• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <errno.h>
30 #include <unistd.h>
31 
32 #include <cmath>
33 #include <cstring>
34 #include <limits>
35 
36 #include "simulator-aarch64.h"
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 using vixl::internal::SimFloat16;
42 
43 const Instruction* Simulator::kEndOfSimAddress = NULL;
44 
SetBits(int msb,int lsb,uint32_t bits)45 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
46   int width = msb - lsb + 1;
47   VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
48 
49   bits <<= lsb;
50   uint32_t mask = ((1 << width) - 1) << lsb;
51   VIXL_ASSERT((mask & write_ignore_mask_) == 0);
52 
53   value_ = (value_ & ~mask) | (bits & mask);
54 }
55 
56 
DefaultValueFor(SystemRegister id)57 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
58   switch (id) {
59     case NZCV:
60       return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
61     case FPCR:
62       return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
63     default:
64       VIXL_UNREACHABLE();
65       return SimSystemRegister();
66   }
67 }
68 
69 const Simulator::FormToVisitorFnMap Simulator::FORM_TO_VISITOR = {
70     DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
71     SIM_AUD_VISITOR_MAP(Simulator),
72     {"smlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
73     {"smlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
74     {"smull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
75     {"sqdmlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
76     {"sqdmlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
77     {"sqdmull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
78     {"umlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
79     {"umlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
80     {"umull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
81     {"fcmla_asimdelem_c_h"_h, &Simulator::SimulateNEONComplexMulByElement},
82     {"fcmla_asimdelem_c_s"_h, &Simulator::SimulateNEONComplexMulByElement},
83     {"fmlal2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
84     {"fmlal_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
85     {"fmlsl2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
86     {"fmlsl_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
87     {"fmla_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
88     {"fmls_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
89     {"fmulx_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
90     {"fmul_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
91     {"fmla_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
92     {"fmls_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
93     {"fmulx_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
94     {"fmul_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
95     {"sdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
96     {"udot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
97     {"adclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
98     {"adclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
99     {"addhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
100     {"addhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
101     {"addp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
102     {"bcax_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
103     {"bdep_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
104     {"bext_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
105     {"bgrp_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
106     {"bsl1n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
107     {"bsl2n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
108     {"bsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
109     {"cadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
110     {"cdot_z_zzz"_h, &Simulator::SimulateSVEComplexDotProduct},
111     {"cdot_z_zzzi_d"_h, &Simulator::SimulateSVEComplexDotProduct},
112     {"cdot_z_zzzi_s"_h, &Simulator::SimulateSVEComplexDotProduct},
113     {"cmla_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
114     {"cmla_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
115     {"cmla_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
116     {"eor3_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
117     {"eorbt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
118     {"eortb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
119     {"ext_z_zi_con"_h, &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm},
120     {"faddp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
121     {"fcvtlt_z_p_z_h2s"_h, &Simulator::SimulateSVEFPConvertLong},
122     {"fcvtlt_z_p_z_s2d"_h, &Simulator::SimulateSVEFPConvertLong},
123     {"fcvtnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
124     {"fcvtnt_z_p_z_s2h"_h, &Simulator::Simulate_ZdH_PgM_ZnS},
125     {"fcvtx_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
126     {"fcvtxnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
127     {"flogb_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
128     {"fmaxnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
129     {"fmaxp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
130     {"fminnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
131     {"fminp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
132     {"fmlalb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
133     {"fmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
134     {"fmlalt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
135     {"fmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
136     {"fmlslb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
137     {"fmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
138     {"fmlslt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
139     {"fmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
140     {"histcnt_z_p_zz"_h, &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT},
141     {"histseg_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
142     {"ldnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
143     {"ldnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
144     {"ldnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
145     {"ldnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
146     {"ldnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
147     {"ldnt1sb_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
148     {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
149     {"ldnt1sh_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
150     {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
151     {"ldnt1sw_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
152     {"ldnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
153     {"ldnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
154     {"match_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
155     {"mla_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
156     {"mla_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
157     {"mla_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
158     {"mls_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
159     {"mls_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
160     {"mls_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
161     {"mul_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
162     {"mul_z_zzi_d"_h, &Simulator::SimulateSVEMulIndex},
163     {"mul_z_zzi_h"_h, &Simulator::SimulateSVEMulIndex},
164     {"mul_z_zzi_s"_h, &Simulator::SimulateSVEMulIndex},
165     {"nbsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
166     {"nmatch_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
167     {"pmul_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
168     {"pmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
169     {"pmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
170     {"raddhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
171     {"raddhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
172     {"rshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
173     {"rshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
174     {"rsubhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
175     {"rsubhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
176     {"saba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
177     {"sabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
178     {"sabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
179     {"sabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
180     {"sabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
181     {"sadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
182     {"saddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
183     {"saddlbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
184     {"saddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
185     {"saddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
186     {"saddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
187     {"sbclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
188     {"sbclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
189     {"shadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
190     {"shrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
191     {"shrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
192     {"shsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
193     {"shsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
194     {"sli_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
195     {"smaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
196     {"sminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
197     {"smlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
198     {"smlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
199     {"smlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
200     {"smlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
201     {"smlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
202     {"smlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
203     {"smlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
204     {"smlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
205     {"smlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
206     {"smlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
207     {"smlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
208     {"smlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
209     {"smulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
210     {"smullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
211     {"smullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
212     {"smullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
213     {"smullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
214     {"smullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
215     {"smullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
216     {"splice_z_p_zz_con"_h, &Simulator::VisitSVEVectorSplice},
217     {"sqabs_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
218     {"sqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
219     {"sqcadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
220     {"sqdmlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
221     {"sqdmlalb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
222     {"sqdmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
223     {"sqdmlalbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
224     {"sqdmlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
225     {"sqdmlalt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
226     {"sqdmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
227     {"sqdmlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
228     {"sqdmlslb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
229     {"sqdmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
230     {"sqdmlslbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
231     {"sqdmlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
232     {"sqdmlslt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
233     {"sqdmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
234     {"sqdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
235     {"sqdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
236     {"sqdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
237     {"sqdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
238     {"sqdmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
239     {"sqdmullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
240     {"sqdmullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
241     {"sqdmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
242     {"sqdmullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
243     {"sqdmullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
244     {"sqneg_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
245     {"sqrdcmlah_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
246     {"sqrdcmlah_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
247     {"sqrdcmlah_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
248     {"sqrdmlah_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
249     {"sqrdmlah_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
250     {"sqrdmlah_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
251     {"sqrdmlah_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
252     {"sqrdmlsh_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
253     {"sqrdmlsh_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
254     {"sqrdmlsh_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
255     {"sqrdmlsh_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
256     {"sqrdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
257     {"sqrdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
258     {"sqrdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
259     {"sqrdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
260     {"sqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
261     {"sqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
262     {"sqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
263     {"sqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
264     {"sqrshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
265     {"sqrshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
266     {"sqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
267     {"sqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
268     {"sqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
269     {"sqshlu_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
270     {"sqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
271     {"sqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
272     {"sqshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
273     {"sqshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
274     {"sqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
275     {"sqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
276     {"sqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
277     {"sqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
278     {"sqxtunb_z_zz"_h, &Simulator::SimulateSVENarrow},
279     {"sqxtunt_z_zz"_h, &Simulator::SimulateSVENarrow},
280     {"srhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
281     {"sri_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
282     {"srshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
283     {"srshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
284     {"srshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
285     {"srsra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
286     {"sshllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
287     {"sshllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
288     {"ssra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
289     {"ssublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
290     {"ssublbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
291     {"ssublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
292     {"ssubltb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
293     {"ssubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
294     {"ssubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
295     {"stnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
296     {"stnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
297     {"stnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
298     {"stnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
299     {"stnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
300     {"stnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
301     {"stnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
302     {"subhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
303     {"subhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
304     {"suqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
305     {"tbl_z_zz_2"_h, &Simulator::VisitSVETableLookup},
306     {"tbx_z_zz"_h, &Simulator::VisitSVETableLookup},
307     {"uaba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
308     {"uabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
309     {"uabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
310     {"uabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
311     {"uabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
312     {"uadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
313     {"uaddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
314     {"uaddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
315     {"uaddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
316     {"uaddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
317     {"uhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
318     {"uhsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
319     {"uhsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
320     {"umaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
321     {"uminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
322     {"umlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
323     {"umlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
324     {"umlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
325     {"umlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
326     {"umlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
327     {"umlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
328     {"umlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
329     {"umlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
330     {"umlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
331     {"umlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
332     {"umlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
333     {"umlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
334     {"umulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
335     {"umullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
336     {"umullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
337     {"umullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
338     {"umullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
339     {"umullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
340     {"umullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
341     {"uqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
342     {"uqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
343     {"uqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
344     {"uqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
345     {"uqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
346     {"uqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
347     {"uqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
348     {"uqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
349     {"uqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
350     {"uqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
351     {"uqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
352     {"uqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
353     {"uqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
354     {"uqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
355     {"urecpe_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
356     {"urhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
357     {"urshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
358     {"urshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
359     {"urshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
360     {"ursqrte_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
361     {"ursra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
362     {"ushllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
363     {"ushllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
364     {"usqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
365     {"usra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
366     {"usublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
367     {"usublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
368     {"usubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
369     {"usubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
370     {"whilege_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
371     {"whilegt_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
372     {"whilehi_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
373     {"whilehs_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
374     {"whilerw_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
375     {"whilewr_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
376     {"xar_z_zzi"_h, &Simulator::SimulateSVEExclusiveOrRotate},
377     {"smmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
378     {"ummla_z_zzz"_h, &Simulator::SimulateMatrixMul},
379     {"usmmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
380     {"smmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
381     {"ummla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
382     {"usmmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
383     {"fmmla_z_zzz_s"_h, &Simulator::SimulateSVEFPMatrixMul},
384     {"fmmla_z_zzz_d"_h, &Simulator::SimulateSVEFPMatrixMul},
385     {"ld1row_z_p_bi_u32"_h,
386     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
387     {"ld1row_z_p_br_contiguous"_h,
388     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
389     {"ld1rod_z_p_bi_u64"_h,
390     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
391     {"ld1rod_z_p_br_contiguous"_h,
392     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
393     {"ld1rob_z_p_bi_u8"_h,
394     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
395     {"ld1rob_z_p_br_contiguous"_h,
396     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
397     {"ld1roh_z_p_bi_u16"_h,
398     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
399     {"ld1roh_z_p_br_contiguous"_h,
400     &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
401     {"usdot_z_zzz_s"_h, &Simulator::VisitSVEIntMulAddUnpredicated},
402     {"sudot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
403     {"usdot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
404     {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra},
405     {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
406     {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
407 };
408 
GetFormToVisitorFnMap()409 const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
410   return &FORM_TO_VISITOR;
411 }
412 
413 #ifndef PANDA_BUILD
Simulator(Decoder * decoder,FILE * stream,SimStack::Allocated stack)414 Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
415     : memory_(std::move(stack)),
416       last_instr_(NULL),
417       cpu_features_auditor_(decoder, CPUFeatures::All()) {
418 #else
419 Simulator::Simulator(PandaAllocator* allocator, Decoder* decoder, SimStack::Allocated stack, FILE* stream)
420     : memory_(std::move(stack)),
421       last_instr_(NULL),
422       allocator_(allocator),
423       cpu_features_auditor_(decoder, CPUFeatures::All()),
424       saved_cpu_features_(allocator_.Adapter()) {
425 #endif
426   // Ensure that shift operations act as the simulator expects.
427   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
428   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
429 
430   // Set up a placeholder pipe for CanReadMemory.
431   VIXL_CHECK(pipe(placeholder_pipe_fd_) == 0);
432 
433   // Set up the decoder.
434   decoder_ = decoder;
435   decoder_->AppendVisitor(this);
436 
437   stream_ = stream;
438 
439 #ifndef PANDA_BUILD
440   print_disasm_ = new PrintDisassembler(stream_);
441 #else
442   print_disasm_ = allocator_.New<PrintDisassembler>(allocator, stream_);
443 #endif
444   // The Simulator and Disassembler share the same available list, held by the
445   // auditor. The Disassembler only annotates instructions with features that
446   // are _not_ available, so registering the auditor should have no effect
447   // unless the simulator is about to abort (due to missing features). In
448   // practice, this means that with trace enabled, the simulator will crash just
449   // after the disassembler prints the instruction, with the missing features
450   // enumerated.
451   print_disasm_->RegisterCPUFeaturesAuditor(&cpu_features_auditor_);
452 
453   SetColouredTrace(false);
454   trace_parameters_ = LOG_NONE;
455 
456   // We have to configure the SVE vector register length before calling
457   // ResetState().
458   SetVectorLengthInBits(kZRegMinSize);
459 
460   ResetState();
461 
462   // Print a warning about exclusive-access instructions, but only the first
463   // time they are encountered. This warning can be silenced using
464   // SilenceExclusiveAccessWarning().
465   print_exclusive_access_warning_ = true;
466 
467   guard_pages_ = false;
468 
469   // Initialize the common state of RNDR and RNDRRS.
470   uint16_t seed[3] = {11, 22, 33};
471   VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
472   memcpy(rand_state_, seed, sizeof(rand_state_));
473 
474   // Initialize all bits of pseudo predicate register to true.
475   LogicPRegister ones(pregister_all_true_);
476   ones.SetAllBits();
477 }
478 
479 void Simulator::ResetSystemRegisters() {
480   // Reset the system registers.
481   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
482   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
483   ResetFFR();
484 }
485 
486 void Simulator::ResetRegisters() {
487   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
488     WriteXRegister(i, 0xbadbeef);
489   }
490   // Returning to address 0 exits the Simulator.
491   WriteLr(kEndOfSimAddress);
492 }
493 
494 void Simulator::ResetVRegisters() {
495   // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
496   VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
497   int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
498   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
499     VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
500     vregisters_[i].NotifyAccessAsZ();
501     for (int lane = 0; lane < lane_count; lane++) {
502       // Encode the register number and (D-sized) lane into each NaN, to
503       // make them easier to trace.
504       uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
505                           (0x0000000000000001 * lane);
506       VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
507       VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
508       vregisters_[i].Insert(lane, nan_bits);
509     }
510   }
511 }
512 
513 void Simulator::ResetPRegisters() {
514   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
515   int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
516   // Ensure the register configuration fits in this bit encoding.
517   VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
518   VIXL_ASSERT(lane_count <= UINT8_MAX);
519   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
520     VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
521     for (int lane = 0; lane < lane_count; lane++) {
522       // Encode the register number and (H-sized) lane into each lane slot.
523       uint16_t bits = (0x0100 * lane) | i;
524       pregisters_[i].Insert(lane, bits);
525     }
526   }
527 }
528 
529 void Simulator::ResetFFR() {
530   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
531   int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
532   ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
533 }
534 
535 void Simulator::ResetState() {
536   ResetSystemRegisters();
537   ResetRegisters();
538   ResetVRegisters();
539   ResetPRegisters();
540 
541   WriteSp(memory_.GetStack().GetBase());
542 
543   pc_ = NULL;
544   pc_modified_ = false;
545 
546   // BTI state.
547   btype_ = DefaultBType;
548   next_btype_ = DefaultBType;
549 }
550 
551 void Simulator::SetVectorLengthInBits(unsigned vector_length) {
552   VIXL_ASSERT((vector_length >= kZRegMinSize) &&
553               (vector_length <= kZRegMaxSize));
554   VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
555   vector_length_ = vector_length;
556 
557   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
558     vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
559   }
560   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
561     pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
562   }
563 
564   ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
565 
566   ResetVRegisters();
567   ResetPRegisters();
568   ResetFFR();
569 }
570 
571 Simulator::~Simulator() {
572   // The decoder may outlive the simulator.
573   decoder_->RemoveVisitor(print_disasm_);
574 #ifndef VIXL_USE_PANDA_ALLOC
575   delete print_disasm_;
576 #endif
577   close(placeholder_pipe_fd_[0]);
578   close(placeholder_pipe_fd_[1]);
579 }
580 
581 
582 void Simulator::Run() {
583   // Flush any written registers before executing anything, so that
584   // manually-set registers are logged _before_ the first instruction.
585   LogAllWrittenRegisters();
586 
587   while (pc_ != kEndOfSimAddress) {
588     ExecuteInstruction();
589   }
590 }
591 
592 
593 void Simulator::RunFrom(const Instruction* first) {
594   WritePc(first, NoBranchLog);
595   Run();
596 }
597 
598 
599 // clang-format off
600 const char* Simulator::xreg_names[] = {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",
601                                        "x6",  "x7",  "x8",  "x9",  "x10", "x11",
602                                        "x12", "x13", "x14", "x15", "x16", "x17",
603                                        "x18", "x19", "x20", "x21", "x22", "x23",
604                                        "x24", "x25", "x26", "x27", "x28", "x29",
605                                        "lr",  "xzr", "sp"};
606 
607 const char* Simulator::wreg_names[] = {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",
608                                        "w6",  "w7",  "w8",  "w9",  "w10", "w11",
609                                        "w12", "w13", "w14", "w15", "w16", "w17",
610                                        "w18", "w19", "w20", "w21", "w22", "w23",
611                                        "w24", "w25", "w26", "w27", "w28", "w29",
612                                        "w30", "wzr", "wsp"};
613 
614 const char* Simulator::breg_names[] = {"b0",  "b1",  "b2",  "b3",  "b4",  "b5",
615                                        "b6",  "b7",  "b8",  "b9",  "b10", "b11",
616                                        "b12", "b13", "b14", "b15", "b16", "b17",
617                                        "b18", "b19", "b20", "b21", "b22", "b23",
618                                        "b24", "b25", "b26", "b27", "b28", "b29",
619                                        "b30", "b31"};
620 
621 const char* Simulator::hreg_names[] = {"h0",  "h1",  "h2",  "h3",  "h4",  "h5",
622                                        "h6",  "h7",  "h8",  "h9",  "h10", "h11",
623                                        "h12", "h13", "h14", "h15", "h16", "h17",
624                                        "h18", "h19", "h20", "h21", "h22", "h23",
625                                        "h24", "h25", "h26", "h27", "h28", "h29",
626                                        "h30", "h31"};
627 
628 const char* Simulator::sreg_names[] = {"s0",  "s1",  "s2",  "s3",  "s4",  "s5",
629                                        "s6",  "s7",  "s8",  "s9",  "s10", "s11",
630                                        "s12", "s13", "s14", "s15", "s16", "s17",
631                                        "s18", "s19", "s20", "s21", "s22", "s23",
632                                        "s24", "s25", "s26", "s27", "s28", "s29",
633                                        "s30", "s31"};
634 
635 const char* Simulator::dreg_names[] = {"d0",  "d1",  "d2",  "d3",  "d4",  "d5",
636                                        "d6",  "d7",  "d8",  "d9",  "d10", "d11",
637                                        "d12", "d13", "d14", "d15", "d16", "d17",
638                                        "d18", "d19", "d20", "d21", "d22", "d23",
639                                        "d24", "d25", "d26", "d27", "d28", "d29",
640                                        "d30", "d31"};
641 
642 const char* Simulator::vreg_names[] = {"v0",  "v1",  "v2",  "v3",  "v4",  "v5",
643                                        "v6",  "v7",  "v8",  "v9",  "v10", "v11",
644                                        "v12", "v13", "v14", "v15", "v16", "v17",
645                                        "v18", "v19", "v20", "v21", "v22", "v23",
646                                        "v24", "v25", "v26", "v27", "v28", "v29",
647                                        "v30", "v31"};
648 
649 const char* Simulator::zreg_names[] = {"z0",  "z1",  "z2",  "z3",  "z4",  "z5",
650                                        "z6",  "z7",  "z8",  "z9",  "z10", "z11",
651                                        "z12", "z13", "z14", "z15", "z16", "z17",
652                                        "z18", "z19", "z20", "z21", "z22", "z23",
653                                        "z24", "z25", "z26", "z27", "z28", "z29",
654                                        "z30", "z31"};
655 
656 const char* Simulator::preg_names[] = {"p0",  "p1",  "p2",  "p3",  "p4",  "p5",
657                                        "p6",  "p7",  "p8",  "p9",  "p10", "p11",
658                                        "p12", "p13", "p14", "p15"};
659 // clang-format on
660 
661 
662 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
663   // If the code represents the stack pointer, index the name after zr.
664   if ((code == kSPRegInternalCode) ||
665       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
666     code = kZeroRegCode + 1;
667   }
668   VIXL_ASSERT(code < ArrayLength(wreg_names));
669   return wreg_names[code];
670 }
671 
672 
673 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
674   // If the code represents the stack pointer, index the name after zr.
675   if ((code == kSPRegInternalCode) ||
676       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
677     code = kZeroRegCode + 1;
678   }
679   VIXL_ASSERT(code < ArrayLength(xreg_names));
680   return xreg_names[code];
681 }
682 
683 
684 const char* Simulator::BRegNameForCode(unsigned code) {
685   VIXL_ASSERT(code < kNumberOfVRegisters);
686   return breg_names[code];
687 }
688 
689 
690 const char* Simulator::HRegNameForCode(unsigned code) {
691   VIXL_ASSERT(code < kNumberOfVRegisters);
692   return hreg_names[code];
693 }
694 
695 
696 const char* Simulator::SRegNameForCode(unsigned code) {
697   VIXL_ASSERT(code < kNumberOfVRegisters);
698   return sreg_names[code];
699 }
700 
701 
702 const char* Simulator::DRegNameForCode(unsigned code) {
703   VIXL_ASSERT(code < kNumberOfVRegisters);
704   return dreg_names[code];
705 }
706 
707 
708 const char* Simulator::VRegNameForCode(unsigned code) {
709   VIXL_ASSERT(code < kNumberOfVRegisters);
710   return vreg_names[code];
711 }
712 
713 
714 const char* Simulator::ZRegNameForCode(unsigned code) {
715   VIXL_ASSERT(code < kNumberOfZRegisters);
716   return zreg_names[code];
717 }
718 
719 
720 const char* Simulator::PRegNameForCode(unsigned code) {
721   VIXL_ASSERT(code < kNumberOfPRegisters);
722   return preg_names[code];
723 }
724 
725 SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
726   SimVRegister ones, result;
727   dup_immediate(kFormatVnB, ones, 0xff);
728   mov_zeroing(kFormatVnB, result, pg, ones);
729   return result;
730 }
731 
732 void Simulator::ExtractFromSimVRegister(VectorFormat vform,
733                                         SimPRegister& pd,
734                                         SimVRegister vreg) {
735   SimVRegister zero;
736   dup_immediate(kFormatVnB, zero, 0);
737   SVEIntCompareVectorsHelper(ne,
738                              vform,
739                              pd,
740                              GetPTrue(),
741                              vreg,
742                              zero,
743                              false,
744                              LeaveFlags);
745 }
746 
747 #define COLOUR(colour_code) "\033[0;" colour_code "m"
748 #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
749 #define COLOUR_HIGHLIGHT "\033[43m"
750 #define NORMAL ""
751 #define GREY "30"
752 #define RED "31"
753 #define GREEN "32"
754 #define YELLOW "33"
755 #define BLUE "34"
756 #define MAGENTA "35"
757 #define CYAN "36"
758 #define WHITE "37"
759 void Simulator::SetColouredTrace(bool value) {
760   coloured_trace_ = value;
761 
762   clr_normal = value ? COLOUR(NORMAL) : "";
763   clr_flag_name = value ? COLOUR_BOLD(WHITE) : "";
764   clr_flag_value = value ? COLOUR(NORMAL) : "";
765   clr_reg_name = value ? COLOUR_BOLD(CYAN) : "";
766   clr_reg_value = value ? COLOUR(CYAN) : "";
767   clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
768   clr_vreg_value = value ? COLOUR(MAGENTA) : "";
769   clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
770   clr_preg_value = value ? COLOUR(GREEN) : "";
771   clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
772   clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
773   clr_warning_message = value ? COLOUR(YELLOW) : "";
774   clr_printf = value ? COLOUR(GREEN) : "";
775   clr_branch_marker = value ? COLOUR(GREY) COLOUR_HIGHLIGHT : "";
776 
777   if (value) {
778     print_disasm_->SetCPUFeaturesPrefix("// Needs: " COLOUR_BOLD(RED));
779     print_disasm_->SetCPUFeaturesSuffix(COLOUR(NORMAL));
780   } else {
781     print_disasm_->SetCPUFeaturesPrefix("// Needs: ");
782     print_disasm_->SetCPUFeaturesSuffix("");
783   }
784 }
785 
786 
787 void Simulator::SetTraceParameters(int parameters) {
788   bool disasm_before = trace_parameters_ & LOG_DISASM;
789   trace_parameters_ = parameters;
790   bool disasm_after = trace_parameters_ & LOG_DISASM;
791 
792   if (disasm_before != disasm_after) {
793     if (disasm_after) {
794       decoder_->InsertVisitorBefore(print_disasm_, this);
795     } else {
796       decoder_->RemoveVisitor(print_disasm_);
797     }
798   }
799 }
800 
801 // Helpers ---------------------------------------------------------------------
802 uint64_t Simulator::AddWithCarry(unsigned reg_size,
803                                  bool set_flags,
804                                  uint64_t left,
805                                  uint64_t right,
806                                  int carry_in) {
807   std::pair<uint64_t, uint8_t> result_and_flags =
808       AddWithCarry(reg_size, left, right, carry_in);
809   if (set_flags) {
810     uint8_t flags = result_and_flags.second;
811     ReadNzcv().SetN((flags >> 3) & 1);
812     ReadNzcv().SetZ((flags >> 2) & 1);
813     ReadNzcv().SetC((flags >> 1) & 1);
814     ReadNzcv().SetV((flags >> 0) & 1);
815     LogSystemRegister(NZCV);
816   }
817   return result_and_flags.first;
818 }
819 
820 std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size,
821                                                      uint64_t left,
822                                                      uint64_t right,
823                                                      int carry_in) {
824   VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
825   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
826 
827   uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
828   uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
829   uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
830 
831   left &= reg_mask;
832   right &= reg_mask;
833   uint64_t result = (left + right + carry_in) & reg_mask;
834 
835   // NZCV bits, ordered N in bit 3 to V in bit 0.
836   uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0;
837   nzcv |= CalcZFlag(result) ? 4 : 0;
838 
839   // Compute the C flag by comparing the result to the max unsigned integer.
840   uint64_t max_uint_2op = max_uint - carry_in;
841   bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
842   nzcv |= C ? 2 : 0;
843 
844   // Overflow iff the sign bit is the same for the two inputs and different
845   // for the result.
846   uint64_t left_sign = left & sign_mask;
847   uint64_t right_sign = right & sign_mask;
848   uint64_t result_sign = result & sign_mask;
849   bool V = (left_sign == right_sign) && (left_sign != result_sign);
850   nzcv |= V ? 1 : 0;
851 
852   return std::make_pair(result, nzcv);
853 }
854 
855 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
856 
857 vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) {
858   std::pair<uint64_t, uint8_t> sum_lo =
859       AddWithCarry(kXRegSize, x.second, y.second, 0);
860   int carry_in = (sum_lo.second & 0x2) >> 1;  // C flag in NZCV result.
861   std::pair<uint64_t, uint8_t> sum_hi =
862       AddWithCarry(kXRegSize, x.first, y.first, carry_in);
863   return std::make_pair(sum_hi.first, sum_lo.first);
864 }
865 
866 vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
867   // Negate the integer value. Throw an assertion when the input is INT128_MIN.
868   VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
869   x.first = ~x.first;
870   x.second = ~x.second;
871   return Add128(x, {0, 1});
872 }
873 
874 vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
875   bool neg_result = false;
876   if ((x >> 63) == 1) {
877     x = -x;
878     neg_result = !neg_result;
879   }
880   if ((y >> 63) == 1) {
881     y = -y;
882     neg_result = !neg_result;
883   }
884 
885   uint64_t x_lo = x & 0xffffffff;
886   uint64_t x_hi = x >> 32;
887   uint64_t y_lo = y & 0xffffffff;
888   uint64_t y_hi = y >> 32;
889 
890   uint64_t t1 = x_lo * y_hi;
891   uint64_t t2 = x_hi * y_lo;
892   vixl_uint128_t a = std::make_pair(0, x_lo * y_lo);
893   vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32);
894   vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32);
895   vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0);
896 
897   vixl_uint128_t result = Add128(a, b);
898   result = Add128(result, c);
899   result = Add128(result, d);
900   return neg_result ? std::make_pair(-result.first - 1, -result.second)
901                     : result;
902 }
903 
904 int64_t Simulator::ShiftOperand(unsigned reg_size,
905                                 uint64_t uvalue,
906                                 Shift shift_type,
907                                 unsigned amount) const {
908   VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
909               (reg_size == kSRegSize) || (reg_size == kDRegSize));
910   if (amount > 0) {
911     uint64_t mask = GetUintMask(reg_size);
912     bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
913     // The behavior is undefined in c++ if the shift amount greater than or
914     // equal to the register lane size. Work out the shifted result based on
915     // architectural behavior before performing the c++ type shfit operations.
916     switch (shift_type) {
917       case LSL:
918         if (amount >= reg_size) {
919           return UINT64_C(0);
920         }
921         uvalue <<= amount;
922         break;
923       case LSR:
924         if (amount >= reg_size) {
925           return UINT64_C(0);
926         }
927         uvalue >>= amount;
928         break;
929       case ASR:
930         if (amount >= reg_size) {
931           return is_negative ? ~UINT64_C(0) : UINT64_C(0);
932         }
933         uvalue >>= amount;
934         if (is_negative) {
935           // Simulate sign-extension to 64 bits.
936           uvalue |= ~UINT64_C(0) << (reg_size - amount);
937         }
938         break;
939       case ROR: {
940         uvalue = RotateRight(uvalue, amount, reg_size);
941         break;
942       }
943       default:
944         VIXL_UNIMPLEMENTED();
945         return 0;
946     }
947     uvalue &= mask;
948   }
949 
950   int64_t result;
951   memcpy(&result, &uvalue, sizeof(result));
952   return result;
953 }
954 
955 
956 int64_t Simulator::ExtendValue(unsigned reg_size,
957                                int64_t value,
958                                Extend extend_type,
959                                unsigned left_shift) const {
960   switch (extend_type) {
961     case UXTB:
962       value &= kByteMask;
963       break;
964     case UXTH:
965       value &= kHalfWordMask;
966       break;
967     case UXTW:
968       value &= kWordMask;
969       break;
970     case SXTB:
971       value &= kByteMask;
972       if ((value & 0x80) != 0) {
973         value |= ~UINT64_C(0) << 8;
974       }
975       break;
976     case SXTH:
977       value &= kHalfWordMask;
978       if ((value & 0x8000) != 0) {
979         value |= ~UINT64_C(0) << 16;
980       }
981       break;
982     case SXTW:
983       value &= kWordMask;
984       if ((value & 0x80000000) != 0) {
985         value |= ~UINT64_C(0) << 32;
986       }
987       break;
988     case UXTX:
989     case SXTX:
990       break;
991     default:
992       VIXL_UNREACHABLE();
993   }
994   return ShiftOperand(reg_size, value, LSL, left_shift);
995 }
996 
997 
998 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
999   AssertSupportedFPCR();
1000 
1001   // TODO: This assumes that the C++ implementation handles comparisons in the
1002   // way that we expect (as per AssertSupportedFPCR()).
1003   bool process_exception = false;
1004   if ((IsNaN(val0) != 0) || (IsNaN(val1) != 0)) {
1005     ReadNzcv().SetRawValue(FPUnorderedFlag);
1006     if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
1007         (trap == EnableTrap)) {
1008       process_exception = true;
1009     }
1010   } else if (val0 < val1) {
1011     ReadNzcv().SetRawValue(FPLessThanFlag);
1012   } else if (val0 > val1) {
1013     ReadNzcv().SetRawValue(FPGreaterThanFlag);
1014   } else if (val0 == val1) {
1015     ReadNzcv().SetRawValue(FPEqualFlag);
1016   } else {
1017     VIXL_UNREACHABLE();
1018   }
1019   LogSystemRegister(NZCV);
1020   if (process_exception) FPProcessException();
1021 }
1022 
1023 
1024 uint64_t Simulator::ComputeMemOperandAddress(const MemOperand& mem_op) const {
1025   VIXL_ASSERT(mem_op.IsValid());
1026   int64_t base = ReadRegister<int64_t>(mem_op.GetBaseRegister());
1027   if (mem_op.IsImmediateOffset()) {
1028     return base + mem_op.GetOffset();
1029   } else {
1030     VIXL_ASSERT(mem_op.GetRegisterOffset().IsValid());
1031     int64_t offset = ReadRegister<int64_t>(mem_op.GetRegisterOffset());
1032     unsigned shift_amount = mem_op.GetShiftAmount();
1033     if (mem_op.GetShift() != NO_SHIFT) {
1034       offset = ShiftOperand(kXRegSize, offset, mem_op.GetShift(), shift_amount);
1035     }
1036     if (mem_op.GetExtend() != NO_EXTEND) {
1037       offset = ExtendValue(kXRegSize, offset, mem_op.GetExtend(), shift_amount);
1038     }
1039     return static_cast<uint64_t>(base + offset);
1040   }
1041 }
1042 
1043 
1044 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
1045     unsigned reg_size, unsigned lane_size) {
1046   VIXL_ASSERT(reg_size >= lane_size);
1047 
1048   uint32_t format = 0;
1049   if (reg_size != lane_size) {
1050     switch (reg_size) {
1051       default:
1052         VIXL_UNREACHABLE();
1053         break;
1054       case kQRegSizeInBytes:
1055         format = kPrintRegAsQVector;
1056         break;
1057       case kDRegSizeInBytes:
1058         format = kPrintRegAsDVector;
1059         break;
1060     }
1061   }
1062 
1063   switch (lane_size) {
1064     default:
1065       VIXL_UNREACHABLE();
1066       break;
1067     case kQRegSizeInBytes:
1068       format |= kPrintReg1Q;
1069       break;
1070     case kDRegSizeInBytes:
1071       format |= kPrintReg1D;
1072       break;
1073     case kSRegSizeInBytes:
1074       format |= kPrintReg1S;
1075       break;
1076     case kHRegSizeInBytes:
1077       format |= kPrintReg1H;
1078       break;
1079     case kBRegSizeInBytes:
1080       format |= kPrintReg1B;
1081       break;
1082   }
1083   // These sizes would be duplicate case labels.
1084   VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
1085   VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
1086   VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
1087   VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
1088 
1089   return static_cast<PrintRegisterFormat>(format);
1090 }
1091 
1092 
1093 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
1094     VectorFormat vform) {
1095   switch (vform) {
1096     default:
1097       VIXL_UNREACHABLE();
1098       return kPrintReg16B;
1099     case kFormat16B:
1100       return kPrintReg16B;
1101     case kFormat8B:
1102       return kPrintReg8B;
1103     case kFormat8H:
1104       return kPrintReg8H;
1105     case kFormat4H:
1106       return kPrintReg4H;
1107     case kFormat4S:
1108       return kPrintReg4S;
1109     case kFormat2S:
1110       return kPrintReg2S;
1111     case kFormat2D:
1112       return kPrintReg2D;
1113     case kFormat1D:
1114       return kPrintReg1D;
1115 
1116     case kFormatB:
1117       return kPrintReg1B;
1118     case kFormatH:
1119       return kPrintReg1H;
1120     case kFormatS:
1121       return kPrintReg1S;
1122     case kFormatD:
1123       return kPrintReg1D;
1124 
1125     case kFormatVnB:
1126       return kPrintRegVnB;
1127     case kFormatVnH:
1128       return kPrintRegVnH;
1129     case kFormatVnS:
1130       return kPrintRegVnS;
1131     case kFormatVnD:
1132       return kPrintRegVnD;
1133   }
1134 }
1135 
1136 
1137 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
1138     VectorFormat vform) {
1139   switch (vform) {
1140     default:
1141       VIXL_UNREACHABLE();
1142       return kPrintReg16B;
1143     case kFormat8H:
1144       return kPrintReg8HFP;
1145     case kFormat4H:
1146       return kPrintReg4HFP;
1147     case kFormat4S:
1148       return kPrintReg4SFP;
1149     case kFormat2S:
1150       return kPrintReg2SFP;
1151     case kFormat2D:
1152       return kPrintReg2DFP;
1153     case kFormat1D:
1154       return kPrintReg1DFP;
1155     case kFormatH:
1156       return kPrintReg1HFP;
1157     case kFormatS:
1158       return kPrintReg1SFP;
1159     case kFormatD:
1160       return kPrintReg1DFP;
1161   }
1162 }
1163 
1164 void Simulator::PrintRegisters() {
1165   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1166     if (i == kSpRegCode) i = kSPRegInternalCode;
1167     PrintRegister(i);
1168   }
1169 }
1170 
1171 void Simulator::PrintVRegisters() {
1172   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1173     PrintVRegister(i);
1174   }
1175 }
1176 
1177 void Simulator::PrintZRegisters() {
1178   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
1179     PrintZRegister(i);
1180   }
1181 }
1182 
1183 void Simulator::PrintWrittenRegisters() {
1184   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1185     if (registers_[i].WrittenSinceLastLog()) {
1186       if (i == kSpRegCode) i = kSPRegInternalCode;
1187       PrintRegister(i);
1188     }
1189   }
1190 }
1191 
1192 void Simulator::PrintWrittenVRegisters() {
1193   bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
1194   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1195     if (vregisters_[i].WrittenSinceLastLog()) {
1196       // Z registers are initialised in the constructor before the user can
1197       // configure the CPU features, so we must also check for SVE here.
1198       if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
1199         PrintZRegister(i);
1200       } else {
1201         PrintVRegister(i);
1202       }
1203     }
1204   }
1205 }
1206 
1207 void Simulator::PrintWrittenPRegisters() {
1208   // P registers are initialised in the constructor before the user can
1209   // configure the CPU features, so we must check for SVE here.
1210   if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
1211   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1212     if (pregisters_[i].WrittenSinceLastLog()) {
1213       PrintPRegister(i);
1214     }
1215   }
1216   if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
1217 }
1218 
1219 void Simulator::PrintSystemRegisters() {
1220   PrintSystemRegister(NZCV);
1221   PrintSystemRegister(FPCR);
1222 }
1223 
1224 void Simulator::PrintRegisterValue(const uint8_t* value,
1225                                    int value_size,
1226                                    PrintRegisterFormat format) {
1227   int print_width = GetPrintRegSizeInBytes(format);
1228   VIXL_ASSERT(print_width <= value_size);
1229   for (int i = value_size - 1; i >= print_width; i--) {
1230     // Pad with spaces so that values align vertically.
1231     fprintf(stream_, "  ");
1232     // If we aren't explicitly printing a partial value, ensure that the
1233     // unprinted bits are zero.
1234     VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
1235   }
1236   fprintf(stream_, "0x");
1237   for (int i = print_width - 1; i >= 0; i--) {
1238     fprintf(stream_, "%02x", value[i]);
1239   }
1240 }
1241 
1242 void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
1243                                                 uint16_t lane_mask,
1244                                                 PrintRegisterFormat format) {
1245   VIXL_ASSERT((format & kPrintRegAsFP) != 0);
1246   int lane_size = GetPrintRegLaneSizeInBytes(format);
1247   fprintf(stream_, " (");
1248   bool last_inactive = false;
1249   const char* sep = "";
1250   for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
1251     bool access = (lane_mask & (1 << (i * lane_size))) != 0;
1252     if (access) {
1253       // Read the lane as a double, so we can format all FP types in the same
1254       // way. We squash NaNs, and a double can exactly represent any other value
1255       // that the smaller types can represent, so this is lossless.
1256       double element;
1257       switch (lane_size) {
1258         case kHRegSizeInBytes: {
1259           Float16 element_fp16;
1260           VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
1261           memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
1262           element = FPToDouble(element_fp16, kUseDefaultNaN);
1263           break;
1264         }
1265         case kSRegSizeInBytes: {
1266           float element_fp32;
1267           memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
1268           element = static_cast<double>(element_fp32);
1269           break;
1270         }
1271         case kDRegSizeInBytes: {
1272           memcpy(&element, &value[i * lane_size], sizeof(element));
1273           break;
1274         }
1275         default:
1276           VIXL_UNREACHABLE();
1277           fprintf(stream_, "{UnknownFPValue}");
1278           continue;
1279       }
1280       if (IsNaN(element)) {
1281         // The fprintf behaviour for NaNs is implementation-defined. Always
1282         // print "nan", so that traces are consistent.
1283         fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
1284       } else {
1285         fprintf(stream_,
1286                 "%s%s%#.4g%s",
1287                 sep,
1288                 clr_vreg_value,
1289                 element,
1290                 clr_normal);
1291       }
1292       last_inactive = false;
1293     } else if (!last_inactive) {
1294       // Replace each contiguous sequence of inactive lanes with "...".
1295       fprintf(stream_, "%s...", sep);
1296       last_inactive = true;
1297     }
1298   }
1299   fprintf(stream_, ")");
1300 }
1301 
1302 void Simulator::PrintRegister(int code,
1303                               PrintRegisterFormat format,
1304                               const char* suffix) {
1305   VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
1306               (static_cast<unsigned>(code) == kSPRegInternalCode));
1307   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
1308   VIXL_ASSERT((format & kPrintRegAsFP) == 0);
1309 
1310   SimRegister* reg;
1311   SimRegister zero;
1312   if (code == kZeroRegCode) {
1313     reg = &zero;
1314   } else {
1315     // registers_[31] holds the SP.
1316     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
1317     reg = &registers_[code % kNumberOfRegisters];
1318   }
1319 
1320   // We trace register writes as whole register values, implying that any
1321   // unprinted bits are all zero:
1322   //   "#       x{code}: 0x{-----value----}"
1323   //   "#       w{code}:         0x{-value}"
1324   // Stores trace partial register values, implying nothing about the unprinted
1325   // bits:
1326   //   "# x{code}<63:0>: 0x{-----value----}"
1327   //   "# x{code}<31:0>:         0x{-value}"
1328   //   "# x{code}<15:0>:             0x{--}"
1329   //   "#  x{code}<7:0>:               0x{}"
1330 
1331   bool is_partial = (format & kPrintRegPartial) != 0;
1332   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1333   std::stringstream name;
1334   if (is_partial) {
1335     name << XRegNameForCode(code) << GetPartialRegSuffix(format);
1336   } else {
1337     // Notify the register that it has been logged, but only if we're printing
1338     // all of it.
1339     reg->NotifyRegisterLogged();
1340     switch (print_reg_size) {
1341       case kWRegSize:
1342         name << WRegNameForCode(code);
1343         break;
1344       case kXRegSize:
1345         name << XRegNameForCode(code);
1346         break;
1347       default:
1348         VIXL_UNREACHABLE();
1349         return;
1350     }
1351   }
1352 
1353   fprintf(stream_,
1354           "# %s%*s: %s",
1355           clr_reg_name,
1356           kPrintRegisterNameFieldWidth,
1357           name.str().c_str(),
1358           clr_reg_value);
1359   PrintRegisterValue(*reg, format);
1360   fprintf(stream_, "%s%s", clr_normal, suffix);
1361 }
1362 
1363 void Simulator::PrintVRegister(int code,
1364                                PrintRegisterFormat format,
1365                                const char* suffix) {
1366   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
1367   VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
1368               ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
1369               ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
1370 
1371   // We trace register writes as whole register values, implying that any
1372   // unprinted bits are all zero:
1373   //   "#        v{code}: 0x{-------------value------------}"
1374   //   "#        d{code}:                 0x{-----value----}"
1375   //   "#        s{code}:                         0x{-value}"
1376   //   "#        h{code}:                             0x{--}"
1377   //   "#        b{code}:                               0x{}"
1378   // Stores trace partial register values, implying nothing about the unprinted
1379   // bits:
1380   //   "# v{code}<127:0>: 0x{-------------value------------}"
1381   //   "#  v{code}<63:0>:                 0x{-----value----}"
1382   //   "#  v{code}<31:0>:                         0x{-value}"
1383   //   "#  v{code}<15:0>:                             0x{--}"
1384   //   "#   v{code}<7:0>:                               0x{}"
1385 
1386   bool is_partial = ((format & kPrintRegPartial) != 0);
1387   std::stringstream name;
1388   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1389   if (is_partial) {
1390     name << VRegNameForCode(code) << GetPartialRegSuffix(format);
1391   } else {
1392     // Notify the register that it has been logged, but only if we're printing
1393     // all of it.
1394     vregisters_[code].NotifyRegisterLogged();
1395     switch (print_reg_size) {
1396       case kBRegSize:
1397         name << BRegNameForCode(code);
1398         break;
1399       case kHRegSize:
1400         name << HRegNameForCode(code);
1401         break;
1402       case kSRegSize:
1403         name << SRegNameForCode(code);
1404         break;
1405       case kDRegSize:
1406         name << DRegNameForCode(code);
1407         break;
1408       case kQRegSize:
1409         name << VRegNameForCode(code);
1410         break;
1411       default:
1412         VIXL_UNREACHABLE();
1413         return;
1414     }
1415   }
1416 
1417   fprintf(stream_,
1418           "# %s%*s: %s",
1419           clr_vreg_name,
1420           kPrintRegisterNameFieldWidth,
1421           name.str().c_str(),
1422           clr_vreg_value);
1423   PrintRegisterValue(vregisters_[code], format);
1424   fprintf(stream_, "%s", clr_normal);
1425   if ((format & kPrintRegAsFP) != 0) {
1426     PrintRegisterValueFPAnnotations(vregisters_[code], format);
1427   }
1428   fprintf(stream_, "%s", suffix);
1429 }
1430 
1431 void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
1432                                                    int reg_count,
1433                                                    uint16_t focus_mask,
1434                                                    PrintRegisterFormat format) {
1435   bool print_fp = (format & kPrintRegAsFP) != 0;
1436   // Suppress FP formatting, so we can specify the lanes we're interested in.
1437   PrintRegisterFormat format_no_fp =
1438       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1439 
1440   for (int r = 0; r < reg_count; r++) {
1441     int code = (rt_code + r) % kNumberOfVRegisters;
1442     PrintVRegister(code, format_no_fp, "");
1443     if (print_fp) {
1444       PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
1445     }
1446     fprintf(stream_, "\n");
1447   }
1448 }
1449 
1450 void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
1451                                                    int q_index,
1452                                                    int reg_count,
1453                                                    uint16_t focus_mask,
1454                                                    PrintRegisterFormat format) {
1455   bool print_fp = (format & kPrintRegAsFP) != 0;
1456   // Suppress FP formatting, so we can specify the lanes we're interested in.
1457   PrintRegisterFormat format_no_fp =
1458       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1459 
1460   PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
1461 
1462   const unsigned size = kQRegSizeInBytes;
1463   unsigned byte_index = q_index * size;
1464   const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
1465   VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
1466 
1467   for (int r = 0; r < reg_count; r++) {
1468     int code = (rt_code + r) % kNumberOfZRegisters;
1469     PrintPartialZRegister(code, q_index, format_no_fp, "");
1470     if (print_fp) {
1471       PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
1472     }
1473     fprintf(stream_, "\n");
1474   }
1475 }
1476 
1477 void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
1478   // We're going to print the register in parts, so force a partial format.
1479   format = GetPrintRegPartial(format);
1480   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1481   int vl = GetVectorLengthInBits();
1482   VIXL_ASSERT((vl % kQRegSize) == 0);
1483   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1484     PrintPartialZRegister(code, i, format);
1485   }
1486   vregisters_[code].NotifyRegisterLogged();
1487 }
1488 
1489 void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
1490   // We're going to print the register in parts, so force a partial format.
1491   format = GetPrintRegPartial(format);
1492   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1493   int vl = GetVectorLengthInBits();
1494   VIXL_ASSERT((vl % kQRegSize) == 0);
1495   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1496     PrintPartialPRegister(code, i, format);
1497   }
1498   pregisters_[code].NotifyRegisterLogged();
1499 }
1500 
1501 void Simulator::PrintFFR(PrintRegisterFormat format) {
1502   // We're going to print the register in parts, so force a partial format.
1503   format = GetPrintRegPartial(format);
1504   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1505   int vl = GetVectorLengthInBits();
1506   VIXL_ASSERT((vl % kQRegSize) == 0);
1507   SimPRegister& ffr = ReadFFR();
1508   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1509     PrintPartialPRegister("FFR", ffr, i, format);
1510   }
1511   ffr.NotifyRegisterLogged();
1512 }
1513 
1514 void Simulator::PrintPartialZRegister(int code,
1515                                       int q_index,
1516                                       PrintRegisterFormat format,
1517                                       const char* suffix) {
1518   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
1519   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1520   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1521   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1522 
1523   // We _only_ trace partial Z register values in Q-sized chunks, because
1524   // they're often too large to reasonably fit on a single line. Each line
1525   // implies nothing about the unprinted bits.
1526   //   "# z{code}<127:0>: 0x{-------------value------------}"
1527 
1528   format = GetPrintRegAsQChunkOfSVE(format);
1529 
1530   const unsigned size = kQRegSizeInBytes;
1531   unsigned byte_index = q_index * size;
1532   const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
1533   VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
1534 
1535   int lsb = q_index * kQRegSize;
1536   int msb = lsb + kQRegSize - 1;
1537   std::stringstream name;
1538   name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
1539 
1540   fprintf(stream_,
1541           "# %s%*s: %s",
1542           clr_vreg_name,
1543           kPrintRegisterNameFieldWidth,
1544           name.str().c_str(),
1545           clr_vreg_value);
1546   PrintRegisterValue(value, size, format);
1547   fprintf(stream_, "%s", clr_normal);
1548   if ((format & kPrintRegAsFP) != 0) {
1549     PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
1550   }
1551   fprintf(stream_, "%s", suffix);
1552 }
1553 
1554 void Simulator::PrintPartialPRegister(const char* name,
1555                                       const SimPRegister& reg,
1556                                       int q_index,
1557                                       PrintRegisterFormat format,
1558                                       const char* suffix) {
1559   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1560   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1561   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1562 
1563   // We don't currently use the format for anything here.
1564   USE(format);
1565 
1566   // We _only_ trace partial P register values, because they're often too large
1567   // to reasonably fit on a single line. Each line implies nothing about the
1568   // unprinted bits.
1569   //
1570   // We print values in binary, with spaces between each bit, in order for the
1571   // bits to align with the Z register bytes that they predicate.
1572   //   "# {name}<15:0>: 0b{-------------value------------}"
1573 
1574   int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
1575   int lsb = q_index * print_size_in_bits;
1576   int msb = lsb + print_size_in_bits - 1;
1577   std::stringstream prefix;
1578   prefix << name << '<' << msb << ':' << lsb << '>';
1579 
1580   fprintf(stream_,
1581           "# %s%*s: %s0b",
1582           clr_preg_name,
1583           kPrintRegisterNameFieldWidth,
1584           prefix.str().c_str(),
1585           clr_preg_value);
1586   for (int i = msb; i >= lsb; i--) {
1587     fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
1588   }
1589   fprintf(stream_, "%s%s", clr_normal, suffix);
1590 }
1591 
1592 void Simulator::PrintPartialPRegister(int code,
1593                                       int q_index,
1594                                       PrintRegisterFormat format,
1595                                       const char* suffix) {
1596   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
1597   PrintPartialPRegister(PRegNameForCode(code),
1598                         pregisters_[code],
1599                         q_index,
1600                         format,
1601                         suffix);
1602 }
1603 
1604 void Simulator::PrintSystemRegister(SystemRegister id) {
1605   switch (id) {
1606     case NZCV:
1607       fprintf(stream_,
1608               "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
1609               clr_flag_name,
1610               clr_flag_value,
1611               ReadNzcv().GetN(),
1612               ReadNzcv().GetZ(),
1613               ReadNzcv().GetC(),
1614               ReadNzcv().GetV(),
1615               clr_normal);
1616       break;
1617     case FPCR: {
1618       static const char* rmode[] = {"0b00 (Round to Nearest)",
1619                                     "0b01 (Round towards Plus Infinity)",
1620                                     "0b10 (Round towards Minus Infinity)",
1621                                     "0b11 (Round towards Zero)"};
1622       VIXL_ASSERT(ReadFpcr().GetRMode() < ArrayLength(rmode));
1623       fprintf(stream_,
1624               "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
1625               clr_flag_name,
1626               clr_flag_value,
1627               ReadFpcr().GetAHP(),
1628               ReadFpcr().GetDN(),
1629               ReadFpcr().GetFZ(),
1630               rmode[ReadFpcr().GetRMode()],
1631               clr_normal);
1632       break;
1633     }
1634     default:
1635       VIXL_UNREACHABLE();
1636   }
1637 }
1638 
1639 uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
1640                                        uint16_t future_access_mask,
1641                                        int struct_element_count,
1642                                        int lane_size_in_bytes,
1643                                        const char* op,
1644                                        uintptr_t address,
1645                                        int reg_size_in_bytes) {
1646   // We want to assume that we'll access at least one lane.
1647   VIXL_ASSERT(access_mask != 0);
1648   VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
1649               (reg_size_in_bytes == kQRegSizeInBytes));
1650   bool started_annotation = false;
1651   // Indent to match the register field, the fixed formatting, and the value
1652   // prefix ("0x"): "# {name}: 0x"
1653   fprintf(stream_, "# %*s    ", kPrintRegisterNameFieldWidth, "");
1654   // First, annotate the lanes (byte by byte).
1655   for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
1656     bool access = (access_mask & (1 << lane)) != 0;
1657     bool future = (future_access_mask & (1 << lane)) != 0;
1658     if (started_annotation) {
1659       // If we've started an annotation, draw a horizontal line in addition to
1660       // any other symbols.
1661       if (access) {
1662         fprintf(stream_, "─╨");
1663       } else if (future) {
1664         fprintf(stream_, "─║");
1665       } else {
1666         fprintf(stream_, "──");
1667       }
1668     } else {
1669       if (access) {
1670         started_annotation = true;
1671         fprintf(stream_, " ╙");
1672       } else if (future) {
1673         fprintf(stream_, " ║");
1674       } else {
1675         fprintf(stream_, "  ");
1676       }
1677     }
1678   }
1679   VIXL_ASSERT(started_annotation);
1680   fprintf(stream_, "─ 0x");
1681   int lane_size_in_nibbles = lane_size_in_bytes * 2;
1682   // Print the most-significant struct element first.
1683   const char* sep = "";
1684   for (int i = struct_element_count - 1; i >= 0; i--) {
1685     int offset = lane_size_in_bytes * i;
1686     uint64_t nibble = MemReadUint(lane_size_in_bytes, address + offset);
1687     fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
1688     sep = "'";
1689   }
1690   fprintf(stream_,
1691           " %s %s0x%016" PRIxPTR "%s\n",
1692           op,
1693           clr_memory_address,
1694           address,
1695           clr_normal);
1696   return future_access_mask & ~access_mask;
1697 }
1698 
1699 void Simulator::PrintAccess(int code,
1700                             PrintRegisterFormat format,
1701                             const char* op,
1702                             uintptr_t address) {
1703   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1704   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1705   if ((format & kPrintRegPartial) == 0) {
1706     registers_[code].NotifyRegisterLogged();
1707   }
1708   // Scalar-format accesses use a simple format:
1709   //   "# {reg}: 0x{value} -> {address}"
1710 
1711   // Suppress the newline, so the access annotation goes on the same line.
1712   PrintRegister(code, format, "");
1713   fprintf(stream_,
1714           " %s %s0x%016" PRIxPTR "%s\n",
1715           op,
1716           clr_memory_address,
1717           address,
1718           clr_normal);
1719 }
1720 
1721 void Simulator::PrintVAccess(int code,
1722                              PrintRegisterFormat format,
1723                              const char* op,
1724                              uintptr_t address) {
1725   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1726 
1727   // Scalar-format accesses use a simple format:
1728   //   "# v{code}: 0x{value} -> {address}"
1729 
1730   // Suppress the newline, so the access annotation goes on the same line.
1731   PrintVRegister(code, format, "");
1732   fprintf(stream_,
1733           " %s %s0x%016" PRIxPTR "%s\n",
1734           op,
1735           clr_memory_address,
1736           address,
1737           clr_normal);
1738 }
1739 
1740 void Simulator::PrintVStructAccess(int rt_code,
1741                                    int reg_count,
1742                                    PrintRegisterFormat format,
1743                                    const char* op,
1744                                    uintptr_t address) {
1745   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1746 
1747   // For example:
1748   //   "# v{code}: 0x{value}"
1749   //   "#     ...: 0x{value}"
1750   //   "#              ║   ╙─ {struct_value} -> {lowest_address}"
1751   //   "#              ╙───── {struct_value} -> {highest_address}"
1752 
1753   uint16_t lane_mask = GetPrintRegLaneMask(format);
1754   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1755 
1756   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
1757   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1758   for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
1759     uint16_t access_mask = 1 << i;
1760     VIXL_ASSERT((lane_mask & access_mask) != 0);
1761     lane_mask = PrintPartialAccess(access_mask,
1762                                    lane_mask,
1763                                    reg_count,
1764                                    lane_size_in_bytes,
1765                                    op,
1766                                    address + (i * reg_count));
1767   }
1768 }
1769 
1770 void Simulator::PrintVSingleStructAccess(int rt_code,
1771                                          int reg_count,
1772                                          int lane,
1773                                          PrintRegisterFormat format,
1774                                          const char* op,
1775                                          uintptr_t address) {
1776   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1777 
1778   // For example:
1779   //   "# v{code}: 0x{value}"
1780   //   "#     ...: 0x{value}"
1781   //   "#              ╙───── {struct_value} -> {address}"
1782 
1783   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1784   uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
1785   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1786   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1787 }
1788 
1789 void Simulator::PrintVReplicatingStructAccess(int rt_code,
1790                                               int reg_count,
1791                                               PrintRegisterFormat format,
1792                                               const char* op,
1793                                               uintptr_t address) {
1794   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1795 
1796   // For example:
1797   //   "# v{code}: 0x{value}"
1798   //   "#     ...: 0x{value}"
1799   //   "#            ╙─╨─╨─╨─ {struct_value} -> {address}"
1800 
1801   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1802   uint16_t lane_mask = GetPrintRegLaneMask(format);
1803   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1804   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1805 }
1806 
1807 void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
1808   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1809 
1810   // Scalar-format accesses are split into separate chunks, each of which uses a
1811   // simple format:
1812   //   "#   z{code}<127:0>: 0x{value} -> {address}"
1813   //   "# z{code}<255:128>: 0x{value} -> {address + 16}"
1814   //   "# z{code}<383:256>: 0x{value} -> {address + 32}"
1815   // etc
1816 
1817   int vl = GetVectorLengthInBits();
1818   VIXL_ASSERT((vl % kQRegSize) == 0);
1819   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1820     // Suppress the newline, so the access annotation goes on the same line.
1821     PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
1822     fprintf(stream_,
1823             " %s %s0x%016" PRIxPTR "%s\n",
1824             op,
1825             clr_memory_address,
1826             address,
1827             clr_normal);
1828     address += kQRegSizeInBytes;
1829   }
1830 }
1831 
1832 void Simulator::PrintZStructAccess(int rt_code,
1833                                    int reg_count,
1834                                    const LogicPRegister& pg,
1835                                    PrintRegisterFormat format,
1836                                    int msize_in_bytes,
1837                                    const char* op,
1838                                    const LogicSVEAddressVector& addr) {
1839   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1840 
1841   // For example:
1842   //   "# z{code}<255:128>: 0x{value}"
1843   //   "#     ...<255:128>: 0x{value}"
1844   //   "#                       ║   ╙─ {struct_value} -> {first_address}"
1845   //   "#                       ╙───── {struct_value} -> {last_address}"
1846 
1847   // We're going to print the register in parts, so force a partial format.
1848   bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
1849   format = GetPrintRegPartial(format);
1850 
1851   int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
1852   int vl = GetVectorLengthInBits();
1853   VIXL_ASSERT((vl % kQRegSize) == 0);
1854   int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
1855   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1856     uint16_t pred =
1857         pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
1858     if ((pred == 0) && skip_inactive_chunks) continue;
1859 
1860     PrintZRegistersForStructuredAccess(rt_code,
1861                                        q_index,
1862                                        reg_count,
1863                                        pred,
1864                                        format);
1865     if (pred == 0) {
1866       // This register chunk has no active lanes. The loop below would print
1867       // nothing, so leave a blank line to keep structures grouped together.
1868       fprintf(stream_, "#\n");
1869       continue;
1870     }
1871     for (int i = 0; i < lanes_per_q; i++) {
1872       uint16_t access = 1 << (i * esize_in_bytes);
1873       int lane = (q_index * lanes_per_q) + i;
1874       // Skip inactive lanes.
1875       if ((pred & access) == 0) continue;
1876       pred = PrintPartialAccess(access,
1877                                 pred,
1878                                 reg_count,
1879                                 msize_in_bytes,
1880                                 op,
1881                                 addr.GetStructAddress(lane));
1882     }
1883   }
1884 
1885   // We print the whole register, even for stores.
1886   for (int i = 0; i < reg_count; i++) {
1887     vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
1888   }
1889 }
1890 
1891 void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
1892   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1893 
1894   // Scalar-format accesses are split into separate chunks, each of which uses a
1895   // simple format:
1896   //   "#  p{code}<15:0>: 0b{value} -> {address}"
1897   //   "# p{code}<31:16>: 0b{value} -> {address + 2}"
1898   //   "# p{code}<47:32>: 0b{value} -> {address + 4}"
1899   // etc
1900 
1901   int vl = GetVectorLengthInBits();
1902   VIXL_ASSERT((vl % kQRegSize) == 0);
1903   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1904     // Suppress the newline, so the access annotation goes on the same line.
1905     PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
1906     fprintf(stream_,
1907             " %s %s0x%016" PRIxPTR "%s\n",
1908             op,
1909             clr_memory_address,
1910             address,
1911             clr_normal);
1912     address += kQRegSizeInBytes;
1913   }
1914 }
1915 
1916 void Simulator::PrintRead(int rt_code,
1917                           PrintRegisterFormat format,
1918                           uintptr_t address) {
1919   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1920   registers_[rt_code].NotifyRegisterLogged();
1921   PrintAccess(rt_code, format, "<-", address);
1922 }
1923 
1924 void Simulator::PrintExtendingRead(int rt_code,
1925                                    PrintRegisterFormat format,
1926                                    int access_size_in_bytes,
1927                                    uintptr_t address) {
1928   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
1929   if (access_size_in_bytes == reg_size_in_bytes) {
1930     // There is no extension here, so print a simple load.
1931     PrintRead(rt_code, format, address);
1932     return;
1933   }
1934   VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
1935 
1936   // For sign- and zero-extension, make it clear that the resulting register
1937   // value is different from what is loaded from memory.
1938   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1939   registers_[rt_code].NotifyRegisterLogged();
1940   PrintRegister(rt_code, format);
1941   PrintPartialAccess(1,
1942                      0,
1943                      1,
1944                      access_size_in_bytes,
1945                      "<-",
1946                      address,
1947                      kXRegSizeInBytes);
1948 }
1949 
1950 void Simulator::PrintVRead(int rt_code,
1951                            PrintRegisterFormat format,
1952                            uintptr_t address) {
1953   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1954   vregisters_[rt_code].NotifyRegisterLogged();
1955   PrintVAccess(rt_code, format, "<-", address);
1956 }
1957 
1958 void Simulator::PrintWrite(int rt_code,
1959                            PrintRegisterFormat format,
1960                            uintptr_t address) {
1961   // Because this trace doesn't represent a change to the source register's
1962   // value, only print the relevant part of the value.
1963   format = GetPrintRegPartial(format);
1964   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1965   registers_[rt_code].NotifyRegisterLogged();
1966   PrintAccess(rt_code, format, "->", address);
1967 }
1968 
1969 void Simulator::PrintVWrite(int rt_code,
1970                             PrintRegisterFormat format,
1971                             uintptr_t address) {
1972   // Because this trace doesn't represent a change to the source register's
1973   // value, only print the relevant part of the value.
1974   format = GetPrintRegPartial(format);
1975   // It only makes sense to write scalar values here. Vectors are handled by
1976   // PrintVStructAccess.
1977   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1978   PrintVAccess(rt_code, format, "->", address);
1979 }
1980 
1981 void Simulator::PrintTakenBranch(const Instruction* target) {
1982   fprintf(stream_,
1983           "# %sBranch%s to 0x%016" PRIx64 ".\n",
1984           clr_branch_marker,
1985           clr_normal,
1986           reinterpret_cast<uint64_t>(target));
1987 }
1988 
1989 // Visitors---------------------------------------------------------------------
1990 
1991 
1992 void Simulator::Visit(Metadata* metadata, const Instruction* instr) {
1993   VIXL_ASSERT(metadata->count("form") > 0);
1994   const std::string& form = (*metadata)["form"];
1995   form_hash_ = Hash(form.c_str());
1996   const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap();
1997   FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
1998   if (it == fv->end()) {
1999     VisitUnimplemented(instr);
2000   } else {
2001     (it->second)(this, instr);
2002   }
2003 }
2004 
2005 void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2006   VectorFormat vform = instr->GetSVEVectorFormat();
2007   SimPRegister& pd = ReadPRegister(instr->GetPd());
2008   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2009   SimVRegister& zm = ReadVRegister(instr->GetRm());
2010   SimVRegister& zn = ReadVRegister(instr->GetRn());
2011 
2012   switch (form_hash_) {
2013     case "match_p_p_zz"_h:
2014       match(vform, pd, zn, zm, /* negate_match = */ false);
2015       break;
2016     case "nmatch_p_p_zz"_h:
2017       match(vform, pd, zn, zm, /* negate_match = */ true);
2018       break;
2019     default:
2020       VIXL_UNIMPLEMENTED();
2021   }
2022   mov_zeroing(pd, pg, pd);
2023   PredTest(vform, pg, pd);
2024 }
2025 
2026 void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) {
2027   VectorFormat vform = instr->GetSVEVectorFormat();
2028   SimPRegister& pd = ReadPRegister(instr->GetPd());
2029   uint64_t src1 = ReadXRegister(instr->GetRn());
2030   uint64_t src2 = ReadXRegister(instr->GetRm());
2031 
2032   uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1);
2033   absdiff >>= LaneSizeInBytesLog2FromFormat(vform);
2034 
2035   bool no_conflict = false;
2036   switch (form_hash_) {
2037     case "whilerw_p_rr"_h:
2038       no_conflict = (absdiff == 0);
2039       break;
2040     case "whilewr_p_rr"_h:
2041       no_conflict = (absdiff == 0) || (src2 <= src1);
2042       break;
2043     default:
2044       VIXL_UNIMPLEMENTED();
2045   }
2046 
2047   LogicPRegister dst(pd);
2048   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2049     dst.SetActive(vform,
2050                   i,
2051                   no_conflict || (static_cast<uint64_t>(i) < absdiff));
2052   }
2053 
2054   PredTest(vform, GetPTrue(), pd);
2055 }
2056 
2057 void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) {
2058   VIXL_ASSERT(form_hash_ == "ext_z_zi_con"_h);
2059 
2060   SimVRegister& zd = ReadVRegister(instr->GetRd());
2061   SimVRegister& zn = ReadVRegister(instr->GetRn());
2062   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
2063 
2064   int index = instr->GetSVEExtractImmediate();
2065   int vl = GetVectorLengthInBytes();
2066   index = (index >= vl) ? 0 : index;
2067 
2068   ext(kFormatVnB, zd, zn, zn2, index);
2069 }
2070 
2071 void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) {
2072   SimVRegister& zd = ReadVRegister(instr->GetRd());
2073   SimVRegister& zm = ReadVRegister(instr->GetRm());
2074   SimVRegister& zn = ReadVRegister(instr->GetRn());
2075 
2076   switch (form_hash_) {
2077     case "histseg_z_zz"_h:
2078       if (instr->GetSVEVectorFormat() == kFormatVnB) {
2079         histogram(kFormatVnB,
2080                   zd,
2081                   GetPTrue(),
2082                   zn,
2083                   zm,
2084                   /* do_segmented = */ true);
2085       } else {
2086         VIXL_UNIMPLEMENTED();
2087       }
2088       break;
2089     case "pmul_z_zz"_h:
2090       pmul(kFormatVnB, zd, zn, zm);
2091       break;
2092     default:
2093       VIXL_UNIMPLEMENTED();
2094   }
2095 }
2096 
2097 void Simulator::SimulateSVEMulIndex(const Instruction* instr) {
2098   VectorFormat vform = instr->GetSVEVectorFormat();
2099   SimVRegister& zd = ReadVRegister(instr->GetRd());
2100   SimVRegister& zn = ReadVRegister(instr->GetRn());
2101 
2102   // The encoding for B and H-sized lanes are redefined to encode the most
2103   // significant bit of index for H-sized lanes. B-sized lanes are not
2104   // supported.
2105   if (vform == kFormatVnB) vform = kFormatVnH;
2106 
2107   VIXL_ASSERT((form_hash_ == "mul_z_zzi_d"_h) ||
2108               (form_hash_ == "mul_z_zzi_h"_h) ||
2109               (form_hash_ == "mul_z_zzi_s"_h));
2110 
2111   SimVRegister temp;
2112   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2113   mul(vform, zd, zn, temp);
2114 }
2115 
2116 void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) {
2117   VectorFormat vform = instr->GetSVEVectorFormat();
2118   SimVRegister& zda = ReadVRegister(instr->GetRd());
2119   SimVRegister& zn = ReadVRegister(instr->GetRn());
2120 
2121   // The encoding for B and H-sized lanes are redefined to encode the most
2122   // significant bit of index for H-sized lanes. B-sized lanes are not
2123   // supported.
2124   if (vform == kFormatVnB) vform = kFormatVnH;
2125 
2126   VIXL_ASSERT(
2127       (form_hash_ == "mla_z_zzzi_d"_h) || (form_hash_ == "mla_z_zzzi_h"_h) ||
2128       (form_hash_ == "mla_z_zzzi_s"_h) || (form_hash_ == "mls_z_zzzi_d"_h) ||
2129       (form_hash_ == "mls_z_zzzi_h"_h) || (form_hash_ == "mls_z_zzzi_s"_h));
2130 
2131   SimVRegister temp;
2132   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2133   if (instr->ExtractBit(10) == 0) {
2134     mla(vform, zda, zda, zn, temp);
2135   } else {
2136     mls(vform, zda, zda, zn, temp);
2137   }
2138 }
2139 
2140 void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) {
2141   VectorFormat vform = instr->GetSVEVectorFormat();
2142   SimVRegister& zd = ReadVRegister(instr->GetRd());
2143   SimVRegister& zn = ReadVRegister(instr->GetRn());
2144 
2145   // The encoding for B and H-sized lanes are redefined to encode the most
2146   // significant bit of index for H-sized lanes. B-sized lanes are not
2147   // supported.
2148   if (vform == kFormatVnB) {
2149     vform = kFormatVnH;
2150   }
2151 
2152   SimVRegister temp;
2153   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2154   switch (form_hash_) {
2155     case "sqdmulh_z_zzi_h"_h:
2156     case "sqdmulh_z_zzi_s"_h:
2157     case "sqdmulh_z_zzi_d"_h:
2158       sqdmulh(vform, zd, zn, temp);
2159       break;
2160     case "sqrdmulh_z_zzi_h"_h:
2161     case "sqrdmulh_z_zzi_s"_h:
2162     case "sqrdmulh_z_zzi_d"_h:
2163       sqrdmulh(vform, zd, zn, temp);
2164       break;
2165     default:
2166       VIXL_UNIMPLEMENTED();
2167   }
2168 }
2169 
2170 void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
2171   VectorFormat vform = instr->GetSVEVectorFormat();
2172   SimVRegister& zd = ReadVRegister(instr->GetRd());
2173   SimVRegister& zn = ReadVRegister(instr->GetRn());
2174 
2175   SimVRegister temp, zm_idx, zn_b, zn_t;
2176   // Instead of calling the indexed form of the instruction logic, we call the
2177   // vector form, which can reuse existing function logic without modification.
2178   // Select the specified elements based on the index input and than pack them
2179   // to the corresponding position.
2180   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2181   dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
2182   pack_even_elements(vform_half, zm_idx, temp);
2183 
2184   pack_even_elements(vform_half, zn_b, zn);
2185   pack_odd_elements(vform_half, zn_t, zn);
2186 
2187   switch (form_hash_) {
2188     case "smullb_z_zzi_s"_h:
2189     case "smullb_z_zzi_d"_h:
2190       smull(vform, zd, zn_b, zm_idx);
2191       break;
2192     case "smullt_z_zzi_s"_h:
2193     case "smullt_z_zzi_d"_h:
2194       smull(vform, zd, zn_t, zm_idx);
2195       break;
2196     case "sqdmullb_z_zzi_d"_h:
2197       sqdmull(vform, zd, zn_b, zm_idx);
2198       break;
2199     case "sqdmullt_z_zzi_d"_h:
2200       sqdmull(vform, zd, zn_t, zm_idx);
2201       break;
2202     case "umullb_z_zzi_s"_h:
2203     case "umullb_z_zzi_d"_h:
2204       umull(vform, zd, zn_b, zm_idx);
2205       break;
2206     case "umullt_z_zzi_s"_h:
2207     case "umullt_z_zzi_d"_h:
2208       umull(vform, zd, zn_t, zm_idx);
2209       break;
2210     case "sqdmullb_z_zzi_s"_h:
2211       sqdmull(vform, zd, zn_b, zm_idx);
2212       break;
2213     case "sqdmullt_z_zzi_s"_h:
2214       sqdmull(vform, zd, zn_t, zm_idx);
2215       break;
2216     case "smlalb_z_zzzi_s"_h:
2217     case "smlalb_z_zzzi_d"_h:
2218       smlal(vform, zd, zn_b, zm_idx);
2219       break;
2220     case "smlalt_z_zzzi_s"_h:
2221     case "smlalt_z_zzzi_d"_h:
2222       smlal(vform, zd, zn_t, zm_idx);
2223       break;
2224     case "smlslb_z_zzzi_s"_h:
2225     case "smlslb_z_zzzi_d"_h:
2226       smlsl(vform, zd, zn_b, zm_idx);
2227       break;
2228     case "smlslt_z_zzzi_s"_h:
2229     case "smlslt_z_zzzi_d"_h:
2230       smlsl(vform, zd, zn_t, zm_idx);
2231       break;
2232     case "umlalb_z_zzzi_s"_h:
2233     case "umlalb_z_zzzi_d"_h:
2234       umlal(vform, zd, zn_b, zm_idx);
2235       break;
2236     case "umlalt_z_zzzi_s"_h:
2237     case "umlalt_z_zzzi_d"_h:
2238       umlal(vform, zd, zn_t, zm_idx);
2239       break;
2240     case "umlslb_z_zzzi_s"_h:
2241     case "umlslb_z_zzzi_d"_h:
2242       umlsl(vform, zd, zn_b, zm_idx);
2243       break;
2244     case "umlslt_z_zzzi_s"_h:
2245     case "umlslt_z_zzzi_d"_h:
2246       umlsl(vform, zd, zn_t, zm_idx);
2247       break;
2248     default:
2249       VIXL_UNIMPLEMENTED();
2250   }
2251 }
2252 
2253 void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) {
2254   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2255   SimVRegister& zd = ReadVRegister(instr->GetRd());
2256   SimVRegister& zn = ReadVRegister(instr->GetRn());
2257   SimVRegister result, zd_b;
2258 
2259   pack_even_elements(kFormatVnH, zd_b, zd);
2260 
2261   switch (form_hash_) {
2262     case "fcvtnt_z_p_z_s2h"_h:
2263       fcvt(kFormatVnH, kFormatVnS, result, pg, zn);
2264       pack_even_elements(kFormatVnH, result, result);
2265       zip1(kFormatVnH, result, zd_b, result);
2266       break;
2267     default:
2268       VIXL_UNIMPLEMENTED();
2269   }
2270   mov_merging(kFormatVnS, zd, pg, result);
2271 }
2272 
2273 void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) {
2274   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2275   SimVRegister& zd = ReadVRegister(instr->GetRd());
2276   SimVRegister& zn = ReadVRegister(instr->GetRn());
2277   SimVRegister result, zero, zd_b;
2278 
2279   zero.Clear();
2280   pack_even_elements(kFormatVnS, zd_b, zd);
2281 
2282   switch (form_hash_) {
2283     case "fcvtnt_z_p_z_d2s"_h:
2284       fcvt(kFormatVnS, kFormatVnD, result, pg, zn);
2285       pack_even_elements(kFormatVnS, result, result);
2286       zip1(kFormatVnS, result, zd_b, result);
2287       break;
2288     case "fcvtx_z_p_z_d2s"_h:
2289       fcvtxn(kFormatVnS, result, zn);
2290       zip1(kFormatVnS, result, result, zero);
2291       break;
2292     case "fcvtxnt_z_p_z_d2s"_h:
2293       fcvtxn(kFormatVnS, result, zn);
2294       zip1(kFormatVnS, result, zd_b, result);
2295       break;
2296     default:
2297       VIXL_UNIMPLEMENTED();
2298   }
2299   mov_merging(kFormatVnD, zd, pg, result);
2300 }
2301 
2302 void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) {
2303   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2304   SimVRegister& zd = ReadVRegister(instr->GetRd());
2305   SimVRegister& zn = ReadVRegister(instr->GetRn());
2306   SimVRegister result;
2307 
2308   switch (form_hash_) {
2309     case "fcvtlt_z_p_z_h2s"_h:
2310       ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes);
2311       fcvt(kFormatVnS, kFormatVnH, zd, pg, result);
2312       break;
2313     case "fcvtlt_z_p_z_s2d"_h:
2314       ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes);
2315       fcvt(kFormatVnD, kFormatVnS, zd, pg, result);
2316       break;
2317     default:
2318       VIXL_UNIMPLEMENTED();
2319   }
2320 }
2321 
2322 void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
2323   VectorFormat vform = instr->GetSVEVectorFormat();
2324   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2325   SimVRegister& zd = ReadVRegister(instr->GetRd());
2326   SimVRegister& zn = ReadVRegister(instr->GetRn());
2327   SimVRegister result;
2328 
2329   if (vform != kFormatVnS) {
2330     VIXL_UNIMPLEMENTED();
2331   }
2332 
2333   switch (form_hash_) {
2334     case "urecpe_z_p_z"_h:
2335       urecpe(vform, result, zn);
2336       break;
2337     case "ursqrte_z_p_z"_h:
2338       ursqrte(vform, result, zn);
2339       break;
2340     default:
2341       VIXL_UNIMPLEMENTED();
2342   }
2343   mov_merging(vform, zd, pg, result);
2344 }
2345 
2346 void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) {
2347   VectorFormat vform = instr->GetSVEVectorFormat();
2348   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2349   SimVRegister& zd = ReadVRegister(instr->GetRd());
2350   SimVRegister& zn = ReadVRegister(instr->GetRn());
2351   SimVRegister result;
2352 
2353   switch (form_hash_) {
2354     case "flogb_z_p_z"_h:
2355       vform = instr->GetSVEVectorFormat(17);
2356       flogb(vform, result, zn);
2357       break;
2358     case "sqabs_z_p_z"_h:
2359       abs(vform, result, zn).SignedSaturate(vform);
2360       break;
2361     case "sqneg_z_p_z"_h:
2362       neg(vform, result, zn).SignedSaturate(vform);
2363       break;
2364     default:
2365       VIXL_UNIMPLEMENTED();
2366   }
2367   mov_merging(vform, zd, pg, result);
2368 }
2369 
2370 void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2371   VectorFormat vform = instr->GetSVEVectorFormat();
2372   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2373   SimVRegister& zd = ReadVRegister(instr->GetRd());
2374   SimVRegister& zm = ReadVRegister(instr->GetRm());
2375   SimVRegister& zn = ReadVRegister(instr->GetRn());
2376   SimVRegister result;
2377 
2378   VIXL_ASSERT(form_hash_ == "histcnt_z_p_zz"_h);
2379   if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
2380     histogram(vform, result, pg, zn, zm);
2381     mov_zeroing(vform, zd, pg, result);
2382   } else {
2383     VIXL_UNIMPLEMENTED();
2384   }
2385 }
2386 
2387 void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
2388   VectorFormat vform = instr->GetSVEVectorFormat();
2389   SimVRegister& zd = ReadVRegister(instr->GetRd());
2390   SimVRegister& zm = ReadVRegister(instr->GetRm());
2391   SimVRegister& zn = ReadVRegister(instr->GetRn());
2392   SimVRegister result;
2393   bool do_bext = false;
2394 
2395   switch (form_hash_) {
2396     case "bdep_z_zz"_h:
2397       bdep(vform, zd, zn, zm);
2398       break;
2399     case "bext_z_zz"_h:
2400       do_bext = true;
2401       VIXL_FALLTHROUGH();
2402     case "bgrp_z_zz"_h:
2403       bgrp(vform, zd, zn, zm, do_bext);
2404       break;
2405     case "eorbt_z_zz"_h:
2406       rotate_elements_right(vform, result, zm, 1);
2407       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2408       mov_alternating(vform, zd, result, 0);
2409       break;
2410     case "eortb_z_zz"_h:
2411       rotate_elements_right(vform, result, zm, -1);
2412       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2413       mov_alternating(vform, zd, result, 1);
2414       break;
2415     case "mul_z_zz"_h:
2416       mul(vform, zd, zn, zm);
2417       break;
2418     case "smulh_z_zz"_h:
2419       smulh(vform, zd, zn, zm);
2420       break;
2421     case "sqdmulh_z_zz"_h:
2422       sqdmulh(vform, zd, zn, zm);
2423       break;
2424     case "sqrdmulh_z_zz"_h:
2425       sqrdmulh(vform, zd, zn, zm);
2426       break;
2427     case "umulh_z_zz"_h:
2428       umulh(vform, zd, zn, zm);
2429       break;
2430     default:
2431       VIXL_UNIMPLEMENTED();
2432   }
2433 }
2434 
2435 void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) {
2436   VectorFormat vform = instr->GetSVEVectorFormat();
2437   SimVRegister& zd = ReadVRegister(instr->GetRd());
2438   SimVRegister& zm = ReadVRegister(instr->GetRm());
2439   SimVRegister& zn = ReadVRegister(instr->GetRn());
2440 
2441   SimVRegister zm_b, zm_t;
2442   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2443   pack_even_elements(vform_half, zm_b, zm);
2444   pack_odd_elements(vform_half, zm_t, zm);
2445 
2446   switch (form_hash_) {
2447     case "saddwb_z_zz"_h:
2448       saddw(vform, zd, zn, zm_b);
2449       break;
2450     case "saddwt_z_zz"_h:
2451       saddw(vform, zd, zn, zm_t);
2452       break;
2453     case "ssubwb_z_zz"_h:
2454       ssubw(vform, zd, zn, zm_b);
2455       break;
2456     case "ssubwt_z_zz"_h:
2457       ssubw(vform, zd, zn, zm_t);
2458       break;
2459     case "uaddwb_z_zz"_h:
2460       uaddw(vform, zd, zn, zm_b);
2461       break;
2462     case "uaddwt_z_zz"_h:
2463       uaddw(vform, zd, zn, zm_t);
2464       break;
2465     case "usubwb_z_zz"_h:
2466       usubw(vform, zd, zn, zm_b);
2467       break;
2468     case "usubwt_z_zz"_h:
2469       usubw(vform, zd, zn, zm_t);
2470       break;
2471     default:
2472       VIXL_UNIMPLEMENTED();
2473   }
2474 }
2475 
2476 void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) {
2477   SimVRegister& zd = ReadVRegister(instr->GetRd());
2478   SimVRegister& zn = ReadVRegister(instr->GetRn());
2479 
2480   std::pair<int, int> shift_and_lane_size =
2481       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2482   int lane_size = shift_and_lane_size.second;
2483   VIXL_ASSERT((lane_size >= 0) &&
2484               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2485   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2486   int shift_dist = shift_and_lane_size.first;
2487 
2488   switch (form_hash_) {
2489     case "sli_z_zzi"_h:
2490       // Shift distance is computed differently for left shifts. Convert the
2491       // result.
2492       shift_dist = (8 << lane_size) - shift_dist;
2493       sli(vform, zd, zn, shift_dist);
2494       break;
2495     case "sri_z_zzi"_h:
2496       sri(vform, zd, zn, shift_dist);
2497       break;
2498     default:
2499       VIXL_UNIMPLEMENTED();
2500   }
2501 }
2502 
2503 void Simulator::SimulateSVENarrow(const Instruction* instr) {
2504   SimVRegister& zd = ReadVRegister(instr->GetRd());
2505   SimVRegister& zn = ReadVRegister(instr->GetRn());
2506   SimVRegister result;
2507 
2508   std::pair<int, int> shift_and_lane_size =
2509       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2510   int lane_size = shift_and_lane_size.second;
2511   VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
2512               (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)));
2513   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2514   int right_shift_dist = shift_and_lane_size.first;
2515   bool top = false;
2516 
2517   switch (form_hash_) {
2518     case "sqxtnt_z_zz"_h:
2519       top = true;
2520       VIXL_FALLTHROUGH();
2521     case "sqxtnb_z_zz"_h:
2522       sqxtn(vform, result, zn);
2523       break;
2524     case "sqxtunt_z_zz"_h:
2525       top = true;
2526       VIXL_FALLTHROUGH();
2527     case "sqxtunb_z_zz"_h:
2528       sqxtun(vform, result, zn);
2529       break;
2530     case "uqxtnt_z_zz"_h:
2531       top = true;
2532       VIXL_FALLTHROUGH();
2533     case "uqxtnb_z_zz"_h:
2534       uqxtn(vform, result, zn);
2535       break;
2536     case "rshrnt_z_zi"_h:
2537       top = true;
2538       VIXL_FALLTHROUGH();
2539     case "rshrnb_z_zi"_h:
2540       rshrn(vform, result, zn, right_shift_dist);
2541       break;
2542     case "shrnt_z_zi"_h:
2543       top = true;
2544       VIXL_FALLTHROUGH();
2545     case "shrnb_z_zi"_h:
2546       shrn(vform, result, zn, right_shift_dist);
2547       break;
2548     case "sqrshrnt_z_zi"_h:
2549       top = true;
2550       VIXL_FALLTHROUGH();
2551     case "sqrshrnb_z_zi"_h:
2552       sqrshrn(vform, result, zn, right_shift_dist);
2553       break;
2554     case "sqrshrunt_z_zi"_h:
2555       top = true;
2556       VIXL_FALLTHROUGH();
2557     case "sqrshrunb_z_zi"_h:
2558       sqrshrun(vform, result, zn, right_shift_dist);
2559       break;
2560     case "sqshrnt_z_zi"_h:
2561       top = true;
2562       VIXL_FALLTHROUGH();
2563     case "sqshrnb_z_zi"_h:
2564       sqshrn(vform, result, zn, right_shift_dist);
2565       break;
2566     case "sqshrunt_z_zi"_h:
2567       top = true;
2568       VIXL_FALLTHROUGH();
2569     case "sqshrunb_z_zi"_h:
2570       sqshrun(vform, result, zn, right_shift_dist);
2571       break;
2572     case "uqrshrnt_z_zi"_h:
2573       top = true;
2574       VIXL_FALLTHROUGH();
2575     case "uqrshrnb_z_zi"_h:
2576       uqrshrn(vform, result, zn, right_shift_dist);
2577       break;
2578     case "uqshrnt_z_zi"_h:
2579       top = true;
2580       VIXL_FALLTHROUGH();
2581     case "uqshrnb_z_zi"_h:
2582       uqshrn(vform, result, zn, right_shift_dist);
2583       break;
2584     default:
2585       VIXL_UNIMPLEMENTED();
2586   }
2587 
2588   if (top) {
2589     // Keep even elements, replace odd elements with the results.
2590     xtn(vform, zd, zd);
2591     zip1(vform, zd, zd, result);
2592   } else {
2593     // Zero odd elements, replace even elements with the results.
2594     SimVRegister zero;
2595     zero.Clear();
2596     zip1(vform, zd, result, zero);
2597   }
2598 }
2599 
2600 void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) {
2601   VectorFormat vform = instr->GetSVEVectorFormat();
2602   SimVRegister& zd = ReadVRegister(instr->GetRd());
2603   SimVRegister& zm = ReadVRegister(instr->GetRm());
2604   SimVRegister& zn = ReadVRegister(instr->GetRn());
2605   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2606 
2607   // Construct temporary registers containing the even (bottom) and odd (top)
2608   // elements.
2609   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2610   pack_even_elements(vform_half, zn_b, zn);
2611   pack_even_elements(vform_half, zm_b, zm);
2612   pack_odd_elements(vform_half, zn_t, zn);
2613   pack_odd_elements(vform_half, zm_t, zm);
2614 
2615   switch (form_hash_) {
2616     case "sabdlb_z_zz"_h:
2617       sabdl(vform, zd, zn_b, zm_b);
2618       break;
2619     case "sabdlt_z_zz"_h:
2620       sabdl(vform, zd, zn_t, zm_t);
2621       break;
2622     case "saddlb_z_zz"_h:
2623       saddl(vform, zd, zn_b, zm_b);
2624       break;
2625     case "saddlbt_z_zz"_h:
2626       saddl(vform, zd, zn_b, zm_t);
2627       break;
2628     case "saddlt_z_zz"_h:
2629       saddl(vform, zd, zn_t, zm_t);
2630       break;
2631     case "ssublb_z_zz"_h:
2632       ssubl(vform, zd, zn_b, zm_b);
2633       break;
2634     case "ssublbt_z_zz"_h:
2635       ssubl(vform, zd, zn_b, zm_t);
2636       break;
2637     case "ssublt_z_zz"_h:
2638       ssubl(vform, zd, zn_t, zm_t);
2639       break;
2640     case "ssubltb_z_zz"_h:
2641       ssubl(vform, zd, zn_t, zm_b);
2642       break;
2643     case "uabdlb_z_zz"_h:
2644       uabdl(vform, zd, zn_b, zm_b);
2645       break;
2646     case "uabdlt_z_zz"_h:
2647       uabdl(vform, zd, zn_t, zm_t);
2648       break;
2649     case "uaddlb_z_zz"_h:
2650       uaddl(vform, zd, zn_b, zm_b);
2651       break;
2652     case "uaddlt_z_zz"_h:
2653       uaddl(vform, zd, zn_t, zm_t);
2654       break;
2655     case "usublb_z_zz"_h:
2656       usubl(vform, zd, zn_b, zm_b);
2657       break;
2658     case "usublt_z_zz"_h:
2659       usubl(vform, zd, zn_t, zm_t);
2660       break;
2661     case "sabalb_z_zzz"_h:
2662       sabal(vform, zd, zn_b, zm_b);
2663       break;
2664     case "sabalt_z_zzz"_h:
2665       sabal(vform, zd, zn_t, zm_t);
2666       break;
2667     case "uabalb_z_zzz"_h:
2668       uabal(vform, zd, zn_b, zm_b);
2669       break;
2670     case "uabalt_z_zzz"_h:
2671       uabal(vform, zd, zn_t, zm_t);
2672       break;
2673     default:
2674       VIXL_UNIMPLEMENTED();
2675   }
2676 }
2677 
2678 void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
2679   VectorFormat vform = instr->GetSVEVectorFormat();
2680   SimVRegister& zd = ReadVRegister(instr->GetRd());
2681   SimVRegister& zm = ReadVRegister(instr->GetRm());
2682   SimVRegister& zn = ReadVRegister(instr->GetRn());
2683   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2684   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2685   pack_even_elements(vform_half, zn_b, zn);
2686   pack_even_elements(vform_half, zm_b, zm);
2687   pack_odd_elements(vform_half, zn_t, zn);
2688   pack_odd_elements(vform_half, zm_t, zm);
2689 
2690   switch (form_hash_) {
2691     case "pmullb_z_zz"_h:
2692       // '00' is reserved for Q-sized lane.
2693       if (vform == kFormatVnB) {
2694         VIXL_UNIMPLEMENTED();
2695       }
2696       pmull(vform, zd, zn_b, zm_b);
2697       break;
2698     case "pmullt_z_zz"_h:
2699       // '00' is reserved for Q-sized lane.
2700       if (vform == kFormatVnB) {
2701         VIXL_UNIMPLEMENTED();
2702       }
2703       pmull(vform, zd, zn_t, zm_t);
2704       break;
2705     case "smullb_z_zz"_h:
2706       smull(vform, zd, zn_b, zm_b);
2707       break;
2708     case "smullt_z_zz"_h:
2709       smull(vform, zd, zn_t, zm_t);
2710       break;
2711     case "sqdmullb_z_zz"_h:
2712       sqdmull(vform, zd, zn_b, zm_b);
2713       break;
2714     case "sqdmullt_z_zz"_h:
2715       sqdmull(vform, zd, zn_t, zm_t);
2716       break;
2717     case "umullb_z_zz"_h:
2718       umull(vform, zd, zn_b, zm_b);
2719       break;
2720     case "umullt_z_zz"_h:
2721       umull(vform, zd, zn_t, zm_t);
2722       break;
2723     default:
2724       VIXL_UNIMPLEMENTED();
2725   }
2726 }
2727 
2728 void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) {
2729   SimVRegister& zd = ReadVRegister(instr->GetRd());
2730   SimVRegister& zm = ReadVRegister(instr->GetRm());
2731   SimVRegister& zn = ReadVRegister(instr->GetRn());
2732   SimVRegister result;
2733   bool top = false;
2734 
2735   VectorFormat vform_src = instr->GetSVEVectorFormat();
2736   if (vform_src == kFormatVnB) {
2737     VIXL_UNIMPLEMENTED();
2738   }
2739   VectorFormat vform = VectorFormatHalfWidth(vform_src);
2740 
2741   switch (form_hash_) {
2742     case "addhnt_z_zz"_h:
2743       top = true;
2744       VIXL_FALLTHROUGH();
2745     case "addhnb_z_zz"_h:
2746       addhn(vform, result, zn, zm);
2747       break;
2748     case "raddhnt_z_zz"_h:
2749       top = true;
2750       VIXL_FALLTHROUGH();
2751     case "raddhnb_z_zz"_h:
2752       raddhn(vform, result, zn, zm);
2753       break;
2754     case "rsubhnt_z_zz"_h:
2755       top = true;
2756       VIXL_FALLTHROUGH();
2757     case "rsubhnb_z_zz"_h:
2758       rsubhn(vform, result, zn, zm);
2759       break;
2760     case "subhnt_z_zz"_h:
2761       top = true;
2762       VIXL_FALLTHROUGH();
2763     case "subhnb_z_zz"_h:
2764       subhn(vform, result, zn, zm);
2765       break;
2766     default:
2767       VIXL_UNIMPLEMENTED();
2768   }
2769 
2770   if (top) {
2771     // Keep even elements, replace odd elements with the results.
2772     xtn(vform, zd, zd);
2773     zip1(vform, zd, zd, result);
2774   } else {
2775     // Zero odd elements, replace even elements with the results.
2776     SimVRegister zero;
2777     zero.Clear();
2778     zip1(vform, zd, result, zero);
2779   }
2780 }
2781 
2782 void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) {
2783   SimVRegister& zd = ReadVRegister(instr->GetRd());
2784   SimVRegister& zn = ReadVRegister(instr->GetRn());
2785   SimVRegister zn_b, zn_t;
2786 
2787   std::pair<int, int> shift_and_lane_size =
2788       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2789   int lane_size = shift_and_lane_size.second;
2790   VIXL_ASSERT((lane_size >= 0) &&
2791               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2792   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1);
2793   int right_shift_dist = shift_and_lane_size.first;
2794   int left_shift_dist = (8 << lane_size) - right_shift_dist;
2795 
2796   // Construct temporary registers containing the even (bottom) and odd (top)
2797   // elements.
2798   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2799   pack_even_elements(vform_half, zn_b, zn);
2800   pack_odd_elements(vform_half, zn_t, zn);
2801 
2802   switch (form_hash_) {
2803     case "sshllb_z_zi"_h:
2804       sshll(vform, zd, zn_b, left_shift_dist);
2805       break;
2806     case "sshllt_z_zi"_h:
2807       sshll(vform, zd, zn_t, left_shift_dist);
2808       break;
2809     case "ushllb_z_zi"_h:
2810       ushll(vform, zd, zn_b, left_shift_dist);
2811       break;
2812     case "ushllt_z_zi"_h:
2813       ushll(vform, zd, zn_t, left_shift_dist);
2814       break;
2815     default:
2816       VIXL_UNIMPLEMENTED();
2817   }
2818 }
2819 
2820 void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) {
2821   VectorFormat vform = instr->GetSVEVectorFormat();
2822   SimVRegister& zda = ReadVRegister(instr->GetRd());
2823   SimVRegister& zn = ReadVRegister(instr->GetRn());
2824   unsigned zm_code = instr->GetRm();
2825   int index = -1;
2826   bool is_mla = false;
2827 
2828   switch (form_hash_) {
2829     case "sqrdmlah_z_zzz"_h:
2830       is_mla = true;
2831       VIXL_FALLTHROUGH();
2832     case "sqrdmlsh_z_zzz"_h:
2833       // Nothing to do.
2834       break;
2835     case "sqrdmlah_z_zzzi_h"_h:
2836       is_mla = true;
2837       VIXL_FALLTHROUGH();
2838     case "sqrdmlsh_z_zzzi_h"_h:
2839       vform = kFormatVnH;
2840       index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
2841       zm_code = instr->ExtractBits(18, 16);
2842       break;
2843     case "sqrdmlah_z_zzzi_s"_h:
2844       is_mla = true;
2845       VIXL_FALLTHROUGH();
2846     case "sqrdmlsh_z_zzzi_s"_h:
2847       vform = kFormatVnS;
2848       index = instr->ExtractBits(20, 19);
2849       zm_code = instr->ExtractBits(18, 16);
2850       break;
2851     case "sqrdmlah_z_zzzi_d"_h:
2852       is_mla = true;
2853       VIXL_FALLTHROUGH();
2854     case "sqrdmlsh_z_zzzi_d"_h:
2855       vform = kFormatVnD;
2856       index = instr->ExtractBit(20);
2857       zm_code = instr->ExtractBits(19, 16);
2858       break;
2859     default:
2860       VIXL_UNIMPLEMENTED();
2861   }
2862 
2863   SimVRegister& zm = ReadVRegister(zm_code);
2864   SimVRegister zm_idx;
2865   if (index >= 0) {
2866     dup_elements_to_segments(vform, zm_idx, zm, index);
2867   }
2868 
2869   if (is_mla) {
2870     sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm);
2871   } else {
2872     sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm);
2873   }
2874 }
2875 
2876 void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
2877   SimVRegister& zda = ReadVRegister(instr->GetRd());
2878   SimVRegister& zn = ReadVRegister(instr->GetRn());
2879   SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
2880 
2881   SimVRegister temp, zm_idx, zn_b, zn_t;
2882   Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
2883   dup_elements_to_segments(kFormatVnS, temp, zm, index);
2884   pack_even_elements(kFormatVnS, zm_idx, temp);
2885   pack_even_elements(kFormatVnS, zn_b, zn);
2886   pack_odd_elements(kFormatVnS, zn_t, zn);
2887 
2888   switch (form_hash_) {
2889     case "sqdmlalb_z_zzzi_d"_h:
2890       sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
2891       break;
2892     case "sqdmlalt_z_zzzi_d"_h:
2893       sqdmlal(kFormatVnD, zda, zn_t, zm_idx);
2894       break;
2895     case "sqdmlslb_z_zzzi_d"_h:
2896       sqdmlsl(kFormatVnD, zda, zn_b, zm_idx);
2897       break;
2898     case "sqdmlslt_z_zzzi_d"_h:
2899       sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
2900       break;
2901     default:
2902       VIXL_UNIMPLEMENTED();
2903   }
2904 }
2905 
2906 void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
2907   SimVRegister& zda = ReadVRegister(instr->GetRd());
2908   SimVRegister& zm = ReadVRegister(instr->GetRm());
2909   SimVRegister& zn = ReadVRegister(instr->GetRn());
2910 
2911   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2912   pack_even_elements(kFormatVnH, zn_b, zn);
2913   pack_even_elements(kFormatVnH, zm_b, zm);
2914   pack_odd_elements(kFormatVnH, zn_t, zn);
2915   pack_odd_elements(kFormatVnH, zm_t, zm);
2916 
2917   switch (form_hash_) {
2918     case "fmlalb_z_zzz"_h:
2919       fmlal(kFormatVnS, zda, zn_b, zm_b);
2920       break;
2921     case "fmlalt_z_zzz"_h:
2922       fmlal(kFormatVnS, zda, zn_t, zm_t);
2923       break;
2924     case "fmlslb_z_zzz"_h:
2925       fmlsl(kFormatVnS, zda, zn_b, zm_b);
2926       break;
2927     case "fmlslt_z_zzz"_h:
2928       fmlsl(kFormatVnS, zda, zn_t, zm_t);
2929       break;
2930     default:
2931       VIXL_UNIMPLEMENTED();
2932   }
2933 }
2934 
2935 void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
2936   SimVRegister& zda = ReadVRegister(instr->GetRd());
2937   SimVRegister& zn = ReadVRegister(instr->GetRn());
2938   SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
2939 
2940   SimVRegister temp, zm_idx, zn_b, zn_t;
2941   Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
2942   dup_elements_to_segments(kFormatVnH, temp, zm, index);
2943   pack_even_elements(kFormatVnH, zm_idx, temp);
2944   pack_even_elements(kFormatVnH, zn_b, zn);
2945   pack_odd_elements(kFormatVnH, zn_t, zn);
2946 
2947   switch (form_hash_) {
2948     case "fmlalb_z_zzzi_s"_h:
2949       fmlal(kFormatVnS, zda, zn_b, zm_idx);
2950       break;
2951     case "fmlalt_z_zzzi_s"_h:
2952       fmlal(kFormatVnS, zda, zn_t, zm_idx);
2953       break;
2954     case "fmlslb_z_zzzi_s"_h:
2955       fmlsl(kFormatVnS, zda, zn_b, zm_idx);
2956       break;
2957     case "fmlslt_z_zzzi_s"_h:
2958       fmlsl(kFormatVnS, zda, zn_t, zm_idx);
2959       break;
2960     case "sqdmlalb_z_zzzi_s"_h:
2961       sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
2962       break;
2963     case "sqdmlalt_z_zzzi_s"_h:
2964       sqdmlal(kFormatVnS, zda, zn_t, zm_idx);
2965       break;
2966     case "sqdmlslb_z_zzzi_s"_h:
2967       sqdmlsl(kFormatVnS, zda, zn_b, zm_idx);
2968       break;
2969     case "sqdmlslt_z_zzzi_s"_h:
2970       sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
2971       break;
2972     default:
2973       VIXL_UNIMPLEMENTED();
2974   }
2975 }
2976 
2977 void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) {
2978   VectorFormat vform = instr->GetSVEVectorFormat();
2979   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2980   SimVRegister& zda = ReadVRegister(instr->GetRd());
2981   SimVRegister& zn = ReadVRegister(instr->GetRn());
2982   SimVRegister result;
2983 
2984   switch (form_hash_) {
2985     case "sadalp_z_p_z"_h:
2986       sadalp(vform, result, zn);
2987       break;
2988     case "uadalp_z_p_z"_h:
2989       uadalp(vform, result, zn);
2990       break;
2991     default:
2992       VIXL_UNIMPLEMENTED();
2993   }
2994   mov_merging(vform, zda, pg, result);
2995 }
2996 
2997 void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) {
2998   VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD;
2999   SimVRegister& zda = ReadVRegister(instr->GetRd());
3000   SimVRegister& zm = ReadVRegister(instr->GetRm());
3001   SimVRegister& zn = ReadVRegister(instr->GetRn());
3002 
3003   SimVRegister not_zn;
3004   not_(vform, not_zn, zn);
3005 
3006   switch (form_hash_) {
3007     case "adclb_z_zzz"_h:
3008       adcl(vform, zda, zn, zm, /* top = */ false);
3009       break;
3010     case "adclt_z_zzz"_h:
3011       adcl(vform, zda, zn, zm, /* top = */ true);
3012       break;
3013     case "sbclb_z_zzz"_h:
3014       adcl(vform, zda, not_zn, zm, /* top = */ false);
3015       break;
3016     case "sbclt_z_zzz"_h:
3017       adcl(vform, zda, not_zn, zm, /* top = */ true);
3018       break;
3019     default:
3020       VIXL_UNIMPLEMENTED();
3021   }
3022 }
3023 
3024 void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) {
3025   VectorFormat vform = instr->GetSVEVectorFormat();
3026   SimVRegister& zda = ReadVRegister(instr->GetRd());
3027   SimVRegister& zm = ReadVRegister(instr->GetRm());
3028   SimVRegister& zn = ReadVRegister(instr->GetRn());
3029 
3030   switch (form_hash_) {
3031     case "saba_z_zzz"_h:
3032       saba(vform, zda, zn, zm);
3033       break;
3034     case "uaba_z_zzz"_h:
3035       uaba(vform, zda, zn, zm);
3036       break;
3037     default:
3038       VIXL_UNIMPLEMENTED();
3039   }
3040 }
3041 
3042 void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) {
3043   SimVRegister& zda = ReadVRegister(instr->GetRd());
3044   SimVRegister& zn = ReadVRegister(instr->GetRn());
3045   int rot = instr->ExtractBits(11, 10) * 90;
3046   // vform and zm are only valid for the vector form of instruction.
3047   VectorFormat vform = instr->GetSVEVectorFormat();
3048   SimVRegister& zm = ReadVRegister(instr->GetRm());
3049 
3050   // Inputs for indexed form of instruction.
3051   SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16));
3052   SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16));
3053   int idx_h = instr->ExtractBits(20, 19);
3054   int idx_s = instr->ExtractBit(20);
3055 
3056   switch (form_hash_) {
3057     case "cmla_z_zzz"_h:
3058       cmla(vform, zda, zda, zn, zm, rot);
3059       break;
3060     case "cmla_z_zzzi_h"_h:
3061       cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3062       break;
3063     case "cmla_z_zzzi_s"_h:
3064       cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3065       break;
3066     case "sqrdcmlah_z_zzz"_h:
3067       sqrdcmlah(vform, zda, zda, zn, zm, rot);
3068       break;
3069     case "sqrdcmlah_z_zzzi_h"_h:
3070       sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3071       break;
3072     case "sqrdcmlah_z_zzzi_s"_h:
3073       sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3074       break;
3075     default:
3076       VIXL_UNIMPLEMENTED();
3077   }
3078 }
3079 
3080 void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) {
3081   SimVRegister& zd = ReadVRegister(instr->GetRd());
3082   SimVRegister& zn = ReadVRegister(instr->GetRn());
3083 
3084   std::pair<int, int> shift_and_lane_size =
3085       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3086   int lane_size = shift_and_lane_size.second;
3087   VIXL_ASSERT((lane_size >= 0) &&
3088               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
3089   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3090   int shift_dist = shift_and_lane_size.first;
3091 
3092   switch (form_hash_) {
3093     case "srsra_z_zi"_h:
3094       srsra(vform, zd, zn, shift_dist);
3095       break;
3096     case "ssra_z_zi"_h:
3097       ssra(vform, zd, zn, shift_dist);
3098       break;
3099     case "ursra_z_zi"_h:
3100       ursra(vform, zd, zn, shift_dist);
3101       break;
3102     case "usra_z_zi"_h:
3103       usra(vform, zd, zn, shift_dist);
3104       break;
3105     default:
3106       VIXL_UNIMPLEMENTED();
3107   }
3108 }
3109 
3110 void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
3111   VectorFormat vform = instr->GetSVEVectorFormat();
3112   SimVRegister& zda = ReadVRegister(instr->GetRd());
3113   SimVRegister& zm = ReadVRegister(instr->GetRm());
3114   SimVRegister& zn = ReadVRegister(instr->GetRn());
3115 
3116   SimVRegister zero, zn_b, zm_b, zn_t, zm_t;
3117   zero.Clear();
3118 
3119   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3120   uzp1(vform_half, zn_b, zn, zero);
3121   uzp1(vform_half, zm_b, zm, zero);
3122   uzp2(vform_half, zn_t, zn, zero);
3123   uzp2(vform_half, zm_t, zm, zero);
3124 
3125   switch (form_hash_) {
3126     case "smlalb_z_zzz"_h:
3127       smlal(vform, zda, zn_b, zm_b);
3128       break;
3129     case "smlalt_z_zzz"_h:
3130       smlal(vform, zda, zn_t, zm_t);
3131       break;
3132     case "smlslb_z_zzz"_h:
3133       smlsl(vform, zda, zn_b, zm_b);
3134       break;
3135     case "smlslt_z_zzz"_h:
3136       smlsl(vform, zda, zn_t, zm_t);
3137       break;
3138     case "sqdmlalb_z_zzz"_h:
3139       sqdmlal(vform, zda, zn_b, zm_b);
3140       break;
3141     case "sqdmlalbt_z_zzz"_h:
3142       sqdmlal(vform, zda, zn_b, zm_t);
3143       break;
3144     case "sqdmlalt_z_zzz"_h:
3145       sqdmlal(vform, zda, zn_t, zm_t);
3146       break;
3147     case "sqdmlslb_z_zzz"_h:
3148       sqdmlsl(vform, zda, zn_b, zm_b);
3149       break;
3150     case "sqdmlslbt_z_zzz"_h:
3151       sqdmlsl(vform, zda, zn_b, zm_t);
3152       break;
3153     case "sqdmlslt_z_zzz"_h:
3154       sqdmlsl(vform, zda, zn_t, zm_t);
3155       break;
3156     case "umlalb_z_zzz"_h:
3157       umlal(vform, zda, zn_b, zm_b);
3158       break;
3159     case "umlalt_z_zzz"_h:
3160       umlal(vform, zda, zn_t, zm_t);
3161       break;
3162     case "umlslb_z_zzz"_h:
3163       umlsl(vform, zda, zn_b, zm_b);
3164       break;
3165     case "umlslt_z_zzz"_h:
3166       umlsl(vform, zda, zn_t, zm_t);
3167       break;
3168     default:
3169       VIXL_UNIMPLEMENTED();
3170   }
3171 }
3172 
3173 void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
3174   VectorFormat vform = instr->GetSVEVectorFormat();
3175   SimVRegister& zda = ReadVRegister(instr->GetRd());
3176   SimVRegister& zn = ReadVRegister(instr->GetRn());
3177   int rot = instr->ExtractBits(11, 10) * 90;
3178   unsigned zm_code = instr->GetRm();
3179   int index = -1;
3180 
3181   switch (form_hash_) {
3182     case "cdot_z_zzz"_h:
3183       // Nothing to do.
3184       break;
3185     case "cdot_z_zzzi_s"_h:
3186       index = zm_code >> 3;
3187       zm_code &= 0x7;
3188       break;
3189     case "cdot_z_zzzi_d"_h:
3190       index = zm_code >> 4;
3191       zm_code &= 0xf;
3192       break;
3193     default:
3194       VIXL_UNIMPLEMENTED();
3195   }
3196 
3197   SimVRegister temp;
3198   SimVRegister& zm = ReadVRegister(zm_code);
3199   if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
3200   cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
3201 }
3202 
3203 void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
3204   VectorFormat vform = kFormatVnD;
3205   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3206   SimVRegister& zm = ReadVRegister(instr->GetRm());
3207   SimVRegister& zk = ReadVRegister(instr->GetRn());
3208   SimVRegister temp;
3209 
3210   switch (form_hash_) {
3211     case "bcax_z_zzz"_h:
3212       bic(vform, temp, zm, zk);
3213       eor(vform, zdn, temp, zdn);
3214       break;
3215     case "bsl1n_z_zzz"_h:
3216       not_(vform, temp, zdn);
3217       bsl(vform, zdn, zk, temp, zm);
3218       break;
3219     case "bsl2n_z_zzz"_h:
3220       not_(vform, temp, zm);
3221       bsl(vform, zdn, zk, zdn, temp);
3222       break;
3223     case "bsl_z_zzz"_h:
3224       bsl(vform, zdn, zk, zdn, zm);
3225       break;
3226     case "eor3_z_zzz"_h:
3227       eor(vform, temp, zdn, zm);
3228       eor(vform, zdn, temp, zk);
3229       break;
3230     case "nbsl_z_zzz"_h:
3231       bsl(vform, zdn, zk, zdn, zm);
3232       not_(vform, zdn, zdn);
3233       break;
3234     default:
3235       VIXL_UNIMPLEMENTED();
3236   }
3237 }
3238 
3239 void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) {
3240   VectorFormat vform = instr->GetSVEVectorFormat();
3241   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3242   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3243   SimVRegister& zm = ReadVRegister(instr->GetRn());
3244   SimVRegister result;
3245 
3246   switch (form_hash_) {
3247     case "shadd_z_p_zz"_h:
3248       add(vform, result, zdn, zm).Halve(vform);
3249       break;
3250     case "shsub_z_p_zz"_h:
3251       sub(vform, result, zdn, zm).Halve(vform);
3252       break;
3253     case "shsubr_z_p_zz"_h:
3254       sub(vform, result, zm, zdn).Halve(vform);
3255       break;
3256     case "srhadd_z_p_zz"_h:
3257       add(vform, result, zdn, zm).Halve(vform).Round(vform);
3258       break;
3259     case "uhadd_z_p_zz"_h:
3260       add(vform, result, zdn, zm).Uhalve(vform);
3261       break;
3262     case "uhsub_z_p_zz"_h:
3263       sub(vform, result, zdn, zm).Uhalve(vform);
3264       break;
3265     case "uhsubr_z_p_zz"_h:
3266       sub(vform, result, zm, zdn).Uhalve(vform);
3267       break;
3268     case "urhadd_z_p_zz"_h:
3269       add(vform, result, zdn, zm).Uhalve(vform).Round(vform);
3270       break;
3271     default:
3272       VIXL_UNIMPLEMENTED();
3273       break;
3274   }
3275   mov_merging(vform, zdn, pg, result);
3276 }
3277 
3278 void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) {
3279   VectorFormat vform = instr->GetSVEVectorFormat();
3280   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3281   SimVRegister& zm = ReadVRegister(instr->GetRn());
3282   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3283   SimVRegister result;
3284 
3285   switch (form_hash_) {
3286     case "sqadd_z_p_zz"_h:
3287       add(vform, result, zdn, zm).SignedSaturate(vform);
3288       break;
3289     case "sqsub_z_p_zz"_h:
3290       sub(vform, result, zdn, zm).SignedSaturate(vform);
3291       break;
3292     case "sqsubr_z_p_zz"_h:
3293       sub(vform, result, zm, zdn).SignedSaturate(vform);
3294       break;
3295     case "suqadd_z_p_zz"_h:
3296       suqadd(vform, result, zdn, zm);
3297       break;
3298     case "uqadd_z_p_zz"_h:
3299       add(vform, result, zdn, zm).UnsignedSaturate(vform);
3300       break;
3301     case "uqsub_z_p_zz"_h:
3302       sub(vform, result, zdn, zm).UnsignedSaturate(vform);
3303       break;
3304     case "uqsubr_z_p_zz"_h:
3305       sub(vform, result, zm, zdn).UnsignedSaturate(vform);
3306       break;
3307     case "usqadd_z_p_zz"_h:
3308       usqadd(vform, result, zdn, zm);
3309       break;
3310     default:
3311       VIXL_UNIMPLEMENTED();
3312       break;
3313   }
3314   mov_merging(vform, zdn, pg, result);
3315 }
3316 
3317 void Simulator::SimulateSVEIntArithPair(const Instruction* instr) {
3318   VectorFormat vform = instr->GetSVEVectorFormat();
3319   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3320   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3321   SimVRegister& zm = ReadVRegister(instr->GetRn());
3322   SimVRegister result;
3323 
3324   switch (form_hash_) {
3325     case "addp_z_p_zz"_h:
3326       addp(vform, result, zdn, zm);
3327       break;
3328     case "smaxp_z_p_zz"_h:
3329       smaxp(vform, result, zdn, zm);
3330       break;
3331     case "sminp_z_p_zz"_h:
3332       sminp(vform, result, zdn, zm);
3333       break;
3334     case "umaxp_z_p_zz"_h:
3335       umaxp(vform, result, zdn, zm);
3336       break;
3337     case "uminp_z_p_zz"_h:
3338       uminp(vform, result, zdn, zm);
3339       break;
3340     default:
3341       VIXL_UNIMPLEMENTED();
3342       break;
3343   }
3344   mov_merging(vform, zdn, pg, result);
3345 }
3346 
3347 void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) {
3348   VectorFormat vform = instr->GetSVEVectorFormat();
3349   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3350   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3351   SimVRegister& zm = ReadVRegister(instr->GetRn());
3352   SimVRegister result;
3353 
3354   switch (form_hash_) {
3355     case "faddp_z_p_zz"_h:
3356       faddp(vform, result, zdn, zm);
3357       break;
3358     case "fmaxnmp_z_p_zz"_h:
3359       fmaxnmp(vform, result, zdn, zm);
3360       break;
3361     case "fmaxp_z_p_zz"_h:
3362       fmaxp(vform, result, zdn, zm);
3363       break;
3364     case "fminnmp_z_p_zz"_h:
3365       fminnmp(vform, result, zdn, zm);
3366       break;
3367     case "fminp_z_p_zz"_h:
3368       fminp(vform, result, zdn, zm);
3369       break;
3370     default:
3371       VIXL_UNIMPLEMENTED();
3372   }
3373   mov_merging(vform, zdn, pg, result);
3374 }
3375 
3376 void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) {
3377   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3378   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3379 
3380   std::pair<int, int> shift_and_lane_size =
3381       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
3382   unsigned lane_size = shift_and_lane_size.second;
3383   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3384   int right_shift_dist = shift_and_lane_size.first;
3385   int left_shift_dist = (8 << lane_size) - right_shift_dist;
3386   SimVRegister result;
3387 
3388   switch (form_hash_) {
3389     case "sqshl_z_p_zi"_h:
3390       sqshl(vform, result, zdn, left_shift_dist);
3391       break;
3392     case "sqshlu_z_p_zi"_h:
3393       sqshlu(vform, result, zdn, left_shift_dist);
3394       break;
3395     case "srshr_z_p_zi"_h:
3396       sshr(vform, result, zdn, right_shift_dist).Round(vform);
3397       break;
3398     case "uqshl_z_p_zi"_h:
3399       uqshl(vform, result, zdn, left_shift_dist);
3400       break;
3401     case "urshr_z_p_zi"_h:
3402       ushr(vform, result, zdn, right_shift_dist).Round(vform);
3403       break;
3404     default:
3405       VIXL_UNIMPLEMENTED();
3406   }
3407   mov_merging(vform, zdn, pg, result);
3408 }
3409 
3410 void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) {
3411   VIXL_ASSERT(form_hash_ == "xar_z_zzi"_h);
3412 
3413   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3414   SimVRegister& zm = ReadVRegister(instr->GetRn());
3415 
3416   std::pair<int, int> shift_and_lane_size =
3417       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3418   unsigned lane_size = shift_and_lane_size.second;
3419   VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
3420   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3421   int shift_dist = shift_and_lane_size.first;
3422   eor(vform, zdn, zdn, zm);
3423   ror(vform, zdn, zdn, shift_dist);
3424 }
3425 
3426 void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) {
3427   VectorFormat vform = instr->GetSVEVectorFormat();
3428   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3429   SimVRegister& zm = ReadVRegister(instr->GetRn());
3430   int rot = (instr->ExtractBit(10) == 0) ? 90 : 270;
3431 
3432   switch (form_hash_) {
3433     case "cadd_z_zz"_h:
3434       cadd(vform, zdn, zdn, zm, rot);
3435       break;
3436     case "sqcadd_z_zz"_h:
3437       cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true);
3438       break;
3439     default:
3440       VIXL_UNIMPLEMENTED();
3441   }
3442 }
3443 
3444 void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) {
3445   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3446   SimVRegister& zn = ReadVRegister(instr->GetRn());
3447   uint64_t xm = ReadXRegister(instr->GetRm());
3448 
3449   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3450   int msize = -1;
3451   bool is_signed = false;
3452 
3453   switch (form_hash_) {
3454     case "ldnt1b_z_p_ar_d_64_unscaled"_h:
3455       msize = 0;
3456       break;
3457     case "ldnt1d_z_p_ar_d_64_unscaled"_h:
3458       msize = 3;
3459       break;
3460     case "ldnt1h_z_p_ar_d_64_unscaled"_h:
3461       msize = 1;
3462       break;
3463     case "ldnt1sb_z_p_ar_d_64_unscaled"_h:
3464       msize = 0;
3465       is_signed = true;
3466       break;
3467     case "ldnt1sh_z_p_ar_d_64_unscaled"_h:
3468       msize = 1;
3469       is_signed = true;
3470       break;
3471     case "ldnt1sw_z_p_ar_d_64_unscaled"_h:
3472       msize = 2;
3473       is_signed = true;
3474       break;
3475     case "ldnt1w_z_p_ar_d_64_unscaled"_h:
3476       msize = 2;
3477       break;
3478     default:
3479       VIXL_UNIMPLEMENTED();
3480   }
3481   addr.SetMsizeInBytesLog2(msize);
3482   SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed);
3483 }
3484 
3485 void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) {
3486   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3487   SimVRegister& zn = ReadVRegister(instr->GetRn());
3488   uint64_t xm = ReadXRegister(instr->GetRm());
3489 
3490   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3491   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_d_64_unscaled"_h) ||
3492               (form_hash_ == "stnt1d_z_p_ar_d_64_unscaled"_h) ||
3493               (form_hash_ == "stnt1h_z_p_ar_d_64_unscaled"_h) ||
3494               (form_hash_ == "stnt1w_z_p_ar_d_64_unscaled"_h));
3495 
3496   addr.SetMsizeInBytesLog2(
3497       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3498   SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr);
3499 }
3500 
3501 void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) {
3502   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3503   SimVRegister& zn = ReadVRegister(instr->GetRn());
3504   uint64_t xm = ReadXRegister(instr->GetRm());
3505 
3506   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3507   int msize = -1;
3508   bool is_signed = false;
3509 
3510   switch (form_hash_) {
3511     case "ldnt1b_z_p_ar_s_x32_unscaled"_h:
3512       msize = 0;
3513       break;
3514     case "ldnt1h_z_p_ar_s_x32_unscaled"_h:
3515       msize = 1;
3516       break;
3517     case "ldnt1sb_z_p_ar_s_x32_unscaled"_h:
3518       msize = 0;
3519       is_signed = true;
3520       break;
3521     case "ldnt1sh_z_p_ar_s_x32_unscaled"_h:
3522       msize = 1;
3523       is_signed = true;
3524       break;
3525     case "ldnt1w_z_p_ar_s_x32_unscaled"_h:
3526       msize = 2;
3527       break;
3528     default:
3529       VIXL_UNIMPLEMENTED();
3530   }
3531   addr.SetMsizeInBytesLog2(msize);
3532   SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed);
3533 }
3534 
3535 void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) {
3536   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3537   SimVRegister& zn = ReadVRegister(instr->GetRn());
3538   uint64_t xm = ReadXRegister(instr->GetRm());
3539 
3540   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3541   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_s_x32_unscaled"_h) ||
3542               (form_hash_ == "stnt1h_z_p_ar_s_x32_unscaled"_h) ||
3543               (form_hash_ == "stnt1w_z_p_ar_s_x32_unscaled"_h));
3544 
3545   addr.SetMsizeInBytesLog2(
3546       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3547   SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr);
3548 }
3549 
3550 void Simulator::VisitReserved(const Instruction* instr) {
3551   // UDF is the only instruction in this group, and the Decoder is precise here.
3552   VIXL_ASSERT(instr->Mask(ReservedMask) == UDF);
3553 
3554   printf("UDF (permanently undefined) instruction at %p: 0x%08" PRIx32 "\n",
3555          reinterpret_cast<const void*>(instr),
3556          instr->GetInstructionBits());
3557   VIXL_ABORT_WITH_MSG("UNDEFINED (UDF)\n");
3558 }
3559 
3560 
3561 void Simulator::VisitUnimplemented(const Instruction* instr) {
3562   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
3563          reinterpret_cast<const void*>(instr),
3564          instr->GetInstructionBits());
3565   VIXL_UNIMPLEMENTED();
3566 }
3567 
3568 
3569 void Simulator::VisitUnallocated(const Instruction* instr) {
3570   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
3571          reinterpret_cast<const void*>(instr),
3572          instr->GetInstructionBits());
3573   VIXL_UNIMPLEMENTED();
3574 }
3575 
3576 
3577 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
3578   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
3579               (instr->Mask(PCRelAddressingMask) == ADRP));
3580 
3581   WriteRegister(instr->GetRd(), instr->GetImmPCOffsetTarget());
3582 }
3583 
3584 
3585 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
3586   switch (instr->Mask(UnconditionalBranchMask)) {
3587     case BL:
3588       WriteLr(instr->GetNextInstruction());
3589       VIXL_FALLTHROUGH();
3590     case B:
3591       WritePc(instr->GetImmPCOffsetTarget());
3592       break;
3593     default:
3594       VIXL_UNREACHABLE();
3595   }
3596 }
3597 
3598 
3599 void Simulator::VisitConditionalBranch(const Instruction* instr) {
3600   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
3601   if (ConditionPassed(instr->GetConditionBranch())) {
3602     WritePc(instr->GetImmPCOffsetTarget());
3603   }
3604 }
3605 
3606 BType Simulator::GetBTypeFromInstruction(const Instruction* instr) const {
3607   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3608     case BLR:
3609     case BLRAA:
3610     case BLRAB:
3611     case BLRAAZ:
3612     case BLRABZ:
3613       return BranchAndLink;
3614     case BR:
3615     case BRAA:
3616     case BRAB:
3617     case BRAAZ:
3618     case BRABZ:
3619       if ((instr->GetRn() == 16) || (instr->GetRn() == 17) ||
3620           !PcIsInGuardedPage()) {
3621         return BranchFromUnguardedOrToIP;
3622       }
3623       return BranchFromGuardedNotToIP;
3624   }
3625   return DefaultBType;
3626 }
3627 
3628 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
3629   bool authenticate = false;
3630   bool link = false;
3631   uint64_t addr = ReadXRegister(instr->GetRn());
3632   uint64_t context = 0;
3633 
3634   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3635     case BLR:
3636       link = true;
3637       VIXL_FALLTHROUGH();
3638     case BR:
3639     case RET:
3640       break;
3641 
3642     case BLRAAZ:
3643     case BLRABZ:
3644       link = true;
3645       VIXL_FALLTHROUGH();
3646     case BRAAZ:
3647     case BRABZ:
3648       authenticate = true;
3649       break;
3650 
3651     case BLRAA:
3652     case BLRAB:
3653       link = true;
3654       VIXL_FALLTHROUGH();
3655     case BRAA:
3656     case BRAB:
3657       authenticate = true;
3658       context = ReadXRegister(instr->GetRd());
3659       break;
3660 
3661     case RETAA:
3662     case RETAB:
3663       authenticate = true;
3664       addr = ReadXRegister(kLinkRegCode);
3665       context = ReadXRegister(31, Reg31IsStackPointer);
3666       break;
3667     default:
3668       VIXL_UNREACHABLE();
3669   }
3670 
3671   if (link) {
3672     WriteLr(instr->GetNextInstruction());
3673   }
3674 
3675   if (authenticate) {
3676     PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
3677     addr = AuthPAC(addr, context, key, kInstructionPointer);
3678 
3679     int error_lsb = GetTopPACBit(addr, kInstructionPointer) - 2;
3680     if (((addr >> error_lsb) & 0x3) != 0x0) {
3681       VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
3682     }
3683   }
3684 
3685   WritePc(Instruction::Cast(addr));
3686   WriteNextBType(GetBTypeFromInstruction(instr));
3687 }
3688 
3689 
3690 void Simulator::VisitTestBranch(const Instruction* instr) {
3691   unsigned bit_pos =
3692       (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40();
3693   bool bit_zero = ((ReadXRegister(instr->GetRt()) >> bit_pos) & 1) == 0;
3694   bool take_branch = false;
3695   switch (instr->Mask(TestBranchMask)) {
3696     case TBZ:
3697       take_branch = bit_zero;
3698       break;
3699     case TBNZ:
3700       take_branch = !bit_zero;
3701       break;
3702     default:
3703       VIXL_UNIMPLEMENTED();
3704   }
3705   if (take_branch) {
3706     WritePc(instr->GetImmPCOffsetTarget());
3707   }
3708 }
3709 
3710 
3711 void Simulator::VisitCompareBranch(const Instruction* instr) {
3712   unsigned rt = instr->GetRt();
3713   bool take_branch = false;
3714   switch (instr->Mask(CompareBranchMask)) {
3715     case CBZ_w:
3716       take_branch = (ReadWRegister(rt) == 0);
3717       break;
3718     case CBZ_x:
3719       take_branch = (ReadXRegister(rt) == 0);
3720       break;
3721     case CBNZ_w:
3722       take_branch = (ReadWRegister(rt) != 0);
3723       break;
3724     case CBNZ_x:
3725       take_branch = (ReadXRegister(rt) != 0);
3726       break;
3727     default:
3728       VIXL_UNIMPLEMENTED();
3729   }
3730   if (take_branch) {
3731     WritePc(instr->GetImmPCOffsetTarget());
3732   }
3733 }
3734 
3735 
3736 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
3737   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3738   bool set_flags = instr->GetFlagsUpdate();
3739   int64_t new_val = 0;
3740   Instr operation = instr->Mask(AddSubOpMask);
3741 
3742   switch (operation) {
3743     case ADD:
3744     case ADDS: {
3745       new_val = AddWithCarry(reg_size,
3746                              set_flags,
3747                              ReadRegister(reg_size,
3748                                           instr->GetRn(),
3749                                           instr->GetRnMode()),
3750                              op2);
3751       break;
3752     }
3753     case SUB:
3754     case SUBS: {
3755       new_val = AddWithCarry(reg_size,
3756                              set_flags,
3757                              ReadRegister(reg_size,
3758                                           instr->GetRn(),
3759                                           instr->GetRnMode()),
3760                              ~op2,
3761                              1);
3762       break;
3763     }
3764     default:
3765       VIXL_UNREACHABLE();
3766   }
3767 
3768   WriteRegister(reg_size,
3769                 instr->GetRd(),
3770                 new_val,
3771                 LogRegWrites,
3772                 instr->GetRdMode());
3773 }
3774 
3775 
3776 void Simulator::VisitAddSubShifted(const Instruction* instr) {
3777   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3778   int64_t op2 = ShiftOperand(reg_size,
3779                              ReadRegister(reg_size, instr->GetRm()),
3780                              static_cast<Shift>(instr->GetShiftDP()),
3781                              instr->GetImmDPShift());
3782   AddSubHelper(instr, op2);
3783 }
3784 
3785 
3786 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
3787   int64_t op2 = instr->GetImmAddSub()
3788                 << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
3789   AddSubHelper(instr, op2);
3790 }
3791 
3792 
3793 void Simulator::VisitAddSubExtended(const Instruction* instr) {
3794   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3795   int64_t op2 = ExtendValue(reg_size,
3796                             ReadRegister(reg_size, instr->GetRm()),
3797                             static_cast<Extend>(instr->GetExtendMode()),
3798                             instr->GetImmExtendShift());
3799   AddSubHelper(instr, op2);
3800 }
3801 
3802 
3803 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
3804   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3805   int64_t op2 = ReadRegister(reg_size, instr->GetRm());
3806   int64_t new_val;
3807 
3808   if ((instr->Mask(AddSubOpMask) == SUB) ||
3809       (instr->Mask(AddSubOpMask) == SUBS)) {
3810     op2 = ~op2;
3811   }
3812 
3813   new_val = AddWithCarry(reg_size,
3814                          instr->GetFlagsUpdate(),
3815                          ReadRegister(reg_size, instr->GetRn()),
3816                          op2,
3817                          ReadC());
3818 
3819   WriteRegister(reg_size, instr->GetRd(), new_val);
3820 }
3821 
3822 
3823 void Simulator::VisitRotateRightIntoFlags(const Instruction* instr) {
3824   switch (instr->Mask(RotateRightIntoFlagsMask)) {
3825     case RMIF: {
3826       uint64_t value = ReadRegister<uint64_t>(instr->GetRn());
3827       unsigned shift = instr->GetImmRMIFRotation();
3828       unsigned mask = instr->GetNzcv();
3829       uint64_t rotated = RotateRight(value, shift, kXRegSize);
3830 
3831       ReadNzcv().SetFlags((rotated & mask) | (ReadNzcv().GetFlags() & ~mask));
3832       break;
3833     }
3834   }
3835 }
3836 
3837 
3838 void Simulator::VisitEvaluateIntoFlags(const Instruction* instr) {
3839   uint32_t value = ReadRegister<uint32_t>(instr->GetRn());
3840   unsigned msb = (instr->Mask(EvaluateIntoFlagsMask) == SETF16) ? 15 : 7;
3841 
3842   unsigned sign_bit = (value >> msb) & 1;
3843   unsigned overflow_bit = (value >> (msb + 1)) & 1;
3844   ReadNzcv().SetN(sign_bit);
3845   ReadNzcv().SetZ((value << (31 - msb)) == 0);
3846   ReadNzcv().SetV(sign_bit ^ overflow_bit);
3847 }
3848 
3849 
3850 void Simulator::VisitLogicalShifted(const Instruction* instr) {
3851   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3852   Shift shift_type = static_cast<Shift>(instr->GetShiftDP());
3853   unsigned shift_amount = instr->GetImmDPShift();
3854   int64_t op2 = ShiftOperand(reg_size,
3855                              ReadRegister(reg_size, instr->GetRm()),
3856                              shift_type,
3857                              shift_amount);
3858   if (instr->Mask(NOT) == NOT) {
3859     op2 = ~op2;
3860   }
3861   LogicalHelper(instr, op2);
3862 }
3863 
3864 
3865 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
3866   if (instr->GetImmLogical() == 0) {
3867     VIXL_UNIMPLEMENTED();
3868   } else {
3869     LogicalHelper(instr, instr->GetImmLogical());
3870   }
3871 }
3872 
3873 
3874 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
3875   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3876   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
3877   int64_t result = 0;
3878   bool update_flags = false;
3879 
3880   // Switch on the logical operation, stripping out the NOT bit, as it has a
3881   // different meaning for logical immediate instructions.
3882   switch (instr->Mask(LogicalOpMask & ~NOT)) {
3883     case ANDS:
3884       update_flags = true;
3885       VIXL_FALLTHROUGH();
3886     case AND:
3887       result = op1 & op2;
3888       break;
3889     case ORR:
3890       result = op1 | op2;
3891       break;
3892     case EOR:
3893       result = op1 ^ op2;
3894       break;
3895     default:
3896       VIXL_UNIMPLEMENTED();
3897   }
3898 
3899   if (update_flags) {
3900     ReadNzcv().SetN(CalcNFlag(result, reg_size));
3901     ReadNzcv().SetZ(CalcZFlag(result));
3902     ReadNzcv().SetC(0);
3903     ReadNzcv().SetV(0);
3904     LogSystemRegister(NZCV);
3905   }
3906 
3907   WriteRegister(reg_size,
3908                 instr->GetRd(),
3909                 result,
3910                 LogRegWrites,
3911                 instr->GetRdMode());
3912 }
3913 
3914 
3915 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
3916   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3917   ConditionalCompareHelper(instr, ReadRegister(reg_size, instr->GetRm()));
3918 }
3919 
3920 
3921 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
3922   ConditionalCompareHelper(instr, instr->GetImmCondCmp());
3923 }
3924 
3925 
3926 void Simulator::ConditionalCompareHelper(const Instruction* instr,
3927                                          int64_t op2) {
3928   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3929   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
3930 
3931   if (ConditionPassed(instr->GetCondition())) {
3932     // If the condition passes, set the status flags to the result of comparing
3933     // the operands.
3934     if (instr->Mask(ConditionalCompareMask) == CCMP) {
3935       AddWithCarry(reg_size, true, op1, ~op2, 1);
3936     } else {
3937       VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
3938       AddWithCarry(reg_size, true, op1, op2, 0);
3939     }
3940   } else {
3941     // If the condition fails, set the status flags to the nzcv immediate.
3942     ReadNzcv().SetFlags(instr->GetNzcv());
3943     LogSystemRegister(NZCV);
3944   }
3945 }
3946 
3947 
3948 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
3949   int offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
3950   LoadStoreHelper(instr, offset, Offset);
3951 }
3952 
3953 
3954 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
3955   LoadStoreHelper(instr, instr->GetImmLS(), Offset);
3956 }
3957 
3958 
3959 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
3960   LoadStoreHelper(instr, instr->GetImmLS(), PreIndex);
3961 }
3962 
3963 
3964 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
3965   LoadStoreHelper(instr, instr->GetImmLS(), PostIndex);
3966 }
3967 
3968 
3969 template <typename T1, typename T2>
3970 void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
3971   unsigned rt = instr->GetRt();
3972   unsigned rn = instr->GetRn();
3973 
3974   unsigned element_size = sizeof(T2);
3975   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
3976   int offset = instr->GetImmLS();
3977   address += offset;
3978 
3979   // Verify that the address is available to the host.
3980   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
3981 
3982   // Check the alignment of `address`.
3983   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
3984     VIXL_ALIGNMENT_EXCEPTION();
3985   }
3986 
3987   WriteRegister<T1>(rt, static_cast<T1>(MemRead<T2>(address)));
3988 
3989   // Approximate load-acquire by issuing a full barrier after the load.
3990   __sync_synchronize();
3991 
3992   LogRead(rt, GetPrintRegisterFormat(element_size), address);
3993 }
3994 
3995 
3996 template <typename T>
3997 void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
3998   unsigned rt = instr->GetRt();
3999   unsigned rn = instr->GetRn();
4000 
4001   unsigned element_size = sizeof(T);
4002   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4003   int offset = instr->GetImmLS();
4004   address += offset;
4005 
4006   // Verify that the address is available to the host.
4007   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4008 
4009   // Check the alignment of `address`.
4010   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
4011     VIXL_ALIGNMENT_EXCEPTION();
4012   }
4013 
4014   // Approximate store-release by issuing a full barrier after the load.
4015   __sync_synchronize();
4016 
4017   MemWrite<T>(address, ReadRegister<T>(rt));
4018 
4019   LogWrite(rt, GetPrintRegisterFormat(element_size), address);
4020 }
4021 
4022 
4023 void Simulator::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
4024   switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
4025     case LDAPURB:
4026       LoadAcquireRCpcUnscaledOffsetHelper<uint8_t, uint8_t>(instr);
4027       break;
4028     case LDAPURH:
4029       LoadAcquireRCpcUnscaledOffsetHelper<uint16_t, uint16_t>(instr);
4030       break;
4031     case LDAPUR_w:
4032       LoadAcquireRCpcUnscaledOffsetHelper<uint32_t, uint32_t>(instr);
4033       break;
4034     case LDAPUR_x:
4035       LoadAcquireRCpcUnscaledOffsetHelper<uint64_t, uint64_t>(instr);
4036       break;
4037     case LDAPURSB_w:
4038       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int8_t>(instr);
4039       break;
4040     case LDAPURSB_x:
4041       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int8_t>(instr);
4042       break;
4043     case LDAPURSH_w:
4044       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int16_t>(instr);
4045       break;
4046     case LDAPURSH_x:
4047       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int16_t>(instr);
4048       break;
4049     case LDAPURSW:
4050       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int32_t>(instr);
4051       break;
4052     case STLURB:
4053       StoreReleaseUnscaledOffsetHelper<uint8_t>(instr);
4054       break;
4055     case STLURH:
4056       StoreReleaseUnscaledOffsetHelper<uint16_t>(instr);
4057       break;
4058     case STLUR_w:
4059       StoreReleaseUnscaledOffsetHelper<uint32_t>(instr);
4060       break;
4061     case STLUR_x:
4062       StoreReleaseUnscaledOffsetHelper<uint64_t>(instr);
4063       break;
4064   }
4065 }
4066 
4067 
4068 void Simulator::VisitLoadStorePAC(const Instruction* instr) {
4069   unsigned dst = instr->GetRt();
4070   unsigned addr_reg = instr->GetRn();
4071 
4072   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
4073 
4074   PACKey key = (instr->ExtractBit(23) == 0) ? kPACKeyDA : kPACKeyDB;
4075   address = AuthPAC(address, 0, key, kDataPointer);
4076 
4077   int error_lsb = GetTopPACBit(address, kInstructionPointer) - 2;
4078   if (((address >> error_lsb) & 0x3) != 0x0) {
4079     VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
4080   }
4081 
4082 
4083   if ((addr_reg == 31) && ((address % 16) != 0)) {
4084     // When the base register is SP the stack pointer is required to be
4085     // quadword aligned prior to the address calculation and write-backs.
4086     // Misalignment will cause a stack alignment fault.
4087     VIXL_ALIGNMENT_EXCEPTION();
4088   }
4089 
4090   int64_t offset = instr->GetImmLSPAC();
4091   address += offset;
4092 
4093   if (instr->Mask(LoadStorePACPreBit) == LoadStorePACPreBit) {
4094     // Pre-index mode.
4095     VIXL_ASSERT(offset != 0);
4096     WriteXRegister(addr_reg, address, LogRegWrites, Reg31IsStackPointer);
4097   }
4098 
4099   uintptr_t addr_ptr = static_cast<uintptr_t>(address);
4100 
4101   // Verify that the calculated address is available to the host.
4102   VIXL_ASSERT(address == addr_ptr);
4103 
4104   WriteXRegister(dst, MemRead<uint64_t>(addr_ptr), NoRegLog);
4105   unsigned access_size = 1 << 3;
4106   LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
4107 }
4108 
4109 
4110 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
4111   Extend ext = static_cast<Extend>(instr->GetExtendMode());
4112   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
4113   unsigned shift_amount = instr->GetImmShiftLS() * instr->GetSizeLS();
4114 
4115   int64_t offset =
4116       ExtendValue(kXRegSize, ReadXRegister(instr->GetRm()), ext, shift_amount);
4117   LoadStoreHelper(instr, offset, Offset);
4118 }
4119 
4120 
4121 void Simulator::LoadStoreHelper(const Instruction* instr,
4122                                 int64_t offset,
4123                                 AddrMode addrmode) {
4124   unsigned srcdst = instr->GetRt();
4125   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4126 
4127   bool rt_is_vreg = false;
4128   int extend_to_size = 0;
4129   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
4130   switch (op) {
4131     case LDRB_w:
4132       WriteWRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4133       extend_to_size = kWRegSizeInBytes;
4134       break;
4135     case LDRH_w:
4136       WriteWRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4137       extend_to_size = kWRegSizeInBytes;
4138       break;
4139     case LDR_w:
4140       WriteWRegister(srcdst, MemRead<uint32_t>(address), NoRegLog);
4141       extend_to_size = kWRegSizeInBytes;
4142       break;
4143     case LDR_x:
4144       WriteXRegister(srcdst, MemRead<uint64_t>(address), NoRegLog);
4145       extend_to_size = kXRegSizeInBytes;
4146       break;
4147     case LDRSB_w:
4148       WriteWRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4149       extend_to_size = kWRegSizeInBytes;
4150       break;
4151     case LDRSH_w:
4152       WriteWRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4153       extend_to_size = kWRegSizeInBytes;
4154       break;
4155     case LDRSB_x:
4156       WriteXRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4157       extend_to_size = kXRegSizeInBytes;
4158       break;
4159     case LDRSH_x:
4160       WriteXRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4161       extend_to_size = kXRegSizeInBytes;
4162       break;
4163     case LDRSW_x:
4164       WriteXRegister(srcdst, MemRead<int32_t>(address), NoRegLog);
4165       extend_to_size = kXRegSizeInBytes;
4166       break;
4167     case LDR_b:
4168       WriteBRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4169       rt_is_vreg = true;
4170       break;
4171     case LDR_h:
4172       WriteHRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4173       rt_is_vreg = true;
4174       break;
4175     case LDR_s:
4176       WriteSRegister(srcdst, MemRead<float>(address), NoRegLog);
4177       rt_is_vreg = true;
4178       break;
4179     case LDR_d:
4180       WriteDRegister(srcdst, MemRead<double>(address), NoRegLog);
4181       rt_is_vreg = true;
4182       break;
4183     case LDR_q:
4184       WriteQRegister(srcdst, MemRead<qreg_t>(address), NoRegLog);
4185       rt_is_vreg = true;
4186       break;
4187 
4188     case STRB_w:
4189       MemWrite<uint8_t>(address, ReadWRegister(srcdst));
4190       break;
4191     case STRH_w:
4192       MemWrite<uint16_t>(address, ReadWRegister(srcdst));
4193       break;
4194     case STR_w:
4195       MemWrite<uint32_t>(address, ReadWRegister(srcdst));
4196       break;
4197     case STR_x:
4198       MemWrite<uint64_t>(address, ReadXRegister(srcdst));
4199       break;
4200     case STR_b:
4201       MemWrite<uint8_t>(address, ReadBRegister(srcdst));
4202       rt_is_vreg = true;
4203       break;
4204     case STR_h:
4205       MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst));
4206       rt_is_vreg = true;
4207       break;
4208     case STR_s:
4209       MemWrite<float>(address, ReadSRegister(srcdst));
4210       rt_is_vreg = true;
4211       break;
4212     case STR_d:
4213       MemWrite<double>(address, ReadDRegister(srcdst));
4214       rt_is_vreg = true;
4215       break;
4216     case STR_q:
4217       MemWrite<qreg_t>(address, ReadQRegister(srcdst));
4218       rt_is_vreg = true;
4219       break;
4220 
4221     // Ignore prfm hint instructions.
4222     case PRFM:
4223       break;
4224 
4225     default:
4226       VIXL_UNIMPLEMENTED();
4227   }
4228 
4229   // Print a detailed trace (including the memory address).
4230   bool extend = (extend_to_size != 0);
4231   unsigned access_size = 1 << instr->GetSizeLS();
4232   unsigned result_size = extend ? extend_to_size : access_size;
4233   PrintRegisterFormat print_format =
4234       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4235                  : GetPrintRegisterFormatForSize(result_size);
4236 
4237   if (instr->IsLoad()) {
4238     if (rt_is_vreg) {
4239       LogVRead(srcdst, print_format, address);
4240     } else {
4241       LogExtendingRead(srcdst, print_format, access_size, address);
4242     }
4243   } else if (instr->IsStore()) {
4244     if (rt_is_vreg) {
4245       LogVWrite(srcdst, print_format, address);
4246     } else {
4247       LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
4248     }
4249   } else {
4250     VIXL_ASSERT(op == PRFM);
4251   }
4252 
4253   local_monitor_.MaybeClear();
4254 }
4255 
4256 
4257 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
4258   LoadStorePairHelper(instr, Offset);
4259 }
4260 
4261 
4262 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
4263   LoadStorePairHelper(instr, PreIndex);
4264 }
4265 
4266 
4267 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
4268   LoadStorePairHelper(instr, PostIndex);
4269 }
4270 
4271 
4272 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
4273   LoadStorePairHelper(instr, Offset);
4274 }
4275 
4276 
4277 void Simulator::LoadStorePairHelper(const Instruction* instr,
4278                                     AddrMode addrmode) {
4279   unsigned rt = instr->GetRt();
4280   unsigned rt2 = instr->GetRt2();
4281   int element_size = 1 << instr->GetSizeLSPair();
4282   int64_t offset = instr->GetImmLSPair() * element_size;
4283   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4284   uintptr_t address2 = address + element_size;
4285 
4286   LoadStorePairOp op =
4287       static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
4288 
4289   // 'rt' and 'rt2' can only be aliased for stores.
4290   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
4291 
4292   bool rt_is_vreg = false;
4293   bool sign_extend = false;
4294   switch (op) {
4295     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
4296     // will print a more detailed log.
4297     case LDP_w: {
4298       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4299       WriteWRegister(rt2, MemRead<uint32_t>(address2), NoRegLog);
4300       break;
4301     }
4302     case LDP_s: {
4303       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
4304       WriteSRegister(rt2, MemRead<float>(address2), NoRegLog);
4305       rt_is_vreg = true;
4306       break;
4307     }
4308     case LDP_x: {
4309       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4310       WriteXRegister(rt2, MemRead<uint64_t>(address2), NoRegLog);
4311       break;
4312     }
4313     case LDP_d: {
4314       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
4315       WriteDRegister(rt2, MemRead<double>(address2), NoRegLog);
4316       rt_is_vreg = true;
4317       break;
4318     }
4319     case LDP_q: {
4320       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
4321       WriteQRegister(rt2, MemRead<qreg_t>(address2), NoRegLog);
4322       rt_is_vreg = true;
4323       break;
4324     }
4325     case LDPSW_x: {
4326       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
4327       WriteXRegister(rt2, MemRead<int32_t>(address2), NoRegLog);
4328       sign_extend = true;
4329       break;
4330     }
4331     case STP_w: {
4332       MemWrite<uint32_t>(address, ReadWRegister(rt));
4333       MemWrite<uint32_t>(address2, ReadWRegister(rt2));
4334       break;
4335     }
4336     case STP_s: {
4337       MemWrite<float>(address, ReadSRegister(rt));
4338       MemWrite<float>(address2, ReadSRegister(rt2));
4339       rt_is_vreg = true;
4340       break;
4341     }
4342     case STP_x: {
4343       MemWrite<uint64_t>(address, ReadXRegister(rt));
4344       MemWrite<uint64_t>(address2, ReadXRegister(rt2));
4345       break;
4346     }
4347     case STP_d: {
4348       MemWrite<double>(address, ReadDRegister(rt));
4349       MemWrite<double>(address2, ReadDRegister(rt2));
4350       rt_is_vreg = true;
4351       break;
4352     }
4353     case STP_q: {
4354       MemWrite<qreg_t>(address, ReadQRegister(rt));
4355       MemWrite<qreg_t>(address2, ReadQRegister(rt2));
4356       rt_is_vreg = true;
4357       break;
4358     }
4359     default:
4360       VIXL_UNREACHABLE();
4361   }
4362 
4363   // Print a detailed trace (including the memory address).
4364   unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
4365   PrintRegisterFormat print_format =
4366       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4367                  : GetPrintRegisterFormatForSize(result_size);
4368 
4369   if (instr->IsLoad()) {
4370     if (rt_is_vreg) {
4371       LogVRead(rt, print_format, address);
4372       LogVRead(rt2, print_format, address2);
4373     } else if (sign_extend) {
4374       LogExtendingRead(rt, print_format, element_size, address);
4375       LogExtendingRead(rt2, print_format, element_size, address2);
4376     } else {
4377       LogRead(rt, print_format, address);
4378       LogRead(rt2, print_format, address2);
4379     }
4380   } else {
4381     if (rt_is_vreg) {
4382       LogVWrite(rt, print_format, address);
4383       LogVWrite(rt2, print_format, address2);
4384     } else {
4385       LogWrite(rt, print_format, address);
4386       LogWrite(rt2, print_format, address2);
4387     }
4388   }
4389 
4390   local_monitor_.MaybeClear();
4391 }
4392 
4393 
4394 template <typename T>
4395 void Simulator::CompareAndSwapHelper(const Instruction* instr) {
4396   unsigned rs = instr->GetRs();
4397   unsigned rt = instr->GetRt();
4398   unsigned rn = instr->GetRn();
4399 
4400   unsigned element_size = sizeof(T);
4401   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4402 
4403   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4404 
4405   bool is_acquire = instr->ExtractBit(22) == 1;
4406   bool is_release = instr->ExtractBit(15) == 1;
4407 
4408   T comparevalue = ReadRegister<T>(rs);
4409   T newvalue = ReadRegister<T>(rt);
4410 
4411   // The architecture permits that the data read clears any exclusive monitors
4412   // associated with that location, even if the compare subsequently fails.
4413   local_monitor_.Clear();
4414 
4415   T data = MemRead<T>(address);
4416   if (is_acquire) {
4417     // Approximate load-acquire by issuing a full barrier after the load.
4418     __sync_synchronize();
4419   }
4420 
4421   if (data == comparevalue) {
4422     if (is_release) {
4423       // Approximate store-release by issuing a full barrier before the store.
4424       __sync_synchronize();
4425     }
4426     MemWrite<T>(address, newvalue);
4427     LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
4428   }
4429   WriteRegister<T>(rs, data, NoRegLog);
4430   LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
4431 }
4432 
4433 
4434 template <typename T>
4435 void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
4436   VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
4437   unsigned rs = instr->GetRs();
4438   unsigned rt = instr->GetRt();
4439   unsigned rn = instr->GetRn();
4440 
4441   VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
4442 
4443   unsigned element_size = sizeof(T);
4444   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4445 
4446   CheckIsValidUnalignedAtomicAccess(rn, address, element_size * 2);
4447 
4448   uint64_t address2 = address + element_size;
4449 
4450   bool is_acquire = instr->ExtractBit(22) == 1;
4451   bool is_release = instr->ExtractBit(15) == 1;
4452 
4453   T comparevalue_high = ReadRegister<T>(rs + 1);
4454   T comparevalue_low = ReadRegister<T>(rs);
4455   T newvalue_high = ReadRegister<T>(rt + 1);
4456   T newvalue_low = ReadRegister<T>(rt);
4457 
4458   // The architecture permits that the data read clears any exclusive monitors
4459   // associated with that location, even if the compare subsequently fails.
4460   local_monitor_.Clear();
4461 
4462   T data_low = MemRead<T>(address);
4463   T data_high = MemRead<T>(address2);
4464 
4465   if (is_acquire) {
4466     // Approximate load-acquire by issuing a full barrier after the load.
4467     __sync_synchronize();
4468   }
4469 
4470   bool same =
4471       (data_high == comparevalue_high) && (data_low == comparevalue_low);
4472   if (same) {
4473     if (is_release) {
4474       // Approximate store-release by issuing a full barrier before the store.
4475       __sync_synchronize();
4476     }
4477 
4478     MemWrite<T>(address, newvalue_low);
4479     MemWrite<T>(address2, newvalue_high);
4480   }
4481 
4482   WriteRegister<T>(rs + 1, data_high, NoRegLog);
4483   WriteRegister<T>(rs, data_low, NoRegLog);
4484 
4485   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4486   LogRead(rs, format, address);
4487   LogRead(rs + 1, format, address2);
4488 
4489   if (same) {
4490     LogWrite(rt, format, address);
4491     LogWrite(rt + 1, format, address2);
4492   }
4493 }
4494 
4495 bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
4496   // To simulate fault-tolerant loads, we need to know what host addresses we
4497   // can access without generating a real fault. One way to do that is to
4498   // attempt to `write()` the memory to a placeholder pipe[1]. This is more
4499   // portable and less intrusive than using (global) signal handlers.
4500   //
4501   // [1]: https://stackoverflow.com/questions/7134590
4502 
4503   size_t written = 0;
4504   bool can_read = true;
4505   // `write` will normally return after one invocation, but it is allowed to
4506   // handle only part of the operation, so wrap it in a loop.
4507   while (can_read && (written < size)) {
4508     ssize_t result = write(placeholder_pipe_fd_[1],
4509                            reinterpret_cast<void*>(address + written),
4510                            size - written);
4511     if (result > 0) {
4512       written += result;
4513     } else {
4514       switch (result) {
4515         case -EPERM:
4516         case -EFAULT:
4517           // The address range is not accessible.
4518           // `write` is supposed to return -EFAULT in this case, but in practice
4519           // it seems to return -EPERM, so we accept that too.
4520           can_read = false;
4521           break;
4522         case -EINTR:
4523           // The call was interrupted by a signal. Just try again.
4524           break;
4525         default:
4526           // Any other error is fatal.
4527           VIXL_ABORT();
4528       }
4529     }
4530   }
4531   // Drain the read side of the pipe. If we don't do this, we'll leak memory as
4532   // the placeholder data is buffered. As before, we expect to drain the whole
4533   // write in one invocation, but cannot guarantee that, so we wrap it in a
4534   // loop. This function is primarily intended to implement SVE fault-tolerant
4535   // loads, so the maximum Z register size is a good default buffer size.
4536   char buffer[kZRegMaxSizeInBytes];
4537   while (written > 0) {
4538     ssize_t result = read(placeholder_pipe_fd_[0],
4539                           reinterpret_cast<void*>(buffer),
4540                           sizeof(buffer));
4541     // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
4542     // we've read everything that was written, so treat 0 as an error.
4543     if (result > 0) {
4544       VIXL_ASSERT(static_cast<size_t>(result) <= written);
4545       written -= result;
4546     } else {
4547       // For -EINTR, just try again. We can't handle any other error.
4548       VIXL_CHECK(result == -EINTR);
4549     }
4550   }
4551 
4552   return can_read;
4553 }
4554 
4555 void Simulator::PrintExclusiveAccessWarning() {
4556   if (print_exclusive_access_warning_) {
4557     fprintf(stderr,
4558             "%sWARNING:%s VIXL simulator support for "
4559             "load-/store-/clear-exclusive "
4560             "instructions is limited. Refer to the README for details.%s\n",
4561             clr_warning,
4562             clr_warning_message,
4563             clr_normal);
4564     print_exclusive_access_warning_ = false;
4565   }
4566 }
4567 
4568 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
4569   LoadStoreExclusive op =
4570       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
4571 
4572   switch (op) {
4573     case CAS_w:
4574     case CASA_w:
4575     case CASL_w:
4576     case CASAL_w:
4577       CompareAndSwapHelper<uint32_t>(instr);
4578       break;
4579     case CAS_x:
4580     case CASA_x:
4581     case CASL_x:
4582     case CASAL_x:
4583       CompareAndSwapHelper<uint64_t>(instr);
4584       break;
4585     case CASB:
4586     case CASAB:
4587     case CASLB:
4588     case CASALB:
4589       CompareAndSwapHelper<uint8_t>(instr);
4590       break;
4591     case CASH:
4592     case CASAH:
4593     case CASLH:
4594     case CASALH:
4595       CompareAndSwapHelper<uint16_t>(instr);
4596       break;
4597     case CASP_w:
4598     case CASPA_w:
4599     case CASPL_w:
4600     case CASPAL_w:
4601       CompareAndSwapPairHelper<uint32_t>(instr);
4602       break;
4603     case CASP_x:
4604     case CASPA_x:
4605     case CASPL_x:
4606     case CASPAL_x:
4607       CompareAndSwapPairHelper<uint64_t>(instr);
4608       break;
4609     default:
4610       PrintExclusiveAccessWarning();
4611 
4612       unsigned rs = instr->GetRs();
4613       unsigned rt = instr->GetRt();
4614       unsigned rt2 = instr->GetRt2();
4615       unsigned rn = instr->GetRn();
4616 
4617       bool is_exclusive = !instr->GetLdStXNotExclusive();
4618       bool is_acquire_release =
4619           !is_exclusive || instr->GetLdStXAcquireRelease();
4620       bool is_load = instr->GetLdStXLoad();
4621       bool is_pair = instr->GetLdStXPair();
4622 
4623       unsigned element_size = 1 << instr->GetLdStXSizeLog2();
4624       unsigned access_size = is_pair ? element_size * 2 : element_size;
4625       uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4626 
4627       CheckIsValidUnalignedAtomicAccess(rn, address, access_size);
4628 
4629       if (is_load) {
4630         if (is_exclusive) {
4631           local_monitor_.MarkExclusive(address, access_size);
4632         } else {
4633           // Any non-exclusive load can clear the local monitor as a side
4634           // effect. We don't need to do this, but it is useful to stress the
4635           // simulated code.
4636           local_monitor_.Clear();
4637         }
4638 
4639         // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
4640         // We will print a more detailed log.
4641         unsigned reg_size = 0;
4642         switch (op) {
4643           case LDXRB_w:
4644           case LDAXRB_w:
4645           case LDARB_w:
4646           case LDLARB:
4647             WriteWRegister(rt, MemRead<uint8_t>(address), NoRegLog);
4648             reg_size = kWRegSizeInBytes;
4649             break;
4650           case LDXRH_w:
4651           case LDAXRH_w:
4652           case LDARH_w:
4653           case LDLARH:
4654             WriteWRegister(rt, MemRead<uint16_t>(address), NoRegLog);
4655             reg_size = kWRegSizeInBytes;
4656             break;
4657           case LDXR_w:
4658           case LDAXR_w:
4659           case LDAR_w:
4660           case LDLAR_w:
4661             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4662             reg_size = kWRegSizeInBytes;
4663             break;
4664           case LDXR_x:
4665           case LDAXR_x:
4666           case LDAR_x:
4667           case LDLAR_x:
4668             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4669             reg_size = kXRegSizeInBytes;
4670             break;
4671           case LDXP_w:
4672           case LDAXP_w:
4673             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4674             WriteWRegister(rt2,
4675                            MemRead<uint32_t>(address + element_size),
4676                            NoRegLog);
4677             reg_size = kWRegSizeInBytes;
4678             break;
4679           case LDXP_x:
4680           case LDAXP_x:
4681             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4682             WriteXRegister(rt2,
4683                            MemRead<uint64_t>(address + element_size),
4684                            NoRegLog);
4685             reg_size = kXRegSizeInBytes;
4686             break;
4687           default:
4688             VIXL_UNREACHABLE();
4689         }
4690 
4691         if (is_acquire_release) {
4692           // Approximate load-acquire by issuing a full barrier after the load.
4693           __sync_synchronize();
4694         }
4695 
4696         PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
4697         LogExtendingRead(rt, format, element_size, address);
4698         if (is_pair) {
4699           LogExtendingRead(rt2, format, element_size, address + element_size);
4700         }
4701       } else {
4702         if (is_acquire_release) {
4703           // Approximate store-release by issuing a full barrier before the
4704           // store.
4705           __sync_synchronize();
4706         }
4707 
4708         bool do_store = true;
4709         if (is_exclusive) {
4710           do_store = local_monitor_.IsExclusive(address, access_size) &&
4711                      global_monitor_.IsExclusive(address, access_size);
4712           WriteWRegister(rs, do_store ? 0 : 1);
4713 
4714           //  - All exclusive stores explicitly clear the local monitor.
4715           local_monitor_.Clear();
4716         } else {
4717           //  - Any other store can clear the local monitor as a side effect.
4718           local_monitor_.MaybeClear();
4719         }
4720 
4721         if (do_store) {
4722           switch (op) {
4723             case STXRB_w:
4724             case STLXRB_w:
4725             case STLRB_w:
4726             case STLLRB:
4727               MemWrite<uint8_t>(address, ReadWRegister(rt));
4728               break;
4729             case STXRH_w:
4730             case STLXRH_w:
4731             case STLRH_w:
4732             case STLLRH:
4733               MemWrite<uint16_t>(address, ReadWRegister(rt));
4734               break;
4735             case STXR_w:
4736             case STLXR_w:
4737             case STLR_w:
4738             case STLLR_w:
4739               MemWrite<uint32_t>(address, ReadWRegister(rt));
4740               break;
4741             case STXR_x:
4742             case STLXR_x:
4743             case STLR_x:
4744             case STLLR_x:
4745               MemWrite<uint64_t>(address, ReadXRegister(rt));
4746               break;
4747             case STXP_w:
4748             case STLXP_w:
4749               MemWrite<uint32_t>(address, ReadWRegister(rt));
4750               MemWrite<uint32_t>(address + element_size, ReadWRegister(rt2));
4751               break;
4752             case STXP_x:
4753             case STLXP_x:
4754               MemWrite<uint64_t>(address, ReadXRegister(rt));
4755               MemWrite<uint64_t>(address + element_size, ReadXRegister(rt2));
4756               break;
4757             default:
4758               VIXL_UNREACHABLE();
4759           }
4760 
4761           PrintRegisterFormat format =
4762               GetPrintRegisterFormatForSize(element_size);
4763           LogWrite(rt, format, address);
4764           if (is_pair) {
4765             LogWrite(rt2, format, address + element_size);
4766           }
4767         }
4768       }
4769   }
4770 }
4771 
4772 template <typename T>
4773 void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
4774   unsigned rs = instr->GetRs();
4775   unsigned rt = instr->GetRt();
4776   unsigned rn = instr->GetRn();
4777 
4778   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
4779   bool is_release = instr->ExtractBit(22) == 1;
4780 
4781   unsigned element_size = sizeof(T);
4782   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4783 
4784   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4785 
4786   T value = ReadRegister<T>(rs);
4787 
4788   T data = MemRead<T>(address);
4789 
4790   if (is_acquire) {
4791     // Approximate load-acquire by issuing a full barrier after the load.
4792     __sync_synchronize();
4793   }
4794 
4795   T result = 0;
4796   switch (instr->Mask(AtomicMemorySimpleOpMask)) {
4797     case LDADDOp:
4798       result = data + value;
4799       break;
4800     case LDCLROp:
4801       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4802       result = data & ~value;
4803       break;
4804     case LDEOROp:
4805       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4806       result = data ^ value;
4807       break;
4808     case LDSETOp:
4809       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4810       result = data | value;
4811       break;
4812 
4813     // Signed/Unsigned difference is done via the templated type T.
4814     case LDSMAXOp:
4815     case LDUMAXOp:
4816       result = (data > value) ? data : value;
4817       break;
4818     case LDSMINOp:
4819     case LDUMINOp:
4820       result = (data > value) ? value : data;
4821       break;
4822   }
4823 
4824   if (is_release) {
4825     // Approximate store-release by issuing a full barrier before the store.
4826     __sync_synchronize();
4827   }
4828 
4829   WriteRegister<T>(rt, data, NoRegLog);
4830 
4831   unsigned register_size = element_size;
4832   if (element_size < kXRegSizeInBytes) {
4833     register_size = kWRegSizeInBytes;
4834   }
4835   PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
4836   LogExtendingRead(rt, format, element_size, address);
4837 
4838   MemWrite<T>(address, result);
4839   format = GetPrintRegisterFormatForSize(element_size);
4840   LogWrite(rs, format, address);
4841 }
4842 
4843 template <typename T>
4844 void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
4845   unsigned rs = instr->GetRs();
4846   unsigned rt = instr->GetRt();
4847   unsigned rn = instr->GetRn();
4848 
4849   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
4850   bool is_release = instr->ExtractBit(22) == 1;
4851 
4852   unsigned element_size = sizeof(T);
4853   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4854 
4855   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4856 
4857   T data = MemRead<T>(address);
4858   if (is_acquire) {
4859     // Approximate load-acquire by issuing a full barrier after the load.
4860     __sync_synchronize();
4861   }
4862 
4863   if (is_release) {
4864     // Approximate store-release by issuing a full barrier before the store.
4865     __sync_synchronize();
4866   }
4867   MemWrite<T>(address, ReadRegister<T>(rs));
4868 
4869   WriteRegister<T>(rt, data);
4870 
4871   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4872   LogRead(rt, format, address);
4873   LogWrite(rs, format, address);
4874 }
4875 
4876 template <typename T>
4877 void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
4878   unsigned rt = instr->GetRt();
4879   unsigned rn = instr->GetRn();
4880 
4881   unsigned element_size = sizeof(T);
4882   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4883 
4884   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4885 
4886   WriteRegister<T>(rt, MemRead<T>(address));
4887 
4888   // Approximate load-acquire by issuing a full barrier after the load.
4889   __sync_synchronize();
4890 
4891   LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
4892 }
4893 
4894 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
4895   V(LDADD)                                \
4896   V(LDCLR)                                \
4897   V(LDEOR)                                \
4898   V(LDSET)                                \
4899   V(LDUMAX)                               \
4900   V(LDUMIN)
4901 
4902 #define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
4903   V(LDSMAX)                              \
4904   V(LDSMIN)
4905 
4906 void Simulator::VisitAtomicMemory(const Instruction* instr) {
4907   switch (instr->Mask(AtomicMemoryMask)) {
4908 // clang-format off
4909 #define SIM_FUNC_B(A) \
4910     case A##B:        \
4911     case A##AB:       \
4912     case A##LB:       \
4913     case A##ALB:
4914 #define SIM_FUNC_H(A) \
4915     case A##H:        \
4916     case A##AH:       \
4917     case A##LH:       \
4918     case A##ALH:
4919 #define SIM_FUNC_w(A) \
4920     case A##_w:       \
4921     case A##A_w:      \
4922     case A##L_w:      \
4923     case A##AL_w:
4924 #define SIM_FUNC_x(A) \
4925     case A##_x:       \
4926     case A##A_x:      \
4927     case A##L_x:      \
4928     case A##AL_x:
4929 
4930     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
4931       AtomicMemorySimpleHelper<uint8_t>(instr);
4932       break;
4933     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
4934       AtomicMemorySimpleHelper<int8_t>(instr);
4935       break;
4936     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
4937       AtomicMemorySimpleHelper<uint16_t>(instr);
4938       break;
4939     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
4940       AtomicMemorySimpleHelper<int16_t>(instr);
4941       break;
4942     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
4943       AtomicMemorySimpleHelper<uint32_t>(instr);
4944       break;
4945     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
4946       AtomicMemorySimpleHelper<int32_t>(instr);
4947       break;
4948     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
4949       AtomicMemorySimpleHelper<uint64_t>(instr);
4950       break;
4951     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
4952       AtomicMemorySimpleHelper<int64_t>(instr);
4953       break;
4954     // clang-format on
4955 
4956     case SWPB:
4957     case SWPAB:
4958     case SWPLB:
4959     case SWPALB:
4960       AtomicMemorySwapHelper<uint8_t>(instr);
4961       break;
4962     case SWPH:
4963     case SWPAH:
4964     case SWPLH:
4965     case SWPALH:
4966       AtomicMemorySwapHelper<uint16_t>(instr);
4967       break;
4968     case SWP_w:
4969     case SWPA_w:
4970     case SWPL_w:
4971     case SWPAL_w:
4972       AtomicMemorySwapHelper<uint32_t>(instr);
4973       break;
4974     case SWP_x:
4975     case SWPA_x:
4976     case SWPL_x:
4977     case SWPAL_x:
4978       AtomicMemorySwapHelper<uint64_t>(instr);
4979       break;
4980     case LDAPRB:
4981       LoadAcquireRCpcHelper<uint8_t>(instr);
4982       break;
4983     case LDAPRH:
4984       LoadAcquireRCpcHelper<uint16_t>(instr);
4985       break;
4986     case LDAPR_w:
4987       LoadAcquireRCpcHelper<uint32_t>(instr);
4988       break;
4989     case LDAPR_x:
4990       LoadAcquireRCpcHelper<uint64_t>(instr);
4991       break;
4992   }
4993 }
4994 
4995 
4996 void Simulator::VisitLoadLiteral(const Instruction* instr) {
4997   unsigned rt = instr->GetRt();
4998   uint64_t address = instr->GetLiteralAddress<uint64_t>();
4999 
5000   // Verify that the calculated address is available to the host.
5001   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5002 
5003   switch (instr->Mask(LoadLiteralMask)) {
5004     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
5005     // print a more detailed log.
5006     case LDR_w_lit:
5007       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
5008       LogRead(rt, kPrintWReg, address);
5009       break;
5010     case LDR_x_lit:
5011       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
5012       LogRead(rt, kPrintXReg, address);
5013       break;
5014     case LDR_s_lit:
5015       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
5016       LogVRead(rt, kPrintSRegFP, address);
5017       break;
5018     case LDR_d_lit:
5019       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
5020       LogVRead(rt, kPrintDRegFP, address);
5021       break;
5022     case LDR_q_lit:
5023       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
5024       LogVRead(rt, kPrintReg1Q, address);
5025       break;
5026     case LDRSW_x_lit:
5027       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
5028       LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
5029       break;
5030 
5031     // Ignore prfm hint instructions.
5032     case PRFM_lit:
5033       break;
5034 
5035     default:
5036       VIXL_UNREACHABLE();
5037   }
5038 
5039   local_monitor_.MaybeClear();
5040 }
5041 
5042 
5043 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
5044                                        int64_t offset,
5045                                        AddrMode addrmode) {
5046   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
5047 
5048   if ((addr_reg == 31) && ((address % 16) != 0)) {
5049     // When the base register is SP the stack pointer is required to be
5050     // quadword aligned prior to the address calculation and write-backs.
5051     // Misalignment will cause a stack alignment fault.
5052     VIXL_ALIGNMENT_EXCEPTION();
5053   }
5054 
5055   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
5056     VIXL_ASSERT(offset != 0);
5057     // Only preindex should log the register update here. For Postindex, the
5058     // update will be printed automatically by LogWrittenRegisters _after_ the
5059     // memory access itself is logged.
5060     RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
5061     WriteXRegister(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
5062   }
5063 
5064   if ((addrmode == Offset) || (addrmode == PreIndex)) {
5065     address += offset;
5066   }
5067 
5068   // Verify that the calculated address is available to the host.
5069   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5070 
5071   return static_cast<uintptr_t>(address);
5072 }
5073 
5074 
5075 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
5076   MoveWideImmediateOp mov_op =
5077       static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
5078   int64_t new_xn_val = 0;
5079 
5080   bool is_64_bits = instr->GetSixtyFourBits() == 1;
5081   // Shift is limited for W operations.
5082   VIXL_ASSERT(is_64_bits || (instr->GetShiftMoveWide() < 2));
5083 
5084   // Get the shifted immediate.
5085   int64_t shift = instr->GetShiftMoveWide() * 16;
5086   int64_t shifted_imm16 = static_cast<int64_t>(instr->GetImmMoveWide())
5087                           << shift;
5088 
5089   // Compute the new value.
5090   switch (mov_op) {
5091     case MOVN_w:
5092     case MOVN_x: {
5093       new_xn_val = ~shifted_imm16;
5094       if (!is_64_bits) new_xn_val &= kWRegMask;
5095       break;
5096     }
5097     case MOVK_w:
5098     case MOVK_x: {
5099       unsigned reg_code = instr->GetRd();
5100       int64_t prev_xn_val =
5101           is_64_bits ? ReadXRegister(reg_code) : ReadWRegister(reg_code);
5102       new_xn_val = (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
5103       break;
5104     }
5105     case MOVZ_w:
5106     case MOVZ_x: {
5107       new_xn_val = shifted_imm16;
5108       break;
5109     }
5110     default:
5111       VIXL_UNREACHABLE();
5112   }
5113 
5114   // Update the destination register.
5115   WriteXRegister(instr->GetRd(), new_xn_val);
5116 }
5117 
5118 
5119 void Simulator::VisitConditionalSelect(const Instruction* instr) {
5120   uint64_t new_val = ReadXRegister(instr->GetRn());
5121 
5122   if (ConditionFailed(static_cast<Condition>(instr->GetCondition()))) {
5123     new_val = ReadXRegister(instr->GetRm());
5124     switch (instr->Mask(ConditionalSelectMask)) {
5125       case CSEL_w:
5126       case CSEL_x:
5127         break;
5128       case CSINC_w:
5129       case CSINC_x:
5130         new_val++;
5131         break;
5132       case CSINV_w:
5133       case CSINV_x:
5134         new_val = ~new_val;
5135         break;
5136       case CSNEG_w:
5137       case CSNEG_x:
5138         new_val = -new_val;
5139         break;
5140       default:
5141         VIXL_UNIMPLEMENTED();
5142     }
5143   }
5144   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5145   WriteRegister(reg_size, instr->GetRd(), new_val);
5146 }
5147 
5148 
5149 #define PAUTH_MODES_REGISTER_CONTEXT(V) \
5150   V(IA, kPACKeyIA, kInstructionPointer) \
5151   V(IB, kPACKeyIB, kInstructionPointer) \
5152   V(DA, kPACKeyDA, kDataPointer)        \
5153   V(DB, kPACKeyDB, kDataPointer)
5154 
5155 #define PAUTH_MODES_ZERO_CONTEXT(V)      \
5156   V(IZA, kPACKeyIA, kInstructionPointer) \
5157   V(IZB, kPACKeyIB, kInstructionPointer) \
5158   V(DZA, kPACKeyDA, kDataPointer)        \
5159   V(DZB, kPACKeyDB, kDataPointer)
5160 
5161 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
5162   unsigned dst = instr->GetRd();
5163   unsigned src = instr->GetRn();
5164 
5165   switch (instr->Mask(DataProcessing1SourceMask)) {
5166 #define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D)          \
5167   case PAC##SUFFIX: {                               \
5168     uint64_t mod = ReadXRegister(src);              \
5169     uint64_t ptr = ReadXRegister(dst);              \
5170     WriteXRegister(dst, AddPAC(ptr, mod, KEY, D));  \
5171     break;                                          \
5172   }                                                 \
5173   case AUT##SUFFIX: {                               \
5174     uint64_t mod = ReadXRegister(src);              \
5175     uint64_t ptr = ReadXRegister(dst);              \
5176     WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
5177     break;                                          \
5178   }
5179 
5180     PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
5181 #undef DEFINE_PAUTH_FUNCS
5182 
5183 #define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D)          \
5184   case PAC##SUFFIX: {                               \
5185     if (src != kZeroRegCode) {                      \
5186       VIXL_UNIMPLEMENTED();                         \
5187     }                                               \
5188     uint64_t ptr = ReadXRegister(dst);              \
5189     WriteXRegister(dst, AddPAC(ptr, 0x0, KEY, D));  \
5190     break;                                          \
5191   }                                                 \
5192   case AUT##SUFFIX: {                               \
5193     if (src != kZeroRegCode) {                      \
5194       VIXL_UNIMPLEMENTED();                         \
5195     }                                               \
5196     uint64_t ptr = ReadXRegister(dst);              \
5197     WriteXRegister(dst, AuthPAC(ptr, 0x0, KEY, D)); \
5198     break;                                          \
5199   }
5200 
5201     PAUTH_MODES_ZERO_CONTEXT(DEFINE_PAUTH_FUNCS)
5202 #undef DEFINE_PAUTH_FUNCS
5203 
5204     case XPACI:
5205       if (src != kZeroRegCode) {
5206         VIXL_UNIMPLEMENTED();
5207       }
5208       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
5209       break;
5210     case XPACD:
5211       if (src != kZeroRegCode) {
5212         VIXL_UNIMPLEMENTED();
5213       }
5214       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
5215       break;
5216     case RBIT_w:
5217       WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
5218       break;
5219     case RBIT_x:
5220       WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
5221       break;
5222     case REV16_w:
5223       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
5224       break;
5225     case REV16_x:
5226       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
5227       break;
5228     case REV_w:
5229       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
5230       break;
5231     case REV32_x:
5232       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
5233       break;
5234     case REV_x:
5235       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
5236       break;
5237     case CLZ_w:
5238       WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
5239       break;
5240     case CLZ_x:
5241       WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
5242       break;
5243     case CLS_w:
5244       WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
5245       break;
5246     case CLS_x:
5247       WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
5248       break;
5249     default:
5250       VIXL_UNIMPLEMENTED();
5251   }
5252 }
5253 
5254 
5255 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
5256   VIXL_ASSERT((n > 32) && (n <= 64));
5257   for (unsigned i = (n - 1); i >= 32; i--) {
5258     if (((data >> i) & 1) != 0) {
5259       uint64_t polysh32 = (uint64_t)poly << (i - 32);
5260       uint64_t mask = (UINT64_C(1) << i) - 1;
5261       data = ((data & mask) ^ polysh32);
5262     }
5263   }
5264   return data & 0xffffffff;
5265 }
5266 
5267 
5268 template <typename T>
5269 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
5270   unsigned size = sizeof(val) * 8;  // Number of bits in type T.
5271   VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
5272   uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
5273   uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
5274   return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
5275 }
5276 
5277 
5278 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
5279   // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
5280   // the CRC of each 32-bit word sequentially.
5281   acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
5282   return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
5283 }
5284 
5285 
5286 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
5287   Shift shift_op = NO_SHIFT;
5288   int64_t result = 0;
5289   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5290 
5291   switch (instr->Mask(DataProcessing2SourceMask)) {
5292     case SDIV_w: {
5293       int32_t rn = ReadWRegister(instr->GetRn());
5294       int32_t rm = ReadWRegister(instr->GetRm());
5295       if ((rn == kWMinInt) && (rm == -1)) {
5296         result = kWMinInt;
5297       } else if (rm == 0) {
5298         // Division by zero can be trapped, but not on A-class processors.
5299         result = 0;
5300       } else {
5301         result = rn / rm;
5302       }
5303       break;
5304     }
5305     case SDIV_x: {
5306       int64_t rn = ReadXRegister(instr->GetRn());
5307       int64_t rm = ReadXRegister(instr->GetRm());
5308       if ((rn == kXMinInt) && (rm == -1)) {
5309         result = kXMinInt;
5310       } else if (rm == 0) {
5311         // Division by zero can be trapped, but not on A-class processors.
5312         result = 0;
5313       } else {
5314         result = rn / rm;
5315       }
5316       break;
5317     }
5318     case UDIV_w: {
5319       uint32_t rn = static_cast<uint32_t>(ReadWRegister(instr->GetRn()));
5320       uint32_t rm = static_cast<uint32_t>(ReadWRegister(instr->GetRm()));
5321       if (rm == 0) {
5322         // Division by zero can be trapped, but not on A-class processors.
5323         result = 0;
5324       } else {
5325         result = rn / rm;
5326       }
5327       break;
5328     }
5329     case UDIV_x: {
5330       uint64_t rn = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5331       uint64_t rm = static_cast<uint64_t>(ReadXRegister(instr->GetRm()));
5332       if (rm == 0) {
5333         // Division by zero can be trapped, but not on A-class processors.
5334         result = 0;
5335       } else {
5336         result = rn / rm;
5337       }
5338       break;
5339     }
5340     case LSLV_w:
5341     case LSLV_x:
5342       shift_op = LSL;
5343       break;
5344     case LSRV_w:
5345     case LSRV_x:
5346       shift_op = LSR;
5347       break;
5348     case ASRV_w:
5349     case ASRV_x:
5350       shift_op = ASR;
5351       break;
5352     case RORV_w:
5353     case RORV_x:
5354       shift_op = ROR;
5355       break;
5356     case PACGA: {
5357       uint64_t dst = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5358       uint64_t src = static_cast<uint64_t>(
5359           ReadXRegister(instr->GetRm(), Reg31IsStackPointer));
5360       uint64_t code = ComputePAC(dst, src, kPACKeyGA);
5361       result = code & 0xffffffff00000000;
5362       break;
5363     }
5364     case CRC32B: {
5365       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5366       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5367       result = Crc32Checksum(acc, val, CRC32_POLY);
5368       break;
5369     }
5370     case CRC32H: {
5371       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5372       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5373       result = Crc32Checksum(acc, val, CRC32_POLY);
5374       break;
5375     }
5376     case CRC32W: {
5377       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5378       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5379       result = Crc32Checksum(acc, val, CRC32_POLY);
5380       break;
5381     }
5382     case CRC32X: {
5383       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5384       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5385       result = Crc32Checksum(acc, val, CRC32_POLY);
5386       reg_size = kWRegSize;
5387       break;
5388     }
5389     case CRC32CB: {
5390       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5391       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5392       result = Crc32Checksum(acc, val, CRC32C_POLY);
5393       break;
5394     }
5395     case CRC32CH: {
5396       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5397       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5398       result = Crc32Checksum(acc, val, CRC32C_POLY);
5399       break;
5400     }
5401     case CRC32CW: {
5402       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5403       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5404       result = Crc32Checksum(acc, val, CRC32C_POLY);
5405       break;
5406     }
5407     case CRC32CX: {
5408       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5409       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5410       result = Crc32Checksum(acc, val, CRC32C_POLY);
5411       reg_size = kWRegSize;
5412       break;
5413     }
5414     default:
5415       VIXL_UNIMPLEMENTED();
5416   }
5417 
5418   if (shift_op != NO_SHIFT) {
5419     // Shift distance encoded in the least-significant five/six bits of the
5420     // register.
5421     int mask = (instr->GetSixtyFourBits() == 1) ? 0x3f : 0x1f;
5422     unsigned shift = ReadWRegister(instr->GetRm()) & mask;
5423     result = ShiftOperand(reg_size,
5424                           ReadRegister(reg_size, instr->GetRn()),
5425                           shift_op,
5426                           shift);
5427   }
5428   WriteRegister(reg_size, instr->GetRd(), result);
5429 }
5430 
5431 
5432 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
5433   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5434 
5435   uint64_t result = 0;
5436   // Extract and sign- or zero-extend 32-bit arguments for widening operations.
5437   uint64_t rn_u32 = ReadRegister<uint32_t>(instr->GetRn());
5438   uint64_t rm_u32 = ReadRegister<uint32_t>(instr->GetRm());
5439   int64_t rn_s32 = ReadRegister<int32_t>(instr->GetRn());
5440   int64_t rm_s32 = ReadRegister<int32_t>(instr->GetRm());
5441   uint64_t rn_u64 = ReadXRegister(instr->GetRn());
5442   uint64_t rm_u64 = ReadXRegister(instr->GetRm());
5443   switch (instr->Mask(DataProcessing3SourceMask)) {
5444     case MADD_w:
5445     case MADD_x:
5446       result = ReadXRegister(instr->GetRa()) + (rn_u64 * rm_u64);
5447       break;
5448     case MSUB_w:
5449     case MSUB_x:
5450       result = ReadXRegister(instr->GetRa()) - (rn_u64 * rm_u64);
5451       break;
5452     case SMADDL_x:
5453       result = ReadXRegister(instr->GetRa()) +
5454                static_cast<uint64_t>(rn_s32 * rm_s32);
5455       break;
5456     case SMSUBL_x:
5457       result = ReadXRegister(instr->GetRa()) -
5458                static_cast<uint64_t>(rn_s32 * rm_s32);
5459       break;
5460     case UMADDL_x:
5461       result = ReadXRegister(instr->GetRa()) + (rn_u32 * rm_u32);
5462       break;
5463     case UMSUBL_x:
5464       result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
5465       break;
5466     case UMULH_x:
5467       result =
5468           internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
5469                                      ReadRegister<uint64_t>(instr->GetRm()));
5470       break;
5471     case SMULH_x:
5472       result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
5473                                           ReadXRegister(instr->GetRm()));
5474       break;
5475     default:
5476       VIXL_UNIMPLEMENTED();
5477   }
5478   WriteRegister(reg_size, instr->GetRd(), result);
5479 }
5480 
5481 
5482 void Simulator::VisitBitfield(const Instruction* instr) {
5483   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5484   int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask;
5485   int R = instr->GetImmR();
5486   int S = instr->GetImmS();
5487 
5488   if (instr->GetSixtyFourBits() != instr->GetBitN()) {
5489     VisitUnallocated(instr);
5490   }
5491 
5492   if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) {
5493     VisitUnallocated(instr);
5494   }
5495 
5496   int diff = S - R;
5497   uint64_t mask;
5498   if (diff >= 0) {
5499     mask = ~UINT64_C(0) >> (64 - (diff + 1));
5500     mask = (static_cast<unsigned>(diff) < (reg_size - 1)) ? mask : reg_mask;
5501   } else {
5502     mask = ~UINT64_C(0) >> (64 - (S + 1));
5503     mask = RotateRight(mask, R, reg_size);
5504     diff += reg_size;
5505   }
5506 
5507   // inzero indicates if the extracted bitfield is inserted into the
5508   // destination register value or in zero.
5509   // If extend is true, extend the sign of the extracted bitfield.
5510   bool inzero = false;
5511   bool extend = false;
5512   switch (instr->Mask(BitfieldMask)) {
5513     case BFM_x:
5514     case BFM_w:
5515       break;
5516     case SBFM_x:
5517     case SBFM_w:
5518       inzero = true;
5519       extend = true;
5520       break;
5521     case UBFM_x:
5522     case UBFM_w:
5523       inzero = true;
5524       break;
5525     default:
5526       VIXL_UNIMPLEMENTED();
5527   }
5528 
5529   uint64_t dst = inzero ? 0 : ReadRegister(reg_size, instr->GetRd());
5530   uint64_t src = ReadRegister(reg_size, instr->GetRn());
5531   // Rotate source bitfield into place.
5532   uint64_t result = RotateRight(src, R, reg_size);
5533   // Determine the sign extension.
5534   uint64_t topbits = (diff == 63) ? 0 : (~UINT64_C(0) << (diff + 1));
5535   uint64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
5536 
5537   // Merge sign extension, dest/zero and bitfield.
5538   result = signbits | (result & mask) | (dst & ~mask);
5539 
5540   WriteRegister(reg_size, instr->GetRd(), result);
5541 }
5542 
5543 
5544 void Simulator::VisitExtract(const Instruction* instr) {
5545   unsigned lsb = instr->GetImmS();
5546   unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
5547   uint64_t low_res =
5548       static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
5549   uint64_t high_res = (lsb == 0)
5550                           ? 0
5551                           : ReadRegister<uint64_t>(reg_size, instr->GetRn())
5552                                 << (reg_size - lsb);
5553   WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
5554 }
5555 
5556 
5557 void Simulator::VisitFPImmediate(const Instruction* instr) {
5558   AssertSupportedFPCR();
5559   unsigned dest = instr->GetRd();
5560   switch (instr->Mask(FPImmediateMask)) {
5561     case FMOV_h_imm:
5562       WriteHRegister(dest, Float16ToRawbits(instr->GetImmFP16()));
5563       break;
5564     case FMOV_s_imm:
5565       WriteSRegister(dest, instr->GetImmFP32());
5566       break;
5567     case FMOV_d_imm:
5568       WriteDRegister(dest, instr->GetImmFP64());
5569       break;
5570     default:
5571       VIXL_UNREACHABLE();
5572   }
5573 }
5574 
5575 
5576 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
5577   AssertSupportedFPCR();
5578 
5579   unsigned dst = instr->GetRd();
5580   unsigned src = instr->GetRn();
5581 
5582   FPRounding round = ReadRMode();
5583 
5584   switch (instr->Mask(FPIntegerConvertMask)) {
5585     case FCVTAS_wh:
5586       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieAway));
5587       break;
5588     case FCVTAS_xh:
5589       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieAway));
5590       break;
5591     case FCVTAS_ws:
5592       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieAway));
5593       break;
5594     case FCVTAS_xs:
5595       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieAway));
5596       break;
5597     case FCVTAS_wd:
5598       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieAway));
5599       break;
5600     case FCVTAS_xd:
5601       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieAway));
5602       break;
5603     case FCVTAU_wh:
5604       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieAway));
5605       break;
5606     case FCVTAU_xh:
5607       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieAway));
5608       break;
5609     case FCVTAU_ws:
5610       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieAway));
5611       break;
5612     case FCVTAU_xs:
5613       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieAway));
5614       break;
5615     case FCVTAU_wd:
5616       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieAway));
5617       break;
5618     case FCVTAU_xd:
5619       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieAway));
5620       break;
5621     case FCVTMS_wh:
5622       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPNegativeInfinity));
5623       break;
5624     case FCVTMS_xh:
5625       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPNegativeInfinity));
5626       break;
5627     case FCVTMS_ws:
5628       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPNegativeInfinity));
5629       break;
5630     case FCVTMS_xs:
5631       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPNegativeInfinity));
5632       break;
5633     case FCVTMS_wd:
5634       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPNegativeInfinity));
5635       break;
5636     case FCVTMS_xd:
5637       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPNegativeInfinity));
5638       break;
5639     case FCVTMU_wh:
5640       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPNegativeInfinity));
5641       break;
5642     case FCVTMU_xh:
5643       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPNegativeInfinity));
5644       break;
5645     case FCVTMU_ws:
5646       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPNegativeInfinity));
5647       break;
5648     case FCVTMU_xs:
5649       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPNegativeInfinity));
5650       break;
5651     case FCVTMU_wd:
5652       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPNegativeInfinity));
5653       break;
5654     case FCVTMU_xd:
5655       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPNegativeInfinity));
5656       break;
5657     case FCVTPS_wh:
5658       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPPositiveInfinity));
5659       break;
5660     case FCVTPS_xh:
5661       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPPositiveInfinity));
5662       break;
5663     case FCVTPS_ws:
5664       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPPositiveInfinity));
5665       break;
5666     case FCVTPS_xs:
5667       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPPositiveInfinity));
5668       break;
5669     case FCVTPS_wd:
5670       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPPositiveInfinity));
5671       break;
5672     case FCVTPS_xd:
5673       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPPositiveInfinity));
5674       break;
5675     case FCVTPU_wh:
5676       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPPositiveInfinity));
5677       break;
5678     case FCVTPU_xh:
5679       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPPositiveInfinity));
5680       break;
5681     case FCVTPU_ws:
5682       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPPositiveInfinity));
5683       break;
5684     case FCVTPU_xs:
5685       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPPositiveInfinity));
5686       break;
5687     case FCVTPU_wd:
5688       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPPositiveInfinity));
5689       break;
5690     case FCVTPU_xd:
5691       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPPositiveInfinity));
5692       break;
5693     case FCVTNS_wh:
5694       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieEven));
5695       break;
5696     case FCVTNS_xh:
5697       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieEven));
5698       break;
5699     case FCVTNS_ws:
5700       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieEven));
5701       break;
5702     case FCVTNS_xs:
5703       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieEven));
5704       break;
5705     case FCVTNS_wd:
5706       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieEven));
5707       break;
5708     case FCVTNS_xd:
5709       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieEven));
5710       break;
5711     case FCVTNU_wh:
5712       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieEven));
5713       break;
5714     case FCVTNU_xh:
5715       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieEven));
5716       break;
5717     case FCVTNU_ws:
5718       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieEven));
5719       break;
5720     case FCVTNU_xs:
5721       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieEven));
5722       break;
5723     case FCVTNU_wd:
5724       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieEven));
5725       break;
5726     case FCVTNU_xd:
5727       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieEven));
5728       break;
5729     case FCVTZS_wh:
5730       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPZero));
5731       break;
5732     case FCVTZS_xh:
5733       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPZero));
5734       break;
5735     case FCVTZS_ws:
5736       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPZero));
5737       break;
5738     case FCVTZS_xs:
5739       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPZero));
5740       break;
5741     case FCVTZS_wd:
5742       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPZero));
5743       break;
5744     case FCVTZS_xd:
5745       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPZero));
5746       break;
5747     case FCVTZU_wh:
5748       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPZero));
5749       break;
5750     case FCVTZU_xh:
5751       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPZero));
5752       break;
5753     case FCVTZU_ws:
5754       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPZero));
5755       break;
5756     case FCVTZU_xs:
5757       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPZero));
5758       break;
5759     case FCVTZU_wd:
5760       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPZero));
5761       break;
5762     case FCVTZU_xd:
5763       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPZero));
5764       break;
5765     case FJCVTZS:
5766       WriteWRegister(dst, FPToFixedJS(ReadDRegister(src)));
5767       break;
5768     case FMOV_hw:
5769       WriteHRegister(dst, ReadWRegister(src) & kHRegMask);
5770       break;
5771     case FMOV_wh:
5772       WriteWRegister(dst, ReadHRegisterBits(src));
5773       break;
5774     case FMOV_xh:
5775       WriteXRegister(dst, ReadHRegisterBits(src));
5776       break;
5777     case FMOV_hx:
5778       WriteHRegister(dst, ReadXRegister(src) & kHRegMask);
5779       break;
5780     case FMOV_ws:
5781       WriteWRegister(dst, ReadSRegisterBits(src));
5782       break;
5783     case FMOV_xd:
5784       WriteXRegister(dst, ReadDRegisterBits(src));
5785       break;
5786     case FMOV_sw:
5787       WriteSRegisterBits(dst, ReadWRegister(src));
5788       break;
5789     case FMOV_dx:
5790       WriteDRegisterBits(dst, ReadXRegister(src));
5791       break;
5792     case FMOV_d1_x:
5793       LogicVRegister(ReadVRegister(dst))
5794           .SetUint(kFormatD, 1, ReadXRegister(src));
5795       break;
5796     case FMOV_x_d1:
5797       WriteXRegister(dst, LogicVRegister(ReadVRegister(src)).Uint(kFormatD, 1));
5798       break;
5799 
5800     // A 32-bit input can be handled in the same way as a 64-bit input, since
5801     // the sign- or zero-extension will not affect the conversion.
5802     case SCVTF_dx:
5803       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), 0, round));
5804       break;
5805     case SCVTF_dw:
5806       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), 0, round));
5807       break;
5808     case UCVTF_dx:
5809       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), 0, round));
5810       break;
5811     case UCVTF_dw: {
5812       WriteDRegister(dst,
5813                      UFixedToDouble(ReadRegister<uint32_t>(src), 0, round));
5814       break;
5815     }
5816     case SCVTF_sx:
5817       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), 0, round));
5818       break;
5819     case SCVTF_sw:
5820       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), 0, round));
5821       break;
5822     case UCVTF_sx:
5823       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), 0, round));
5824       break;
5825     case UCVTF_sw: {
5826       WriteSRegister(dst, UFixedToFloat(ReadRegister<uint32_t>(src), 0, round));
5827       break;
5828     }
5829     case SCVTF_hx:
5830       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), 0, round));
5831       break;
5832     case SCVTF_hw:
5833       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), 0, round));
5834       break;
5835     case UCVTF_hx:
5836       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), 0, round));
5837       break;
5838     case UCVTF_hw: {
5839       WriteHRegister(dst,
5840                      UFixedToFloat16(ReadRegister<uint32_t>(src), 0, round));
5841       break;
5842     }
5843 
5844     default:
5845       VIXL_UNREACHABLE();
5846   }
5847 }
5848 
5849 
5850 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
5851   AssertSupportedFPCR();
5852 
5853   unsigned dst = instr->GetRd();
5854   unsigned src = instr->GetRn();
5855   int fbits = 64 - instr->GetFPScale();
5856 
5857   FPRounding round = ReadRMode();
5858 
5859   switch (instr->Mask(FPFixedPointConvertMask)) {
5860     // A 32-bit input can be handled in the same way as a 64-bit input, since
5861     // the sign- or zero-extension will not affect the conversion.
5862     case SCVTF_dx_fixed:
5863       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), fbits, round));
5864       break;
5865     case SCVTF_dw_fixed:
5866       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), fbits, round));
5867       break;
5868     case UCVTF_dx_fixed:
5869       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), fbits, round));
5870       break;
5871     case UCVTF_dw_fixed: {
5872       WriteDRegister(dst,
5873                      UFixedToDouble(ReadRegister<uint32_t>(src), fbits, round));
5874       break;
5875     }
5876     case SCVTF_sx_fixed:
5877       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), fbits, round));
5878       break;
5879     case SCVTF_sw_fixed:
5880       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), fbits, round));
5881       break;
5882     case UCVTF_sx_fixed:
5883       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), fbits, round));
5884       break;
5885     case UCVTF_sw_fixed: {
5886       WriteSRegister(dst,
5887                      UFixedToFloat(ReadRegister<uint32_t>(src), fbits, round));
5888       break;
5889     }
5890     case SCVTF_hx_fixed:
5891       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), fbits, round));
5892       break;
5893     case SCVTF_hw_fixed:
5894       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), fbits, round));
5895       break;
5896     case UCVTF_hx_fixed:
5897       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), fbits, round));
5898       break;
5899     case UCVTF_hw_fixed: {
5900       WriteHRegister(dst,
5901                      UFixedToFloat16(ReadRegister<uint32_t>(src),
5902                                      fbits,
5903                                      round));
5904       break;
5905     }
5906     case FCVTZS_xd_fixed:
5907       WriteXRegister(dst,
5908                      FPToInt64(ReadDRegister(src) * std::pow(2.0, fbits),
5909                                FPZero));
5910       break;
5911     case FCVTZS_wd_fixed:
5912       WriteWRegister(dst,
5913                      FPToInt32(ReadDRegister(src) * std::pow(2.0, fbits),
5914                                FPZero));
5915       break;
5916     case FCVTZU_xd_fixed:
5917       WriteXRegister(dst,
5918                      FPToUInt64(ReadDRegister(src) * std::pow(2.0, fbits),
5919                                 FPZero));
5920       break;
5921     case FCVTZU_wd_fixed:
5922       WriteWRegister(dst,
5923                      FPToUInt32(ReadDRegister(src) * std::pow(2.0, fbits),
5924                                 FPZero));
5925       break;
5926     case FCVTZS_xs_fixed:
5927       WriteXRegister(dst,
5928                      FPToInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
5929                                FPZero));
5930       break;
5931     case FCVTZS_ws_fixed:
5932       WriteWRegister(dst,
5933                      FPToInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
5934                                FPZero));
5935       break;
5936     case FCVTZU_xs_fixed:
5937       WriteXRegister(dst,
5938                      FPToUInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
5939                                 FPZero));
5940       break;
5941     case FCVTZU_ws_fixed:
5942       WriteWRegister(dst,
5943                      FPToUInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
5944                                 FPZero));
5945       break;
5946     case FCVTZS_xh_fixed: {
5947       double output =
5948           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5949       WriteXRegister(dst, FPToInt64(output, FPZero));
5950       break;
5951     }
5952     case FCVTZS_wh_fixed: {
5953       double output =
5954           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5955       WriteWRegister(dst, FPToInt32(output, FPZero));
5956       break;
5957     }
5958     case FCVTZU_xh_fixed: {
5959       double output =
5960           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5961       WriteXRegister(dst, FPToUInt64(output, FPZero));
5962       break;
5963     }
5964     case FCVTZU_wh_fixed: {
5965       double output =
5966           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5967       WriteWRegister(dst, FPToUInt32(output, FPZero));
5968       break;
5969     }
5970     default:
5971       VIXL_UNREACHABLE();
5972   }
5973 }
5974 
5975 
5976 void Simulator::VisitFPCompare(const Instruction* instr) {
5977   AssertSupportedFPCR();
5978 
5979   FPTrapFlags trap = DisableTrap;
5980   switch (instr->Mask(FPCompareMask)) {
5981     case FCMPE_h:
5982       trap = EnableTrap;
5983       VIXL_FALLTHROUGH();
5984     case FCMP_h:
5985       FPCompare(ReadHRegister(instr->GetRn()),
5986                 ReadHRegister(instr->GetRm()),
5987                 trap);
5988       break;
5989     case FCMPE_s:
5990       trap = EnableTrap;
5991       VIXL_FALLTHROUGH();
5992     case FCMP_s:
5993       FPCompare(ReadSRegister(instr->GetRn()),
5994                 ReadSRegister(instr->GetRm()),
5995                 trap);
5996       break;
5997     case FCMPE_d:
5998       trap = EnableTrap;
5999       VIXL_FALLTHROUGH();
6000     case FCMP_d:
6001       FPCompare(ReadDRegister(instr->GetRn()),
6002                 ReadDRegister(instr->GetRm()),
6003                 trap);
6004       break;
6005     case FCMPE_h_zero:
6006       trap = EnableTrap;
6007       VIXL_FALLTHROUGH();
6008     case FCMP_h_zero:
6009       FPCompare(ReadHRegister(instr->GetRn()), SimFloat16(0.0), trap);
6010       break;
6011     case FCMPE_s_zero:
6012       trap = EnableTrap;
6013       VIXL_FALLTHROUGH();
6014     case FCMP_s_zero:
6015       FPCompare(ReadSRegister(instr->GetRn()), 0.0f, trap);
6016       break;
6017     case FCMPE_d_zero:
6018       trap = EnableTrap;
6019       VIXL_FALLTHROUGH();
6020     case FCMP_d_zero:
6021       FPCompare(ReadDRegister(instr->GetRn()), 0.0, trap);
6022       break;
6023     default:
6024       VIXL_UNIMPLEMENTED();
6025   }
6026 }
6027 
6028 
6029 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
6030   AssertSupportedFPCR();
6031 
6032   FPTrapFlags trap = DisableTrap;
6033   switch (instr->Mask(FPConditionalCompareMask)) {
6034     case FCCMPE_h:
6035       trap = EnableTrap;
6036       VIXL_FALLTHROUGH();
6037     case FCCMP_h:
6038       if (ConditionPassed(instr->GetCondition())) {
6039         FPCompare(ReadHRegister(instr->GetRn()),
6040                   ReadHRegister(instr->GetRm()),
6041                   trap);
6042       } else {
6043         ReadNzcv().SetFlags(instr->GetNzcv());
6044         LogSystemRegister(NZCV);
6045       }
6046       break;
6047     case FCCMPE_s:
6048       trap = EnableTrap;
6049       VIXL_FALLTHROUGH();
6050     case FCCMP_s:
6051       if (ConditionPassed(instr->GetCondition())) {
6052         FPCompare(ReadSRegister(instr->GetRn()),
6053                   ReadSRegister(instr->GetRm()),
6054                   trap);
6055       } else {
6056         ReadNzcv().SetFlags(instr->GetNzcv());
6057         LogSystemRegister(NZCV);
6058       }
6059       break;
6060     case FCCMPE_d:
6061       trap = EnableTrap;
6062       VIXL_FALLTHROUGH();
6063     case FCCMP_d:
6064       if (ConditionPassed(instr->GetCondition())) {
6065         FPCompare(ReadDRegister(instr->GetRn()),
6066                   ReadDRegister(instr->GetRm()),
6067                   trap);
6068       } else {
6069         ReadNzcv().SetFlags(instr->GetNzcv());
6070         LogSystemRegister(NZCV);
6071       }
6072       break;
6073     default:
6074       VIXL_UNIMPLEMENTED();
6075   }
6076 }
6077 
6078 
6079 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
6080   AssertSupportedFPCR();
6081 
6082   Instr selected;
6083   if (ConditionPassed(instr->GetCondition())) {
6084     selected = instr->GetRn();
6085   } else {
6086     selected = instr->GetRm();
6087   }
6088 
6089   switch (instr->Mask(FPConditionalSelectMask)) {
6090     case FCSEL_h:
6091       WriteHRegister(instr->GetRd(), ReadHRegister(selected));
6092       break;
6093     case FCSEL_s:
6094       WriteSRegister(instr->GetRd(), ReadSRegister(selected));
6095       break;
6096     case FCSEL_d:
6097       WriteDRegister(instr->GetRd(), ReadDRegister(selected));
6098       break;
6099     default:
6100       VIXL_UNIMPLEMENTED();
6101   }
6102 }
6103 
6104 
6105 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
6106   AssertSupportedFPCR();
6107 
6108   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6109   VectorFormat vform;
6110   switch (instr->Mask(FPTypeMask)) {
6111     default:
6112       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6113     case FP64:
6114       vform = kFormatD;
6115       break;
6116     case FP32:
6117       vform = kFormatS;
6118       break;
6119     case FP16:
6120       vform = kFormatH;
6121       break;
6122   }
6123 
6124   SimVRegister& rd = ReadVRegister(instr->GetRd());
6125   SimVRegister& rn = ReadVRegister(instr->GetRn());
6126   bool inexact_exception = false;
6127   FrintMode frint_mode = kFrintToInteger;
6128 
6129   unsigned fd = instr->GetRd();
6130   unsigned fn = instr->GetRn();
6131 
6132   switch (instr->Mask(FPDataProcessing1SourceMask)) {
6133     case FMOV_h:
6134       WriteHRegister(fd, ReadHRegister(fn));
6135       return;
6136     case FMOV_s:
6137       WriteSRegister(fd, ReadSRegister(fn));
6138       return;
6139     case FMOV_d:
6140       WriteDRegister(fd, ReadDRegister(fn));
6141       return;
6142     case FABS_h:
6143     case FABS_s:
6144     case FABS_d:
6145       fabs_(vform, ReadVRegister(fd), ReadVRegister(fn));
6146       // Explicitly log the register update whilst we have type information.
6147       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6148       return;
6149     case FNEG_h:
6150     case FNEG_s:
6151     case FNEG_d:
6152       fneg(vform, ReadVRegister(fd), ReadVRegister(fn));
6153       // Explicitly log the register update whilst we have type information.
6154       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6155       return;
6156     case FCVT_ds:
6157       WriteDRegister(fd, FPToDouble(ReadSRegister(fn), ReadDN()));
6158       return;
6159     case FCVT_sd:
6160       WriteSRegister(fd, FPToFloat(ReadDRegister(fn), FPTieEven, ReadDN()));
6161       return;
6162     case FCVT_hs:
6163       WriteHRegister(fd,
6164                      Float16ToRawbits(
6165                          FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN())));
6166       return;
6167     case FCVT_sh:
6168       WriteSRegister(fd, FPToFloat(ReadHRegister(fn), ReadDN()));
6169       return;
6170     case FCVT_dh:
6171       WriteDRegister(fd, FPToDouble(ReadHRegister(fn), ReadDN()));
6172       return;
6173     case FCVT_hd:
6174       WriteHRegister(fd,
6175                      Float16ToRawbits(
6176                          FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN())));
6177       return;
6178     case FSQRT_h:
6179     case FSQRT_s:
6180     case FSQRT_d:
6181       fsqrt(vform, rd, rn);
6182       // Explicitly log the register update whilst we have type information.
6183       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6184       return;
6185     case FRINT32X_s:
6186     case FRINT32X_d:
6187       inexact_exception = true;
6188       frint_mode = kFrintToInt32;
6189       break;  // Use FPCR rounding mode.
6190     case FRINT64X_s:
6191     case FRINT64X_d:
6192       inexact_exception = true;
6193       frint_mode = kFrintToInt64;
6194       break;  // Use FPCR rounding mode.
6195     case FRINT32Z_s:
6196     case FRINT32Z_d:
6197       inexact_exception = true;
6198       frint_mode = kFrintToInt32;
6199       fpcr_rounding = FPZero;
6200       break;
6201     case FRINT64Z_s:
6202     case FRINT64Z_d:
6203       inexact_exception = true;
6204       frint_mode = kFrintToInt64;
6205       fpcr_rounding = FPZero;
6206       break;
6207     case FRINTI_h:
6208     case FRINTI_s:
6209     case FRINTI_d:
6210       break;  // Use FPCR rounding mode.
6211     case FRINTX_h:
6212     case FRINTX_s:
6213     case FRINTX_d:
6214       inexact_exception = true;
6215       break;
6216     case FRINTA_h:
6217     case FRINTA_s:
6218     case FRINTA_d:
6219       fpcr_rounding = FPTieAway;
6220       break;
6221     case FRINTM_h:
6222     case FRINTM_s:
6223     case FRINTM_d:
6224       fpcr_rounding = FPNegativeInfinity;
6225       break;
6226     case FRINTN_h:
6227     case FRINTN_s:
6228     case FRINTN_d:
6229       fpcr_rounding = FPTieEven;
6230       break;
6231     case FRINTP_h:
6232     case FRINTP_s:
6233     case FRINTP_d:
6234       fpcr_rounding = FPPositiveInfinity;
6235       break;
6236     case FRINTZ_h:
6237     case FRINTZ_s:
6238     case FRINTZ_d:
6239       fpcr_rounding = FPZero;
6240       break;
6241     default:
6242       VIXL_UNIMPLEMENTED();
6243   }
6244 
6245   // Only FRINT* instructions fall through the switch above.
6246   frint(vform, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
6247   // Explicitly log the register update whilst we have type information.
6248   LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6249 }
6250 
6251 
6252 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
6253   AssertSupportedFPCR();
6254 
6255   VectorFormat vform;
6256   switch (instr->Mask(FPTypeMask)) {
6257     default:
6258       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6259     case FP64:
6260       vform = kFormatD;
6261       break;
6262     case FP32:
6263       vform = kFormatS;
6264       break;
6265     case FP16:
6266       vform = kFormatH;
6267       break;
6268   }
6269   SimVRegister& rd = ReadVRegister(instr->GetRd());
6270   SimVRegister& rn = ReadVRegister(instr->GetRn());
6271   SimVRegister& rm = ReadVRegister(instr->GetRm());
6272 
6273   switch (instr->Mask(FPDataProcessing2SourceMask)) {
6274     case FADD_h:
6275     case FADD_s:
6276     case FADD_d:
6277       fadd(vform, rd, rn, rm);
6278       break;
6279     case FSUB_h:
6280     case FSUB_s:
6281     case FSUB_d:
6282       fsub(vform, rd, rn, rm);
6283       break;
6284     case FMUL_h:
6285     case FMUL_s:
6286     case FMUL_d:
6287       fmul(vform, rd, rn, rm);
6288       break;
6289     case FNMUL_h:
6290     case FNMUL_s:
6291     case FNMUL_d:
6292       fnmul(vform, rd, rn, rm);
6293       break;
6294     case FDIV_h:
6295     case FDIV_s:
6296     case FDIV_d:
6297       fdiv(vform, rd, rn, rm);
6298       break;
6299     case FMAX_h:
6300     case FMAX_s:
6301     case FMAX_d:
6302       fmax(vform, rd, rn, rm);
6303       break;
6304     case FMIN_h:
6305     case FMIN_s:
6306     case FMIN_d:
6307       fmin(vform, rd, rn, rm);
6308       break;
6309     case FMAXNM_h:
6310     case FMAXNM_s:
6311     case FMAXNM_d:
6312       fmaxnm(vform, rd, rn, rm);
6313       break;
6314     case FMINNM_h:
6315     case FMINNM_s:
6316     case FMINNM_d:
6317       fminnm(vform, rd, rn, rm);
6318       break;
6319     default:
6320       VIXL_UNREACHABLE();
6321   }
6322   // Explicitly log the register update whilst we have type information.
6323   LogVRegister(instr->GetRd(), GetPrintRegisterFormatFP(vform));
6324 }
6325 
6326 
6327 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
6328   AssertSupportedFPCR();
6329 
6330   unsigned fd = instr->GetRd();
6331   unsigned fn = instr->GetRn();
6332   unsigned fm = instr->GetRm();
6333   unsigned fa = instr->GetRa();
6334 
6335   switch (instr->Mask(FPDataProcessing3SourceMask)) {
6336     // fd = fa +/- (fn * fm)
6337     case FMADD_h:
6338       WriteHRegister(fd,
6339                      FPMulAdd(ReadHRegister(fa),
6340                               ReadHRegister(fn),
6341                               ReadHRegister(fm)));
6342       break;
6343     case FMSUB_h:
6344       WriteHRegister(fd,
6345                      FPMulAdd(ReadHRegister(fa),
6346                               -ReadHRegister(fn),
6347                               ReadHRegister(fm)));
6348       break;
6349     case FMADD_s:
6350       WriteSRegister(fd,
6351                      FPMulAdd(ReadSRegister(fa),
6352                               ReadSRegister(fn),
6353                               ReadSRegister(fm)));
6354       break;
6355     case FMSUB_s:
6356       WriteSRegister(fd,
6357                      FPMulAdd(ReadSRegister(fa),
6358                               -ReadSRegister(fn),
6359                               ReadSRegister(fm)));
6360       break;
6361     case FMADD_d:
6362       WriteDRegister(fd,
6363                      FPMulAdd(ReadDRegister(fa),
6364                               ReadDRegister(fn),
6365                               ReadDRegister(fm)));
6366       break;
6367     case FMSUB_d:
6368       WriteDRegister(fd,
6369                      FPMulAdd(ReadDRegister(fa),
6370                               -ReadDRegister(fn),
6371                               ReadDRegister(fm)));
6372       break;
6373     // Negated variants of the above.
6374     case FNMADD_h:
6375       WriteHRegister(fd,
6376                      FPMulAdd(-ReadHRegister(fa),
6377                               -ReadHRegister(fn),
6378                               ReadHRegister(fm)));
6379       break;
6380     case FNMSUB_h:
6381       WriteHRegister(fd,
6382                      FPMulAdd(-ReadHRegister(fa),
6383                               ReadHRegister(fn),
6384                               ReadHRegister(fm)));
6385       break;
6386     case FNMADD_s:
6387       WriteSRegister(fd,
6388                      FPMulAdd(-ReadSRegister(fa),
6389                               -ReadSRegister(fn),
6390                               ReadSRegister(fm)));
6391       break;
6392     case FNMSUB_s:
6393       WriteSRegister(fd,
6394                      FPMulAdd(-ReadSRegister(fa),
6395                               ReadSRegister(fn),
6396                               ReadSRegister(fm)));
6397       break;
6398     case FNMADD_d:
6399       WriteDRegister(fd,
6400                      FPMulAdd(-ReadDRegister(fa),
6401                               -ReadDRegister(fn),
6402                               ReadDRegister(fm)));
6403       break;
6404     case FNMSUB_d:
6405       WriteDRegister(fd,
6406                      FPMulAdd(-ReadDRegister(fa),
6407                               ReadDRegister(fn),
6408                               ReadDRegister(fm)));
6409       break;
6410     default:
6411       VIXL_UNIMPLEMENTED();
6412   }
6413 }
6414 
6415 
6416 bool Simulator::FPProcessNaNs(const Instruction* instr) {
6417   unsigned fd = instr->GetRd();
6418   unsigned fn = instr->GetRn();
6419   unsigned fm = instr->GetRm();
6420   bool done = false;
6421 
6422   if (instr->Mask(FP64) == FP64) {
6423     double result = FPProcessNaNs(ReadDRegister(fn), ReadDRegister(fm));
6424     if (IsNaN(result)) {
6425       WriteDRegister(fd, result);
6426       done = true;
6427     }
6428   } else if (instr->Mask(FP32) == FP32) {
6429     float result = FPProcessNaNs(ReadSRegister(fn), ReadSRegister(fm));
6430     if (IsNaN(result)) {
6431       WriteSRegister(fd, result);
6432       done = true;
6433     }
6434   } else {
6435     VIXL_ASSERT(instr->Mask(FP16) == FP16);
6436     VIXL_UNIMPLEMENTED();
6437   }
6438 
6439   return done;
6440 }
6441 
6442 
6443 void Simulator::SysOp_W(int op, int64_t val) {
6444   switch (op) {
6445     case IVAU:
6446     case CVAC:
6447     case CVAU:
6448     case CVAP:
6449     case CVADP:
6450     case CIVAC: {
6451       // Perform a placeholder memory access to ensure that we have read access
6452       // to the specified address.
6453       volatile uint8_t y = MemRead<uint8_t>(val);
6454       USE(y);
6455       // TODO: Implement "case ZVA:".
6456       break;
6457     }
6458     default:
6459       VIXL_UNIMPLEMENTED();
6460   }
6461 }
6462 
6463 
6464 // clang-format off
6465 #define PAUTH_SYSTEM_MODES(V)                                     \
6466   V(A1716, 17, ReadXRegister(16),                      kPACKeyIA) \
6467   V(B1716, 17, ReadXRegister(16),                      kPACKeyIB) \
6468   V(AZ,    30, 0x00000000,                             kPACKeyIA) \
6469   V(BZ,    30, 0x00000000,                             kPACKeyIB) \
6470   V(ASP,   30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIA) \
6471   V(BSP,   30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIB)
6472 // clang-format on
6473 
6474 
6475 void Simulator::VisitSystem(const Instruction* instr) {
6476   // Some system instructions hijack their Op and Cp fields to represent a
6477   // range of immediates instead of indicating a different instruction. This
6478   // makes the decoding tricky.
6479   if (instr->GetInstructionBits() == XPACLRI) {
6480     WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
6481   } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
6482     switch (instr->Mask(SystemPStateMask)) {
6483       case CFINV:
6484         ReadNzcv().SetC(!ReadC());
6485         break;
6486       case AXFLAG:
6487         ReadNzcv().SetN(0);
6488         ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
6489         ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
6490         ReadNzcv().SetV(0);
6491         break;
6492       case XAFLAG: {
6493         // Can't set the flags in place due to the logical dependencies.
6494         uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
6495         uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
6496         uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
6497         uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
6498         ReadNzcv().SetN(n);
6499         ReadNzcv().SetZ(z);
6500         ReadNzcv().SetC(c);
6501         ReadNzcv().SetV(v);
6502         break;
6503       }
6504     }
6505   } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) {
6506     // Check BType allows PACI[AB]SP instructions.
6507     if (PcIsInGuardedPage()) {
6508       Instr i = instr->Mask(SystemPAuthMask);
6509       if ((i == PACIASP) || (i == PACIBSP)) {
6510         switch (ReadBType()) {
6511           case BranchFromGuardedNotToIP:
6512           // TODO: This case depends on the value of SCTLR_EL1.BT0, which we
6513           // assume here to be zero. This allows execution of PACI[AB]SP when
6514           // BTYPE is BranchFromGuardedNotToIP (0b11).
6515           case DefaultBType:
6516           case BranchFromUnguardedOrToIP:
6517           case BranchAndLink:
6518             break;
6519         }
6520       }
6521     }
6522 
6523     switch (instr->Mask(SystemPAuthMask)) {
6524 #define DEFINE_PAUTH_FUNCS(SUFFIX, DST, MOD, KEY)                              \
6525   case PACI##SUFFIX:                                                           \
6526     WriteXRegister(DST,                                                        \
6527                    AddPAC(ReadXRegister(DST), MOD, KEY, kInstructionPointer)); \
6528     break;                                                                     \
6529   case AUTI##SUFFIX:                                                           \
6530     WriteXRegister(DST,                                                        \
6531                    AuthPAC(ReadXRegister(DST),                                 \
6532                            MOD,                                                \
6533                            KEY,                                                \
6534                            kInstructionPointer));                              \
6535     break;
6536 
6537       PAUTH_SYSTEM_MODES(DEFINE_PAUTH_FUNCS)
6538 #undef DEFINE_PAUTH_FUNCS
6539     }
6540   } else if (instr->Mask(SystemExclusiveMonitorFMask) ==
6541              SystemExclusiveMonitorFixed) {
6542     VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
6543     switch (instr->Mask(SystemExclusiveMonitorMask)) {
6544       case CLREX: {
6545         PrintExclusiveAccessWarning();
6546         ClearLocalMonitor();
6547         break;
6548       }
6549     }
6550   } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
6551     switch (instr->Mask(SystemSysRegMask)) {
6552       case MRS: {
6553         switch (instr->GetImmSystemRegister()) {
6554           case NZCV:
6555             WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
6556             break;
6557           case FPCR:
6558             WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
6559             break;
6560           case RNDR:
6561           case RNDRRS: {
6562             uint64_t high = jrand48(rand_state_);
6563             uint64_t low = jrand48(rand_state_);
6564             uint64_t rand_num = (high << 32) | (low & 0xffffffff);
6565             WriteXRegister(instr->GetRt(), rand_num);
6566             // Simulate successful random number generation.
6567             // TODO: Return failure occasionally as a random number cannot be
6568             // returned in a period of time.
6569             ReadNzcv().SetRawValue(NoFlag);
6570             LogSystemRegister(NZCV);
6571             break;
6572           }
6573           default:
6574             VIXL_UNIMPLEMENTED();
6575         }
6576         break;
6577       }
6578       case MSR: {
6579         switch (instr->GetImmSystemRegister()) {
6580           case NZCV:
6581             ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
6582             LogSystemRegister(NZCV);
6583             break;
6584           case FPCR:
6585             ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
6586             LogSystemRegister(FPCR);
6587             break;
6588           default:
6589             VIXL_UNIMPLEMENTED();
6590         }
6591         break;
6592       }
6593     }
6594   } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
6595     VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
6596     switch (instr->GetImmHint()) {
6597       case NOP:
6598       case ESB:
6599       case CSDB:
6600       case BTI_jc:
6601         break;
6602       case BTI:
6603         if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
6604           VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
6605         }
6606         break;
6607       case BTI_c:
6608         if (PcIsInGuardedPage() && (ReadBType() == BranchFromGuardedNotToIP)) {
6609           VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
6610         }
6611         break;
6612       case BTI_j:
6613         if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
6614           VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
6615         }
6616         break;
6617       default:
6618         VIXL_UNIMPLEMENTED();
6619     }
6620   } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
6621     __sync_synchronize();
6622   } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
6623     switch (instr->Mask(SystemSysMask)) {
6624       case SYS:
6625         SysOp_W(instr->GetSysOp(), ReadXRegister(instr->GetRt()));
6626         break;
6627       default:
6628         VIXL_UNIMPLEMENTED();
6629     }
6630   } else {
6631     VIXL_UNIMPLEMENTED();
6632   }
6633 }
6634 
6635 
6636 void Simulator::VisitException(const Instruction* instr) {
6637   switch (instr->Mask(ExceptionMask)) {
6638     case HLT:
6639       switch (instr->GetImmException()) {
6640         case kUnreachableOpcode:
6641           DoUnreachable(instr);
6642           return;
6643         case kTraceOpcode:
6644           DoTrace(instr);
6645           return;
6646         case kLogOpcode:
6647           DoLog(instr);
6648           return;
6649         case kPrintfOpcode:
6650           DoPrintf(instr);
6651           return;
6652         case kRuntimeCallOpcode:
6653           DoRuntimeCall(instr);
6654           return;
6655         case kSetCPUFeaturesOpcode:
6656         case kEnableCPUFeaturesOpcode:
6657         case kDisableCPUFeaturesOpcode:
6658           DoConfigureCPUFeatures(instr);
6659           return;
6660         case kSaveCPUFeaturesOpcode:
6661           DoSaveCPUFeatures(instr);
6662           return;
6663         case kRestoreCPUFeaturesOpcode:
6664           DoRestoreCPUFeatures(instr);
6665           return;
6666         default:
6667           HostBreakpoint();
6668           return;
6669       }
6670     case BRK:
6671       HostBreakpoint();
6672       return;
6673     default:
6674       VIXL_UNIMPLEMENTED();
6675   }
6676 }
6677 
6678 
6679 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
6680   VisitUnimplemented(instr);
6681 }
6682 
6683 
6684 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
6685   VisitUnimplemented(instr);
6686 }
6687 
6688 
6689 void Simulator::VisitCryptoAES(const Instruction* instr) {
6690   VisitUnimplemented(instr);
6691 }
6692 
6693 
6694 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
6695   NEONFormatDecoder nfd(instr);
6696   VectorFormat vf = nfd.GetVectorFormat();
6697 
6698   static const NEONFormatMap map_lp =
6699       {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}};
6700   VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
6701 
6702   static const NEONFormatMap map_fcvtl = {{22}, {NF_4S, NF_2D}};
6703   VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
6704 
6705   static const NEONFormatMap map_fcvtn = {{22, 30},
6706                                           {NF_4H, NF_8H, NF_2S, NF_4S}};
6707   VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
6708 
6709   SimVRegister& rd = ReadVRegister(instr->GetRd());
6710   SimVRegister& rn = ReadVRegister(instr->GetRn());
6711 
6712   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
6713     // These instructions all use a two bit size field, except NOT and RBIT,
6714     // which use the field to encode the operation.
6715     switch (instr->Mask(NEON2RegMiscMask)) {
6716       case NEON_REV64:
6717         rev64(vf, rd, rn);
6718         break;
6719       case NEON_REV32:
6720         rev32(vf, rd, rn);
6721         break;
6722       case NEON_REV16:
6723         rev16(vf, rd, rn);
6724         break;
6725       case NEON_SUQADD:
6726         suqadd(vf, rd, rd, rn);
6727         break;
6728       case NEON_USQADD:
6729         usqadd(vf, rd, rd, rn);
6730         break;
6731       case NEON_CLS:
6732         cls(vf, rd, rn);
6733         break;
6734       case NEON_CLZ:
6735         clz(vf, rd, rn);
6736         break;
6737       case NEON_CNT:
6738         cnt(vf, rd, rn);
6739         break;
6740       case NEON_SQABS:
6741         abs(vf, rd, rn).SignedSaturate(vf);
6742         break;
6743       case NEON_SQNEG:
6744         neg(vf, rd, rn).SignedSaturate(vf);
6745         break;
6746       case NEON_CMGT_zero:
6747         cmp(vf, rd, rn, 0, gt);
6748         break;
6749       case NEON_CMGE_zero:
6750         cmp(vf, rd, rn, 0, ge);
6751         break;
6752       case NEON_CMEQ_zero:
6753         cmp(vf, rd, rn, 0, eq);
6754         break;
6755       case NEON_CMLE_zero:
6756         cmp(vf, rd, rn, 0, le);
6757         break;
6758       case NEON_CMLT_zero:
6759         cmp(vf, rd, rn, 0, lt);
6760         break;
6761       case NEON_ABS:
6762         abs(vf, rd, rn);
6763         break;
6764       case NEON_NEG:
6765         neg(vf, rd, rn);
6766         break;
6767       case NEON_SADDLP:
6768         saddlp(vf_lp, rd, rn);
6769         break;
6770       case NEON_UADDLP:
6771         uaddlp(vf_lp, rd, rn);
6772         break;
6773       case NEON_SADALP:
6774         sadalp(vf_lp, rd, rn);
6775         break;
6776       case NEON_UADALP:
6777         uadalp(vf_lp, rd, rn);
6778         break;
6779       case NEON_RBIT_NOT:
6780         vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
6781         switch (instr->GetFPType()) {
6782           case 0:
6783             not_(vf, rd, rn);
6784             break;
6785           case 1:
6786             rbit(vf, rd, rn);
6787             break;
6788           default:
6789             VIXL_UNIMPLEMENTED();
6790         }
6791         break;
6792     }
6793   } else {
6794     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
6795     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6796     bool inexact_exception = false;
6797     FrintMode frint_mode = kFrintToInteger;
6798 
6799     // These instructions all use a one bit size field, except XTN, SQXTUN,
6800     // SHLL, SQXTN and UQXTN, which use a two bit size field.
6801     switch (instr->Mask(NEON2RegMiscFPMask)) {
6802       case NEON_FABS:
6803         fabs_(fpf, rd, rn);
6804         return;
6805       case NEON_FNEG:
6806         fneg(fpf, rd, rn);
6807         return;
6808       case NEON_FSQRT:
6809         fsqrt(fpf, rd, rn);
6810         return;
6811       case NEON_FCVTL:
6812         if (instr->Mask(NEON_Q)) {
6813           fcvtl2(vf_fcvtl, rd, rn);
6814         } else {
6815           fcvtl(vf_fcvtl, rd, rn);
6816         }
6817         return;
6818       case NEON_FCVTN:
6819         if (instr->Mask(NEON_Q)) {
6820           fcvtn2(vf_fcvtn, rd, rn);
6821         } else {
6822           fcvtn(vf_fcvtn, rd, rn);
6823         }
6824         return;
6825       case NEON_FCVTXN:
6826         if (instr->Mask(NEON_Q)) {
6827           fcvtxn2(vf_fcvtn, rd, rn);
6828         } else {
6829           fcvtxn(vf_fcvtn, rd, rn);
6830         }
6831         return;
6832 
6833       // The following instructions break from the switch statement, rather
6834       // than return.
6835       case NEON_FRINT32X:
6836         inexact_exception = true;
6837         frint_mode = kFrintToInt32;
6838         break;  // Use FPCR rounding mode.
6839       case NEON_FRINT32Z:
6840         inexact_exception = true;
6841         frint_mode = kFrintToInt32;
6842         fpcr_rounding = FPZero;
6843         break;
6844       case NEON_FRINT64X:
6845         inexact_exception = true;
6846         frint_mode = kFrintToInt64;
6847         break;  // Use FPCR rounding mode.
6848       case NEON_FRINT64Z:
6849         inexact_exception = true;
6850         frint_mode = kFrintToInt64;
6851         fpcr_rounding = FPZero;
6852         break;
6853       case NEON_FRINTI:
6854         break;  // Use FPCR rounding mode.
6855       case NEON_FRINTX:
6856         inexact_exception = true;
6857         break;
6858       case NEON_FRINTA:
6859         fpcr_rounding = FPTieAway;
6860         break;
6861       case NEON_FRINTM:
6862         fpcr_rounding = FPNegativeInfinity;
6863         break;
6864       case NEON_FRINTN:
6865         fpcr_rounding = FPTieEven;
6866         break;
6867       case NEON_FRINTP:
6868         fpcr_rounding = FPPositiveInfinity;
6869         break;
6870       case NEON_FRINTZ:
6871         fpcr_rounding = FPZero;
6872         break;
6873 
6874       case NEON_FCVTNS:
6875         fcvts(fpf, rd, rn, FPTieEven);
6876         return;
6877       case NEON_FCVTNU:
6878         fcvtu(fpf, rd, rn, FPTieEven);
6879         return;
6880       case NEON_FCVTPS:
6881         fcvts(fpf, rd, rn, FPPositiveInfinity);
6882         return;
6883       case NEON_FCVTPU:
6884         fcvtu(fpf, rd, rn, FPPositiveInfinity);
6885         return;
6886       case NEON_FCVTMS:
6887         fcvts(fpf, rd, rn, FPNegativeInfinity);
6888         return;
6889       case NEON_FCVTMU:
6890         fcvtu(fpf, rd, rn, FPNegativeInfinity);
6891         return;
6892       case NEON_FCVTZS:
6893         fcvts(fpf, rd, rn, FPZero);
6894         return;
6895       case NEON_FCVTZU:
6896         fcvtu(fpf, rd, rn, FPZero);
6897         return;
6898       case NEON_FCVTAS:
6899         fcvts(fpf, rd, rn, FPTieAway);
6900         return;
6901       case NEON_FCVTAU:
6902         fcvtu(fpf, rd, rn, FPTieAway);
6903         return;
6904       case NEON_SCVTF:
6905         scvtf(fpf, rd, rn, 0, fpcr_rounding);
6906         return;
6907       case NEON_UCVTF:
6908         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
6909         return;
6910       case NEON_URSQRTE:
6911         ursqrte(fpf, rd, rn);
6912         return;
6913       case NEON_URECPE:
6914         urecpe(fpf, rd, rn);
6915         return;
6916       case NEON_FRSQRTE:
6917         frsqrte(fpf, rd, rn);
6918         return;
6919       case NEON_FRECPE:
6920         frecpe(fpf, rd, rn, fpcr_rounding);
6921         return;
6922       case NEON_FCMGT_zero:
6923         fcmp_zero(fpf, rd, rn, gt);
6924         return;
6925       case NEON_FCMGE_zero:
6926         fcmp_zero(fpf, rd, rn, ge);
6927         return;
6928       case NEON_FCMEQ_zero:
6929         fcmp_zero(fpf, rd, rn, eq);
6930         return;
6931       case NEON_FCMLE_zero:
6932         fcmp_zero(fpf, rd, rn, le);
6933         return;
6934       case NEON_FCMLT_zero:
6935         fcmp_zero(fpf, rd, rn, lt);
6936         return;
6937       default:
6938         if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
6939             (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
6940           switch (instr->Mask(NEON2RegMiscMask)) {
6941             case NEON_XTN:
6942               xtn(vf, rd, rn);
6943               return;
6944             case NEON_SQXTN:
6945               sqxtn(vf, rd, rn);
6946               return;
6947             case NEON_UQXTN:
6948               uqxtn(vf, rd, rn);
6949               return;
6950             case NEON_SQXTUN:
6951               sqxtun(vf, rd, rn);
6952               return;
6953             case NEON_SHLL:
6954               vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
6955               if (instr->Mask(NEON_Q)) {
6956                 shll2(vf, rd, rn);
6957               } else {
6958                 shll(vf, rd, rn);
6959               }
6960               return;
6961             default:
6962               VIXL_UNIMPLEMENTED();
6963           }
6964         } else {
6965           VIXL_UNIMPLEMENTED();
6966         }
6967     }
6968 
6969     // Only FRINT* instructions fall through the switch above.
6970     frint(fpf, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
6971   }
6972 }
6973 
6974 
6975 void Simulator::VisitNEON2RegMiscFP16(const Instruction* instr) {
6976   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
6977   NEONFormatDecoder nfd(instr);
6978   VectorFormat fpf = nfd.GetVectorFormat(&map_half);
6979 
6980   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6981 
6982   SimVRegister& rd = ReadVRegister(instr->GetRd());
6983   SimVRegister& rn = ReadVRegister(instr->GetRn());
6984 
6985   switch (instr->Mask(NEON2RegMiscFP16Mask)) {
6986     case NEON_SCVTF_H:
6987       scvtf(fpf, rd, rn, 0, fpcr_rounding);
6988       return;
6989     case NEON_UCVTF_H:
6990       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
6991       return;
6992     case NEON_FCVTNS_H:
6993       fcvts(fpf, rd, rn, FPTieEven);
6994       return;
6995     case NEON_FCVTNU_H:
6996       fcvtu(fpf, rd, rn, FPTieEven);
6997       return;
6998     case NEON_FCVTPS_H:
6999       fcvts(fpf, rd, rn, FPPositiveInfinity);
7000       return;
7001     case NEON_FCVTPU_H:
7002       fcvtu(fpf, rd, rn, FPPositiveInfinity);
7003       return;
7004     case NEON_FCVTMS_H:
7005       fcvts(fpf, rd, rn, FPNegativeInfinity);
7006       return;
7007     case NEON_FCVTMU_H:
7008       fcvtu(fpf, rd, rn, FPNegativeInfinity);
7009       return;
7010     case NEON_FCVTZS_H:
7011       fcvts(fpf, rd, rn, FPZero);
7012       return;
7013     case NEON_FCVTZU_H:
7014       fcvtu(fpf, rd, rn, FPZero);
7015       return;
7016     case NEON_FCVTAS_H:
7017       fcvts(fpf, rd, rn, FPTieAway);
7018       return;
7019     case NEON_FCVTAU_H:
7020       fcvtu(fpf, rd, rn, FPTieAway);
7021       return;
7022     case NEON_FRINTI_H:
7023       frint(fpf, rd, rn, fpcr_rounding, false);
7024       return;
7025     case NEON_FRINTX_H:
7026       frint(fpf, rd, rn, fpcr_rounding, true);
7027       return;
7028     case NEON_FRINTA_H:
7029       frint(fpf, rd, rn, FPTieAway, false);
7030       return;
7031     case NEON_FRINTM_H:
7032       frint(fpf, rd, rn, FPNegativeInfinity, false);
7033       return;
7034     case NEON_FRINTN_H:
7035       frint(fpf, rd, rn, FPTieEven, false);
7036       return;
7037     case NEON_FRINTP_H:
7038       frint(fpf, rd, rn, FPPositiveInfinity, false);
7039       return;
7040     case NEON_FRINTZ_H:
7041       frint(fpf, rd, rn, FPZero, false);
7042       return;
7043     case NEON_FABS_H:
7044       fabs_(fpf, rd, rn);
7045       return;
7046     case NEON_FNEG_H:
7047       fneg(fpf, rd, rn);
7048       return;
7049     case NEON_FSQRT_H:
7050       fsqrt(fpf, rd, rn);
7051       return;
7052     case NEON_FRSQRTE_H:
7053       frsqrte(fpf, rd, rn);
7054       return;
7055     case NEON_FRECPE_H:
7056       frecpe(fpf, rd, rn, fpcr_rounding);
7057       return;
7058     case NEON_FCMGT_H_zero:
7059       fcmp_zero(fpf, rd, rn, gt);
7060       return;
7061     case NEON_FCMGE_H_zero:
7062       fcmp_zero(fpf, rd, rn, ge);
7063       return;
7064     case NEON_FCMEQ_H_zero:
7065       fcmp_zero(fpf, rd, rn, eq);
7066       return;
7067     case NEON_FCMLE_H_zero:
7068       fcmp_zero(fpf, rd, rn, le);
7069       return;
7070     case NEON_FCMLT_H_zero:
7071       fcmp_zero(fpf, rd, rn, lt);
7072       return;
7073     default:
7074       VIXL_UNIMPLEMENTED();
7075       return;
7076   }
7077 }
7078 
7079 
7080 void Simulator::VisitNEON3Same(const Instruction* instr) {
7081   NEONFormatDecoder nfd(instr);
7082   SimVRegister& rd = ReadVRegister(instr->GetRd());
7083   SimVRegister& rn = ReadVRegister(instr->GetRn());
7084   SimVRegister& rm = ReadVRegister(instr->GetRm());
7085 
7086   if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
7087     VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
7088     switch (instr->Mask(NEON3SameLogicalMask)) {
7089       case NEON_AND:
7090         and_(vf, rd, rn, rm);
7091         break;
7092       case NEON_ORR:
7093         orr(vf, rd, rn, rm);
7094         break;
7095       case NEON_ORN:
7096         orn(vf, rd, rn, rm);
7097         break;
7098       case NEON_EOR:
7099         eor(vf, rd, rn, rm);
7100         break;
7101       case NEON_BIC:
7102         bic(vf, rd, rn, rm);
7103         break;
7104       case NEON_BIF:
7105         bif(vf, rd, rn, rm);
7106         break;
7107       case NEON_BIT:
7108         bit(vf, rd, rn, rm);
7109         break;
7110       case NEON_BSL:
7111         bsl(vf, rd, rd, rn, rm);
7112         break;
7113       default:
7114         VIXL_UNIMPLEMENTED();
7115     }
7116   } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
7117     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7118     switch (instr->Mask(NEON3SameFPMask)) {
7119       case NEON_FADD:
7120         fadd(vf, rd, rn, rm);
7121         break;
7122       case NEON_FSUB:
7123         fsub(vf, rd, rn, rm);
7124         break;
7125       case NEON_FMUL:
7126         fmul(vf, rd, rn, rm);
7127         break;
7128       case NEON_FDIV:
7129         fdiv(vf, rd, rn, rm);
7130         break;
7131       case NEON_FMAX:
7132         fmax(vf, rd, rn, rm);
7133         break;
7134       case NEON_FMIN:
7135         fmin(vf, rd, rn, rm);
7136         break;
7137       case NEON_FMAXNM:
7138         fmaxnm(vf, rd, rn, rm);
7139         break;
7140       case NEON_FMINNM:
7141         fminnm(vf, rd, rn, rm);
7142         break;
7143       case NEON_FMLA:
7144         fmla(vf, rd, rd, rn, rm);
7145         break;
7146       case NEON_FMLS:
7147         fmls(vf, rd, rd, rn, rm);
7148         break;
7149       case NEON_FMULX:
7150         fmulx(vf, rd, rn, rm);
7151         break;
7152       case NEON_FACGE:
7153         fabscmp(vf, rd, rn, rm, ge);
7154         break;
7155       case NEON_FACGT:
7156         fabscmp(vf, rd, rn, rm, gt);
7157         break;
7158       case NEON_FCMEQ:
7159         fcmp(vf, rd, rn, rm, eq);
7160         break;
7161       case NEON_FCMGE:
7162         fcmp(vf, rd, rn, rm, ge);
7163         break;
7164       case NEON_FCMGT:
7165         fcmp(vf, rd, rn, rm, gt);
7166         break;
7167       case NEON_FRECPS:
7168         frecps(vf, rd, rn, rm);
7169         break;
7170       case NEON_FRSQRTS:
7171         frsqrts(vf, rd, rn, rm);
7172         break;
7173       case NEON_FABD:
7174         fabd(vf, rd, rn, rm);
7175         break;
7176       case NEON_FADDP:
7177         faddp(vf, rd, rn, rm);
7178         break;
7179       case NEON_FMAXP:
7180         fmaxp(vf, rd, rn, rm);
7181         break;
7182       case NEON_FMAXNMP:
7183         fmaxnmp(vf, rd, rn, rm);
7184         break;
7185       case NEON_FMINP:
7186         fminp(vf, rd, rn, rm);
7187         break;
7188       case NEON_FMINNMP:
7189         fminnmp(vf, rd, rn, rm);
7190         break;
7191       default:
7192         // FMLAL{2} and FMLSL{2} have special-case encodings.
7193         switch (instr->Mask(NEON3SameFHMMask)) {
7194           case NEON_FMLAL:
7195             fmlal(vf, rd, rn, rm);
7196             break;
7197           case NEON_FMLAL2:
7198             fmlal2(vf, rd, rn, rm);
7199             break;
7200           case NEON_FMLSL:
7201             fmlsl(vf, rd, rn, rm);
7202             break;
7203           case NEON_FMLSL2:
7204             fmlsl2(vf, rd, rn, rm);
7205             break;
7206           default:
7207             VIXL_UNIMPLEMENTED();
7208         }
7209     }
7210   } else {
7211     VectorFormat vf = nfd.GetVectorFormat();
7212     switch (instr->Mask(NEON3SameMask)) {
7213       case NEON_ADD:
7214         add(vf, rd, rn, rm);
7215         break;
7216       case NEON_ADDP:
7217         addp(vf, rd, rn, rm);
7218         break;
7219       case NEON_CMEQ:
7220         cmp(vf, rd, rn, rm, eq);
7221         break;
7222       case NEON_CMGE:
7223         cmp(vf, rd, rn, rm, ge);
7224         break;
7225       case NEON_CMGT:
7226         cmp(vf, rd, rn, rm, gt);
7227         break;
7228       case NEON_CMHI:
7229         cmp(vf, rd, rn, rm, hi);
7230         break;
7231       case NEON_CMHS:
7232         cmp(vf, rd, rn, rm, hs);
7233         break;
7234       case NEON_CMTST:
7235         cmptst(vf, rd, rn, rm);
7236         break;
7237       case NEON_MLS:
7238         mls(vf, rd, rd, rn, rm);
7239         break;
7240       case NEON_MLA:
7241         mla(vf, rd, rd, rn, rm);
7242         break;
7243       case NEON_MUL:
7244         mul(vf, rd, rn, rm);
7245         break;
7246       case NEON_PMUL:
7247         pmul(vf, rd, rn, rm);
7248         break;
7249       case NEON_SMAX:
7250         smax(vf, rd, rn, rm);
7251         break;
7252       case NEON_SMAXP:
7253         smaxp(vf, rd, rn, rm);
7254         break;
7255       case NEON_SMIN:
7256         smin(vf, rd, rn, rm);
7257         break;
7258       case NEON_SMINP:
7259         sminp(vf, rd, rn, rm);
7260         break;
7261       case NEON_SUB:
7262         sub(vf, rd, rn, rm);
7263         break;
7264       case NEON_UMAX:
7265         umax(vf, rd, rn, rm);
7266         break;
7267       case NEON_UMAXP:
7268         umaxp(vf, rd, rn, rm);
7269         break;
7270       case NEON_UMIN:
7271         umin(vf, rd, rn, rm);
7272         break;
7273       case NEON_UMINP:
7274         uminp(vf, rd, rn, rm);
7275         break;
7276       case NEON_SSHL:
7277         sshl(vf, rd, rn, rm);
7278         break;
7279       case NEON_USHL:
7280         ushl(vf, rd, rn, rm);
7281         break;
7282       case NEON_SABD:
7283         absdiff(vf, rd, rn, rm, true);
7284         break;
7285       case NEON_UABD:
7286         absdiff(vf, rd, rn, rm, false);
7287         break;
7288       case NEON_SABA:
7289         saba(vf, rd, rn, rm);
7290         break;
7291       case NEON_UABA:
7292         uaba(vf, rd, rn, rm);
7293         break;
7294       case NEON_UQADD:
7295         add(vf, rd, rn, rm).UnsignedSaturate(vf);
7296         break;
7297       case NEON_SQADD:
7298         add(vf, rd, rn, rm).SignedSaturate(vf);
7299         break;
7300       case NEON_UQSUB:
7301         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
7302         break;
7303       case NEON_SQSUB:
7304         sub(vf, rd, rn, rm).SignedSaturate(vf);
7305         break;
7306       case NEON_SQDMULH:
7307         sqdmulh(vf, rd, rn, rm);
7308         break;
7309       case NEON_SQRDMULH:
7310         sqrdmulh(vf, rd, rn, rm);
7311         break;
7312       case NEON_UQSHL:
7313         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
7314         break;
7315       case NEON_SQSHL:
7316         sshl(vf, rd, rn, rm).SignedSaturate(vf);
7317         break;
7318       case NEON_URSHL:
7319         ushl(vf, rd, rn, rm).Round(vf);
7320         break;
7321       case NEON_SRSHL:
7322         sshl(vf, rd, rn, rm).Round(vf);
7323         break;
7324       case NEON_UQRSHL:
7325         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
7326         break;
7327       case NEON_SQRSHL:
7328         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
7329         break;
7330       case NEON_UHADD:
7331         add(vf, rd, rn, rm).Uhalve(vf);
7332         break;
7333       case NEON_URHADD:
7334         add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
7335         break;
7336       case NEON_SHADD:
7337         add(vf, rd, rn, rm).Halve(vf);
7338         break;
7339       case NEON_SRHADD:
7340         add(vf, rd, rn, rm).Halve(vf).Round(vf);
7341         break;
7342       case NEON_UHSUB:
7343         sub(vf, rd, rn, rm).Uhalve(vf);
7344         break;
7345       case NEON_SHSUB:
7346         sub(vf, rd, rn, rm).Halve(vf);
7347         break;
7348       default:
7349         VIXL_UNIMPLEMENTED();
7350     }
7351   }
7352 }
7353 
7354 
7355 void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
7356   NEONFormatDecoder nfd(instr);
7357   SimVRegister& rd = ReadVRegister(instr->GetRd());
7358   SimVRegister& rn = ReadVRegister(instr->GetRn());
7359   SimVRegister& rm = ReadVRegister(instr->GetRm());
7360 
7361   VectorFormat vf = nfd.GetVectorFormat(nfd.FP16FormatMap());
7362   switch (instr->Mask(NEON3SameFP16Mask)) {
7363 #define SIM_FUNC(A, B) \
7364   case NEON_##A##_H:   \
7365     B(vf, rd, rn, rm); \
7366     break;
7367     SIM_FUNC(FMAXNM, fmaxnm);
7368     SIM_FUNC(FADD, fadd);
7369     SIM_FUNC(FMULX, fmulx);
7370     SIM_FUNC(FMAX, fmax);
7371     SIM_FUNC(FRECPS, frecps);
7372     SIM_FUNC(FMINNM, fminnm);
7373     SIM_FUNC(FSUB, fsub);
7374     SIM_FUNC(FMIN, fmin);
7375     SIM_FUNC(FRSQRTS, frsqrts);
7376     SIM_FUNC(FMAXNMP, fmaxnmp);
7377     SIM_FUNC(FADDP, faddp);
7378     SIM_FUNC(FMUL, fmul);
7379     SIM_FUNC(FMAXP, fmaxp);
7380     SIM_FUNC(FDIV, fdiv);
7381     SIM_FUNC(FMINNMP, fminnmp);
7382     SIM_FUNC(FABD, fabd);
7383     SIM_FUNC(FMINP, fminp);
7384 #undef SIM_FUNC
7385     case NEON_FMLA_H:
7386       fmla(vf, rd, rd, rn, rm);
7387       break;
7388     case NEON_FMLS_H:
7389       fmls(vf, rd, rd, rn, rm);
7390       break;
7391     case NEON_FCMEQ_H:
7392       fcmp(vf, rd, rn, rm, eq);
7393       break;
7394     case NEON_FCMGE_H:
7395       fcmp(vf, rd, rn, rm, ge);
7396       break;
7397     case NEON_FACGE_H:
7398       fabscmp(vf, rd, rn, rm, ge);
7399       break;
7400     case NEON_FCMGT_H:
7401       fcmp(vf, rd, rn, rm, gt);
7402       break;
7403     case NEON_FACGT_H:
7404       fabscmp(vf, rd, rn, rm, gt);
7405       break;
7406     default:
7407       VIXL_UNIMPLEMENTED();
7408       break;
7409   }
7410 }
7411 
7412 void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
7413   NEONFormatDecoder nfd(instr);
7414   SimVRegister& rd = ReadVRegister(instr->GetRd());
7415   SimVRegister& rn = ReadVRegister(instr->GetRn());
7416   SimVRegister& rm = ReadVRegister(instr->GetRm());
7417   int rot = 0;
7418   VectorFormat vf = nfd.GetVectorFormat();
7419 
7420   switch (form_hash_) {
7421     case "fcmla_asimdsame2_c"_h:
7422       rot = instr->GetImmRotFcmlaVec();
7423       fcmla(vf, rd, rn, rm, rd, rot);
7424       break;
7425     case "fcadd_asimdsame2_c"_h:
7426       rot = instr->GetImmRotFcadd();
7427       fcadd(vf, rd, rn, rm, rot);
7428       break;
7429     case "sdot_asimdsame2_d"_h:
7430       sdot(vf, rd, rn, rm);
7431       break;
7432     case "udot_asimdsame2_d"_h:
7433       udot(vf, rd, rn, rm);
7434       break;
7435     case "usdot_asimdsame2_d"_h:
7436       usdot(vf, rd, rn, rm);
7437       break;
7438     case "sqrdmlah_asimdsame2_only"_h:
7439       sqrdmlah(vf, rd, rn, rm);
7440       break;
7441     case "sqrdmlsh_asimdsame2_only"_h:
7442       sqrdmlsh(vf, rd, rn, rm);
7443       break;
7444   }
7445 }
7446 
7447 
7448 void Simulator::VisitNEON3Different(const Instruction* instr) {
7449   NEONFormatDecoder nfd(instr);
7450   VectorFormat vf = nfd.GetVectorFormat();
7451   VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7452 
7453   SimVRegister& rd = ReadVRegister(instr->GetRd());
7454   SimVRegister& rn = ReadVRegister(instr->GetRn());
7455   SimVRegister& rm = ReadVRegister(instr->GetRm());
7456 
7457   switch (instr->Mask(NEON3DifferentMask)) {
7458     case NEON_PMULL:
7459       pmull(vf_l, rd, rn, rm);
7460       break;
7461     case NEON_PMULL2:
7462       pmull2(vf_l, rd, rn, rm);
7463       break;
7464     case NEON_UADDL:
7465       uaddl(vf_l, rd, rn, rm);
7466       break;
7467     case NEON_UADDL2:
7468       uaddl2(vf_l, rd, rn, rm);
7469       break;
7470     case NEON_SADDL:
7471       saddl(vf_l, rd, rn, rm);
7472       break;
7473     case NEON_SADDL2:
7474       saddl2(vf_l, rd, rn, rm);
7475       break;
7476     case NEON_USUBL:
7477       usubl(vf_l, rd, rn, rm);
7478       break;
7479     case NEON_USUBL2:
7480       usubl2(vf_l, rd, rn, rm);
7481       break;
7482     case NEON_SSUBL:
7483       ssubl(vf_l, rd, rn, rm);
7484       break;
7485     case NEON_SSUBL2:
7486       ssubl2(vf_l, rd, rn, rm);
7487       break;
7488     case NEON_SABAL:
7489       sabal(vf_l, rd, rn, rm);
7490       break;
7491     case NEON_SABAL2:
7492       sabal2(vf_l, rd, rn, rm);
7493       break;
7494     case NEON_UABAL:
7495       uabal(vf_l, rd, rn, rm);
7496       break;
7497     case NEON_UABAL2:
7498       uabal2(vf_l, rd, rn, rm);
7499       break;
7500     case NEON_SABDL:
7501       sabdl(vf_l, rd, rn, rm);
7502       break;
7503     case NEON_SABDL2:
7504       sabdl2(vf_l, rd, rn, rm);
7505       break;
7506     case NEON_UABDL:
7507       uabdl(vf_l, rd, rn, rm);
7508       break;
7509     case NEON_UABDL2:
7510       uabdl2(vf_l, rd, rn, rm);
7511       break;
7512     case NEON_SMLAL:
7513       smlal(vf_l, rd, rn, rm);
7514       break;
7515     case NEON_SMLAL2:
7516       smlal2(vf_l, rd, rn, rm);
7517       break;
7518     case NEON_UMLAL:
7519       umlal(vf_l, rd, rn, rm);
7520       break;
7521     case NEON_UMLAL2:
7522       umlal2(vf_l, rd, rn, rm);
7523       break;
7524     case NEON_SMLSL:
7525       smlsl(vf_l, rd, rn, rm);
7526       break;
7527     case NEON_SMLSL2:
7528       smlsl2(vf_l, rd, rn, rm);
7529       break;
7530     case NEON_UMLSL:
7531       umlsl(vf_l, rd, rn, rm);
7532       break;
7533     case NEON_UMLSL2:
7534       umlsl2(vf_l, rd, rn, rm);
7535       break;
7536     case NEON_SMULL:
7537       smull(vf_l, rd, rn, rm);
7538       break;
7539     case NEON_SMULL2:
7540       smull2(vf_l, rd, rn, rm);
7541       break;
7542     case NEON_UMULL:
7543       umull(vf_l, rd, rn, rm);
7544       break;
7545     case NEON_UMULL2:
7546       umull2(vf_l, rd, rn, rm);
7547       break;
7548     case NEON_SQDMLAL:
7549       sqdmlal(vf_l, rd, rn, rm);
7550       break;
7551     case NEON_SQDMLAL2:
7552       sqdmlal2(vf_l, rd, rn, rm);
7553       break;
7554     case NEON_SQDMLSL:
7555       sqdmlsl(vf_l, rd, rn, rm);
7556       break;
7557     case NEON_SQDMLSL2:
7558       sqdmlsl2(vf_l, rd, rn, rm);
7559       break;
7560     case NEON_SQDMULL:
7561       sqdmull(vf_l, rd, rn, rm);
7562       break;
7563     case NEON_SQDMULL2:
7564       sqdmull2(vf_l, rd, rn, rm);
7565       break;
7566     case NEON_UADDW:
7567       uaddw(vf_l, rd, rn, rm);
7568       break;
7569     case NEON_UADDW2:
7570       uaddw2(vf_l, rd, rn, rm);
7571       break;
7572     case NEON_SADDW:
7573       saddw(vf_l, rd, rn, rm);
7574       break;
7575     case NEON_SADDW2:
7576       saddw2(vf_l, rd, rn, rm);
7577       break;
7578     case NEON_USUBW:
7579       usubw(vf_l, rd, rn, rm);
7580       break;
7581     case NEON_USUBW2:
7582       usubw2(vf_l, rd, rn, rm);
7583       break;
7584     case NEON_SSUBW:
7585       ssubw(vf_l, rd, rn, rm);
7586       break;
7587     case NEON_SSUBW2:
7588       ssubw2(vf_l, rd, rn, rm);
7589       break;
7590     case NEON_ADDHN:
7591       addhn(vf, rd, rn, rm);
7592       break;
7593     case NEON_ADDHN2:
7594       addhn2(vf, rd, rn, rm);
7595       break;
7596     case NEON_RADDHN:
7597       raddhn(vf, rd, rn, rm);
7598       break;
7599     case NEON_RADDHN2:
7600       raddhn2(vf, rd, rn, rm);
7601       break;
7602     case NEON_SUBHN:
7603       subhn(vf, rd, rn, rm);
7604       break;
7605     case NEON_SUBHN2:
7606       subhn2(vf, rd, rn, rm);
7607       break;
7608     case NEON_RSUBHN:
7609       rsubhn(vf, rd, rn, rm);
7610       break;
7611     case NEON_RSUBHN2:
7612       rsubhn2(vf, rd, rn, rm);
7613       break;
7614     default:
7615       VIXL_UNIMPLEMENTED();
7616   }
7617 }
7618 
7619 
7620 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
7621   NEONFormatDecoder nfd(instr);
7622 
7623   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
7624 
7625   SimVRegister& rd = ReadVRegister(instr->GetRd());
7626   SimVRegister& rn = ReadVRegister(instr->GetRn());
7627 
7628   if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
7629     VectorFormat vf = nfd.GetVectorFormat(&map_half);
7630     switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
7631       case NEON_FMAXV_H:
7632         fmaxv(vf, rd, rn);
7633         break;
7634       case NEON_FMINV_H:
7635         fminv(vf, rd, rn);
7636         break;
7637       case NEON_FMAXNMV_H:
7638         fmaxnmv(vf, rd, rn);
7639         break;
7640       case NEON_FMINNMV_H:
7641         fminnmv(vf, rd, rn);
7642         break;
7643       default:
7644         VIXL_UNIMPLEMENTED();
7645     }
7646   } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
7647     // The input operand's VectorFormat is passed for these instructions.
7648     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7649 
7650     switch (instr->Mask(NEONAcrossLanesFPMask)) {
7651       case NEON_FMAXV:
7652         fmaxv(vf, rd, rn);
7653         break;
7654       case NEON_FMINV:
7655         fminv(vf, rd, rn);
7656         break;
7657       case NEON_FMAXNMV:
7658         fmaxnmv(vf, rd, rn);
7659         break;
7660       case NEON_FMINNMV:
7661         fminnmv(vf, rd, rn);
7662         break;
7663       default:
7664         VIXL_UNIMPLEMENTED();
7665     }
7666   } else {
7667     VectorFormat vf = nfd.GetVectorFormat();
7668 
7669     switch (instr->Mask(NEONAcrossLanesMask)) {
7670       case NEON_ADDV:
7671         addv(vf, rd, rn);
7672         break;
7673       case NEON_SMAXV:
7674         smaxv(vf, rd, rn);
7675         break;
7676       case NEON_SMINV:
7677         sminv(vf, rd, rn);
7678         break;
7679       case NEON_UMAXV:
7680         umaxv(vf, rd, rn);
7681         break;
7682       case NEON_UMINV:
7683         uminv(vf, rd, rn);
7684         break;
7685       case NEON_SADDLV:
7686         saddlv(vf, rd, rn);
7687         break;
7688       case NEON_UADDLV:
7689         uaddlv(vf, rd, rn);
7690         break;
7691       default:
7692         VIXL_UNIMPLEMENTED();
7693     }
7694   }
7695 }
7696 
7697 void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
7698   NEONFormatDecoder nfd(instr);
7699   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7700 
7701   SimVRegister& rd = ReadVRegister(instr->GetRd());
7702   SimVRegister& rn = ReadVRegister(instr->GetRn());
7703 
7704   int rm_reg = instr->GetRm();
7705   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7706   if (instr->GetNEONSize() == 1) {
7707     rm_reg = instr->GetRmLow16();
7708     index = (index << 1) | instr->GetNEONM();
7709   }
7710   SimVRegister& rm = ReadVRegister(rm_reg);
7711 
7712   SimVRegister temp;
7713   VectorFormat indexform =
7714       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
7715   dup_element(indexform, temp, rm, index);
7716 
7717   bool is_2 = instr->Mask(NEON_Q) ? true : false;
7718 
7719   switch (form_hash_) {
7720     case "smull_asimdelem_l"_h:
7721       smull(vf, rd, rn, temp, is_2);
7722       break;
7723     case "umull_asimdelem_l"_h:
7724       umull(vf, rd, rn, temp, is_2);
7725       break;
7726     case "smlal_asimdelem_l"_h:
7727       smlal(vf, rd, rn, temp, is_2);
7728       break;
7729     case "umlal_asimdelem_l"_h:
7730       umlal(vf, rd, rn, temp, is_2);
7731       break;
7732     case "smlsl_asimdelem_l"_h:
7733       smlsl(vf, rd, rn, temp, is_2);
7734       break;
7735     case "umlsl_asimdelem_l"_h:
7736       umlsl(vf, rd, rn, temp, is_2);
7737       break;
7738     case "sqdmull_asimdelem_l"_h:
7739       sqdmull(vf, rd, rn, temp, is_2);
7740       break;
7741     case "sqdmlal_asimdelem_l"_h:
7742       sqdmlal(vf, rd, rn, temp, is_2);
7743       break;
7744     case "sqdmlsl_asimdelem_l"_h:
7745       sqdmlsl(vf, rd, rn, temp, is_2);
7746       break;
7747     default:
7748       VIXL_UNREACHABLE();
7749   }
7750 }
7751 
7752 void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) {
7753   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
7754   SimVRegister& rd = ReadVRegister(instr->GetRd());
7755   SimVRegister& rn = ReadVRegister(instr->GetRn());
7756   SimVRegister& rm = ReadVRegister(instr->GetRmLow16());
7757 
7758   int index =
7759       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
7760 
7761   switch (form_hash_) {
7762     case "fmlal_asimdelem_lh"_h:
7763       fmlal(vform, rd, rn, rm, index);
7764       break;
7765     case "fmlal2_asimdelem_lh"_h:
7766       fmlal2(vform, rd, rn, rm, index);
7767       break;
7768     case "fmlsl_asimdelem_lh"_h:
7769       fmlsl(vform, rd, rn, rm, index);
7770       break;
7771     case "fmlsl2_asimdelem_lh"_h:
7772       fmlsl2(vform, rd, rn, rm, index);
7773       break;
7774     default:
7775       VIXL_UNREACHABLE();
7776   }
7777 }
7778 
7779 void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) {
7780   NEONFormatDecoder nfd(instr);
7781   static const NEONFormatMap map =
7782       {{23, 22, 30},
7783        {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
7784   VectorFormat vform = nfd.GetVectorFormat(&map);
7785 
7786   SimVRegister& rd = ReadVRegister(instr->GetRd());
7787   SimVRegister& rn = ReadVRegister(instr->GetRn());
7788 
7789   int rm_reg = instr->GetRm();
7790   int index =
7791       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
7792 
7793   if ((vform == kFormat4H) || (vform == kFormat8H)) {
7794     rm_reg &= 0xf;
7795   } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
7796     index >>= 1;
7797   } else {
7798     VIXL_ASSERT(vform == kFormat2D);
7799     VIXL_ASSERT(instr->GetNEONL() == 0);
7800     index >>= 2;
7801   }
7802 
7803   SimVRegister& rm = ReadVRegister(rm_reg);
7804 
7805   switch (form_hash_) {
7806     case "fmul_asimdelem_rh_h"_h:
7807     case "fmul_asimdelem_r_sd"_h:
7808       fmul(vform, rd, rn, rm, index);
7809       break;
7810     case "fmla_asimdelem_rh_h"_h:
7811     case "fmla_asimdelem_r_sd"_h:
7812       fmla(vform, rd, rn, rm, index);
7813       break;
7814     case "fmls_asimdelem_rh_h"_h:
7815     case "fmls_asimdelem_r_sd"_h:
7816       fmls(vform, rd, rn, rm, index);
7817       break;
7818     case "fmulx_asimdelem_rh_h"_h:
7819     case "fmulx_asimdelem_r_sd"_h:
7820       fmulx(vform, rd, rn, rm, index);
7821       break;
7822     default:
7823       VIXL_UNREACHABLE();
7824   }
7825 }
7826 
7827 void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) {
7828   VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H;
7829   SimVRegister& rd = ReadVRegister(instr->GetRd());
7830   SimVRegister& rn = ReadVRegister(instr->GetRn());
7831   SimVRegister& rm = ReadVRegister(instr->GetRm());
7832   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7833 
7834   switch (form_hash_) {
7835     case "fcmla_asimdelem_c_s"_h:
7836       vform = kFormat4S;
7837       index >>= 1;
7838       VIXL_FALLTHROUGH();
7839     case "fcmla_asimdelem_c_h"_h:
7840       fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca());
7841       break;
7842     default:
7843       VIXL_UNREACHABLE();
7844   }
7845 }
7846 
7847 void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) {
7848   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
7849 
7850   SimVRegister& rd = ReadVRegister(instr->GetRd());
7851   SimVRegister& rn = ReadVRegister(instr->GetRn());
7852   SimVRegister& rm = ReadVRegister(instr->GetRm());
7853   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7854 
7855   SimVRegister temp;
7856   // NEON indexed `dot` allows the index value exceed the register size.
7857   // Promote the format to Q-sized vector format before the duplication.
7858   dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index);
7859 
7860   switch (form_hash_) {
7861     case "sdot_asimdelem_d"_h:
7862       sdot(vform, rd, rn, temp);
7863       break;
7864     case "udot_asimdelem_d"_h:
7865       udot(vform, rd, rn, temp);
7866       break;
7867     case "sudot_asimdelem_d"_h:
7868       usdot(vform, rd, temp, rn);
7869       break;
7870     case "usdot_asimdelem_d"_h:
7871       usdot(vform, rd, rn, temp);
7872       break;
7873   }
7874 }
7875 
7876 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
7877   NEONFormatDecoder nfd(instr);
7878   VectorFormat vform = nfd.GetVectorFormat();
7879 
7880   SimVRegister& rd = ReadVRegister(instr->GetRd());
7881   SimVRegister& rn = ReadVRegister(instr->GetRn());
7882 
7883   int rm_reg = instr->GetRm();
7884   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7885 
7886   if ((vform == kFormat4H) || (vform == kFormat8H)) {
7887     rm_reg &= 0xf;
7888     index = (index << 1) | instr->GetNEONM();
7889   }
7890 
7891   SimVRegister& rm = ReadVRegister(rm_reg);
7892 
7893   switch (form_hash_) {
7894     case "mul_asimdelem_r"_h:
7895       mul(vform, rd, rn, rm, index);
7896       break;
7897     case "mla_asimdelem_r"_h:
7898       mla(vform, rd, rn, rm, index);
7899       break;
7900     case "mls_asimdelem_r"_h:
7901       mls(vform, rd, rn, rm, index);
7902       break;
7903     case "sqdmulh_asimdelem_r"_h:
7904       sqdmulh(vform, rd, rn, rm, index);
7905       break;
7906     case "sqrdmulh_asimdelem_r"_h:
7907       sqrdmulh(vform, rd, rn, rm, index);
7908       break;
7909     case "sqrdmlah_asimdelem_r"_h:
7910       sqrdmlah(vform, rd, rn, rm, index);
7911       break;
7912     case "sqrdmlsh_asimdelem_r"_h:
7913       sqrdmlsh(vform, rd, rn, rm, index);
7914       break;
7915   }
7916 }
7917 
7918 
7919 void Simulator::VisitNEONCopy(const Instruction* instr) {
7920   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
7921   VectorFormat vf = nfd.GetVectorFormat();
7922 
7923   SimVRegister& rd = ReadVRegister(instr->GetRd());
7924   SimVRegister& rn = ReadVRegister(instr->GetRn());
7925   int imm5 = instr->GetImmNEON5();
7926   int tz = CountTrailingZeros(imm5, 32);
7927   int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5);
7928 
7929   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
7930     int imm4 = instr->GetImmNEON4();
7931     int rn_index = ExtractSignedBitfield32(31, tz, imm4);
7932     ins_element(vf, rd, reg_index, rn, rn_index);
7933   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
7934     ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
7935   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
7936     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
7937     value &= MaxUintFromFormat(vf);
7938     WriteXRegister(instr->GetRd(), value);
7939   } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
7940     int64_t value = LogicVRegister(rn).Int(vf, reg_index);
7941     if (instr->GetNEONQ()) {
7942       WriteXRegister(instr->GetRd(), value);
7943     } else {
7944       WriteWRegister(instr->GetRd(), (int32_t)value);
7945     }
7946   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
7947     dup_element(vf, rd, rn, reg_index);
7948   } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
7949     dup_immediate(vf, rd, ReadXRegister(instr->GetRn()));
7950   } else {
7951     VIXL_UNIMPLEMENTED();
7952   }
7953 }
7954 
7955 
7956 void Simulator::VisitNEONExtract(const Instruction* instr) {
7957   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
7958   VectorFormat vf = nfd.GetVectorFormat();
7959   SimVRegister& rd = ReadVRegister(instr->GetRd());
7960   SimVRegister& rn = ReadVRegister(instr->GetRn());
7961   SimVRegister& rm = ReadVRegister(instr->GetRm());
7962   if (instr->Mask(NEONExtractMask) == NEON_EXT) {
7963     int index = instr->GetImmNEONExt();
7964     ext(vf, rd, rn, rm, index);
7965   } else {
7966     VIXL_UNIMPLEMENTED();
7967   }
7968 }
7969 
7970 
7971 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
7972                                                AddrMode addr_mode) {
7973   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
7974   VectorFormat vf = nfd.GetVectorFormat();
7975 
7976   uint64_t addr_base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7977   int reg_size = RegisterSizeInBytesFromFormat(vf);
7978 
7979   int reg[4];
7980   uint64_t addr[4];
7981   for (int i = 0; i < 4; i++) {
7982     reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
7983     addr[i] = addr_base + (i * reg_size);
7984   }
7985   int struct_parts = 1;
7986   int reg_count = 1;
7987   bool log_read = true;
7988 
7989   // Bit 23 determines whether this is an offset or post-index addressing mode.
7990   // In offset mode, bits 20 to 16 should be zero; these bits encode the
7991   // register or immediate in post-index mode.
7992   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
7993     VIXL_UNREACHABLE();
7994   }
7995 
7996   // We use the PostIndex mask here, as it works in this case for both Offset
7997   // and PostIndex addressing.
7998   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
7999     case NEON_LD1_4v:
8000     case NEON_LD1_4v_post:
8001       ld1(vf, ReadVRegister(reg[3]), addr[3]);
8002       reg_count++;
8003       VIXL_FALLTHROUGH();
8004     case NEON_LD1_3v:
8005     case NEON_LD1_3v_post:
8006       ld1(vf, ReadVRegister(reg[2]), addr[2]);
8007       reg_count++;
8008       VIXL_FALLTHROUGH();
8009     case NEON_LD1_2v:
8010     case NEON_LD1_2v_post:
8011       ld1(vf, ReadVRegister(reg[1]), addr[1]);
8012       reg_count++;
8013       VIXL_FALLTHROUGH();
8014     case NEON_LD1_1v:
8015     case NEON_LD1_1v_post:
8016       ld1(vf, ReadVRegister(reg[0]), addr[0]);
8017       break;
8018     case NEON_ST1_4v:
8019     case NEON_ST1_4v_post:
8020       st1(vf, ReadVRegister(reg[3]), addr[3]);
8021       reg_count++;
8022       VIXL_FALLTHROUGH();
8023     case NEON_ST1_3v:
8024     case NEON_ST1_3v_post:
8025       st1(vf, ReadVRegister(reg[2]), addr[2]);
8026       reg_count++;
8027       VIXL_FALLTHROUGH();
8028     case NEON_ST1_2v:
8029     case NEON_ST1_2v_post:
8030       st1(vf, ReadVRegister(reg[1]), addr[1]);
8031       reg_count++;
8032       VIXL_FALLTHROUGH();
8033     case NEON_ST1_1v:
8034     case NEON_ST1_1v_post:
8035       st1(vf, ReadVRegister(reg[0]), addr[0]);
8036       log_read = false;
8037       break;
8038     case NEON_LD2_post:
8039     case NEON_LD2:
8040       ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8041       struct_parts = 2;
8042       reg_count = 2;
8043       break;
8044     case NEON_ST2:
8045     case NEON_ST2_post:
8046       st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8047       struct_parts = 2;
8048       reg_count = 2;
8049       log_read = false;
8050       break;
8051     case NEON_LD3_post:
8052     case NEON_LD3:
8053       ld3(vf,
8054           ReadVRegister(reg[0]),
8055           ReadVRegister(reg[1]),
8056           ReadVRegister(reg[2]),
8057           addr[0]);
8058       struct_parts = 3;
8059       reg_count = 3;
8060       break;
8061     case NEON_ST3:
8062     case NEON_ST3_post:
8063       st3(vf,
8064           ReadVRegister(reg[0]),
8065           ReadVRegister(reg[1]),
8066           ReadVRegister(reg[2]),
8067           addr[0]);
8068       struct_parts = 3;
8069       reg_count = 3;
8070       log_read = false;
8071       break;
8072     case NEON_ST4:
8073     case NEON_ST4_post:
8074       st4(vf,
8075           ReadVRegister(reg[0]),
8076           ReadVRegister(reg[1]),
8077           ReadVRegister(reg[2]),
8078           ReadVRegister(reg[3]),
8079           addr[0]);
8080       struct_parts = 4;
8081       reg_count = 4;
8082       log_read = false;
8083       break;
8084     case NEON_LD4_post:
8085     case NEON_LD4:
8086       ld4(vf,
8087           ReadVRegister(reg[0]),
8088           ReadVRegister(reg[1]),
8089           ReadVRegister(reg[2]),
8090           ReadVRegister(reg[3]),
8091           addr[0]);
8092       struct_parts = 4;
8093       reg_count = 4;
8094       break;
8095     default:
8096       VIXL_UNIMPLEMENTED();
8097   }
8098 
8099   bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
8100   if (do_trace) {
8101     PrintRegisterFormat print_format =
8102         GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8103     const char* op;
8104     if (log_read) {
8105       op = "<-";
8106     } else {
8107       op = "->";
8108       // Stores don't represent a change to the source register's value, so only
8109       // print the relevant part of the value.
8110       print_format = GetPrintRegPartial(print_format);
8111     }
8112 
8113     VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
8114     for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
8115       uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
8116       PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
8117     }
8118   }
8119 
8120   if (addr_mode == PostIndex) {
8121     int rm = instr->GetRm();
8122     // The immediate post index addressing mode is indicated by rm = 31.
8123     // The immediate is implied by the number of vector registers used.
8124     addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
8125                             : ReadXRegister(rm);
8126     WriteXRegister(instr->GetRn(), addr_base);
8127   } else {
8128     VIXL_ASSERT(addr_mode == Offset);
8129   }
8130 }
8131 
8132 
8133 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
8134   NEONLoadStoreMultiStructHelper(instr, Offset);
8135 }
8136 
8137 
8138 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
8139     const Instruction* instr) {
8140   NEONLoadStoreMultiStructHelper(instr, PostIndex);
8141 }
8142 
8143 
8144 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
8145                                                 AddrMode addr_mode) {
8146   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
8147   int rt = instr->GetRt();
8148 
8149   // Bit 23 determines whether this is an offset or post-index addressing mode.
8150   // In offset mode, bits 20 to 16 should be zero; these bits encode the
8151   // register or immediate in post-index mode.
8152   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
8153     VIXL_UNREACHABLE();
8154   }
8155 
8156   // We use the PostIndex mask here, as it works in this case for both Offset
8157   // and PostIndex addressing.
8158   bool do_load = false;
8159 
8160   bool replicating = false;
8161 
8162   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
8163   VectorFormat vf_t = nfd.GetVectorFormat();
8164 
8165   VectorFormat vf = kFormat16B;
8166   switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
8167     case NEON_LD1_b:
8168     case NEON_LD1_b_post:
8169     case NEON_LD2_b:
8170     case NEON_LD2_b_post:
8171     case NEON_LD3_b:
8172     case NEON_LD3_b_post:
8173     case NEON_LD4_b:
8174     case NEON_LD4_b_post:
8175       do_load = true;
8176       VIXL_FALLTHROUGH();
8177     case NEON_ST1_b:
8178     case NEON_ST1_b_post:
8179     case NEON_ST2_b:
8180     case NEON_ST2_b_post:
8181     case NEON_ST3_b:
8182     case NEON_ST3_b_post:
8183     case NEON_ST4_b:
8184     case NEON_ST4_b_post:
8185       break;
8186 
8187     case NEON_LD1_h:
8188     case NEON_LD1_h_post:
8189     case NEON_LD2_h:
8190     case NEON_LD2_h_post:
8191     case NEON_LD3_h:
8192     case NEON_LD3_h_post:
8193     case NEON_LD4_h:
8194     case NEON_LD4_h_post:
8195       do_load = true;
8196       VIXL_FALLTHROUGH();
8197     case NEON_ST1_h:
8198     case NEON_ST1_h_post:
8199     case NEON_ST2_h:
8200     case NEON_ST2_h_post:
8201     case NEON_ST3_h:
8202     case NEON_ST3_h_post:
8203     case NEON_ST4_h:
8204     case NEON_ST4_h_post:
8205       vf = kFormat8H;
8206       break;
8207     case NEON_LD1_s:
8208     case NEON_LD1_s_post:
8209     case NEON_LD2_s:
8210     case NEON_LD2_s_post:
8211     case NEON_LD3_s:
8212     case NEON_LD3_s_post:
8213     case NEON_LD4_s:
8214     case NEON_LD4_s_post:
8215       do_load = true;
8216       VIXL_FALLTHROUGH();
8217     case NEON_ST1_s:
8218     case NEON_ST1_s_post:
8219     case NEON_ST2_s:
8220     case NEON_ST2_s_post:
8221     case NEON_ST3_s:
8222     case NEON_ST3_s_post:
8223     case NEON_ST4_s:
8224     case NEON_ST4_s_post: {
8225       VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
8226       VIXL_STATIC_ASSERT((NEON_LD1_s_post | (1 << NEONLSSize_offset)) ==
8227                          NEON_LD1_d_post);
8228       VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
8229       VIXL_STATIC_ASSERT((NEON_ST1_s_post | (1 << NEONLSSize_offset)) ==
8230                          NEON_ST1_d_post);
8231       vf = ((instr->GetNEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
8232       break;
8233     }
8234 
8235     case NEON_LD1R:
8236     case NEON_LD1R_post:
8237     case NEON_LD2R:
8238     case NEON_LD2R_post:
8239     case NEON_LD3R:
8240     case NEON_LD3R_post:
8241     case NEON_LD4R:
8242     case NEON_LD4R_post:
8243       vf = vf_t;
8244       do_load = true;
8245       replicating = true;
8246       break;
8247 
8248     default:
8249       VIXL_UNIMPLEMENTED();
8250   }
8251 
8252   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
8253   int lane = instr->GetNEONLSIndex(index_shift);
8254   int reg_count = 0;
8255   int rt2 = (rt + 1) % kNumberOfVRegisters;
8256   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
8257   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
8258   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
8259     case NEONLoadStoreSingle1:
8260       reg_count = 1;
8261       if (replicating) {
8262         VIXL_ASSERT(do_load);
8263         ld1r(vf, ReadVRegister(rt), addr);
8264       } else if (do_load) {
8265         ld1(vf, ReadVRegister(rt), lane, addr);
8266       } else {
8267         st1(vf, ReadVRegister(rt), lane, addr);
8268       }
8269       break;
8270     case NEONLoadStoreSingle2:
8271       reg_count = 2;
8272       if (replicating) {
8273         VIXL_ASSERT(do_load);
8274         ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
8275       } else if (do_load) {
8276         ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8277       } else {
8278         st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8279       }
8280       break;
8281     case NEONLoadStoreSingle3:
8282       reg_count = 3;
8283       if (replicating) {
8284         VIXL_ASSERT(do_load);
8285         ld3r(vf,
8286              ReadVRegister(rt),
8287              ReadVRegister(rt2),
8288              ReadVRegister(rt3),
8289              addr);
8290       } else if (do_load) {
8291         ld3(vf,
8292             ReadVRegister(rt),
8293             ReadVRegister(rt2),
8294             ReadVRegister(rt3),
8295             lane,
8296             addr);
8297       } else {
8298         st3(vf,
8299             ReadVRegister(rt),
8300             ReadVRegister(rt2),
8301             ReadVRegister(rt3),
8302             lane,
8303             addr);
8304       }
8305       break;
8306     case NEONLoadStoreSingle4:
8307       reg_count = 4;
8308       if (replicating) {
8309         VIXL_ASSERT(do_load);
8310         ld4r(vf,
8311              ReadVRegister(rt),
8312              ReadVRegister(rt2),
8313              ReadVRegister(rt3),
8314              ReadVRegister(rt4),
8315              addr);
8316       } else if (do_load) {
8317         ld4(vf,
8318             ReadVRegister(rt),
8319             ReadVRegister(rt2),
8320             ReadVRegister(rt3),
8321             ReadVRegister(rt4),
8322             lane,
8323             addr);
8324       } else {
8325         st4(vf,
8326             ReadVRegister(rt),
8327             ReadVRegister(rt2),
8328             ReadVRegister(rt3),
8329             ReadVRegister(rt4),
8330             lane,
8331             addr);
8332       }
8333       break;
8334     default:
8335       VIXL_UNIMPLEMENTED();
8336   }
8337 
8338   // Trace registers and/or memory writes.
8339   PrintRegisterFormat print_format =
8340       GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8341   if (do_load) {
8342     if (ShouldTraceVRegs()) {
8343       if (replicating) {
8344         PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
8345       } else {
8346         PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
8347       }
8348     }
8349   } else {
8350     if (ShouldTraceWrites()) {
8351       // Stores don't represent a change to the source register's value, so only
8352       // print the relevant part of the value.
8353       print_format = GetPrintRegPartial(print_format);
8354       PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
8355     }
8356   }
8357 
8358   if (addr_mode == PostIndex) {
8359     int rm = instr->GetRm();
8360     int lane_size = LaneSizeInBytesFromFormat(vf);
8361     WriteXRegister(instr->GetRn(),
8362                    addr + ((rm == 31) ? (reg_count * lane_size)
8363                                       : ReadXRegister(rm)));
8364   }
8365 }
8366 
8367 
8368 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
8369   NEONLoadStoreSingleStructHelper(instr, Offset);
8370 }
8371 
8372 
8373 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
8374     const Instruction* instr) {
8375   NEONLoadStoreSingleStructHelper(instr, PostIndex);
8376 }
8377 
8378 
8379 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
8380   SimVRegister& rd = ReadVRegister(instr->GetRd());
8381   int cmode = instr->GetNEONCmode();
8382   int cmode_3_1 = (cmode >> 1) & 7;
8383   int cmode_3 = (cmode >> 3) & 1;
8384   int cmode_2 = (cmode >> 2) & 1;
8385   int cmode_1 = (cmode >> 1) & 1;
8386   int cmode_0 = cmode & 1;
8387   int half_enc = instr->ExtractBit(11);
8388   int q = instr->GetNEONQ();
8389   int op_bit = instr->GetNEONModImmOp();
8390   uint64_t imm8 = instr->GetImmNEONabcdefgh();
8391   // Find the format and immediate value
8392   uint64_t imm = 0;
8393   VectorFormat vform = kFormatUndefined;
8394   switch (cmode_3_1) {
8395     case 0x0:
8396     case 0x1:
8397     case 0x2:
8398     case 0x3:
8399       vform = (q == 1) ? kFormat4S : kFormat2S;
8400       imm = imm8 << (8 * cmode_3_1);
8401       break;
8402     case 0x4:
8403     case 0x5:
8404       vform = (q == 1) ? kFormat8H : kFormat4H;
8405       imm = imm8 << (8 * cmode_1);
8406       break;
8407     case 0x6:
8408       vform = (q == 1) ? kFormat4S : kFormat2S;
8409       if (cmode_0 == 0) {
8410         imm = imm8 << 8 | 0x000000ff;
8411       } else {
8412         imm = imm8 << 16 | 0x0000ffff;
8413       }
8414       break;
8415     case 0x7:
8416       if (cmode_0 == 0 && op_bit == 0) {
8417         vform = q ? kFormat16B : kFormat8B;
8418         imm = imm8;
8419       } else if (cmode_0 == 0 && op_bit == 1) {
8420         vform = q ? kFormat2D : kFormat1D;
8421         imm = 0;
8422         for (int i = 0; i < 8; ++i) {
8423           if (imm8 & (1 << i)) {
8424             imm |= (UINT64_C(0xff) << (8 * i));
8425           }
8426         }
8427       } else {  // cmode_0 == 1, cmode == 0xf.
8428         if (half_enc == 1) {
8429           vform = q ? kFormat8H : kFormat4H;
8430           imm = Float16ToRawbits(instr->GetImmNEONFP16());
8431         } else if (op_bit == 0) {
8432           vform = q ? kFormat4S : kFormat2S;
8433           imm = FloatToRawbits(instr->GetImmNEONFP32());
8434         } else if (q == 1) {
8435           vform = kFormat2D;
8436           imm = DoubleToRawbits(instr->GetImmNEONFP64());
8437         } else {
8438           VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
8439           VisitUnallocated(instr);
8440         }
8441       }
8442       break;
8443     default:
8444       VIXL_UNREACHABLE();
8445       break;
8446   }
8447 
8448   // Find the operation
8449   NEONModifiedImmediateOp op;
8450   if (cmode_3 == 0) {
8451     if (cmode_0 == 0) {
8452       op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8453     } else {  // cmode<0> == '1'
8454       op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8455     }
8456   } else {  // cmode<3> == '1'
8457     if (cmode_2 == 0) {
8458       if (cmode_0 == 0) {
8459         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8460       } else {  // cmode<0> == '1'
8461         op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8462       }
8463     } else {  // cmode<2> == '1'
8464       if (cmode_1 == 0) {
8465         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8466       } else {  // cmode<1> == '1'
8467         if (cmode_0 == 0) {
8468           op = NEONModifiedImmediate_MOVI;
8469         } else {  // cmode<0> == '1'
8470           op = NEONModifiedImmediate_MOVI;
8471         }
8472       }
8473     }
8474   }
8475 
8476   // Call the logic function
8477   if (op == NEONModifiedImmediate_ORR) {
8478     orr(vform, rd, rd, imm);
8479   } else if (op == NEONModifiedImmediate_BIC) {
8480     bic(vform, rd, rd, imm);
8481   } else if (op == NEONModifiedImmediate_MOVI) {
8482     movi(vform, rd, imm);
8483   } else if (op == NEONModifiedImmediate_MVNI) {
8484     mvni(vform, rd, imm);
8485   } else {
8486     VisitUnimplemented(instr);
8487   }
8488 }
8489 
8490 
8491 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
8492   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8493   VectorFormat vf = nfd.GetVectorFormat();
8494 
8495   SimVRegister& rd = ReadVRegister(instr->GetRd());
8496   SimVRegister& rn = ReadVRegister(instr->GetRn());
8497 
8498   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
8499     // These instructions all use a two bit size field, except NOT and RBIT,
8500     // which use the field to encode the operation.
8501     switch (instr->Mask(NEONScalar2RegMiscMask)) {
8502       case NEON_CMEQ_zero_scalar:
8503         cmp(vf, rd, rn, 0, eq);
8504         break;
8505       case NEON_CMGE_zero_scalar:
8506         cmp(vf, rd, rn, 0, ge);
8507         break;
8508       case NEON_CMGT_zero_scalar:
8509         cmp(vf, rd, rn, 0, gt);
8510         break;
8511       case NEON_CMLT_zero_scalar:
8512         cmp(vf, rd, rn, 0, lt);
8513         break;
8514       case NEON_CMLE_zero_scalar:
8515         cmp(vf, rd, rn, 0, le);
8516         break;
8517       case NEON_ABS_scalar:
8518         abs(vf, rd, rn);
8519         break;
8520       case NEON_SQABS_scalar:
8521         abs(vf, rd, rn).SignedSaturate(vf);
8522         break;
8523       case NEON_NEG_scalar:
8524         neg(vf, rd, rn);
8525         break;
8526       case NEON_SQNEG_scalar:
8527         neg(vf, rd, rn).SignedSaturate(vf);
8528         break;
8529       case NEON_SUQADD_scalar:
8530         suqadd(vf, rd, rd, rn);
8531         break;
8532       case NEON_USQADD_scalar:
8533         usqadd(vf, rd, rd, rn);
8534         break;
8535       default:
8536         VIXL_UNIMPLEMENTED();
8537         break;
8538     }
8539   } else {
8540     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8541     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8542 
8543     // These instructions all use a one bit size field, except SQXTUN, SQXTN
8544     // and UQXTN, which use a two bit size field.
8545     switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
8546       case NEON_FRECPE_scalar:
8547         frecpe(fpf, rd, rn, fpcr_rounding);
8548         break;
8549       case NEON_FRECPX_scalar:
8550         frecpx(fpf, rd, rn);
8551         break;
8552       case NEON_FRSQRTE_scalar:
8553         frsqrte(fpf, rd, rn);
8554         break;
8555       case NEON_FCMGT_zero_scalar:
8556         fcmp_zero(fpf, rd, rn, gt);
8557         break;
8558       case NEON_FCMGE_zero_scalar:
8559         fcmp_zero(fpf, rd, rn, ge);
8560         break;
8561       case NEON_FCMEQ_zero_scalar:
8562         fcmp_zero(fpf, rd, rn, eq);
8563         break;
8564       case NEON_FCMLE_zero_scalar:
8565         fcmp_zero(fpf, rd, rn, le);
8566         break;
8567       case NEON_FCMLT_zero_scalar:
8568         fcmp_zero(fpf, rd, rn, lt);
8569         break;
8570       case NEON_SCVTF_scalar:
8571         scvtf(fpf, rd, rn, 0, fpcr_rounding);
8572         break;
8573       case NEON_UCVTF_scalar:
8574         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8575         break;
8576       case NEON_FCVTNS_scalar:
8577         fcvts(fpf, rd, rn, FPTieEven);
8578         break;
8579       case NEON_FCVTNU_scalar:
8580         fcvtu(fpf, rd, rn, FPTieEven);
8581         break;
8582       case NEON_FCVTPS_scalar:
8583         fcvts(fpf, rd, rn, FPPositiveInfinity);
8584         break;
8585       case NEON_FCVTPU_scalar:
8586         fcvtu(fpf, rd, rn, FPPositiveInfinity);
8587         break;
8588       case NEON_FCVTMS_scalar:
8589         fcvts(fpf, rd, rn, FPNegativeInfinity);
8590         break;
8591       case NEON_FCVTMU_scalar:
8592         fcvtu(fpf, rd, rn, FPNegativeInfinity);
8593         break;
8594       case NEON_FCVTZS_scalar:
8595         fcvts(fpf, rd, rn, FPZero);
8596         break;
8597       case NEON_FCVTZU_scalar:
8598         fcvtu(fpf, rd, rn, FPZero);
8599         break;
8600       case NEON_FCVTAS_scalar:
8601         fcvts(fpf, rd, rn, FPTieAway);
8602         break;
8603       case NEON_FCVTAU_scalar:
8604         fcvtu(fpf, rd, rn, FPTieAway);
8605         break;
8606       case NEON_FCVTXN_scalar:
8607         // Unlike all of the other FP instructions above, fcvtxn encodes dest
8608         // size S as size<0>=1. There's only one case, so we ignore the form.
8609         VIXL_ASSERT(instr->ExtractBit(22) == 1);
8610         fcvtxn(kFormatS, rd, rn);
8611         break;
8612       default:
8613         switch (instr->Mask(NEONScalar2RegMiscMask)) {
8614           case NEON_SQXTN_scalar:
8615             sqxtn(vf, rd, rn);
8616             break;
8617           case NEON_UQXTN_scalar:
8618             uqxtn(vf, rd, rn);
8619             break;
8620           case NEON_SQXTUN_scalar:
8621             sqxtun(vf, rd, rn);
8622             break;
8623           default:
8624             VIXL_UNIMPLEMENTED();
8625         }
8626     }
8627   }
8628 }
8629 
8630 
8631 void Simulator::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
8632   VectorFormat fpf = kFormatH;
8633   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8634 
8635   SimVRegister& rd = ReadVRegister(instr->GetRd());
8636   SimVRegister& rn = ReadVRegister(instr->GetRn());
8637 
8638   switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
8639     case NEON_FRECPE_H_scalar:
8640       frecpe(fpf, rd, rn, fpcr_rounding);
8641       break;
8642     case NEON_FRECPX_H_scalar:
8643       frecpx(fpf, rd, rn);
8644       break;
8645     case NEON_FRSQRTE_H_scalar:
8646       frsqrte(fpf, rd, rn);
8647       break;
8648     case NEON_FCMGT_H_zero_scalar:
8649       fcmp_zero(fpf, rd, rn, gt);
8650       break;
8651     case NEON_FCMGE_H_zero_scalar:
8652       fcmp_zero(fpf, rd, rn, ge);
8653       break;
8654     case NEON_FCMEQ_H_zero_scalar:
8655       fcmp_zero(fpf, rd, rn, eq);
8656       break;
8657     case NEON_FCMLE_H_zero_scalar:
8658       fcmp_zero(fpf, rd, rn, le);
8659       break;
8660     case NEON_FCMLT_H_zero_scalar:
8661       fcmp_zero(fpf, rd, rn, lt);
8662       break;
8663     case NEON_SCVTF_H_scalar:
8664       scvtf(fpf, rd, rn, 0, fpcr_rounding);
8665       break;
8666     case NEON_UCVTF_H_scalar:
8667       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8668       break;
8669     case NEON_FCVTNS_H_scalar:
8670       fcvts(fpf, rd, rn, FPTieEven);
8671       break;
8672     case NEON_FCVTNU_H_scalar:
8673       fcvtu(fpf, rd, rn, FPTieEven);
8674       break;
8675     case NEON_FCVTPS_H_scalar:
8676       fcvts(fpf, rd, rn, FPPositiveInfinity);
8677       break;
8678     case NEON_FCVTPU_H_scalar:
8679       fcvtu(fpf, rd, rn, FPPositiveInfinity);
8680       break;
8681     case NEON_FCVTMS_H_scalar:
8682       fcvts(fpf, rd, rn, FPNegativeInfinity);
8683       break;
8684     case NEON_FCVTMU_H_scalar:
8685       fcvtu(fpf, rd, rn, FPNegativeInfinity);
8686       break;
8687     case NEON_FCVTZS_H_scalar:
8688       fcvts(fpf, rd, rn, FPZero);
8689       break;
8690     case NEON_FCVTZU_H_scalar:
8691       fcvtu(fpf, rd, rn, FPZero);
8692       break;
8693     case NEON_FCVTAS_H_scalar:
8694       fcvts(fpf, rd, rn, FPTieAway);
8695       break;
8696     case NEON_FCVTAU_H_scalar:
8697       fcvtu(fpf, rd, rn, FPTieAway);
8698       break;
8699   }
8700 }
8701 
8702 
8703 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
8704   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
8705   VectorFormat vf = nfd.GetVectorFormat();
8706 
8707   SimVRegister& rd = ReadVRegister(instr->GetRd());
8708   SimVRegister& rn = ReadVRegister(instr->GetRn());
8709   SimVRegister& rm = ReadVRegister(instr->GetRm());
8710   switch (instr->Mask(NEONScalar3DiffMask)) {
8711     case NEON_SQDMLAL_scalar:
8712       sqdmlal(vf, rd, rn, rm);
8713       break;
8714     case NEON_SQDMLSL_scalar:
8715       sqdmlsl(vf, rd, rn, rm);
8716       break;
8717     case NEON_SQDMULL_scalar:
8718       sqdmull(vf, rd, rn, rm);
8719       break;
8720     default:
8721       VIXL_UNIMPLEMENTED();
8722   }
8723 }
8724 
8725 
8726 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
8727   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8728   VectorFormat vf = nfd.GetVectorFormat();
8729 
8730   SimVRegister& rd = ReadVRegister(instr->GetRd());
8731   SimVRegister& rn = ReadVRegister(instr->GetRn());
8732   SimVRegister& rm = ReadVRegister(instr->GetRm());
8733 
8734   if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
8735     vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8736     switch (instr->Mask(NEONScalar3SameFPMask)) {
8737       case NEON_FMULX_scalar:
8738         fmulx(vf, rd, rn, rm);
8739         break;
8740       case NEON_FACGE_scalar:
8741         fabscmp(vf, rd, rn, rm, ge);
8742         break;
8743       case NEON_FACGT_scalar:
8744         fabscmp(vf, rd, rn, rm, gt);
8745         break;
8746       case NEON_FCMEQ_scalar:
8747         fcmp(vf, rd, rn, rm, eq);
8748         break;
8749       case NEON_FCMGE_scalar:
8750         fcmp(vf, rd, rn, rm, ge);
8751         break;
8752       case NEON_FCMGT_scalar:
8753         fcmp(vf, rd, rn, rm, gt);
8754         break;
8755       case NEON_FRECPS_scalar:
8756         frecps(vf, rd, rn, rm);
8757         break;
8758       case NEON_FRSQRTS_scalar:
8759         frsqrts(vf, rd, rn, rm);
8760         break;
8761       case NEON_FABD_scalar:
8762         fabd(vf, rd, rn, rm);
8763         break;
8764       default:
8765         VIXL_UNIMPLEMENTED();
8766     }
8767   } else {
8768     switch (instr->Mask(NEONScalar3SameMask)) {
8769       case NEON_ADD_scalar:
8770         add(vf, rd, rn, rm);
8771         break;
8772       case NEON_SUB_scalar:
8773         sub(vf, rd, rn, rm);
8774         break;
8775       case NEON_CMEQ_scalar:
8776         cmp(vf, rd, rn, rm, eq);
8777         break;
8778       case NEON_CMGE_scalar:
8779         cmp(vf, rd, rn, rm, ge);
8780         break;
8781       case NEON_CMGT_scalar:
8782         cmp(vf, rd, rn, rm, gt);
8783         break;
8784       case NEON_CMHI_scalar:
8785         cmp(vf, rd, rn, rm, hi);
8786         break;
8787       case NEON_CMHS_scalar:
8788         cmp(vf, rd, rn, rm, hs);
8789         break;
8790       case NEON_CMTST_scalar:
8791         cmptst(vf, rd, rn, rm);
8792         break;
8793       case NEON_USHL_scalar:
8794         ushl(vf, rd, rn, rm);
8795         break;
8796       case NEON_SSHL_scalar:
8797         sshl(vf, rd, rn, rm);
8798         break;
8799       case NEON_SQDMULH_scalar:
8800         sqdmulh(vf, rd, rn, rm);
8801         break;
8802       case NEON_SQRDMULH_scalar:
8803         sqrdmulh(vf, rd, rn, rm);
8804         break;
8805       case NEON_UQADD_scalar:
8806         add(vf, rd, rn, rm).UnsignedSaturate(vf);
8807         break;
8808       case NEON_SQADD_scalar:
8809         add(vf, rd, rn, rm).SignedSaturate(vf);
8810         break;
8811       case NEON_UQSUB_scalar:
8812         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
8813         break;
8814       case NEON_SQSUB_scalar:
8815         sub(vf, rd, rn, rm).SignedSaturate(vf);
8816         break;
8817       case NEON_UQSHL_scalar:
8818         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
8819         break;
8820       case NEON_SQSHL_scalar:
8821         sshl(vf, rd, rn, rm).SignedSaturate(vf);
8822         break;
8823       case NEON_URSHL_scalar:
8824         ushl(vf, rd, rn, rm).Round(vf);
8825         break;
8826       case NEON_SRSHL_scalar:
8827         sshl(vf, rd, rn, rm).Round(vf);
8828         break;
8829       case NEON_UQRSHL_scalar:
8830         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
8831         break;
8832       case NEON_SQRSHL_scalar:
8833         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
8834         break;
8835       default:
8836         VIXL_UNIMPLEMENTED();
8837     }
8838   }
8839 }
8840 
8841 void Simulator::VisitNEONScalar3SameFP16(const Instruction* instr) {
8842   SimVRegister& rd = ReadVRegister(instr->GetRd());
8843   SimVRegister& rn = ReadVRegister(instr->GetRn());
8844   SimVRegister& rm = ReadVRegister(instr->GetRm());
8845 
8846   switch (instr->Mask(NEONScalar3SameFP16Mask)) {
8847     case NEON_FABD_H_scalar:
8848       fabd(kFormatH, rd, rn, rm);
8849       break;
8850     case NEON_FMULX_H_scalar:
8851       fmulx(kFormatH, rd, rn, rm);
8852       break;
8853     case NEON_FCMEQ_H_scalar:
8854       fcmp(kFormatH, rd, rn, rm, eq);
8855       break;
8856     case NEON_FCMGE_H_scalar:
8857       fcmp(kFormatH, rd, rn, rm, ge);
8858       break;
8859     case NEON_FCMGT_H_scalar:
8860       fcmp(kFormatH, rd, rn, rm, gt);
8861       break;
8862     case NEON_FACGE_H_scalar:
8863       fabscmp(kFormatH, rd, rn, rm, ge);
8864       break;
8865     case NEON_FACGT_H_scalar:
8866       fabscmp(kFormatH, rd, rn, rm, gt);
8867       break;
8868     case NEON_FRECPS_H_scalar:
8869       frecps(kFormatH, rd, rn, rm);
8870       break;
8871     case NEON_FRSQRTS_H_scalar:
8872       frsqrts(kFormatH, rd, rn, rm);
8873       break;
8874     default:
8875       VIXL_UNREACHABLE();
8876   }
8877 }
8878 
8879 
8880 void Simulator::VisitNEONScalar3SameExtra(const Instruction* instr) {
8881   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8882   VectorFormat vf = nfd.GetVectorFormat();
8883 
8884   SimVRegister& rd = ReadVRegister(instr->GetRd());
8885   SimVRegister& rn = ReadVRegister(instr->GetRn());
8886   SimVRegister& rm = ReadVRegister(instr->GetRm());
8887 
8888   switch (instr->Mask(NEONScalar3SameExtraMask)) {
8889     case NEON_SQRDMLAH_scalar:
8890       sqrdmlah(vf, rd, rn, rm);
8891       break;
8892     case NEON_SQRDMLSH_scalar:
8893       sqrdmlsh(vf, rd, rn, rm);
8894       break;
8895     default:
8896       VIXL_UNIMPLEMENTED();
8897   }
8898 }
8899 
8900 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
8901   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
8902   VectorFormat vf = nfd.GetVectorFormat();
8903   VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
8904 
8905   SimVRegister& rd = ReadVRegister(instr->GetRd());
8906   SimVRegister& rn = ReadVRegister(instr->GetRn());
8907   ByElementOp Op = NULL;
8908 
8909   int rm_reg = instr->GetRm();
8910   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8911   if (instr->GetNEONSize() == 1) {
8912     rm_reg &= 0xf;
8913     index = (index << 1) | instr->GetNEONM();
8914   }
8915 
8916   switch (instr->Mask(NEONScalarByIndexedElementMask)) {
8917     case NEON_SQDMULL_byelement_scalar:
8918       Op = &Simulator::sqdmull;
8919       break;
8920     case NEON_SQDMLAL_byelement_scalar:
8921       Op = &Simulator::sqdmlal;
8922       break;
8923     case NEON_SQDMLSL_byelement_scalar:
8924       Op = &Simulator::sqdmlsl;
8925       break;
8926     case NEON_SQDMULH_byelement_scalar:
8927       Op = &Simulator::sqdmulh;
8928       vf = vf_r;
8929       break;
8930     case NEON_SQRDMULH_byelement_scalar:
8931       Op = &Simulator::sqrdmulh;
8932       vf = vf_r;
8933       break;
8934     case NEON_SQRDMLAH_byelement_scalar:
8935       Op = &Simulator::sqrdmlah;
8936       vf = vf_r;
8937       break;
8938     case NEON_SQRDMLSH_byelement_scalar:
8939       Op = &Simulator::sqrdmlsh;
8940       vf = vf_r;
8941       break;
8942     default:
8943       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8944       index = instr->GetNEONH();
8945       if (instr->GetFPType() == 0) {
8946         index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
8947         rm_reg &= 0xf;
8948         vf = kFormatH;
8949       } else if ((instr->GetFPType() & 1) == 0) {
8950         index = (index << 1) | instr->GetNEONL();
8951       }
8952       switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
8953         case NEON_FMUL_H_byelement_scalar:
8954         case NEON_FMUL_byelement_scalar:
8955           Op = &Simulator::fmul;
8956           break;
8957         case NEON_FMLA_H_byelement_scalar:
8958         case NEON_FMLA_byelement_scalar:
8959           Op = &Simulator::fmla;
8960           break;
8961         case NEON_FMLS_H_byelement_scalar:
8962         case NEON_FMLS_byelement_scalar:
8963           Op = &Simulator::fmls;
8964           break;
8965         case NEON_FMULX_H_byelement_scalar:
8966         case NEON_FMULX_byelement_scalar:
8967           Op = &Simulator::fmulx;
8968           break;
8969         default:
8970           VIXL_UNIMPLEMENTED();
8971       }
8972   }
8973 
8974   (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
8975 }
8976 
8977 
8978 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
8979   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
8980   VectorFormat vf = nfd.GetVectorFormat();
8981 
8982   SimVRegister& rd = ReadVRegister(instr->GetRd());
8983   SimVRegister& rn = ReadVRegister(instr->GetRn());
8984 
8985   if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
8986     int imm5 = instr->GetImmNEON5();
8987     int tz = CountTrailingZeros(imm5, 32);
8988     int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5);
8989     dup_element(vf, rd, rn, rn_index);
8990   } else {
8991     VIXL_UNIMPLEMENTED();
8992   }
8993 }
8994 
8995 
8996 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
8997   NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarPairwiseFormatMap());
8998   VectorFormat vf = nfd.GetVectorFormat();
8999 
9000   SimVRegister& rd = ReadVRegister(instr->GetRd());
9001   SimVRegister& rn = ReadVRegister(instr->GetRn());
9002   switch (instr->Mask(NEONScalarPairwiseMask)) {
9003     case NEON_ADDP_scalar: {
9004       // All pairwise operations except ADDP use bit U to differentiate FP16
9005       // from FP32/FP64 variations.
9006       NEONFormatDecoder nfd_addp(instr, NEONFormatDecoder::FPScalarFormatMap());
9007       addp(nfd_addp.GetVectorFormat(), rd, rn);
9008       break;
9009     }
9010     case NEON_FADDP_h_scalar:
9011     case NEON_FADDP_scalar:
9012       faddp(vf, rd, rn);
9013       break;
9014     case NEON_FMAXP_h_scalar:
9015     case NEON_FMAXP_scalar:
9016       fmaxp(vf, rd, rn);
9017       break;
9018     case NEON_FMAXNMP_h_scalar:
9019     case NEON_FMAXNMP_scalar:
9020       fmaxnmp(vf, rd, rn);
9021       break;
9022     case NEON_FMINP_h_scalar:
9023     case NEON_FMINP_scalar:
9024       fminp(vf, rd, rn);
9025       break;
9026     case NEON_FMINNMP_h_scalar:
9027     case NEON_FMINNMP_scalar:
9028       fminnmp(vf, rd, rn);
9029       break;
9030     default:
9031       VIXL_UNIMPLEMENTED();
9032   }
9033 }
9034 
9035 
9036 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
9037   SimVRegister& rd = ReadVRegister(instr->GetRd());
9038   SimVRegister& rn = ReadVRegister(instr->GetRn());
9039   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9040 
9041   static const NEONFormatMap map = {{22, 21, 20, 19},
9042                                     {NF_UNDEF,
9043                                      NF_B,
9044                                      NF_H,
9045                                      NF_H,
9046                                      NF_S,
9047                                      NF_S,
9048                                      NF_S,
9049                                      NF_S,
9050                                      NF_D,
9051                                      NF_D,
9052                                      NF_D,
9053                                      NF_D,
9054                                      NF_D,
9055                                      NF_D,
9056                                      NF_D,
9057                                      NF_D}};
9058   NEONFormatDecoder nfd(instr, &map);
9059   VectorFormat vf = nfd.GetVectorFormat();
9060 
9061   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9062   int immh_immb = instr->GetImmNEONImmhImmb();
9063   int right_shift = (16 << highest_set_bit) - immh_immb;
9064   int left_shift = immh_immb - (8 << highest_set_bit);
9065   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
9066     case NEON_SHL_scalar:
9067       shl(vf, rd, rn, left_shift);
9068       break;
9069     case NEON_SLI_scalar:
9070       sli(vf, rd, rn, left_shift);
9071       break;
9072     case NEON_SQSHL_imm_scalar:
9073       sqshl(vf, rd, rn, left_shift);
9074       break;
9075     case NEON_UQSHL_imm_scalar:
9076       uqshl(vf, rd, rn, left_shift);
9077       break;
9078     case NEON_SQSHLU_scalar:
9079       sqshlu(vf, rd, rn, left_shift);
9080       break;
9081     case NEON_SRI_scalar:
9082       sri(vf, rd, rn, right_shift);
9083       break;
9084     case NEON_SSHR_scalar:
9085       sshr(vf, rd, rn, right_shift);
9086       break;
9087     case NEON_USHR_scalar:
9088       ushr(vf, rd, rn, right_shift);
9089       break;
9090     case NEON_SRSHR_scalar:
9091       sshr(vf, rd, rn, right_shift).Round(vf);
9092       break;
9093     case NEON_URSHR_scalar:
9094       ushr(vf, rd, rn, right_shift).Round(vf);
9095       break;
9096     case NEON_SSRA_scalar:
9097       ssra(vf, rd, rn, right_shift);
9098       break;
9099     case NEON_USRA_scalar:
9100       usra(vf, rd, rn, right_shift);
9101       break;
9102     case NEON_SRSRA_scalar:
9103       srsra(vf, rd, rn, right_shift);
9104       break;
9105     case NEON_URSRA_scalar:
9106       ursra(vf, rd, rn, right_shift);
9107       break;
9108     case NEON_UQSHRN_scalar:
9109       uqshrn(vf, rd, rn, right_shift);
9110       break;
9111     case NEON_UQRSHRN_scalar:
9112       uqrshrn(vf, rd, rn, right_shift);
9113       break;
9114     case NEON_SQSHRN_scalar:
9115       sqshrn(vf, rd, rn, right_shift);
9116       break;
9117     case NEON_SQRSHRN_scalar:
9118       sqrshrn(vf, rd, rn, right_shift);
9119       break;
9120     case NEON_SQSHRUN_scalar:
9121       sqshrun(vf, rd, rn, right_shift);
9122       break;
9123     case NEON_SQRSHRUN_scalar:
9124       sqrshrun(vf, rd, rn, right_shift);
9125       break;
9126     case NEON_FCVTZS_imm_scalar:
9127       fcvts(vf, rd, rn, FPZero, right_shift);
9128       break;
9129     case NEON_FCVTZU_imm_scalar:
9130       fcvtu(vf, rd, rn, FPZero, right_shift);
9131       break;
9132     case NEON_SCVTF_imm_scalar:
9133       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9134       break;
9135     case NEON_UCVTF_imm_scalar:
9136       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9137       break;
9138     default:
9139       VIXL_UNIMPLEMENTED();
9140   }
9141 }
9142 
9143 
9144 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
9145   SimVRegister& rd = ReadVRegister(instr->GetRd());
9146   SimVRegister& rn = ReadVRegister(instr->GetRn());
9147   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9148 
9149   // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
9150   // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
9151   static const NEONFormatMap map = {{22, 21, 20, 19, 30},
9152                                     {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
9153                                      NF_4H,    NF_8H,    NF_4H,    NF_8H,
9154                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9155                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9156                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9157                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9158                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9159                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
9160   NEONFormatDecoder nfd(instr, &map);
9161   VectorFormat vf = nfd.GetVectorFormat();
9162 
9163   // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
9164   static const NEONFormatMap map_l =
9165       {{22, 21, 20, 19},
9166        {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
9167   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
9168 
9169   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9170   int immh_immb = instr->GetImmNEONImmhImmb();
9171   int right_shift = (16 << highest_set_bit) - immh_immb;
9172   int left_shift = immh_immb - (8 << highest_set_bit);
9173 
9174   switch (instr->Mask(NEONShiftImmediateMask)) {
9175     case NEON_SHL:
9176       shl(vf, rd, rn, left_shift);
9177       break;
9178     case NEON_SLI:
9179       sli(vf, rd, rn, left_shift);
9180       break;
9181     case NEON_SQSHLU:
9182       sqshlu(vf, rd, rn, left_shift);
9183       break;
9184     case NEON_SRI:
9185       sri(vf, rd, rn, right_shift);
9186       break;
9187     case NEON_SSHR:
9188       sshr(vf, rd, rn, right_shift);
9189       break;
9190     case NEON_USHR:
9191       ushr(vf, rd, rn, right_shift);
9192       break;
9193     case NEON_SRSHR:
9194       sshr(vf, rd, rn, right_shift).Round(vf);
9195       break;
9196     case NEON_URSHR:
9197       ushr(vf, rd, rn, right_shift).Round(vf);
9198       break;
9199     case NEON_SSRA:
9200       ssra(vf, rd, rn, right_shift);
9201       break;
9202     case NEON_USRA:
9203       usra(vf, rd, rn, right_shift);
9204       break;
9205     case NEON_SRSRA:
9206       srsra(vf, rd, rn, right_shift);
9207       break;
9208     case NEON_URSRA:
9209       ursra(vf, rd, rn, right_shift);
9210       break;
9211     case NEON_SQSHL_imm:
9212       sqshl(vf, rd, rn, left_shift);
9213       break;
9214     case NEON_UQSHL_imm:
9215       uqshl(vf, rd, rn, left_shift);
9216       break;
9217     case NEON_SCVTF_imm:
9218       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9219       break;
9220     case NEON_UCVTF_imm:
9221       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9222       break;
9223     case NEON_FCVTZS_imm:
9224       fcvts(vf, rd, rn, FPZero, right_shift);
9225       break;
9226     case NEON_FCVTZU_imm:
9227       fcvtu(vf, rd, rn, FPZero, right_shift);
9228       break;
9229     case NEON_SSHLL:
9230       vf = vf_l;
9231       if (instr->Mask(NEON_Q)) {
9232         sshll2(vf, rd, rn, left_shift);
9233       } else {
9234         sshll(vf, rd, rn, left_shift);
9235       }
9236       break;
9237     case NEON_USHLL:
9238       vf = vf_l;
9239       if (instr->Mask(NEON_Q)) {
9240         ushll2(vf, rd, rn, left_shift);
9241       } else {
9242         ushll(vf, rd, rn, left_shift);
9243       }
9244       break;
9245     case NEON_SHRN:
9246       if (instr->Mask(NEON_Q)) {
9247         shrn2(vf, rd, rn, right_shift);
9248       } else {
9249         shrn(vf, rd, rn, right_shift);
9250       }
9251       break;
9252     case NEON_RSHRN:
9253       if (instr->Mask(NEON_Q)) {
9254         rshrn2(vf, rd, rn, right_shift);
9255       } else {
9256         rshrn(vf, rd, rn, right_shift);
9257       }
9258       break;
9259     case NEON_UQSHRN:
9260       if (instr->Mask(NEON_Q)) {
9261         uqshrn2(vf, rd, rn, right_shift);
9262       } else {
9263         uqshrn(vf, rd, rn, right_shift);
9264       }
9265       break;
9266     case NEON_UQRSHRN:
9267       if (instr->Mask(NEON_Q)) {
9268         uqrshrn2(vf, rd, rn, right_shift);
9269       } else {
9270         uqrshrn(vf, rd, rn, right_shift);
9271       }
9272       break;
9273     case NEON_SQSHRN:
9274       if (instr->Mask(NEON_Q)) {
9275         sqshrn2(vf, rd, rn, right_shift);
9276       } else {
9277         sqshrn(vf, rd, rn, right_shift);
9278       }
9279       break;
9280     case NEON_SQRSHRN:
9281       if (instr->Mask(NEON_Q)) {
9282         sqrshrn2(vf, rd, rn, right_shift);
9283       } else {
9284         sqrshrn(vf, rd, rn, right_shift);
9285       }
9286       break;
9287     case NEON_SQSHRUN:
9288       if (instr->Mask(NEON_Q)) {
9289         sqshrun2(vf, rd, rn, right_shift);
9290       } else {
9291         sqshrun(vf, rd, rn, right_shift);
9292       }
9293       break;
9294     case NEON_SQRSHRUN:
9295       if (instr->Mask(NEON_Q)) {
9296         sqrshrun2(vf, rd, rn, right_shift);
9297       } else {
9298         sqrshrun(vf, rd, rn, right_shift);
9299       }
9300       break;
9301     default:
9302       VIXL_UNIMPLEMENTED();
9303   }
9304 }
9305 
9306 
9307 void Simulator::VisitNEONTable(const Instruction* instr) {
9308   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
9309   VectorFormat vf = nfd.GetVectorFormat();
9310 
9311   SimVRegister& rd = ReadVRegister(instr->GetRd());
9312   SimVRegister& rn = ReadVRegister(instr->GetRn());
9313   SimVRegister& rn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfVRegisters);
9314   SimVRegister& rn3 = ReadVRegister((instr->GetRn() + 2) % kNumberOfVRegisters);
9315   SimVRegister& rn4 = ReadVRegister((instr->GetRn() + 3) % kNumberOfVRegisters);
9316   SimVRegister& rm = ReadVRegister(instr->GetRm());
9317 
9318   switch (instr->Mask(NEONTableMask)) {
9319     case NEON_TBL_1v:
9320       tbl(vf, rd, rn, rm);
9321       break;
9322     case NEON_TBL_2v:
9323       tbl(vf, rd, rn, rn2, rm);
9324       break;
9325     case NEON_TBL_3v:
9326       tbl(vf, rd, rn, rn2, rn3, rm);
9327       break;
9328     case NEON_TBL_4v:
9329       tbl(vf, rd, rn, rn2, rn3, rn4, rm);
9330       break;
9331     case NEON_TBX_1v:
9332       tbx(vf, rd, rn, rm);
9333       break;
9334     case NEON_TBX_2v:
9335       tbx(vf, rd, rn, rn2, rm);
9336       break;
9337     case NEON_TBX_3v:
9338       tbx(vf, rd, rn, rn2, rn3, rm);
9339       break;
9340     case NEON_TBX_4v:
9341       tbx(vf, rd, rn, rn2, rn3, rn4, rm);
9342       break;
9343     default:
9344       VIXL_UNIMPLEMENTED();
9345   }
9346 }
9347 
9348 
9349 void Simulator::VisitNEONPerm(const Instruction* instr) {
9350   NEONFormatDecoder nfd(instr);
9351   VectorFormat vf = nfd.GetVectorFormat();
9352 
9353   SimVRegister& rd = ReadVRegister(instr->GetRd());
9354   SimVRegister& rn = ReadVRegister(instr->GetRn());
9355   SimVRegister& rm = ReadVRegister(instr->GetRm());
9356 
9357   switch (instr->Mask(NEONPermMask)) {
9358     case NEON_TRN1:
9359       trn1(vf, rd, rn, rm);
9360       break;
9361     case NEON_TRN2:
9362       trn2(vf, rd, rn, rm);
9363       break;
9364     case NEON_UZP1:
9365       uzp1(vf, rd, rn, rm);
9366       break;
9367     case NEON_UZP2:
9368       uzp2(vf, rd, rn, rm);
9369       break;
9370     case NEON_ZIP1:
9371       zip1(vf, rd, rn, rm);
9372       break;
9373     case NEON_ZIP2:
9374       zip2(vf, rd, rn, rm);
9375       break;
9376     default:
9377       VIXL_UNIMPLEMENTED();
9378   }
9379 }
9380 
9381 void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
9382   SimVRegister& zd = ReadVRegister(instr->GetRd());
9383   SimVRegister& zn = ReadVRegister(instr->GetRn());
9384   SimVRegister& zm = ReadVRegister(instr->GetRm());
9385   SimVRegister temp;
9386 
9387   VectorFormat vform = kFormatVnD;
9388   mov(vform, temp, zm);
9389 
9390   switch (instr->Mask(SVEAddressGenerationMask)) {
9391     case ADR_z_az_d_s32_scaled:
9392       sxt(vform, temp, temp, kSRegSize);
9393       break;
9394     case ADR_z_az_d_u32_scaled:
9395       uxt(vform, temp, temp, kSRegSize);
9396       break;
9397     case ADR_z_az_s_same_scaled:
9398       vform = kFormatVnS;
9399       break;
9400     case ADR_z_az_d_same_scaled:
9401       // Nothing to do.
9402       break;
9403     default:
9404       VIXL_UNIMPLEMENTED();
9405       break;
9406   }
9407 
9408   int shift_amount = instr->ExtractBits(11, 10);
9409   shl(vform, temp, temp, shift_amount);
9410   add(vform, zd, zn, temp);
9411 }
9412 
9413 void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
9414     const Instruction* instr) {
9415   Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
9416   switch (op) {
9417     case AND_z_zi:
9418     case EOR_z_zi:
9419     case ORR_z_zi: {
9420       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9421       uint64_t imm = instr->GetSVEImmLogical();
9422       // Valid immediate is a non-zero bits
9423       VIXL_ASSERT(imm != 0);
9424       SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
9425                               op),
9426                           SVEFormatFromLaneSizeInBytesLog2(lane_size),
9427                           ReadVRegister(instr->GetRd()),
9428                           imm);
9429       break;
9430     }
9431     default:
9432       VIXL_UNIMPLEMENTED();
9433       break;
9434   }
9435 }
9436 
9437 void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
9438   switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
9439     case DUPM_z_i: {
9440       /* DUPM uses the same lane size and immediate encoding as bitwise logical
9441        * immediate instructions. */
9442       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9443       uint64_t imm = instr->GetSVEImmLogical();
9444       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9445       dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
9446       break;
9447     }
9448     default:
9449       VIXL_UNIMPLEMENTED();
9450       break;
9451   }
9452 }
9453 
9454 void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
9455   SimVRegister& zd = ReadVRegister(instr->GetRd());
9456   SimVRegister& zn = ReadVRegister(instr->GetRn());
9457   SimVRegister& zm = ReadVRegister(instr->GetRm());
9458   Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
9459 
9460   LogicalOp logical_op = LogicalOpMask;
9461   switch (op) {
9462     case AND_z_zz:
9463       logical_op = AND;
9464       break;
9465     case BIC_z_zz:
9466       logical_op = BIC;
9467       break;
9468     case EOR_z_zz:
9469       logical_op = EOR;
9470       break;
9471     case ORR_z_zz:
9472       logical_op = ORR;
9473       break;
9474     default:
9475       VIXL_UNIMPLEMENTED();
9476       break;
9477   }
9478   // Lane size of registers is irrelevant to the bitwise operations, so perform
9479   // the operation on D-sized lanes.
9480   SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
9481 }
9482 
9483 void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
9484   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9485   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9486 
9487   SimVRegister scratch;
9488   SimVRegister result;
9489 
9490   bool for_division = false;
9491   Shift shift_op = NO_SHIFT;
9492   switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
9493     case ASRD_z_p_zi:
9494       shift_op = ASR;
9495       for_division = true;
9496       break;
9497     case ASR_z_p_zi:
9498       shift_op = ASR;
9499       break;
9500     case LSL_z_p_zi:
9501       shift_op = LSL;
9502       break;
9503     case LSR_z_p_zi:
9504       shift_op = LSR;
9505       break;
9506     default:
9507       VIXL_UNIMPLEMENTED();
9508       break;
9509   }
9510 
9511   std::pair<int, int> shift_and_lane_size =
9512       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
9513   unsigned lane_size = shift_and_lane_size.second;
9514   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9515   int shift_dist = shift_and_lane_size.first;
9516 
9517   if ((shift_op == ASR) && for_division) {
9518     asrd(vform, result, zdn, shift_dist);
9519   } else {
9520     if (shift_op == LSL) {
9521       // Shift distance is computed differently for LSL. Convert the result.
9522       shift_dist = (8 << lane_size) - shift_dist;
9523     }
9524     dup_immediate(vform, scratch, shift_dist);
9525     SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
9526   }
9527   mov_merging(vform, zdn, pg, result);
9528 }
9529 
9530 void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
9531     const Instruction* instr) {
9532   VectorFormat vform = instr->GetSVEVectorFormat();
9533   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9534   SimVRegister& zm = ReadVRegister(instr->GetRn());
9535   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9536   SimVRegister result;
9537 
9538   // SVE uses the whole (saturated) lane for the shift amount.
9539   bool shift_in_ls_byte = false;
9540 
9541   switch (form_hash_) {
9542     case "asrr_z_p_zz"_h:
9543       sshr(vform, result, zm, zdn);
9544       break;
9545     case "asr_z_p_zz"_h:
9546       sshr(vform, result, zdn, zm);
9547       break;
9548     case "lslr_z_p_zz"_h:
9549       sshl(vform, result, zm, zdn, shift_in_ls_byte);
9550       break;
9551     case "lsl_z_p_zz"_h:
9552       sshl(vform, result, zdn, zm, shift_in_ls_byte);
9553       break;
9554     case "lsrr_z_p_zz"_h:
9555       ushr(vform, result, zm, zdn);
9556       break;
9557     case "lsr_z_p_zz"_h:
9558       ushr(vform, result, zdn, zm);
9559       break;
9560     case "sqrshl_z_p_zz"_h:
9561       sshl(vform, result, zdn, zm, shift_in_ls_byte)
9562           .Round(vform)
9563           .SignedSaturate(vform);
9564       break;
9565     case "sqrshlr_z_p_zz"_h:
9566       sshl(vform, result, zm, zdn, shift_in_ls_byte)
9567           .Round(vform)
9568           .SignedSaturate(vform);
9569       break;
9570     case "sqshl_z_p_zz"_h:
9571       sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform);
9572       break;
9573     case "sqshlr_z_p_zz"_h:
9574       sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform);
9575       break;
9576     case "srshl_z_p_zz"_h:
9577       sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9578       break;
9579     case "srshlr_z_p_zz"_h:
9580       sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9581       break;
9582     case "uqrshl_z_p_zz"_h:
9583       ushl(vform, result, zdn, zm, shift_in_ls_byte)
9584           .Round(vform)
9585           .UnsignedSaturate(vform);
9586       break;
9587     case "uqrshlr_z_p_zz"_h:
9588       ushl(vform, result, zm, zdn, shift_in_ls_byte)
9589           .Round(vform)
9590           .UnsignedSaturate(vform);
9591       break;
9592     case "uqshl_z_p_zz"_h:
9593       ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform);
9594       break;
9595     case "uqshlr_z_p_zz"_h:
9596       ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform);
9597       break;
9598     case "urshl_z_p_zz"_h:
9599       ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9600       break;
9601     case "urshlr_z_p_zz"_h:
9602       ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9603       break;
9604     default:
9605       VIXL_UNIMPLEMENTED();
9606       break;
9607   }
9608   mov_merging(vform, zdn, pg, result);
9609 }
9610 
9611 void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
9612     const Instruction* instr) {
9613   VectorFormat vform = instr->GetSVEVectorFormat();
9614   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9615   SimVRegister& zm = ReadVRegister(instr->GetRn());
9616   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9617 
9618   SimVRegister result;
9619   Shift shift_op = ASR;
9620 
9621   switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
9622     case ASR_z_p_zw:
9623       break;
9624     case LSL_z_p_zw:
9625       shift_op = LSL;
9626       break;
9627     case LSR_z_p_zw:
9628       shift_op = LSR;
9629       break;
9630     default:
9631       VIXL_UNIMPLEMENTED();
9632       break;
9633   }
9634   SVEBitwiseShiftHelper(shift_op,
9635                         vform,
9636                         result,
9637                         zdn,
9638                         zm,
9639                         /* is_wide_elements = */ true);
9640   mov_merging(vform, zdn, pg, result);
9641 }
9642 
9643 void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
9644   SimVRegister& zd = ReadVRegister(instr->GetRd());
9645   SimVRegister& zn = ReadVRegister(instr->GetRn());
9646 
9647   Shift shift_op = NO_SHIFT;
9648   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9649     case ASR_z_zi:
9650     case ASR_z_zw:
9651       shift_op = ASR;
9652       break;
9653     case LSL_z_zi:
9654     case LSL_z_zw:
9655       shift_op = LSL;
9656       break;
9657     case LSR_z_zi:
9658     case LSR_z_zw:
9659       shift_op = LSR;
9660       break;
9661     default:
9662       VIXL_UNIMPLEMENTED();
9663       break;
9664   }
9665 
9666   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9667     case ASR_z_zi:
9668     case LSL_z_zi:
9669     case LSR_z_zi: {
9670       SimVRegister scratch;
9671       std::pair<int, int> shift_and_lane_size =
9672           instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
9673       unsigned lane_size = shift_and_lane_size.second;
9674       VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
9675       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9676       int shift_dist = shift_and_lane_size.first;
9677       if (shift_op == LSL) {
9678         // Shift distance is computed differently for LSL. Convert the result.
9679         shift_dist = (8 << lane_size) - shift_dist;
9680       }
9681       dup_immediate(vform, scratch, shift_dist);
9682       SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
9683       break;
9684     }
9685     case ASR_z_zw:
9686     case LSL_z_zw:
9687     case LSR_z_zw:
9688       SVEBitwiseShiftHelper(shift_op,
9689                             instr->GetSVEVectorFormat(),
9690                             zd,
9691                             zn,
9692                             ReadVRegister(instr->GetRm()),
9693                             true);
9694       break;
9695     default:
9696       VIXL_UNIMPLEMENTED();
9697       break;
9698   }
9699 }
9700 
9701 void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
9702   // Although the instructions have a separate encoding class, the lane size is
9703   // encoded in the same way as most other SVE instructions.
9704   VectorFormat vform = instr->GetSVEVectorFormat();
9705 
9706   int pattern = instr->GetImmSVEPredicateConstraint();
9707   int count = GetPredicateConstraintLaneCount(vform, pattern);
9708   int multiplier = instr->ExtractBits(19, 16) + 1;
9709 
9710   switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
9711     case DECB_r_rs:
9712     case DECD_r_rs:
9713     case DECH_r_rs:
9714     case DECW_r_rs:
9715       count = -count;
9716       break;
9717     case INCB_r_rs:
9718     case INCD_r_rs:
9719     case INCH_r_rs:
9720     case INCW_r_rs:
9721       // Nothing to do.
9722       break;
9723     default:
9724       VIXL_UNIMPLEMENTED();
9725       return;
9726   }
9727 
9728   WriteXRegister(instr->GetRd(),
9729                  IncDecN(ReadXRegister(instr->GetRd()),
9730                          count * multiplier,
9731                          kXRegSize));
9732 }
9733 
9734 void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
9735   VectorFormat vform = instr->GetSVEVectorFormat();
9736   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9737     VIXL_UNIMPLEMENTED();
9738   }
9739 
9740   int pattern = instr->GetImmSVEPredicateConstraint();
9741   int count = GetPredicateConstraintLaneCount(vform, pattern);
9742   int multiplier = instr->ExtractBits(19, 16) + 1;
9743 
9744   switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
9745     case DECD_z_zs:
9746     case DECH_z_zs:
9747     case DECW_z_zs:
9748       count = -count;
9749       break;
9750     case INCD_z_zs:
9751     case INCH_z_zs:
9752     case INCW_z_zs:
9753       // Nothing to do.
9754       break;
9755     default:
9756       VIXL_UNIMPLEMENTED();
9757       break;
9758   }
9759 
9760   SimVRegister& zd = ReadVRegister(instr->GetRd());
9761   SimVRegister scratch;
9762   dup_immediate(vform,
9763                 scratch,
9764                 IncDecN(0,
9765                         count * multiplier,
9766                         LaneSizeInBitsFromFormat(vform)));
9767   add(vform, zd, zd, scratch);
9768 }
9769 
9770 void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
9771     const Instruction* instr) {
9772   // Although the instructions have a separate encoding class, the lane size is
9773   // encoded in the same way as most other SVE instructions.
9774   VectorFormat vform = instr->GetSVEVectorFormat();
9775 
9776   int pattern = instr->GetImmSVEPredicateConstraint();
9777   int count = GetPredicateConstraintLaneCount(vform, pattern);
9778   int multiplier = instr->ExtractBits(19, 16) + 1;
9779 
9780   unsigned width = kXRegSize;
9781   bool is_signed = false;
9782 
9783   switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
9784     case SQDECB_r_rs_sx:
9785     case SQDECD_r_rs_sx:
9786     case SQDECH_r_rs_sx:
9787     case SQDECW_r_rs_sx:
9788       width = kWRegSize;
9789       VIXL_FALLTHROUGH();
9790     case SQDECB_r_rs_x:
9791     case SQDECD_r_rs_x:
9792     case SQDECH_r_rs_x:
9793     case SQDECW_r_rs_x:
9794       is_signed = true;
9795       count = -count;
9796       break;
9797     case SQINCB_r_rs_sx:
9798     case SQINCD_r_rs_sx:
9799     case SQINCH_r_rs_sx:
9800     case SQINCW_r_rs_sx:
9801       width = kWRegSize;
9802       VIXL_FALLTHROUGH();
9803     case SQINCB_r_rs_x:
9804     case SQINCD_r_rs_x:
9805     case SQINCH_r_rs_x:
9806     case SQINCW_r_rs_x:
9807       is_signed = true;
9808       break;
9809     case UQDECB_r_rs_uw:
9810     case UQDECD_r_rs_uw:
9811     case UQDECH_r_rs_uw:
9812     case UQDECW_r_rs_uw:
9813       width = kWRegSize;
9814       VIXL_FALLTHROUGH();
9815     case UQDECB_r_rs_x:
9816     case UQDECD_r_rs_x:
9817     case UQDECH_r_rs_x:
9818     case UQDECW_r_rs_x:
9819       count = -count;
9820       break;
9821     case UQINCB_r_rs_uw:
9822     case UQINCD_r_rs_uw:
9823     case UQINCH_r_rs_uw:
9824     case UQINCW_r_rs_uw:
9825       width = kWRegSize;
9826       VIXL_FALLTHROUGH();
9827     case UQINCB_r_rs_x:
9828     case UQINCD_r_rs_x:
9829     case UQINCH_r_rs_x:
9830     case UQINCW_r_rs_x:
9831       // Nothing to do.
9832       break;
9833     default:
9834       VIXL_UNIMPLEMENTED();
9835       break;
9836   }
9837 
9838   WriteXRegister(instr->GetRd(),
9839                  IncDecN(ReadXRegister(instr->GetRd()),
9840                          count * multiplier,
9841                          width,
9842                          true,
9843                          is_signed));
9844 }
9845 
9846 void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
9847     const Instruction* instr) {
9848   VectorFormat vform = instr->GetSVEVectorFormat();
9849   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9850     VIXL_UNIMPLEMENTED();
9851   }
9852 
9853   int pattern = instr->GetImmSVEPredicateConstraint();
9854   int count = GetPredicateConstraintLaneCount(vform, pattern);
9855   int multiplier = instr->ExtractBits(19, 16) + 1;
9856 
9857   SimVRegister& zd = ReadVRegister(instr->GetRd());
9858   SimVRegister scratch;
9859   dup_immediate(vform,
9860                 scratch,
9861                 IncDecN(0,
9862                         count * multiplier,
9863                         LaneSizeInBitsFromFormat(vform)));
9864 
9865   switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
9866     case SQDECD_z_zs:
9867     case SQDECH_z_zs:
9868     case SQDECW_z_zs:
9869       sub(vform, zd, zd, scratch).SignedSaturate(vform);
9870       break;
9871     case SQINCD_z_zs:
9872     case SQINCH_z_zs:
9873     case SQINCW_z_zs:
9874       add(vform, zd, zd, scratch).SignedSaturate(vform);
9875       break;
9876     case UQDECD_z_zs:
9877     case UQDECH_z_zs:
9878     case UQDECW_z_zs:
9879       sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
9880       break;
9881     case UQINCD_z_zs:
9882     case UQINCH_z_zs:
9883     case UQINCW_z_zs:
9884       add(vform, zd, zd, scratch).UnsignedSaturate(vform);
9885       break;
9886     default:
9887       VIXL_UNIMPLEMENTED();
9888       break;
9889   }
9890 }
9891 
9892 void Simulator::VisitSVEElementCount(const Instruction* instr) {
9893   switch (instr->Mask(SVEElementCountMask)) {
9894     case CNTB_r_s:
9895     case CNTD_r_s:
9896     case CNTH_r_s:
9897     case CNTW_r_s:
9898       // All handled below.
9899       break;
9900     default:
9901       VIXL_UNIMPLEMENTED();
9902       break;
9903   }
9904 
9905   // Although the instructions are separated, the lane size is encoded in the
9906   // same way as most other SVE instructions.
9907   VectorFormat vform = instr->GetSVEVectorFormat();
9908 
9909   int pattern = instr->GetImmSVEPredicateConstraint();
9910   int count = GetPredicateConstraintLaneCount(vform, pattern);
9911   int multiplier = instr->ExtractBits(19, 16) + 1;
9912   WriteXRegister(instr->GetRd(), count * multiplier);
9913 }
9914 
9915 void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
9916   VectorFormat vform = instr->GetSVEVectorFormat();
9917   SimVRegister& vdn = ReadVRegister(instr->GetRd());
9918   SimVRegister& zm = ReadVRegister(instr->GetRn());
9919   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9920 
9921   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
9922 
9923   switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
9924     case FADDA_v_p_z:
9925       fadda(vform, vdn, pg, zm);
9926       break;
9927     default:
9928       VIXL_UNIMPLEMENTED();
9929       break;
9930   }
9931 }
9932 
9933 void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
9934   VectorFormat vform = instr->GetSVEVectorFormat();
9935   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9936   SimVRegister& zm = ReadVRegister(instr->GetRn());
9937   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9938 
9939   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
9940 
9941   SimVRegister result;
9942   switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
9943     case FABD_z_p_zz:
9944       fabd(vform, result, zdn, zm);
9945       break;
9946     case FADD_z_p_zz:
9947       fadd(vform, result, zdn, zm);
9948       break;
9949     case FDIVR_z_p_zz:
9950       fdiv(vform, result, zm, zdn);
9951       break;
9952     case FDIV_z_p_zz:
9953       fdiv(vform, result, zdn, zm);
9954       break;
9955     case FMAXNM_z_p_zz:
9956       fmaxnm(vform, result, zdn, zm);
9957       break;
9958     case FMAX_z_p_zz:
9959       fmax(vform, result, zdn, zm);
9960       break;
9961     case FMINNM_z_p_zz:
9962       fminnm(vform, result, zdn, zm);
9963       break;
9964     case FMIN_z_p_zz:
9965       fmin(vform, result, zdn, zm);
9966       break;
9967     case FMULX_z_p_zz:
9968       fmulx(vform, result, zdn, zm);
9969       break;
9970     case FMUL_z_p_zz:
9971       fmul(vform, result, zdn, zm);
9972       break;
9973     case FSCALE_z_p_zz:
9974       fscale(vform, result, zdn, zm);
9975       break;
9976     case FSUBR_z_p_zz:
9977       fsub(vform, result, zm, zdn);
9978       break;
9979     case FSUB_z_p_zz:
9980       fsub(vform, result, zdn, zm);
9981       break;
9982     default:
9983       VIXL_UNIMPLEMENTED();
9984       break;
9985   }
9986   mov_merging(vform, zdn, pg, result);
9987 }
9988 
9989 void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
9990     const Instruction* instr) {
9991   VectorFormat vform = instr->GetSVEVectorFormat();
9992   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9993     VIXL_UNIMPLEMENTED();
9994   }
9995 
9996   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9997   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9998   SimVRegister result;
9999 
10000   int i1 = instr->ExtractBit(5);
10001   SimVRegister add_sub_imm, min_max_imm, mul_imm;
10002   uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
10003   uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
10004   uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
10005   dup_immediate(vform, add_sub_imm, i1 ? one : half);
10006   dup_immediate(vform, min_max_imm, i1 ? one : 0);
10007   dup_immediate(vform, mul_imm, i1 ? two : half);
10008 
10009   switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
10010     case FADD_z_p_zs:
10011       fadd(vform, result, zdn, add_sub_imm);
10012       break;
10013     case FMAXNM_z_p_zs:
10014       fmaxnm(vform, result, zdn, min_max_imm);
10015       break;
10016     case FMAX_z_p_zs:
10017       fmax(vform, result, zdn, min_max_imm);
10018       break;
10019     case FMINNM_z_p_zs:
10020       fminnm(vform, result, zdn, min_max_imm);
10021       break;
10022     case FMIN_z_p_zs:
10023       fmin(vform, result, zdn, min_max_imm);
10024       break;
10025     case FMUL_z_p_zs:
10026       fmul(vform, result, zdn, mul_imm);
10027       break;
10028     case FSUBR_z_p_zs:
10029       fsub(vform, result, add_sub_imm, zdn);
10030       break;
10031     case FSUB_z_p_zs:
10032       fsub(vform, result, zdn, add_sub_imm);
10033       break;
10034     default:
10035       VIXL_UNIMPLEMENTED();
10036       break;
10037   }
10038   mov_merging(vform, zdn, pg, result);
10039 }
10040 
10041 void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
10042   VectorFormat vform = instr->GetSVEVectorFormat();
10043   SimVRegister& zd = ReadVRegister(instr->GetRd());
10044   SimVRegister& zm = ReadVRegister(instr->GetRn());
10045 
10046   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10047 
10048   switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
10049     case FTMAD_z_zzi:
10050       ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
10051       break;
10052     default:
10053       VIXL_UNIMPLEMENTED();
10054       break;
10055   }
10056 }
10057 
10058 void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
10059   VectorFormat vform = instr->GetSVEVectorFormat();
10060   SimVRegister& zd = ReadVRegister(instr->GetRd());
10061   SimVRegister& zn = ReadVRegister(instr->GetRn());
10062   SimVRegister& zm = ReadVRegister(instr->GetRm());
10063 
10064   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10065 
10066   switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
10067     case FADD_z_zz:
10068       fadd(vform, zd, zn, zm);
10069       break;
10070     case FMUL_z_zz:
10071       fmul(vform, zd, zn, zm);
10072       break;
10073     case FRECPS_z_zz:
10074       frecps(vform, zd, zn, zm);
10075       break;
10076     case FRSQRTS_z_zz:
10077       frsqrts(vform, zd, zn, zm);
10078       break;
10079     case FSUB_z_zz:
10080       fsub(vform, zd, zn, zm);
10081       break;
10082     case FTSMUL_z_zz:
10083       ftsmul(vform, zd, zn, zm);
10084       break;
10085     default:
10086       VIXL_UNIMPLEMENTED();
10087       break;
10088   }
10089 }
10090 
10091 void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
10092   SimPRegister& pd = ReadPRegister(instr->GetPd());
10093   SimVRegister& zn = ReadVRegister(instr->GetRn());
10094   SimVRegister& zm = ReadVRegister(instr->GetRm());
10095   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10096   VectorFormat vform = instr->GetSVEVectorFormat();
10097   SimVRegister result;
10098 
10099   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10100 
10101   switch (instr->Mask(SVEFPCompareVectorsMask)) {
10102     case FACGE_p_p_zz:
10103       fabscmp(vform, result, zn, zm, ge);
10104       break;
10105     case FACGT_p_p_zz:
10106       fabscmp(vform, result, zn, zm, gt);
10107       break;
10108     case FCMEQ_p_p_zz:
10109       fcmp(vform, result, zn, zm, eq);
10110       break;
10111     case FCMGE_p_p_zz:
10112       fcmp(vform, result, zn, zm, ge);
10113       break;
10114     case FCMGT_p_p_zz:
10115       fcmp(vform, result, zn, zm, gt);
10116       break;
10117     case FCMNE_p_p_zz:
10118       fcmp(vform, result, zn, zm, ne);
10119       break;
10120     case FCMUO_p_p_zz:
10121       fcmp(vform, result, zn, zm, uo);
10122       break;
10123     default:
10124       VIXL_UNIMPLEMENTED();
10125       break;
10126   }
10127 
10128   ExtractFromSimVRegister(vform, pd, result);
10129   mov_zeroing(pd, pg, pd);
10130 }
10131 
10132 void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
10133   SimPRegister& pd = ReadPRegister(instr->GetPd());
10134   SimVRegister& zn = ReadVRegister(instr->GetRn());
10135   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10136   VectorFormat vform = instr->GetSVEVectorFormat();
10137 
10138   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10139 
10140   SimVRegister result;
10141   SimVRegister zeros;
10142   dup_immediate(kFormatVnD, zeros, 0);
10143 
10144   switch (instr->Mask(SVEFPCompareWithZeroMask)) {
10145     case FCMEQ_p_p_z0:
10146       fcmp(vform, result, zn, zeros, eq);
10147       break;
10148     case FCMGE_p_p_z0:
10149       fcmp(vform, result, zn, zeros, ge);
10150       break;
10151     case FCMGT_p_p_z0:
10152       fcmp(vform, result, zn, zeros, gt);
10153       break;
10154     case FCMLE_p_p_z0:
10155       fcmp(vform, result, zn, zeros, le);
10156       break;
10157     case FCMLT_p_p_z0:
10158       fcmp(vform, result, zn, zeros, lt);
10159       break;
10160     case FCMNE_p_p_z0:
10161       fcmp(vform, result, zn, zeros, ne);
10162       break;
10163     default:
10164       VIXL_UNIMPLEMENTED();
10165       break;
10166   }
10167 
10168   ExtractFromSimVRegister(vform, pd, result);
10169   mov_zeroing(pd, pg, pd);
10170 }
10171 
10172 void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
10173   VectorFormat vform = instr->GetSVEVectorFormat();
10174 
10175   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10176     VIXL_UNIMPLEMENTED();
10177   }
10178 
10179   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10180   SimVRegister& zm = ReadVRegister(instr->GetRn());
10181   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10182   int rot = instr->ExtractBit(16);
10183 
10184   SimVRegister result;
10185 
10186   switch (instr->Mask(SVEFPComplexAdditionMask)) {
10187     case FCADD_z_p_zz:
10188       fcadd(vform, result, zdn, zm, rot);
10189       break;
10190     default:
10191       VIXL_UNIMPLEMENTED();
10192       break;
10193   }
10194   mov_merging(vform, zdn, pg, result);
10195 }
10196 
10197 void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
10198   VectorFormat vform = instr->GetSVEVectorFormat();
10199 
10200   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10201     VIXL_UNIMPLEMENTED();
10202   }
10203 
10204   SimVRegister& zda = ReadVRegister(instr->GetRd());
10205   SimVRegister& zn = ReadVRegister(instr->GetRn());
10206   SimVRegister& zm = ReadVRegister(instr->GetRm());
10207   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10208   int rot = instr->ExtractBits(14, 13);
10209 
10210   SimVRegister result;
10211 
10212   switch (instr->Mask(SVEFPComplexMulAddMask)) {
10213     case FCMLA_z_p_zzz:
10214       fcmla(vform, result, zn, zm, zda, rot);
10215       break;
10216     default:
10217       VIXL_UNIMPLEMENTED();
10218       break;
10219   }
10220   mov_merging(vform, zda, pg, result);
10221 }
10222 
10223 void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
10224   SimVRegister& zda = ReadVRegister(instr->GetRd());
10225   SimVRegister& zn = ReadVRegister(instr->GetRn());
10226   int rot = instr->ExtractBits(11, 10);
10227   unsigned zm_code = instr->GetRm();
10228   int index = -1;
10229   VectorFormat vform, vform_dup;
10230 
10231   switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
10232     case FCMLA_z_zzzi_h:
10233       vform = kFormatVnH;
10234       vform_dup = kFormatVnS;
10235       index = zm_code >> 3;
10236       zm_code &= 0x7;
10237       break;
10238     case FCMLA_z_zzzi_s:
10239       vform = kFormatVnS;
10240       vform_dup = kFormatVnD;
10241       index = zm_code >> 4;
10242       zm_code &= 0xf;
10243       break;
10244     default:
10245       VIXL_UNIMPLEMENTED();
10246       break;
10247   }
10248 
10249   if (index >= 0) {
10250     SimVRegister temp;
10251     dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
10252     fcmla(vform, zda, zn, temp, zda, rot);
10253   }
10254 }
10255 
10256 typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
10257                                                   LogicVRegister dst,
10258                                                   const LogicVRegister& src);
10259 
10260 void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
10261   VectorFormat vform = instr->GetSVEVectorFormat();
10262   SimVRegister& vd = ReadVRegister(instr->GetRd());
10263   SimVRegister& zn = ReadVRegister(instr->GetRn());
10264   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10265   int lane_size = LaneSizeInBitsFromFormat(vform);
10266 
10267   uint64_t inactive_value = 0;
10268   FastReduceFn fn = nullptr;
10269 
10270   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10271 
10272   switch (instr->Mask(SVEFPFastReductionMask)) {
10273     case FADDV_v_p_z:
10274       fn = &Simulator::faddv;
10275       break;
10276     case FMAXNMV_v_p_z:
10277       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10278       fn = &Simulator::fmaxnmv;
10279       break;
10280     case FMAXV_v_p_z:
10281       inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
10282       fn = &Simulator::fmaxv;
10283       break;
10284     case FMINNMV_v_p_z:
10285       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10286       fn = &Simulator::fminnmv;
10287       break;
10288     case FMINV_v_p_z:
10289       inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
10290       fn = &Simulator::fminv;
10291       break;
10292     default:
10293       VIXL_UNIMPLEMENTED();
10294       break;
10295   }
10296 
10297   SimVRegister scratch;
10298   dup_immediate(vform, scratch, inactive_value);
10299   mov_merging(vform, scratch, pg, zn);
10300   if (fn != nullptr) (this->*fn)(vform, vd, scratch);
10301 }
10302 
10303 void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
10304   VectorFormat vform = kFormatUndefined;
10305 
10306   switch (instr->Mask(SVEFPMulIndexMask)) {
10307     case FMUL_z_zzi_d:
10308       vform = kFormatVnD;
10309       break;
10310     case FMUL_z_zzi_h_i3h:
10311     case FMUL_z_zzi_h:
10312       vform = kFormatVnH;
10313       break;
10314     case FMUL_z_zzi_s:
10315       vform = kFormatVnS;
10316       break;
10317     default:
10318       VIXL_UNIMPLEMENTED();
10319       break;
10320   }
10321 
10322   SimVRegister& zd = ReadVRegister(instr->GetRd());
10323   SimVRegister& zn = ReadVRegister(instr->GetRn());
10324   SimVRegister temp;
10325 
10326   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10327   fmul(vform, zd, zn, temp);
10328 }
10329 
10330 void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
10331   VectorFormat vform = instr->GetSVEVectorFormat();
10332   SimVRegister& zd = ReadVRegister(instr->GetRd());
10333   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10334   SimVRegister result;
10335 
10336   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10337 
10338   if (instr->ExtractBit(15) == 0) {
10339     // Floating-point multiply-accumulate writing addend.
10340     SimVRegister& zm = ReadVRegister(instr->GetRm());
10341     SimVRegister& zn = ReadVRegister(instr->GetRn());
10342 
10343     switch (instr->Mask(SVEFPMulAddMask)) {
10344       // zda = zda + zn * zm
10345       case FMLA_z_p_zzz:
10346         fmla(vform, result, zd, zn, zm);
10347         break;
10348       // zda = -zda + -zn * zm
10349       case FNMLA_z_p_zzz:
10350         fneg(vform, result, zd);
10351         fmls(vform, result, result, zn, zm);
10352         break;
10353       // zda = zda + -zn * zm
10354       case FMLS_z_p_zzz:
10355         fmls(vform, result, zd, zn, zm);
10356         break;
10357       // zda = -zda + zn * zm
10358       case FNMLS_z_p_zzz:
10359         fneg(vform, result, zd);
10360         fmla(vform, result, result, zn, zm);
10361         break;
10362       default:
10363         VIXL_UNIMPLEMENTED();
10364         break;
10365     }
10366   } else {
10367     // Floating-point multiply-accumulate writing multiplicand.
10368     SimVRegister& za = ReadVRegister(instr->GetRm());
10369     SimVRegister& zm = ReadVRegister(instr->GetRn());
10370 
10371     switch (instr->Mask(SVEFPMulAddMask)) {
10372       // zdn = za + zdn * zm
10373       case FMAD_z_p_zzz:
10374         fmla(vform, result, za, zd, zm);
10375         break;
10376       // zdn = -za + -zdn * zm
10377       case FNMAD_z_p_zzz:
10378         fneg(vform, result, za);
10379         fmls(vform, result, result, zd, zm);
10380         break;
10381       // zdn = za + -zdn * zm
10382       case FMSB_z_p_zzz:
10383         fmls(vform, result, za, zd, zm);
10384         break;
10385       // zdn = -za + zdn * zm
10386       case FNMSB_z_p_zzz:
10387         fneg(vform, result, za);
10388         fmla(vform, result, result, zd, zm);
10389         break;
10390       default:
10391         VIXL_UNIMPLEMENTED();
10392         break;
10393     }
10394   }
10395 
10396   mov_merging(vform, zd, pg, result);
10397 }
10398 
10399 void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
10400   VectorFormat vform = kFormatUndefined;
10401 
10402   switch (instr->Mask(SVEFPMulAddIndexMask)) {
10403     case FMLA_z_zzzi_d:
10404     case FMLS_z_zzzi_d:
10405       vform = kFormatVnD;
10406       break;
10407     case FMLA_z_zzzi_s:
10408     case FMLS_z_zzzi_s:
10409       vform = kFormatVnS;
10410       break;
10411     case FMLA_z_zzzi_h:
10412     case FMLS_z_zzzi_h:
10413     case FMLA_z_zzzi_h_i3h:
10414     case FMLS_z_zzzi_h_i3h:
10415       vform = kFormatVnH;
10416       break;
10417     default:
10418       VIXL_UNIMPLEMENTED();
10419       break;
10420   }
10421 
10422   SimVRegister& zd = ReadVRegister(instr->GetRd());
10423   SimVRegister& zn = ReadVRegister(instr->GetRn());
10424   SimVRegister temp;
10425 
10426   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10427   if (instr->ExtractBit(10) == 1) {
10428     fmls(vform, zd, zd, zn, temp);
10429   } else {
10430     fmla(vform, zd, zd, zn, temp);
10431   }
10432 }
10433 
10434 void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
10435   SimVRegister& zd = ReadVRegister(instr->GetRd());
10436   SimVRegister& zn = ReadVRegister(instr->GetRn());
10437   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10438   int dst_data_size;
10439   int src_data_size;
10440 
10441   switch (instr->Mask(SVEFPConvertToIntMask)) {
10442     case FCVTZS_z_p_z_d2w:
10443     case FCVTZU_z_p_z_d2w:
10444       dst_data_size = kSRegSize;
10445       src_data_size = kDRegSize;
10446       break;
10447     case FCVTZS_z_p_z_d2x:
10448     case FCVTZU_z_p_z_d2x:
10449       dst_data_size = kDRegSize;
10450       src_data_size = kDRegSize;
10451       break;
10452     case FCVTZS_z_p_z_fp162h:
10453     case FCVTZU_z_p_z_fp162h:
10454       dst_data_size = kHRegSize;
10455       src_data_size = kHRegSize;
10456       break;
10457     case FCVTZS_z_p_z_fp162w:
10458     case FCVTZU_z_p_z_fp162w:
10459       dst_data_size = kSRegSize;
10460       src_data_size = kHRegSize;
10461       break;
10462     case FCVTZS_z_p_z_fp162x:
10463     case FCVTZU_z_p_z_fp162x:
10464       dst_data_size = kDRegSize;
10465       src_data_size = kHRegSize;
10466       break;
10467     case FCVTZS_z_p_z_s2w:
10468     case FCVTZU_z_p_z_s2w:
10469       dst_data_size = kSRegSize;
10470       src_data_size = kSRegSize;
10471       break;
10472     case FCVTZS_z_p_z_s2x:
10473     case FCVTZU_z_p_z_s2x:
10474       dst_data_size = kDRegSize;
10475       src_data_size = kSRegSize;
10476       break;
10477     default:
10478       VIXL_UNIMPLEMENTED();
10479       dst_data_size = 0;
10480       src_data_size = 0;
10481       break;
10482   }
10483 
10484   VectorFormat vform =
10485       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10486 
10487   if (instr->ExtractBit(16) == 0) {
10488     fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10489   } else {
10490     fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10491   }
10492 }
10493 
10494 void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
10495   SimVRegister& zd = ReadVRegister(instr->GetRd());
10496   SimVRegister& zn = ReadVRegister(instr->GetRn());
10497   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10498   VectorFormat dst_data_size = kFormatUndefined;
10499   VectorFormat src_data_size = kFormatUndefined;
10500 
10501   switch (instr->Mask(SVEFPConvertPrecisionMask)) {
10502     case FCVT_z_p_z_d2h:
10503       dst_data_size = kFormatVnH;
10504       src_data_size = kFormatVnD;
10505       break;
10506     case FCVT_z_p_z_d2s:
10507       dst_data_size = kFormatVnS;
10508       src_data_size = kFormatVnD;
10509       break;
10510     case FCVT_z_p_z_h2d:
10511       dst_data_size = kFormatVnD;
10512       src_data_size = kFormatVnH;
10513       break;
10514     case FCVT_z_p_z_h2s:
10515       dst_data_size = kFormatVnS;
10516       src_data_size = kFormatVnH;
10517       break;
10518     case FCVT_z_p_z_s2d:
10519       dst_data_size = kFormatVnD;
10520       src_data_size = kFormatVnS;
10521       break;
10522     case FCVT_z_p_z_s2h:
10523       dst_data_size = kFormatVnH;
10524       src_data_size = kFormatVnS;
10525       break;
10526     default:
10527       VIXL_UNIMPLEMENTED();
10528       break;
10529   }
10530 
10531   fcvt(dst_data_size, src_data_size, zd, pg, zn);
10532 }
10533 
10534 void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
10535   SimVRegister& zd = ReadVRegister(instr->GetRd());
10536   SimVRegister& zn = ReadVRegister(instr->GetRn());
10537   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10538   VectorFormat vform = instr->GetSVEVectorFormat();
10539   SimVRegister result;
10540 
10541   switch (instr->Mask(SVEFPUnaryOpMask)) {
10542     case FRECPX_z_p_z:
10543       frecpx(vform, result, zn);
10544       break;
10545     case FSQRT_z_p_z:
10546       fsqrt(vform, result, zn);
10547       break;
10548     default:
10549       VIXL_UNIMPLEMENTED();
10550       break;
10551   }
10552   mov_merging(vform, zd, pg, result);
10553 }
10554 
10555 void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
10556   SimVRegister& zd = ReadVRegister(instr->GetRd());
10557   SimVRegister& zn = ReadVRegister(instr->GetRn());
10558   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10559   VectorFormat vform = instr->GetSVEVectorFormat();
10560   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10561   bool exact_exception = false;
10562 
10563   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10564 
10565   switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
10566     case FRINTA_z_p_z:
10567       fpcr_rounding = FPTieAway;
10568       break;
10569     case FRINTI_z_p_z:
10570       break;  // Use FPCR rounding mode.
10571     case FRINTM_z_p_z:
10572       fpcr_rounding = FPNegativeInfinity;
10573       break;
10574     case FRINTN_z_p_z:
10575       fpcr_rounding = FPTieEven;
10576       break;
10577     case FRINTP_z_p_z:
10578       fpcr_rounding = FPPositiveInfinity;
10579       break;
10580     case FRINTX_z_p_z:
10581       exact_exception = true;
10582       break;
10583     case FRINTZ_z_p_z:
10584       fpcr_rounding = FPZero;
10585       break;
10586     default:
10587       VIXL_UNIMPLEMENTED();
10588       break;
10589   }
10590 
10591   SimVRegister result;
10592   frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
10593   mov_merging(vform, zd, pg, result);
10594 }
10595 
10596 void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
10597   SimVRegister& zd = ReadVRegister(instr->GetRd());
10598   SimVRegister& zn = ReadVRegister(instr->GetRn());
10599   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10600   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10601   int dst_data_size;
10602   int src_data_size;
10603 
10604   switch (instr->Mask(SVEIntConvertToFPMask)) {
10605     case SCVTF_z_p_z_h2fp16:
10606     case UCVTF_z_p_z_h2fp16:
10607       dst_data_size = kHRegSize;
10608       src_data_size = kHRegSize;
10609       break;
10610     case SCVTF_z_p_z_w2d:
10611     case UCVTF_z_p_z_w2d:
10612       dst_data_size = kDRegSize;
10613       src_data_size = kSRegSize;
10614       break;
10615     case SCVTF_z_p_z_w2fp16:
10616     case UCVTF_z_p_z_w2fp16:
10617       dst_data_size = kHRegSize;
10618       src_data_size = kSRegSize;
10619       break;
10620     case SCVTF_z_p_z_w2s:
10621     case UCVTF_z_p_z_w2s:
10622       dst_data_size = kSRegSize;
10623       src_data_size = kSRegSize;
10624       break;
10625     case SCVTF_z_p_z_x2d:
10626     case UCVTF_z_p_z_x2d:
10627       dst_data_size = kDRegSize;
10628       src_data_size = kDRegSize;
10629       break;
10630     case SCVTF_z_p_z_x2fp16:
10631     case UCVTF_z_p_z_x2fp16:
10632       dst_data_size = kHRegSize;
10633       src_data_size = kDRegSize;
10634       break;
10635     case SCVTF_z_p_z_x2s:
10636     case UCVTF_z_p_z_x2s:
10637       dst_data_size = kSRegSize;
10638       src_data_size = kDRegSize;
10639       break;
10640     default:
10641       VIXL_UNIMPLEMENTED();
10642       dst_data_size = 0;
10643       src_data_size = 0;
10644       break;
10645   }
10646 
10647   VectorFormat vform =
10648       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10649 
10650   if (instr->ExtractBit(16) == 0) {
10651     scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10652   } else {
10653     ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10654   }
10655 }
10656 
10657 void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
10658   VectorFormat vform = instr->GetSVEVectorFormat();
10659   SimVRegister& zd = ReadVRegister(instr->GetRd());
10660   SimVRegister& zn = ReadVRegister(instr->GetRn());
10661   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10662 
10663   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10664 
10665   switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
10666     case FRECPE_z_z:
10667       frecpe(vform, zd, zn, fpcr_rounding);
10668       break;
10669     case FRSQRTE_z_z:
10670       frsqrte(vform, zd, zn);
10671       break;
10672     default:
10673       VIXL_UNIMPLEMENTED();
10674       break;
10675   }
10676 }
10677 
10678 void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
10679   VectorFormat vform = instr->GetSVEVectorFormat();
10680   SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
10681 
10682   int count = CountActiveLanes(vform, pg);
10683 
10684   if (instr->ExtractBit(11) == 0) {
10685     SimVRegister& zdn = ReadVRegister(instr->GetRd());
10686     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10687       case DECP_z_p_z:
10688         sub_uint(vform, zdn, zdn, count);
10689         break;
10690       case INCP_z_p_z:
10691         add_uint(vform, zdn, zdn, count);
10692         break;
10693       case SQDECP_z_p_z:
10694         sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10695         break;
10696       case SQINCP_z_p_z:
10697         add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10698         break;
10699       case UQDECP_z_p_z:
10700         sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10701         break;
10702       case UQINCP_z_p_z:
10703         add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10704         break;
10705       default:
10706         VIXL_UNIMPLEMENTED();
10707         break;
10708     }
10709   } else {
10710     bool is_saturating = (instr->ExtractBit(18) == 0);
10711     bool decrement =
10712         is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
10713     bool is_signed = (instr->ExtractBit(16) == 0);
10714     bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
10715     unsigned width = sf ? kXRegSize : kWRegSize;
10716 
10717     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10718       case DECP_r_p_r:
10719       case INCP_r_p_r:
10720       case SQDECP_r_p_r_sx:
10721       case SQDECP_r_p_r_x:
10722       case SQINCP_r_p_r_sx:
10723       case SQINCP_r_p_r_x:
10724       case UQDECP_r_p_r_uw:
10725       case UQDECP_r_p_r_x:
10726       case UQINCP_r_p_r_uw:
10727       case UQINCP_r_p_r_x:
10728         WriteXRegister(instr->GetRd(),
10729                        IncDecN(ReadXRegister(instr->GetRd()),
10730                                decrement ? -count : count,
10731                                width,
10732                                is_saturating,
10733                                is_signed));
10734         break;
10735       default:
10736         VIXL_UNIMPLEMENTED();
10737         break;
10738     }
10739   }
10740 }
10741 
10742 uint64_t Simulator::IncDecN(uint64_t acc,
10743                             int64_t delta,
10744                             unsigned n,
10745                             bool is_saturating,
10746                             bool is_signed) {
10747   VIXL_ASSERT(n <= 64);
10748   VIXL_ASSERT(IsIntN(n, delta));
10749 
10750   uint64_t sign_mask = UINT64_C(1) << (n - 1);
10751   uint64_t mask = GetUintMask(n);
10752 
10753   acc &= mask;  // Ignore initial accumulator high bits.
10754   uint64_t result = (acc + delta) & mask;
10755 
10756   bool result_negative = ((result & sign_mask) != 0);
10757 
10758   if (is_saturating) {
10759     if (is_signed) {
10760       bool acc_negative = ((acc & sign_mask) != 0);
10761       bool delta_negative = delta < 0;
10762 
10763       // If the signs of the operands are the same, but different from the
10764       // result, there was an overflow.
10765       if ((acc_negative == delta_negative) &&
10766           (acc_negative != result_negative)) {
10767         if (result_negative) {
10768           // Saturate to [..., INT<n>_MAX].
10769           result_negative = false;
10770           result = mask & ~sign_mask;  // E.g. 0x000000007fffffff
10771         } else {
10772           // Saturate to [INT<n>_MIN, ...].
10773           result_negative = true;
10774           result = ~mask | sign_mask;  // E.g. 0xffffffff80000000
10775         }
10776       }
10777     } else {
10778       if ((delta < 0) && (result > acc)) {
10779         // Saturate to [0, ...].
10780         result = 0;
10781       } else if ((delta > 0) && (result < acc)) {
10782         // Saturate to [..., UINT<n>_MAX].
10783         result = mask;
10784       }
10785     }
10786   }
10787 
10788   // Sign-extend if necessary.
10789   if (result_negative && is_signed) result |= ~mask;
10790 
10791   return result;
10792 }
10793 
10794 void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
10795   VectorFormat vform = instr->GetSVEVectorFormat();
10796   SimVRegister& zd = ReadVRegister(instr->GetRd());
10797   switch (instr->Mask(SVEIndexGenerationMask)) {
10798     case INDEX_z_ii:
10799     case INDEX_z_ir:
10800     case INDEX_z_ri:
10801     case INDEX_z_rr: {
10802       uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
10803                                              : instr->ExtractSignedBits(9, 5);
10804       uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
10805                                             : instr->ExtractSignedBits(20, 16);
10806       index(vform, zd, start, step);
10807       break;
10808     }
10809     default:
10810       VIXL_UNIMPLEMENTED();
10811       break;
10812   }
10813 }
10814 
10815 void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
10816   VectorFormat vform = instr->GetSVEVectorFormat();
10817   SimVRegister& zd = ReadVRegister(instr->GetRd());
10818   SimVRegister& zn = ReadVRegister(instr->GetRn());
10819   SimVRegister& zm = ReadVRegister(instr->GetRm());
10820   switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
10821     case ADD_z_zz:
10822       add(vform, zd, zn, zm);
10823       break;
10824     case SQADD_z_zz:
10825       add(vform, zd, zn, zm).SignedSaturate(vform);
10826       break;
10827     case SQSUB_z_zz:
10828       sub(vform, zd, zn, zm).SignedSaturate(vform);
10829       break;
10830     case SUB_z_zz:
10831       sub(vform, zd, zn, zm);
10832       break;
10833     case UQADD_z_zz:
10834       add(vform, zd, zn, zm).UnsignedSaturate(vform);
10835       break;
10836     case UQSUB_z_zz:
10837       sub(vform, zd, zn, zm).UnsignedSaturate(vform);
10838       break;
10839     default:
10840       VIXL_UNIMPLEMENTED();
10841       break;
10842   }
10843 }
10844 
10845 void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
10846     const Instruction* instr) {
10847   VectorFormat vform = instr->GetSVEVectorFormat();
10848   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10849   SimVRegister& zm = ReadVRegister(instr->GetRn());
10850   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10851   SimVRegister result;
10852 
10853   switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
10854     case ADD_z_p_zz:
10855       add(vform, result, zdn, zm);
10856       break;
10857     case SUBR_z_p_zz:
10858       sub(vform, result, zm, zdn);
10859       break;
10860     case SUB_z_p_zz:
10861       sub(vform, result, zdn, zm);
10862       break;
10863     default:
10864       VIXL_UNIMPLEMENTED();
10865       break;
10866   }
10867   mov_merging(vform, zdn, pg, result);
10868 }
10869 
10870 void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
10871   VectorFormat vform = instr->GetSVEVectorFormat();
10872   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10873   SimVRegister& zm = ReadVRegister(instr->GetRn());
10874   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10875   SimVRegister result;
10876 
10877   switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
10878     case AND_z_p_zz:
10879       SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
10880       break;
10881     case BIC_z_p_zz:
10882       SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
10883       break;
10884     case EOR_z_p_zz:
10885       SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
10886       break;
10887     case ORR_z_p_zz:
10888       SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
10889       break;
10890     default:
10891       VIXL_UNIMPLEMENTED();
10892       break;
10893   }
10894   mov_merging(vform, zdn, pg, result);
10895 }
10896 
10897 void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
10898   VectorFormat vform = instr->GetSVEVectorFormat();
10899   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10900   SimVRegister& zm = ReadVRegister(instr->GetRn());
10901   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10902   SimVRegister result;
10903 
10904   switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
10905     case MUL_z_p_zz:
10906       mul(vform, result, zdn, zm);
10907       break;
10908     case SMULH_z_p_zz:
10909       smulh(vform, result, zdn, zm);
10910       break;
10911     case UMULH_z_p_zz:
10912       umulh(vform, result, zdn, zm);
10913       break;
10914     default:
10915       VIXL_UNIMPLEMENTED();
10916       break;
10917   }
10918   mov_merging(vform, zdn, pg, result);
10919 }
10920 
10921 void Simulator::VisitSVEIntMinMaxDifference_Predicated(
10922     const Instruction* instr) {
10923   VectorFormat vform = instr->GetSVEVectorFormat();
10924   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10925   SimVRegister& zm = ReadVRegister(instr->GetRn());
10926   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10927   SimVRegister result;
10928 
10929   switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
10930     case SABD_z_p_zz:
10931       absdiff(vform, result, zdn, zm, true);
10932       break;
10933     case SMAX_z_p_zz:
10934       smax(vform, result, zdn, zm);
10935       break;
10936     case SMIN_z_p_zz:
10937       smin(vform, result, zdn, zm);
10938       break;
10939     case UABD_z_p_zz:
10940       absdiff(vform, result, zdn, zm, false);
10941       break;
10942     case UMAX_z_p_zz:
10943       umax(vform, result, zdn, zm);
10944       break;
10945     case UMIN_z_p_zz:
10946       umin(vform, result, zdn, zm);
10947       break;
10948     default:
10949       VIXL_UNIMPLEMENTED();
10950       break;
10951   }
10952   mov_merging(vform, zdn, pg, result);
10953 }
10954 
10955 void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
10956   VectorFormat vform = instr->GetSVEVectorFormat();
10957   SimVRegister& zd = ReadVRegister(instr->GetRd());
10958   SimVRegister scratch;
10959 
10960   switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
10961     case MUL_z_zi:
10962       dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
10963       mul(vform, zd, zd, scratch);
10964       break;
10965     default:
10966       VIXL_UNIMPLEMENTED();
10967       break;
10968   }
10969 }
10970 
10971 void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
10972   VectorFormat vform = instr->GetSVEVectorFormat();
10973   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10974   SimVRegister& zm = ReadVRegister(instr->GetRn());
10975   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10976   SimVRegister result;
10977 
10978   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
10979 
10980   switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
10981     case SDIVR_z_p_zz:
10982       sdiv(vform, result, zm, zdn);
10983       break;
10984     case SDIV_z_p_zz:
10985       sdiv(vform, result, zdn, zm);
10986       break;
10987     case UDIVR_z_p_zz:
10988       udiv(vform, result, zm, zdn);
10989       break;
10990     case UDIV_z_p_zz:
10991       udiv(vform, result, zdn, zm);
10992       break;
10993     default:
10994       VIXL_UNIMPLEMENTED();
10995       break;
10996   }
10997   mov_merging(vform, zdn, pg, result);
10998 }
10999 
11000 void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
11001   VectorFormat vform = instr->GetSVEVectorFormat();
11002   SimVRegister& zd = ReadVRegister(instr->GetRd());
11003   SimVRegister scratch;
11004 
11005   uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
11006   int64_t signed_imm = instr->GetImmSVEIntWideSigned();
11007 
11008   switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
11009     case SMAX_z_zi:
11010       dup_immediate(vform, scratch, signed_imm);
11011       smax(vform, zd, zd, scratch);
11012       break;
11013     case SMIN_z_zi:
11014       dup_immediate(vform, scratch, signed_imm);
11015       smin(vform, zd, zd, scratch);
11016       break;
11017     case UMAX_z_zi:
11018       dup_immediate(vform, scratch, unsigned_imm);
11019       umax(vform, zd, zd, scratch);
11020       break;
11021     case UMIN_z_zi:
11022       dup_immediate(vform, scratch, unsigned_imm);
11023       umin(vform, zd, zd, scratch);
11024       break;
11025     default:
11026       VIXL_UNIMPLEMENTED();
11027       break;
11028   }
11029 }
11030 
11031 void Simulator::VisitSVEIntCompareScalarCountAndLimit(
11032     const Instruction* instr) {
11033   unsigned rn_code = instr->GetRn();
11034   unsigned rm_code = instr->GetRm();
11035   SimPRegister& pd = ReadPRegister(instr->GetPd());
11036   VectorFormat vform = instr->GetSVEVectorFormat();
11037 
11038   bool is_64_bit = instr->ExtractBit(12) == 1;
11039   int rsize = is_64_bit ? kXRegSize : kWRegSize;
11040   uint64_t mask = is_64_bit ? kXRegMask : kWRegMask;
11041 
11042   uint64_t usrc1 = ReadXRegister(rn_code);
11043   int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11044   uint64_t usrc2 = ssrc2 & mask;
11045 
11046   bool reverse = (form_hash_ == "whilege_p_p_rr"_h) ||
11047                  (form_hash_ == "whilegt_p_p_rr"_h) ||
11048                  (form_hash_ == "whilehi_p_p_rr"_h) ||
11049                  (form_hash_ == "whilehs_p_p_rr"_h);
11050 
11051   int lane_count = LaneCountFromFormat(vform);
11052   bool last = true;
11053   for (int i = 0; i < lane_count; i++) {
11054     usrc1 &= mask;
11055     int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1);
11056 
11057     bool cond = false;
11058     switch (form_hash_) {
11059       case "whilele_p_p_rr"_h:
11060         cond = ssrc1 <= ssrc2;
11061         break;
11062       case "whilelo_p_p_rr"_h:
11063         cond = usrc1 < usrc2;
11064         break;
11065       case "whilels_p_p_rr"_h:
11066         cond = usrc1 <= usrc2;
11067         break;
11068       case "whilelt_p_p_rr"_h:
11069         cond = ssrc1 < ssrc2;
11070         break;
11071       case "whilege_p_p_rr"_h:
11072         cond = ssrc1 >= ssrc2;
11073         break;
11074       case "whilegt_p_p_rr"_h:
11075         cond = ssrc1 > ssrc2;
11076         break;
11077       case "whilehi_p_p_rr"_h:
11078         cond = usrc1 > usrc2;
11079         break;
11080       case "whilehs_p_p_rr"_h:
11081         cond = usrc1 >= usrc2;
11082         break;
11083       default:
11084         VIXL_UNIMPLEMENTED();
11085         break;
11086     }
11087     last = last && cond;
11088     LogicPRegister dst(pd);
11089     int lane = reverse ? ((lane_count - 1) - i) : i;
11090     dst.SetActive(vform, lane, last);
11091     usrc1 += reverse ? -1 : 1;
11092   }
11093 
11094   PredTest(vform, GetPTrue(), pd);
11095   LogSystemRegister(NZCV);
11096 }
11097 
11098 void Simulator::VisitSVEConditionallyTerminateScalars(
11099     const Instruction* instr) {
11100   unsigned rn_code = instr->GetRn();
11101   unsigned rm_code = instr->GetRm();
11102   bool is_64_bit = instr->ExtractBit(22) == 1;
11103   uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
11104   uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11105   bool term = false;
11106   switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
11107     case CTERMEQ_rr:
11108       term = src1 == src2;
11109       break;
11110     case CTERMNE_rr:
11111       term = src1 != src2;
11112       break;
11113     default:
11114       VIXL_UNIMPLEMENTED();
11115       break;
11116   }
11117   ReadNzcv().SetN(term ? 1 : 0);
11118   ReadNzcv().SetV(term ? 0 : !ReadC());
11119   LogSystemRegister(NZCV);
11120 }
11121 
11122 void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
11123   bool commute_inputs = false;
11124   Condition cond = al;
11125   switch (instr->Mask(SVEIntCompareSignedImmMask)) {
11126     case CMPEQ_p_p_zi:
11127       cond = eq;
11128       break;
11129     case CMPGE_p_p_zi:
11130       cond = ge;
11131       break;
11132     case CMPGT_p_p_zi:
11133       cond = gt;
11134       break;
11135     case CMPLE_p_p_zi:
11136       cond = ge;
11137       commute_inputs = true;
11138       break;
11139     case CMPLT_p_p_zi:
11140       cond = gt;
11141       commute_inputs = true;
11142       break;
11143     case CMPNE_p_p_zi:
11144       cond = ne;
11145       break;
11146     default:
11147       VIXL_UNIMPLEMENTED();
11148       break;
11149   }
11150 
11151   VectorFormat vform = instr->GetSVEVectorFormat();
11152   SimVRegister src2;
11153   dup_immediate(vform,
11154                 src2,
11155                 ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
11156   SVEIntCompareVectorsHelper(cond,
11157                              vform,
11158                              ReadPRegister(instr->GetPd()),
11159                              ReadPRegister(instr->GetPgLow8()),
11160                              commute_inputs ? src2
11161                                             : ReadVRegister(instr->GetRn()),
11162                              commute_inputs ? ReadVRegister(instr->GetRn())
11163                                             : src2);
11164 }
11165 
11166 void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
11167   bool commute_inputs = false;
11168   Condition cond = al;
11169   switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
11170     case CMPHI_p_p_zi:
11171       cond = hi;
11172       break;
11173     case CMPHS_p_p_zi:
11174       cond = hs;
11175       break;
11176     case CMPLO_p_p_zi:
11177       cond = hi;
11178       commute_inputs = true;
11179       break;
11180     case CMPLS_p_p_zi:
11181       cond = hs;
11182       commute_inputs = true;
11183       break;
11184     default:
11185       VIXL_UNIMPLEMENTED();
11186       break;
11187   }
11188 
11189   VectorFormat vform = instr->GetSVEVectorFormat();
11190   SimVRegister src2;
11191   dup_immediate(vform, src2, instr->ExtractBits(20, 14));
11192   SVEIntCompareVectorsHelper(cond,
11193                              vform,
11194                              ReadPRegister(instr->GetPd()),
11195                              ReadPRegister(instr->GetPgLow8()),
11196                              commute_inputs ? src2
11197                                             : ReadVRegister(instr->GetRn()),
11198                              commute_inputs ? ReadVRegister(instr->GetRn())
11199                                             : src2);
11200 }
11201 
11202 void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
11203   Instr op = instr->Mask(SVEIntCompareVectorsMask);
11204   bool is_wide_elements = false;
11205   switch (op) {
11206     case CMPEQ_p_p_zw:
11207     case CMPGE_p_p_zw:
11208     case CMPGT_p_p_zw:
11209     case CMPHI_p_p_zw:
11210     case CMPHS_p_p_zw:
11211     case CMPLE_p_p_zw:
11212     case CMPLO_p_p_zw:
11213     case CMPLS_p_p_zw:
11214     case CMPLT_p_p_zw:
11215     case CMPNE_p_p_zw:
11216       is_wide_elements = true;
11217       break;
11218   }
11219 
11220   Condition cond;
11221   switch (op) {
11222     case CMPEQ_p_p_zw:
11223     case CMPEQ_p_p_zz:
11224       cond = eq;
11225       break;
11226     case CMPGE_p_p_zw:
11227     case CMPGE_p_p_zz:
11228       cond = ge;
11229       break;
11230     case CMPGT_p_p_zw:
11231     case CMPGT_p_p_zz:
11232       cond = gt;
11233       break;
11234     case CMPHI_p_p_zw:
11235     case CMPHI_p_p_zz:
11236       cond = hi;
11237       break;
11238     case CMPHS_p_p_zw:
11239     case CMPHS_p_p_zz:
11240       cond = hs;
11241       break;
11242     case CMPNE_p_p_zw:
11243     case CMPNE_p_p_zz:
11244       cond = ne;
11245       break;
11246     case CMPLE_p_p_zw:
11247       cond = le;
11248       break;
11249     case CMPLO_p_p_zw:
11250       cond = lo;
11251       break;
11252     case CMPLS_p_p_zw:
11253       cond = ls;
11254       break;
11255     case CMPLT_p_p_zw:
11256       cond = lt;
11257       break;
11258     default:
11259       VIXL_UNIMPLEMENTED();
11260       cond = al;
11261       break;
11262   }
11263 
11264   SVEIntCompareVectorsHelper(cond,
11265                              instr->GetSVEVectorFormat(),
11266                              ReadPRegister(instr->GetPd()),
11267                              ReadPRegister(instr->GetPgLow8()),
11268                              ReadVRegister(instr->GetRn()),
11269                              ReadVRegister(instr->GetRm()),
11270                              is_wide_elements);
11271 }
11272 
11273 void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
11274   VectorFormat vform = instr->GetSVEVectorFormat();
11275   SimVRegister& zd = ReadVRegister(instr->GetRd());
11276   SimVRegister& zn = ReadVRegister(instr->GetRn());
11277 
11278   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11279               (vform == kFormatVnD));
11280 
11281   switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
11282     case FEXPA_z_z:
11283       fexpa(vform, zd, zn);
11284       break;
11285     default:
11286       VIXL_UNIMPLEMENTED();
11287       break;
11288   }
11289 }
11290 
11291 void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
11292   VectorFormat vform = instr->GetSVEVectorFormat();
11293   SimVRegister& zd = ReadVRegister(instr->GetRd());
11294   SimVRegister& zn = ReadVRegister(instr->GetRn());
11295   SimVRegister& zm = ReadVRegister(instr->GetRm());
11296 
11297   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11298               (vform == kFormatVnD));
11299 
11300   switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
11301     case FTSSEL_z_zz:
11302       ftssel(vform, zd, zn, zm);
11303       break;
11304     default:
11305       VIXL_UNIMPLEMENTED();
11306       break;
11307   }
11308 }
11309 
11310 void Simulator::VisitSVEConstructivePrefix_Unpredicated(
11311     const Instruction* instr) {
11312   SimVRegister& zd = ReadVRegister(instr->GetRd());
11313   SimVRegister& zn = ReadVRegister(instr->GetRn());
11314 
11315   switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
11316     case MOVPRFX_z_z:
11317       mov(kFormatVnD, zd, zn);  // The lane size is arbitrary.
11318       break;
11319     default:
11320       VIXL_UNIMPLEMENTED();
11321       break;
11322   }
11323 }
11324 
11325 void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
11326   VectorFormat vform = instr->GetSVEVectorFormat();
11327 
11328   SimVRegister& zd = ReadVRegister(instr->GetRd());
11329   SimVRegister& zm = ReadVRegister(instr->GetRm());
11330 
11331   SimVRegister result;
11332   switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
11333     case MLA_z_p_zzz:
11334       mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11335       break;
11336     case MLS_z_p_zzz:
11337       mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11338       break;
11339     case MAD_z_p_zzz:
11340       // 'za' is encoded in 'Rn'.
11341       mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11342       break;
11343     case MSB_z_p_zzz: {
11344       // 'za' is encoded in 'Rn'.
11345       mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11346       break;
11347     }
11348     default:
11349       VIXL_UNIMPLEMENTED();
11350       break;
11351   }
11352   mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
11353 }
11354 
11355 void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
11356   VectorFormat vform = instr->GetSVEVectorFormat();
11357   SimVRegister& zda = ReadVRegister(instr->GetRd());
11358   SimVRegister& zn = ReadVRegister(instr->GetRn());
11359   SimVRegister& zm = ReadVRegister(instr->GetRm());
11360 
11361   switch (form_hash_) {
11362     case "sdot_z_zzz"_h:
11363       sdot(vform, zda, zn, zm);
11364       break;
11365     case "udot_z_zzz"_h:
11366       udot(vform, zda, zn, zm);
11367       break;
11368     case "usdot_z_zzz_s"_h:
11369       usdot(vform, zda, zn, zm);
11370       break;
11371     default:
11372       VIXL_UNIMPLEMENTED();
11373       break;
11374   }
11375 }
11376 
11377 void Simulator::VisitSVEMovprfx(const Instruction* instr) {
11378   VectorFormat vform = instr->GetSVEVectorFormat();
11379   SimVRegister& zn = ReadVRegister(instr->GetRn());
11380   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11381   SimVRegister& zd = ReadVRegister(instr->GetRd());
11382 
11383   switch (instr->Mask(SVEMovprfxMask)) {
11384     case MOVPRFX_z_p_z:
11385       if (instr->ExtractBit(16)) {
11386         mov_merging(vform, zd, pg, zn);
11387       } else {
11388         mov_zeroing(vform, zd, pg, zn);
11389       }
11390       break;
11391     default:
11392       VIXL_UNIMPLEMENTED();
11393       break;
11394   }
11395 }
11396 
11397 void Simulator::VisitSVEIntReduction(const Instruction* instr) {
11398   VectorFormat vform = instr->GetSVEVectorFormat();
11399   SimVRegister& vd = ReadVRegister(instr->GetRd());
11400   SimVRegister& zn = ReadVRegister(instr->GetRn());
11401   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11402 
11403   if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
11404     switch (instr->Mask(SVEIntReductionLogicalMask)) {
11405       case ANDV_r_p_z:
11406         andv(vform, vd, pg, zn);
11407         break;
11408       case EORV_r_p_z:
11409         eorv(vform, vd, pg, zn);
11410         break;
11411       case ORV_r_p_z:
11412         orv(vform, vd, pg, zn);
11413         break;
11414       default:
11415         VIXL_UNIMPLEMENTED();
11416         break;
11417     }
11418   } else {
11419     switch (instr->Mask(SVEIntReductionMask)) {
11420       case SADDV_r_p_z:
11421         saddv(vform, vd, pg, zn);
11422         break;
11423       case SMAXV_r_p_z:
11424         smaxv(vform, vd, pg, zn);
11425         break;
11426       case SMINV_r_p_z:
11427         sminv(vform, vd, pg, zn);
11428         break;
11429       case UADDV_r_p_z:
11430         uaddv(vform, vd, pg, zn);
11431         break;
11432       case UMAXV_r_p_z:
11433         umaxv(vform, vd, pg, zn);
11434         break;
11435       case UMINV_r_p_z:
11436         uminv(vform, vd, pg, zn);
11437         break;
11438       default:
11439         VIXL_UNIMPLEMENTED();
11440         break;
11441     }
11442   }
11443 }
11444 
11445 void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
11446   VectorFormat vform = instr->GetSVEVectorFormat();
11447   SimVRegister& zn = ReadVRegister(instr->GetRn());
11448 
11449   SimVRegister result;
11450   switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
11451     case ABS_z_p_z:
11452       abs(vform, result, zn);
11453       break;
11454     case CLS_z_p_z:
11455       cls(vform, result, zn);
11456       break;
11457     case CLZ_z_p_z:
11458       clz(vform, result, zn);
11459       break;
11460     case CNOT_z_p_z:
11461       cnot(vform, result, zn);
11462       break;
11463     case CNT_z_p_z:
11464       cnt(vform, result, zn);
11465       break;
11466     case FABS_z_p_z:
11467       fabs_(vform, result, zn);
11468       break;
11469     case FNEG_z_p_z:
11470       fneg(vform, result, zn);
11471       break;
11472     case NEG_z_p_z:
11473       neg(vform, result, zn);
11474       break;
11475     case NOT_z_p_z:
11476       not_(vform, result, zn);
11477       break;
11478     case SXTB_z_p_z:
11479     case SXTH_z_p_z:
11480     case SXTW_z_p_z:
11481       sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11482       break;
11483     case UXTB_z_p_z:
11484     case UXTH_z_p_z:
11485     case UXTW_z_p_z:
11486       uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11487       break;
11488     default:
11489       VIXL_UNIMPLEMENTED();
11490       break;
11491   }
11492 
11493   SimVRegister& zd = ReadVRegister(instr->GetRd());
11494   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11495   mov_merging(vform, zd, pg, result);
11496 }
11497 
11498 void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
11499   // There is only one instruction in this group.
11500   VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
11501 
11502   VectorFormat vform = instr->GetSVEVectorFormat();
11503   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
11504   SimVRegister& zd = ReadVRegister(instr->GetRd());
11505 
11506   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
11507 
11508   SimVRegister result;
11509   switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
11510     case FCPY_z_p_i: {
11511       int imm8 = instr->ExtractBits(12, 5);
11512       uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
11513                                            Instruction::Imm8ToFP64(imm8));
11514       dup_immediate(vform, result, value);
11515       break;
11516     }
11517     default:
11518       VIXL_UNIMPLEMENTED();
11519       break;
11520   }
11521   mov_merging(vform, zd, pg, result);
11522 }
11523 
11524 void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
11525     const Instruction* instr) {
11526   VectorFormat vform = instr->GetSVEVectorFormat();
11527   SimVRegister& zd = ReadVRegister(instr->GetRd());
11528   SimVRegister scratch;
11529 
11530   uint64_t imm = instr->GetImmSVEIntWideUnsigned();
11531   imm <<= instr->ExtractBit(13) * 8;
11532 
11533   switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
11534     case ADD_z_zi:
11535       add_uint(vform, zd, zd, imm);
11536       break;
11537     case SQADD_z_zi:
11538       add_uint(vform, zd, zd, imm).SignedSaturate(vform);
11539       break;
11540     case SQSUB_z_zi:
11541       sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
11542       break;
11543     case SUBR_z_zi:
11544       dup_immediate(vform, scratch, imm);
11545       sub(vform, zd, scratch, zd);
11546       break;
11547     case SUB_z_zi:
11548       sub_uint(vform, zd, zd, imm);
11549       break;
11550     case UQADD_z_zi:
11551       add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11552       break;
11553     case UQSUB_z_zi:
11554       sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11555       break;
11556     default:
11557       break;
11558   }
11559 }
11560 
11561 void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
11562   SimVRegister& zd = ReadVRegister(instr->GetRd());
11563 
11564   VectorFormat format = instr->GetSVEVectorFormat();
11565   int64_t imm = instr->GetImmSVEIntWideSigned();
11566   int shift = instr->ExtractBit(13) * 8;
11567   imm *= 1 << shift;
11568 
11569   switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
11570     case DUP_z_i:
11571       // The encoding of byte-sized lanes with lsl #8 is undefined.
11572       if ((format == kFormatVnB) && (shift == 8)) {
11573         VIXL_UNIMPLEMENTED();
11574       } else {
11575         dup_immediate(format, zd, imm);
11576       }
11577       break;
11578     default:
11579       VIXL_UNIMPLEMENTED();
11580       break;
11581   }
11582 }
11583 
11584 void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
11585   VectorFormat vform = instr->GetSVEVectorFormat();
11586   SimVRegister& zd = ReadVRegister(instr->GetRd());
11587 
11588   switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
11589     case FDUP_z_i:
11590       switch (vform) {
11591         case kFormatVnH:
11592           dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
11593           break;
11594         case kFormatVnS:
11595           dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
11596           break;
11597         case kFormatVnD:
11598           dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
11599           break;
11600         default:
11601           VIXL_UNIMPLEMENTED();
11602       }
11603       break;
11604     default:
11605       VIXL_UNIMPLEMENTED();
11606       break;
11607   }
11608 }
11609 
11610 void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
11611     const Instruction* instr) {
11612   switch (instr->Mask(
11613       SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
11614     case LD1H_z_p_bz_s_x32_scaled:
11615     case LD1SH_z_p_bz_s_x32_scaled:
11616     case LDFF1H_z_p_bz_s_x32_scaled:
11617     case LDFF1SH_z_p_bz_s_x32_scaled:
11618       break;
11619     default:
11620       VIXL_UNIMPLEMENTED();
11621       break;
11622   }
11623 
11624   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11625   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11626 }
11627 
11628 void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
11629     const Instruction* instr) {
11630   switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
11631     case LD1B_z_p_bz_s_x32_unscaled:
11632     case LD1H_z_p_bz_s_x32_unscaled:
11633     case LD1SB_z_p_bz_s_x32_unscaled:
11634     case LD1SH_z_p_bz_s_x32_unscaled:
11635     case LD1W_z_p_bz_s_x32_unscaled:
11636     case LDFF1B_z_p_bz_s_x32_unscaled:
11637     case LDFF1H_z_p_bz_s_x32_unscaled:
11638     case LDFF1SB_z_p_bz_s_x32_unscaled:
11639     case LDFF1SH_z_p_bz_s_x32_unscaled:
11640     case LDFF1W_z_p_bz_s_x32_unscaled:
11641       break;
11642     default:
11643       VIXL_UNIMPLEMENTED();
11644       break;
11645   }
11646 
11647   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11648   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11649 }
11650 
11651 void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
11652     const Instruction* instr) {
11653   switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
11654     case LD1B_z_p_ai_s:
11655       VIXL_UNIMPLEMENTED();
11656       break;
11657     case LD1H_z_p_ai_s:
11658       VIXL_UNIMPLEMENTED();
11659       break;
11660     case LD1SB_z_p_ai_s:
11661       VIXL_UNIMPLEMENTED();
11662       break;
11663     case LD1SH_z_p_ai_s:
11664       VIXL_UNIMPLEMENTED();
11665       break;
11666     case LD1W_z_p_ai_s:
11667       VIXL_UNIMPLEMENTED();
11668       break;
11669     case LDFF1B_z_p_ai_s:
11670       VIXL_UNIMPLEMENTED();
11671       break;
11672     case LDFF1H_z_p_ai_s:
11673       VIXL_UNIMPLEMENTED();
11674       break;
11675     case LDFF1SB_z_p_ai_s:
11676       VIXL_UNIMPLEMENTED();
11677       break;
11678     case LDFF1SH_z_p_ai_s:
11679       VIXL_UNIMPLEMENTED();
11680       break;
11681     case LDFF1W_z_p_ai_s:
11682       VIXL_UNIMPLEMENTED();
11683       break;
11684     default:
11685       VIXL_UNIMPLEMENTED();
11686       break;
11687   }
11688 }
11689 
11690 void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
11691     const Instruction* instr) {
11692   switch (
11693       instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
11694     case LD1W_z_p_bz_s_x32_scaled:
11695     case LDFF1W_z_p_bz_s_x32_scaled:
11696       break;
11697     default:
11698       VIXL_UNIMPLEMENTED();
11699       break;
11700   }
11701 
11702   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11703   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11704 }
11705 
11706 void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
11707     const Instruction* instr) {
11708   switch (
11709       instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
11710     // Ignore prefetch hint instructions.
11711     case PRFB_i_p_bz_s_x32_scaled:
11712     case PRFD_i_p_bz_s_x32_scaled:
11713     case PRFH_i_p_bz_s_x32_scaled:
11714     case PRFW_i_p_bz_s_x32_scaled:
11715       break;
11716     default:
11717       VIXL_UNIMPLEMENTED();
11718       break;
11719   }
11720 }
11721 
11722 void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
11723     const Instruction* instr) {
11724   switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
11725     // Ignore prefetch hint instructions.
11726     case PRFB_i_p_ai_s:
11727     case PRFD_i_p_ai_s:
11728     case PRFH_i_p_ai_s:
11729     case PRFW_i_p_ai_s:
11730       break;
11731     default:
11732       VIXL_UNIMPLEMENTED();
11733       break;
11734   }
11735 }
11736 
11737 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
11738     const Instruction* instr) {
11739   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
11740     // Ignore prefetch hint instructions.
11741     case PRFB_i_p_bi_s:
11742     case PRFD_i_p_bi_s:
11743     case PRFH_i_p_bi_s:
11744     case PRFW_i_p_bi_s:
11745       break;
11746     default:
11747       VIXL_UNIMPLEMENTED();
11748       break;
11749   }
11750 }
11751 
11752 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
11753     const Instruction* instr) {
11754   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
11755     // Ignore prefetch hint instructions.
11756     case PRFB_i_p_br_s:
11757     case PRFD_i_p_br_s:
11758     case PRFH_i_p_br_s:
11759     case PRFW_i_p_br_s:
11760       if (instr->GetRm() == kZeroRegCode) {
11761         VIXL_UNIMPLEMENTED();
11762       }
11763       break;
11764     default:
11765       VIXL_UNIMPLEMENTED();
11766       break;
11767   }
11768 }
11769 
11770 void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
11771   bool is_signed;
11772   switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
11773     case LD1RB_z_p_bi_u8:
11774     case LD1RB_z_p_bi_u16:
11775     case LD1RB_z_p_bi_u32:
11776     case LD1RB_z_p_bi_u64:
11777     case LD1RH_z_p_bi_u16:
11778     case LD1RH_z_p_bi_u32:
11779     case LD1RH_z_p_bi_u64:
11780     case LD1RW_z_p_bi_u32:
11781     case LD1RW_z_p_bi_u64:
11782     case LD1RD_z_p_bi_u64:
11783       is_signed = false;
11784       break;
11785     case LD1RSB_z_p_bi_s16:
11786     case LD1RSB_z_p_bi_s32:
11787     case LD1RSB_z_p_bi_s64:
11788     case LD1RSH_z_p_bi_s32:
11789     case LD1RSH_z_p_bi_s64:
11790     case LD1RSW_z_p_bi_s64:
11791       is_signed = true;
11792       break;
11793     default:
11794       // This encoding group is complete, so no other values should be possible.
11795       VIXL_UNREACHABLE();
11796       is_signed = false;
11797       break;
11798   }
11799 
11800   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
11801   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
11802   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
11803   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
11804   uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
11805   uint64_t base = ReadXRegister(instr->GetRn()) + offset;
11806   VectorFormat unpack_vform =
11807       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
11808   SimVRegister temp;
11809   ld1r(vform, unpack_vform, temp, base, is_signed);
11810   mov_zeroing(vform,
11811               ReadVRegister(instr->GetRt()),
11812               ReadPRegister(instr->GetPgLow8()),
11813               temp);
11814 }
11815 
11816 void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
11817   switch (instr->Mask(SVELoadPredicateRegisterMask)) {
11818     case LDR_p_bi: {
11819       SimPRegister& pt = ReadPRegister(instr->GetPt());
11820       int pl = GetPredicateLengthInBytes();
11821       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
11822       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
11823       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
11824       for (int i = 0; i < pl; i++) {
11825         pt.Insert(i, MemRead<uint8_t>(address + i));
11826       }
11827       LogPRead(instr->GetPt(), address);
11828       break;
11829     }
11830     default:
11831       VIXL_UNIMPLEMENTED();
11832       break;
11833   }
11834 }
11835 
11836 void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
11837   switch (instr->Mask(SVELoadVectorRegisterMask)) {
11838     case LDR_z_bi: {
11839       SimVRegister& zt = ReadVRegister(instr->GetRt());
11840       int vl = GetVectorLengthInBytes();
11841       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
11842       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
11843       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
11844       for (int i = 0; i < vl; i++) {
11845         zt.Insert(i, MemRead<uint8_t>(address + i));
11846       }
11847       LogZRead(instr->GetRt(), address);
11848       break;
11849     }
11850     default:
11851       VIXL_UNIMPLEMENTED();
11852       break;
11853   }
11854 }
11855 
11856 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
11857     const Instruction* instr) {
11858   switch (instr->Mask(
11859       SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
11860     case LD1D_z_p_bz_d_x32_scaled:
11861     case LD1H_z_p_bz_d_x32_scaled:
11862     case LD1SH_z_p_bz_d_x32_scaled:
11863     case LD1SW_z_p_bz_d_x32_scaled:
11864     case LD1W_z_p_bz_d_x32_scaled:
11865     case LDFF1H_z_p_bz_d_x32_scaled:
11866     case LDFF1W_z_p_bz_d_x32_scaled:
11867     case LDFF1D_z_p_bz_d_x32_scaled:
11868     case LDFF1SH_z_p_bz_d_x32_scaled:
11869     case LDFF1SW_z_p_bz_d_x32_scaled:
11870       break;
11871     default:
11872       VIXL_UNIMPLEMENTED();
11873       break;
11874   }
11875 
11876   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11877   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
11878 }
11879 
11880 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
11881     const Instruction* instr) {
11882   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
11883     case LD1D_z_p_bz_d_64_scaled:
11884     case LD1H_z_p_bz_d_64_scaled:
11885     case LD1SH_z_p_bz_d_64_scaled:
11886     case LD1SW_z_p_bz_d_64_scaled:
11887     case LD1W_z_p_bz_d_64_scaled:
11888     case LDFF1H_z_p_bz_d_64_scaled:
11889     case LDFF1W_z_p_bz_d_64_scaled:
11890     case LDFF1D_z_p_bz_d_64_scaled:
11891     case LDFF1SH_z_p_bz_d_64_scaled:
11892     case LDFF1SW_z_p_bz_d_64_scaled:
11893       break;
11894     default:
11895       VIXL_UNIMPLEMENTED();
11896       break;
11897   }
11898 
11899   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
11900 }
11901 
11902 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
11903     const Instruction* instr) {
11904   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
11905     case LD1B_z_p_bz_d_64_unscaled:
11906     case LD1D_z_p_bz_d_64_unscaled:
11907     case LD1H_z_p_bz_d_64_unscaled:
11908     case LD1SB_z_p_bz_d_64_unscaled:
11909     case LD1SH_z_p_bz_d_64_unscaled:
11910     case LD1SW_z_p_bz_d_64_unscaled:
11911     case LD1W_z_p_bz_d_64_unscaled:
11912     case LDFF1B_z_p_bz_d_64_unscaled:
11913     case LDFF1D_z_p_bz_d_64_unscaled:
11914     case LDFF1H_z_p_bz_d_64_unscaled:
11915     case LDFF1SB_z_p_bz_d_64_unscaled:
11916     case LDFF1SH_z_p_bz_d_64_unscaled:
11917     case LDFF1SW_z_p_bz_d_64_unscaled:
11918     case LDFF1W_z_p_bz_d_64_unscaled:
11919       break;
11920     default:
11921       VIXL_UNIMPLEMENTED();
11922       break;
11923   }
11924 
11925   SVEGatherLoadScalarPlusVectorHelper(instr,
11926                                       kFormatVnD,
11927                                       NO_SVE_OFFSET_MODIFIER);
11928 }
11929 
11930 void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
11931     const Instruction* instr) {
11932   switch (instr->Mask(
11933       SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
11934     case LD1B_z_p_bz_d_x32_unscaled:
11935     case LD1D_z_p_bz_d_x32_unscaled:
11936     case LD1H_z_p_bz_d_x32_unscaled:
11937     case LD1SB_z_p_bz_d_x32_unscaled:
11938     case LD1SH_z_p_bz_d_x32_unscaled:
11939     case LD1SW_z_p_bz_d_x32_unscaled:
11940     case LD1W_z_p_bz_d_x32_unscaled:
11941     case LDFF1B_z_p_bz_d_x32_unscaled:
11942     case LDFF1H_z_p_bz_d_x32_unscaled:
11943     case LDFF1W_z_p_bz_d_x32_unscaled:
11944     case LDFF1D_z_p_bz_d_x32_unscaled:
11945     case LDFF1SB_z_p_bz_d_x32_unscaled:
11946     case LDFF1SH_z_p_bz_d_x32_unscaled:
11947     case LDFF1SW_z_p_bz_d_x32_unscaled:
11948       break;
11949     default:
11950       VIXL_UNIMPLEMENTED();
11951       break;
11952   }
11953 
11954   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11955   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
11956 }
11957 
11958 void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
11959     const Instruction* instr) {
11960   switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
11961     case LD1B_z_p_ai_d:
11962     case LD1D_z_p_ai_d:
11963     case LD1H_z_p_ai_d:
11964     case LD1SB_z_p_ai_d:
11965     case LD1SH_z_p_ai_d:
11966     case LD1SW_z_p_ai_d:
11967     case LD1W_z_p_ai_d:
11968     case LDFF1B_z_p_ai_d:
11969     case LDFF1D_z_p_ai_d:
11970     case LDFF1H_z_p_ai_d:
11971     case LDFF1SB_z_p_ai_d:
11972     case LDFF1SH_z_p_ai_d:
11973     case LDFF1SW_z_p_ai_d:
11974     case LDFF1W_z_p_ai_d:
11975       break;
11976     default:
11977       VIXL_UNIMPLEMENTED();
11978       break;
11979   }
11980   bool is_signed = instr->ExtractBit(14) == 0;
11981   bool is_ff = instr->ExtractBit(13) == 1;
11982   // Note that these instructions don't use the Dtype encoding.
11983   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
11984   uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
11985   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
11986   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
11987   if (is_ff) {
11988     VIXL_UNIMPLEMENTED();
11989   } else {
11990     SVEStructuredLoadHelper(kFormatVnD,
11991                             ReadPRegister(instr->GetPgLow8()),
11992                             instr->GetRt(),
11993                             addr,
11994                             is_signed);
11995   }
11996 }
11997 
11998 void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
11999     const Instruction* instr) {
12000   switch (
12001       instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
12002     // Ignore prefetch hint instructions.
12003     case PRFB_i_p_bz_d_64_scaled:
12004     case PRFD_i_p_bz_d_64_scaled:
12005     case PRFH_i_p_bz_d_64_scaled:
12006     case PRFW_i_p_bz_d_64_scaled:
12007       break;
12008     default:
12009       VIXL_UNIMPLEMENTED();
12010       break;
12011   }
12012 }
12013 
12014 void Simulator::
12015     VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
12016         const Instruction* instr) {
12017   switch (instr->Mask(
12018       SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12019     // Ignore prefetch hint instructions.
12020     case PRFB_i_p_bz_d_x32_scaled:
12021     case PRFD_i_p_bz_d_x32_scaled:
12022     case PRFH_i_p_bz_d_x32_scaled:
12023     case PRFW_i_p_bz_d_x32_scaled:
12024       break;
12025     default:
12026       VIXL_UNIMPLEMENTED();
12027       break;
12028   }
12029 }
12030 
12031 void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
12032     const Instruction* instr) {
12033   switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
12034     // Ignore prefetch hint instructions.
12035     case PRFB_i_p_ai_d:
12036     case PRFD_i_p_ai_d:
12037     case PRFH_i_p_ai_d:
12038     case PRFW_i_p_ai_d:
12039       break;
12040     default:
12041       VIXL_UNIMPLEMENTED();
12042       break;
12043   }
12044 }
12045 
12046 void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
12047     const Instruction* instr) {
12048   bool is_signed;
12049   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
12050     case LDFF1B_z_p_br_u8:
12051     case LDFF1B_z_p_br_u16:
12052     case LDFF1B_z_p_br_u32:
12053     case LDFF1B_z_p_br_u64:
12054     case LDFF1H_z_p_br_u16:
12055     case LDFF1H_z_p_br_u32:
12056     case LDFF1H_z_p_br_u64:
12057     case LDFF1W_z_p_br_u32:
12058     case LDFF1W_z_p_br_u64:
12059     case LDFF1D_z_p_br_u64:
12060       is_signed = false;
12061       break;
12062     case LDFF1SB_z_p_br_s16:
12063     case LDFF1SB_z_p_br_s32:
12064     case LDFF1SB_z_p_br_s64:
12065     case LDFF1SH_z_p_br_s32:
12066     case LDFF1SH_z_p_br_s64:
12067     case LDFF1SW_z_p_br_s64:
12068       is_signed = true;
12069       break;
12070     default:
12071       // This encoding group is complete, so no other values should be possible.
12072       VIXL_UNREACHABLE();
12073       is_signed = false;
12074       break;
12075   }
12076 
12077   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12078   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12079   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12080   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12081   uint64_t offset = ReadXRegister(instr->GetRm());
12082   offset <<= msize_in_bytes_log2;
12083   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12084   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12085   SVEFaultTolerantLoadHelper(vform,
12086                              ReadPRegister(instr->GetPgLow8()),
12087                              instr->GetRt(),
12088                              addr,
12089                              kSVEFirstFaultLoad,
12090                              is_signed);
12091 }
12092 
12093 void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
12094     const Instruction* instr) {
12095   bool is_signed = false;
12096   switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
12097     case LDNF1B_z_p_bi_u16:
12098     case LDNF1B_z_p_bi_u32:
12099     case LDNF1B_z_p_bi_u64:
12100     case LDNF1B_z_p_bi_u8:
12101     case LDNF1D_z_p_bi_u64:
12102     case LDNF1H_z_p_bi_u16:
12103     case LDNF1H_z_p_bi_u32:
12104     case LDNF1H_z_p_bi_u64:
12105     case LDNF1W_z_p_bi_u32:
12106     case LDNF1W_z_p_bi_u64:
12107       break;
12108     case LDNF1SB_z_p_bi_s16:
12109     case LDNF1SB_z_p_bi_s32:
12110     case LDNF1SB_z_p_bi_s64:
12111     case LDNF1SH_z_p_bi_s32:
12112     case LDNF1SH_z_p_bi_s64:
12113     case LDNF1SW_z_p_bi_s64:
12114       is_signed = true;
12115       break;
12116     default:
12117       VIXL_UNIMPLEMENTED();
12118       break;
12119   }
12120   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12121   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12122   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12123   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12124   int vl = GetVectorLengthInBytes();
12125   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12126   uint64_t offset =
12127       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12128   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12129   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12130   SVEFaultTolerantLoadHelper(vform,
12131                              ReadPRegister(instr->GetPgLow8()),
12132                              instr->GetRt(),
12133                              addr,
12134                              kSVENonFaultLoad,
12135                              is_signed);
12136 }
12137 
12138 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
12139     const Instruction* instr) {
12140   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12141   VectorFormat vform = kFormatUndefined;
12142 
12143   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
12144     case LDNT1B_z_p_bi_contiguous:
12145       vform = kFormatVnB;
12146       break;
12147     case LDNT1D_z_p_bi_contiguous:
12148       vform = kFormatVnD;
12149       break;
12150     case LDNT1H_z_p_bi_contiguous:
12151       vform = kFormatVnH;
12152       break;
12153     case LDNT1W_z_p_bi_contiguous:
12154       vform = kFormatVnS;
12155       break;
12156     default:
12157       VIXL_UNIMPLEMENTED();
12158       break;
12159   }
12160   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12161   int vl = GetVectorLengthInBytes();
12162   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12163   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12164   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12165   SVEStructuredLoadHelper(vform,
12166                           pg,
12167                           instr->GetRt(),
12168                           addr,
12169                           /* is_signed = */ false);
12170 }
12171 
12172 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
12173     const Instruction* instr) {
12174   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12175   VectorFormat vform = kFormatUndefined;
12176 
12177   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
12178     case LDNT1B_z_p_br_contiguous:
12179       vform = kFormatVnB;
12180       break;
12181     case LDNT1D_z_p_br_contiguous:
12182       vform = kFormatVnD;
12183       break;
12184     case LDNT1H_z_p_br_contiguous:
12185       vform = kFormatVnH;
12186       break;
12187     case LDNT1W_z_p_br_contiguous:
12188       vform = kFormatVnS;
12189       break;
12190     default:
12191       VIXL_UNIMPLEMENTED();
12192       break;
12193   }
12194   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12195   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12196   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12197   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12198   SVEStructuredLoadHelper(vform,
12199                           pg,
12200                           instr->GetRt(),
12201                           addr,
12202                           /* is_signed = */ false);
12203 }
12204 
12205 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
12206     const Instruction* instr) {
12207   SimVRegister& zt = ReadVRegister(instr->GetRt());
12208   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12209 
12210   uint64_t dwords = 2;
12211   VectorFormat vform_dst = kFormatVnQ;
12212   if ((form_hash_ == "ld1rob_z_p_bi_u8"_h) ||
12213       (form_hash_ == "ld1roh_z_p_bi_u16"_h) ||
12214       (form_hash_ == "ld1row_z_p_bi_u32"_h) ||
12215       (form_hash_ == "ld1rod_z_p_bi_u64"_h)) {
12216     dwords = 4;
12217     vform_dst = kFormatVnO;
12218   }
12219 
12220   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12221   uint64_t offset =
12222       instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes;
12223   int msz = instr->ExtractBits(24, 23);
12224   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12225 
12226   for (unsigned i = 0; i < dwords; i++) {
12227     ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes));
12228   }
12229   mov_zeroing(vform, zt, pg, zt);
12230   dup_element(vform_dst, zt, zt, 0);
12231 }
12232 
12233 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
12234     const Instruction* instr) {
12235   SimVRegister& zt = ReadVRegister(instr->GetRt());
12236   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12237 
12238   uint64_t bytes = 16;
12239   VectorFormat vform_dst = kFormatVnQ;
12240   if ((form_hash_ == "ld1rob_z_p_br_contiguous"_h) ||
12241       (form_hash_ == "ld1roh_z_p_br_contiguous"_h) ||
12242       (form_hash_ == "ld1row_z_p_br_contiguous"_h) ||
12243       (form_hash_ == "ld1rod_z_p_br_contiguous"_h)) {
12244     bytes = 32;
12245     vform_dst = kFormatVnO;
12246   }
12247 
12248   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12249   uint64_t offset = ReadXRegister(instr->GetRm());
12250   int msz = instr->ExtractBits(24, 23);
12251   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12252   offset <<= msz;
12253   for (unsigned i = 0; i < bytes; i++) {
12254     ld1(kFormatVnB, zt, i, addr + offset + i);
12255   }
12256   mov_zeroing(vform, zt, pg, zt);
12257   dup_element(vform_dst, zt, zt, 0);
12258 }
12259 
12260 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
12261     const Instruction* instr) {
12262   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
12263     case LD2B_z_p_bi_contiguous:
12264     case LD2D_z_p_bi_contiguous:
12265     case LD2H_z_p_bi_contiguous:
12266     case LD2W_z_p_bi_contiguous:
12267     case LD3B_z_p_bi_contiguous:
12268     case LD3D_z_p_bi_contiguous:
12269     case LD3H_z_p_bi_contiguous:
12270     case LD3W_z_p_bi_contiguous:
12271     case LD4B_z_p_bi_contiguous:
12272     case LD4D_z_p_bi_contiguous:
12273     case LD4H_z_p_bi_contiguous:
12274     case LD4W_z_p_bi_contiguous: {
12275       int vl = GetVectorLengthInBytes();
12276       int msz = instr->ExtractBits(24, 23);
12277       int reg_count = instr->ExtractBits(22, 21) + 1;
12278       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12279       LogicSVEAddressVector addr(
12280           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12281       addr.SetMsizeInBytesLog2(msz);
12282       addr.SetRegCount(reg_count);
12283       SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12284                               ReadPRegister(instr->GetPgLow8()),
12285                               instr->GetRt(),
12286                               addr);
12287       break;
12288     }
12289     default:
12290       VIXL_UNIMPLEMENTED();
12291       break;
12292   }
12293 }
12294 
12295 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
12296     const Instruction* instr) {
12297   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
12298     case LD2B_z_p_br_contiguous:
12299     case LD2D_z_p_br_contiguous:
12300     case LD2H_z_p_br_contiguous:
12301     case LD2W_z_p_br_contiguous:
12302     case LD3B_z_p_br_contiguous:
12303     case LD3D_z_p_br_contiguous:
12304     case LD3H_z_p_br_contiguous:
12305     case LD3W_z_p_br_contiguous:
12306     case LD4B_z_p_br_contiguous:
12307     case LD4D_z_p_br_contiguous:
12308     case LD4H_z_p_br_contiguous:
12309     case LD4W_z_p_br_contiguous: {
12310       int msz = instr->ExtractBits(24, 23);
12311       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
12312       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12313       LogicSVEAddressVector addr(
12314           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12315       addr.SetMsizeInBytesLog2(msz);
12316       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
12317       SVEStructuredLoadHelper(vform,
12318                               ReadPRegister(instr->GetPgLow8()),
12319                               instr->GetRt(),
12320                               addr,
12321                               false);
12322       break;
12323     }
12324     default:
12325       VIXL_UNIMPLEMENTED();
12326       break;
12327   }
12328 }
12329 
12330 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
12331     const Instruction* instr) {
12332   switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
12333     case ST1H_z_p_bz_s_x32_scaled:
12334     case ST1W_z_p_bz_s_x32_scaled: {
12335       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12336       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12337       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12338       uint64_t base = ReadXRegister(instr->GetRn());
12339       SVEOffsetModifier mod =
12340           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12341       LogicSVEAddressVector addr(base,
12342                                  &ReadVRegister(instr->GetRm()),
12343                                  kFormatVnS,
12344                                  mod,
12345                                  scale);
12346       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12347       SVEStructuredStoreHelper(kFormatVnS,
12348                                ReadPRegister(instr->GetPgLow8()),
12349                                instr->GetRt(),
12350                                addr);
12351       break;
12352     }
12353     default:
12354       VIXL_UNIMPLEMENTED();
12355       break;
12356   }
12357 }
12358 
12359 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
12360     const Instruction* instr) {
12361   switch (
12362       instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
12363     case ST1B_z_p_bz_s_x32_unscaled:
12364     case ST1H_z_p_bz_s_x32_unscaled:
12365     case ST1W_z_p_bz_s_x32_unscaled: {
12366       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12367       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12368       uint64_t base = ReadXRegister(instr->GetRn());
12369       SVEOffsetModifier mod =
12370           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12371       LogicSVEAddressVector addr(base,
12372                                  &ReadVRegister(instr->GetRm()),
12373                                  kFormatVnS,
12374                                  mod);
12375       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12376       SVEStructuredStoreHelper(kFormatVnS,
12377                                ReadPRegister(instr->GetPgLow8()),
12378                                instr->GetRt(),
12379                                addr);
12380       break;
12381     }
12382     default:
12383       VIXL_UNIMPLEMENTED();
12384       break;
12385   }
12386 }
12387 
12388 void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
12389     const Instruction* instr) {
12390   int msz = 0;
12391   switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
12392     case ST1B_z_p_ai_s:
12393       msz = 0;
12394       break;
12395     case ST1H_z_p_ai_s:
12396       msz = 1;
12397       break;
12398     case ST1W_z_p_ai_s:
12399       msz = 2;
12400       break;
12401     default:
12402       VIXL_UNIMPLEMENTED();
12403       break;
12404   }
12405   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12406   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
12407   addr.SetMsizeInBytesLog2(msz);
12408   SVEStructuredStoreHelper(kFormatVnS,
12409                            ReadPRegister(instr->GetPgLow8()),
12410                            instr->GetRt(),
12411                            addr);
12412 }
12413 
12414 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
12415     const Instruction* instr) {
12416   switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
12417     case ST1D_z_p_bz_d_64_scaled:
12418     case ST1H_z_p_bz_d_64_scaled:
12419     case ST1W_z_p_bz_d_64_scaled: {
12420       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12421       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12422       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12423       uint64_t base = ReadXRegister(instr->GetRn());
12424       LogicSVEAddressVector addr(base,
12425                                  &ReadVRegister(instr->GetRm()),
12426                                  kFormatVnD,
12427                                  SVE_LSL,
12428                                  scale);
12429       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12430       SVEStructuredStoreHelper(kFormatVnD,
12431                                ReadPRegister(instr->GetPgLow8()),
12432                                instr->GetRt(),
12433                                addr);
12434       break;
12435     }
12436     default:
12437       VIXL_UNIMPLEMENTED();
12438       break;
12439   }
12440 }
12441 
12442 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
12443     const Instruction* instr) {
12444   switch (
12445       instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
12446     case ST1B_z_p_bz_d_64_unscaled:
12447     case ST1D_z_p_bz_d_64_unscaled:
12448     case ST1H_z_p_bz_d_64_unscaled:
12449     case ST1W_z_p_bz_d_64_unscaled: {
12450       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12451       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12452       uint64_t base = ReadXRegister(instr->GetRn());
12453       LogicSVEAddressVector addr(base,
12454                                  &ReadVRegister(instr->GetRm()),
12455                                  kFormatVnD,
12456                                  NO_SVE_OFFSET_MODIFIER);
12457       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12458       SVEStructuredStoreHelper(kFormatVnD,
12459                                ReadPRegister(instr->GetPgLow8()),
12460                                instr->GetRt(),
12461                                addr);
12462       break;
12463     }
12464     default:
12465       VIXL_UNIMPLEMENTED();
12466       break;
12467   }
12468 }
12469 
12470 void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
12471     const Instruction* instr) {
12472   switch (instr->Mask(
12473       SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12474     case ST1D_z_p_bz_d_x32_scaled:
12475     case ST1H_z_p_bz_d_x32_scaled:
12476     case ST1W_z_p_bz_d_x32_scaled: {
12477       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12478       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12479       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12480       uint64_t base = ReadXRegister(instr->GetRn());
12481       SVEOffsetModifier mod =
12482           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12483       LogicSVEAddressVector addr(base,
12484                                  &ReadVRegister(instr->GetRm()),
12485                                  kFormatVnD,
12486                                  mod,
12487                                  scale);
12488       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12489       SVEStructuredStoreHelper(kFormatVnD,
12490                                ReadPRegister(instr->GetPgLow8()),
12491                                instr->GetRt(),
12492                                addr);
12493       break;
12494     }
12495     default:
12496       VIXL_UNIMPLEMENTED();
12497       break;
12498   }
12499 }
12500 
12501 void Simulator::
12502     VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
12503         const Instruction* instr) {
12504   switch (instr->Mask(
12505       SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
12506     case ST1B_z_p_bz_d_x32_unscaled:
12507     case ST1D_z_p_bz_d_x32_unscaled:
12508     case ST1H_z_p_bz_d_x32_unscaled:
12509     case ST1W_z_p_bz_d_x32_unscaled: {
12510       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12511       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12512       uint64_t base = ReadXRegister(instr->GetRn());
12513       SVEOffsetModifier mod =
12514           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12515       LogicSVEAddressVector addr(base,
12516                                  &ReadVRegister(instr->GetRm()),
12517                                  kFormatVnD,
12518                                  mod);
12519       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12520       SVEStructuredStoreHelper(kFormatVnD,
12521                                ReadPRegister(instr->GetPgLow8()),
12522                                instr->GetRt(),
12523                                addr);
12524       break;
12525     }
12526     default:
12527       VIXL_UNIMPLEMENTED();
12528       break;
12529   }
12530 }
12531 
12532 void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
12533     const Instruction* instr) {
12534   int msz = 0;
12535   switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
12536     case ST1B_z_p_ai_d:
12537       msz = 0;
12538       break;
12539     case ST1D_z_p_ai_d:
12540       msz = 3;
12541       break;
12542     case ST1H_z_p_ai_d:
12543       msz = 1;
12544       break;
12545     case ST1W_z_p_ai_d:
12546       msz = 2;
12547       break;
12548     default:
12549       VIXL_UNIMPLEMENTED();
12550       break;
12551   }
12552   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12553   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
12554   addr.SetMsizeInBytesLog2(msz);
12555   SVEStructuredStoreHelper(kFormatVnD,
12556                            ReadPRegister(instr->GetPgLow8()),
12557                            instr->GetRt(),
12558                            addr);
12559 }
12560 
12561 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
12562     const Instruction* instr) {
12563   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12564   VectorFormat vform = kFormatUndefined;
12565 
12566   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
12567     case STNT1B_z_p_bi_contiguous:
12568       vform = kFormatVnB;
12569       break;
12570     case STNT1D_z_p_bi_contiguous:
12571       vform = kFormatVnD;
12572       break;
12573     case STNT1H_z_p_bi_contiguous:
12574       vform = kFormatVnH;
12575       break;
12576     case STNT1W_z_p_bi_contiguous:
12577       vform = kFormatVnS;
12578       break;
12579     default:
12580       VIXL_UNIMPLEMENTED();
12581       break;
12582   }
12583   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12584   int vl = GetVectorLengthInBytes();
12585   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12586   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12587   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12588   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12589 }
12590 
12591 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
12592     const Instruction* instr) {
12593   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12594   VectorFormat vform = kFormatUndefined;
12595 
12596   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
12597     case STNT1B_z_p_br_contiguous:
12598       vform = kFormatVnB;
12599       break;
12600     case STNT1D_z_p_br_contiguous:
12601       vform = kFormatVnD;
12602       break;
12603     case STNT1H_z_p_br_contiguous:
12604       vform = kFormatVnH;
12605       break;
12606     case STNT1W_z_p_br_contiguous:
12607       vform = kFormatVnS;
12608       break;
12609     default:
12610       VIXL_UNIMPLEMENTED();
12611       break;
12612   }
12613   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12614   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12615   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12616   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12617   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12618 }
12619 
12620 void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
12621     const Instruction* instr) {
12622   switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
12623     case ST1B_z_p_bi:
12624     case ST1D_z_p_bi:
12625     case ST1H_z_p_bi:
12626     case ST1W_z_p_bi: {
12627       int vl = GetVectorLengthInBytes();
12628       int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12629       int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
12630       VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
12631       int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12632       uint64_t offset =
12633           (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12634       VectorFormat vform =
12635           SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12636       LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12637       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12638       SVEStructuredStoreHelper(vform,
12639                                ReadPRegister(instr->GetPgLow8()),
12640                                instr->GetRt(),
12641                                addr);
12642       break;
12643     }
12644     default:
12645       VIXL_UNIMPLEMENTED();
12646       break;
12647   }
12648 }
12649 
12650 void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
12651     const Instruction* instr) {
12652   switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
12653     case ST1B_z_p_br:
12654     case ST1D_z_p_br:
12655     case ST1H_z_p_br:
12656     case ST1W_z_p_br: {
12657       uint64_t offset = ReadXRegister(instr->GetRm());
12658       offset <<= instr->ExtractBits(24, 23);
12659       VectorFormat vform =
12660           SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
12661       LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12662       addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
12663       SVEStructuredStoreHelper(vform,
12664                                ReadPRegister(instr->GetPgLow8()),
12665                                instr->GetRt(),
12666                                addr);
12667       break;
12668     }
12669     default:
12670       VIXL_UNIMPLEMENTED();
12671       break;
12672   }
12673 }
12674 
12675 void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
12676     const Instruction* instr) {
12677   VectorFormat vform = instr->GetSVEVectorFormat();
12678   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12679   SimVRegister z_result;
12680 
12681   switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
12682     case CPY_z_p_v:
12683       dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
12684       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
12685       break;
12686     default:
12687       VIXL_UNIMPLEMENTED();
12688       break;
12689   }
12690 }
12691 
12692 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
12693     const Instruction* instr) {
12694   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
12695     case ST2B_z_p_bi_contiguous:
12696     case ST2D_z_p_bi_contiguous:
12697     case ST2H_z_p_bi_contiguous:
12698     case ST2W_z_p_bi_contiguous:
12699     case ST3B_z_p_bi_contiguous:
12700     case ST3D_z_p_bi_contiguous:
12701     case ST3H_z_p_bi_contiguous:
12702     case ST3W_z_p_bi_contiguous:
12703     case ST4B_z_p_bi_contiguous:
12704     case ST4D_z_p_bi_contiguous:
12705     case ST4H_z_p_bi_contiguous:
12706     case ST4W_z_p_bi_contiguous: {
12707       int vl = GetVectorLengthInBytes();
12708       int msz = instr->ExtractBits(24, 23);
12709       int reg_count = instr->ExtractBits(22, 21) + 1;
12710       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12711       LogicSVEAddressVector addr(
12712           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12713       addr.SetMsizeInBytesLog2(msz);
12714       addr.SetRegCount(reg_count);
12715       SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12716                                ReadPRegister(instr->GetPgLow8()),
12717                                instr->GetRt(),
12718                                addr);
12719       break;
12720     }
12721     default:
12722       VIXL_UNIMPLEMENTED();
12723       break;
12724   }
12725 }
12726 
12727 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
12728     const Instruction* instr) {
12729   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
12730     case ST2B_z_p_br_contiguous:
12731     case ST2D_z_p_br_contiguous:
12732     case ST2H_z_p_br_contiguous:
12733     case ST2W_z_p_br_contiguous:
12734     case ST3B_z_p_br_contiguous:
12735     case ST3D_z_p_br_contiguous:
12736     case ST3H_z_p_br_contiguous:
12737     case ST3W_z_p_br_contiguous:
12738     case ST4B_z_p_br_contiguous:
12739     case ST4D_z_p_br_contiguous:
12740     case ST4H_z_p_br_contiguous:
12741     case ST4W_z_p_br_contiguous: {
12742       int msz = instr->ExtractBits(24, 23);
12743       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
12744       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12745       LogicSVEAddressVector addr(
12746           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12747       addr.SetMsizeInBytesLog2(msz);
12748       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
12749       SVEStructuredStoreHelper(vform,
12750                                ReadPRegister(instr->GetPgLow8()),
12751                                instr->GetRt(),
12752                                addr);
12753       break;
12754     }
12755     default:
12756       VIXL_UNIMPLEMENTED();
12757       break;
12758   }
12759 }
12760 
12761 void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
12762   switch (instr->Mask(SVEStorePredicateRegisterMask)) {
12763     case STR_p_bi: {
12764       SimPRegister& pt = ReadPRegister(instr->GetPt());
12765       int pl = GetPredicateLengthInBytes();
12766       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12767       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12768       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
12769       for (int i = 0; i < pl; i++) {
12770         MemWrite(address + i, pt.GetLane<uint8_t>(i));
12771       }
12772       LogPWrite(instr->GetPt(), address);
12773       break;
12774     }
12775     default:
12776       VIXL_UNIMPLEMENTED();
12777       break;
12778   }
12779 }
12780 
12781 void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
12782   switch (instr->Mask(SVEStoreVectorRegisterMask)) {
12783     case STR_z_bi: {
12784       SimVRegister& zt = ReadVRegister(instr->GetRt());
12785       int vl = GetVectorLengthInBytes();
12786       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12787       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12788       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
12789       for (int i = 0; i < vl; i++) {
12790         MemWrite(address + i, zt.GetLane<uint8_t>(i));
12791       }
12792       LogZWrite(instr->GetRt(), address);
12793       break;
12794     }
12795     default:
12796       VIXL_UNIMPLEMENTED();
12797       break;
12798   }
12799 }
12800 
12801 void Simulator::VisitSVEMulIndex(const Instruction* instr) {
12802   VectorFormat vform = instr->GetSVEVectorFormat();
12803   SimVRegister& zda = ReadVRegister(instr->GetRd());
12804   SimVRegister& zn = ReadVRegister(instr->GetRn());
12805   std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex();
12806   SimVRegister zm = ReadVRegister(zm_and_index.first);
12807   int index = zm_and_index.second;
12808 
12809   SimVRegister temp;
12810   dup_elements_to_segments(vform, temp, zm, index);
12811 
12812   switch (form_hash_) {
12813     case "sdot_z_zzzi_d"_h:
12814     case "sdot_z_zzzi_s"_h:
12815       sdot(vform, zda, zn, temp);
12816       break;
12817     case "udot_z_zzzi_d"_h:
12818     case "udot_z_zzzi_s"_h:
12819       udot(vform, zda, zn, temp);
12820       break;
12821     case "sudot_z_zzzi_s"_h:
12822       usdot(vform, zda, temp, zn);
12823       break;
12824     case "usdot_z_zzzi_s"_h:
12825       usdot(vform, zda, zn, temp);
12826       break;
12827     default:
12828       VIXL_UNIMPLEMENTED();
12829       break;
12830   }
12831 }
12832 
12833 void Simulator::SimulateMatrixMul(const Instruction* instr) {
12834   VectorFormat vform = kFormatVnS;
12835   SimVRegister& dn = ReadVRegister(instr->GetRd());
12836   SimVRegister& n = ReadVRegister(instr->GetRn());
12837   SimVRegister& m = ReadVRegister(instr->GetRm());
12838 
12839   bool n_signed = false;
12840   bool m_signed = false;
12841   switch (form_hash_) {
12842     case "smmla_asimdsame2_g"_h:
12843       vform = kFormat4S;
12844       VIXL_FALLTHROUGH();
12845     case "smmla_z_zzz"_h:
12846       n_signed = m_signed = true;
12847       break;
12848     case "ummla_asimdsame2_g"_h:
12849       vform = kFormat4S;
12850       VIXL_FALLTHROUGH();
12851     case "ummla_z_zzz"_h:
12852       // Nothing to do.
12853       break;
12854     case "usmmla_asimdsame2_g"_h:
12855       vform = kFormat4S;
12856       VIXL_FALLTHROUGH();
12857     case "usmmla_z_zzz"_h:
12858       m_signed = true;
12859       break;
12860     default:
12861       VIXL_UNIMPLEMENTED();
12862       break;
12863   }
12864   matmul(vform, dn, n, m, n_signed, m_signed);
12865 }
12866 
12867 void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
12868   VectorFormat vform = instr->GetSVEVectorFormat();
12869   SimVRegister& zdn = ReadVRegister(instr->GetRd());
12870   SimVRegister& zn = ReadVRegister(instr->GetRn());
12871   SimVRegister& zm = ReadVRegister(instr->GetRm());
12872 
12873   switch (form_hash_) {
12874     case "fmmla_z_zzz_s"_h:
12875     case "fmmla_z_zzz_d"_h:
12876       fmatmul(vform, zdn, zn, zm);
12877       break;
12878     default:
12879       VIXL_UNIMPLEMENTED();
12880       break;
12881   }
12882 }
12883 
12884 void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
12885   SimPRegister& pd = ReadPRegister(instr->GetPd());
12886   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
12887   SimPRegister& pn = ReadPRegister(instr->GetPn());
12888   SimPRegister result;
12889 
12890   switch (instr->Mask(SVEPartitionBreakConditionMask)) {
12891     case BRKAS_p_p_p_z:
12892     case BRKA_p_p_p:
12893       brka(result, pg, pn);
12894       break;
12895     case BRKBS_p_p_p_z:
12896     case BRKB_p_p_p:
12897       brkb(result, pg, pn);
12898       break;
12899     default:
12900       VIXL_UNIMPLEMENTED();
12901       break;
12902   }
12903 
12904   if (instr->ExtractBit(4) == 1) {
12905     mov_merging(pd, pg, result);
12906   } else {
12907     mov_zeroing(pd, pg, result);
12908   }
12909 
12910   // Set flag if needed.
12911   if (instr->ExtractBit(22) == 1) {
12912     PredTest(kFormatVnB, pg, pd);
12913   }
12914 }
12915 
12916 void Simulator::VisitSVEPropagateBreakToNextPartition(
12917     const Instruction* instr) {
12918   SimPRegister& pdm = ReadPRegister(instr->GetPd());
12919   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
12920   SimPRegister& pn = ReadPRegister(instr->GetPn());
12921 
12922   switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
12923     case BRKNS_p_p_pp:
12924     case BRKN_p_p_pp:
12925       brkn(pdm, pg, pn);
12926       break;
12927     default:
12928       VIXL_UNIMPLEMENTED();
12929       break;
12930   }
12931 
12932   // Set flag if needed.
12933   if (instr->ExtractBit(22) == 1) {
12934     // Note that this ignores `pg`.
12935     PredTest(kFormatVnB, GetPTrue(), pdm);
12936   }
12937 }
12938 
12939 void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
12940   SimPRegister& pd = ReadPRegister(instr->GetPd());
12941   SimPRegister& pn = ReadPRegister(instr->GetPn());
12942 
12943   SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
12944   SimVRegister zero;
12945   dup_immediate(kFormatVnB, zero, 0);
12946 
12947   switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
12948     case PUNPKHI_p_p:
12949       zip2(kFormatVnB, temp, temp, zero);
12950       break;
12951     case PUNPKLO_p_p:
12952       zip1(kFormatVnB, temp, temp, zero);
12953       break;
12954     default:
12955       VIXL_UNIMPLEMENTED();
12956       break;
12957   }
12958   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
12959 }
12960 
12961 void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
12962   VectorFormat vform = instr->GetSVEVectorFormat();
12963   SimPRegister& pd = ReadPRegister(instr->GetPd());
12964   SimPRegister& pn = ReadPRegister(instr->GetPn());
12965   SimPRegister& pm = ReadPRegister(instr->GetPm());
12966 
12967   SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
12968   SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
12969 
12970   switch (instr->Mask(SVEPermutePredicateElementsMask)) {
12971     case TRN1_p_pp:
12972       trn1(vform, temp0, temp0, temp1);
12973       break;
12974     case TRN2_p_pp:
12975       trn2(vform, temp0, temp0, temp1);
12976       break;
12977     case UZP1_p_pp:
12978       uzp1(vform, temp0, temp0, temp1);
12979       break;
12980     case UZP2_p_pp:
12981       uzp2(vform, temp0, temp0, temp1);
12982       break;
12983     case ZIP1_p_pp:
12984       zip1(vform, temp0, temp0, temp1);
12985       break;
12986     case ZIP2_p_pp:
12987       zip2(vform, temp0, temp0, temp1);
12988       break;
12989     default:
12990       VIXL_UNIMPLEMENTED();
12991       break;
12992   }
12993   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
12994 }
12995 
12996 void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
12997   switch (instr->Mask(SVEReversePredicateElementsMask)) {
12998     case REV_p_p: {
12999       VectorFormat vform = instr->GetSVEVectorFormat();
13000       SimPRegister& pn = ReadPRegister(instr->GetPn());
13001       SimPRegister& pd = ReadPRegister(instr->GetPd());
13002       SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
13003       rev(vform, temp, temp);
13004       Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
13005       break;
13006     }
13007     default:
13008       VIXL_UNIMPLEMENTED();
13009       break;
13010   }
13011 }
13012 
13013 void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
13014   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13015   // Second source register "Zm" is encoded where "Zn" would usually be.
13016   SimVRegister& zm = ReadVRegister(instr->GetRn());
13017 
13018   int index = instr->GetSVEExtractImmediate();
13019   int vl = GetVectorLengthInBytes();
13020   index = (index >= vl) ? 0 : index;
13021 
13022   switch (instr->Mask(SVEPermuteVectorExtractMask)) {
13023     case EXT_z_zi_des:
13024       ext(kFormatVnB, zdn, zdn, zm, index);
13025       break;
13026     default:
13027       VIXL_UNIMPLEMENTED();
13028       break;
13029   }
13030 }
13031 
13032 void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
13033   VectorFormat vform = instr->GetSVEVectorFormat();
13034   SimVRegister& zd = ReadVRegister(instr->GetRd());
13035   SimVRegister& zn = ReadVRegister(instr->GetRn());
13036   SimVRegister& zm = ReadVRegister(instr->GetRm());
13037 
13038   switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
13039     case TRN1_z_zz:
13040       trn1(vform, zd, zn, zm);
13041       break;
13042     case TRN2_z_zz:
13043       trn2(vform, zd, zn, zm);
13044       break;
13045     case UZP1_z_zz:
13046       uzp1(vform, zd, zn, zm);
13047       break;
13048     case UZP2_z_zz:
13049       uzp2(vform, zd, zn, zm);
13050       break;
13051     case ZIP1_z_zz:
13052       zip1(vform, zd, zn, zm);
13053       break;
13054     case ZIP2_z_zz:
13055       zip2(vform, zd, zn, zm);
13056       break;
13057     default:
13058       VIXL_UNIMPLEMENTED();
13059       break;
13060   }
13061 }
13062 
13063 void Simulator::VisitSVEConditionallyBroadcastElementToVector(
13064     const Instruction* instr) {
13065   VectorFormat vform = instr->GetSVEVectorFormat();
13066   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13067   SimVRegister& zm = ReadVRegister(instr->GetRn());
13068   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13069 
13070   int active_offset = -1;
13071   switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
13072     case CLASTA_z_p_zz:
13073       active_offset = 1;
13074       break;
13075     case CLASTB_z_p_zz:
13076       active_offset = 0;
13077       break;
13078     default:
13079       VIXL_UNIMPLEMENTED();
13080       break;
13081   }
13082 
13083   if (active_offset >= 0) {
13084     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13085     if (value.first) {
13086       dup_immediate(vform, zdn, value.second);
13087     } else {
13088       // Trigger a line of trace for the operation, even though it doesn't
13089       // change the register value.
13090       mov(vform, zdn, zdn);
13091     }
13092   }
13093 }
13094 
13095 void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
13096     const Instruction* instr) {
13097   VectorFormat vform = instr->GetSVEVectorFormat();
13098   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13099   SimVRegister& zm = ReadVRegister(instr->GetRn());
13100   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13101 
13102   int active_offset = -1;
13103   switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
13104     case CLASTA_v_p_z:
13105       active_offset = 1;
13106       break;
13107     case CLASTB_v_p_z:
13108       active_offset = 0;
13109       break;
13110     default:
13111       VIXL_UNIMPLEMENTED();
13112       break;
13113   }
13114 
13115   if (active_offset >= 0) {
13116     LogicVRegister dst(vdn);
13117     uint64_t src1_value = dst.Uint(vform, 0);
13118     std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
13119     dup_immediate(vform, vdn, 0);
13120     dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
13121   }
13122 }
13123 
13124 void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
13125     const Instruction* instr) {
13126   VectorFormat vform = instr->GetSVEVectorFormat();
13127   SimVRegister& zm = ReadVRegister(instr->GetRn());
13128   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13129 
13130   int active_offset = -1;
13131   switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
13132     case CLASTA_r_p_z:
13133       active_offset = 1;
13134       break;
13135     case CLASTB_r_p_z:
13136       active_offset = 0;
13137       break;
13138     default:
13139       VIXL_UNIMPLEMENTED();
13140       break;
13141   }
13142 
13143   if (active_offset >= 0) {
13144     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13145     uint64_t masked_src = ReadXRegister(instr->GetRd()) &
13146                           GetUintMask(LaneSizeInBitsFromFormat(vform));
13147     WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
13148   }
13149 }
13150 
13151 void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
13152     const Instruction* instr) {
13153   VectorFormat vform = instr->GetSVEVectorFormat();
13154   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13155   SimVRegister& zm = ReadVRegister(instr->GetRn());
13156   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13157 
13158   int active_offset = -1;
13159   switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
13160     case LASTA_v_p_z:
13161       active_offset = 1;
13162       break;
13163     case LASTB_v_p_z:
13164       active_offset = 0;
13165       break;
13166     default:
13167       VIXL_UNIMPLEMENTED();
13168       break;
13169   }
13170 
13171   if (active_offset >= 0) {
13172     LogicVRegister dst(vdn);
13173     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13174     dup_immediate(vform, vdn, 0);
13175     dst.SetUint(vform, 0, value.second);
13176   }
13177 }
13178 
13179 void Simulator::VisitSVEExtractElementToGeneralRegister(
13180     const Instruction* instr) {
13181   VectorFormat vform = instr->GetSVEVectorFormat();
13182   SimVRegister& zm = ReadVRegister(instr->GetRn());
13183   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13184 
13185   int active_offset = -1;
13186   switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
13187     case LASTA_r_p_z:
13188       active_offset = 1;
13189       break;
13190     case LASTB_r_p_z:
13191       active_offset = 0;
13192       break;
13193     default:
13194       VIXL_UNIMPLEMENTED();
13195       break;
13196   }
13197 
13198   if (active_offset >= 0) {
13199     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13200     WriteXRegister(instr->GetRd(), value.second);
13201   }
13202 }
13203 
13204 void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
13205   VectorFormat vform = instr->GetSVEVectorFormat();
13206   SimVRegister& zd = ReadVRegister(instr->GetRd());
13207   SimVRegister& zn = ReadVRegister(instr->GetRn());
13208   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13209 
13210   switch (instr->Mask(SVECompressActiveElementsMask)) {
13211     case COMPACT_z_p_z:
13212       compact(vform, zd, pg, zn);
13213       break;
13214     default:
13215       VIXL_UNIMPLEMENTED();
13216       break;
13217   }
13218 }
13219 
13220 void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
13221     const Instruction* instr) {
13222   VectorFormat vform = instr->GetSVEVectorFormat();
13223   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13224   SimVRegister z_result;
13225 
13226   switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
13227     case CPY_z_p_r:
13228       dup_immediate(vform,
13229                     z_result,
13230                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13231       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
13232       break;
13233     default:
13234       VIXL_UNIMPLEMENTED();
13235       break;
13236   }
13237 }
13238 
13239 void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
13240   VectorFormat vform = instr->GetSVEVectorFormat();
13241   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
13242   SimVRegister& zd = ReadVRegister(instr->GetRd());
13243 
13244   SimVRegister result;
13245   switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
13246     case CPY_z_p_i: {
13247       // Use unsigned arithmetic to avoid undefined behaviour during the shift.
13248       uint64_t imm8 = instr->GetImmSVEIntWideSigned();
13249       dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
13250       break;
13251     }
13252     default:
13253       VIXL_UNIMPLEMENTED();
13254       break;
13255   }
13256 
13257   if (instr->ExtractBit(14) != 0) {
13258     mov_merging(vform, zd, pg, result);
13259   } else {
13260     mov_zeroing(vform, zd, pg, result);
13261   }
13262 }
13263 
13264 void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
13265   SimVRegister& zd = ReadVRegister(instr->GetRd());
13266   SimVRegister& zn = ReadVRegister(instr->GetRn());
13267   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13268   SimVRegister result;
13269 
13270   // In NEON, the chunk size in which elements are REVersed is in the
13271   // instruction mnemonic, and the element size attached to the register.
13272   // SVE reverses the semantics; the mapping to logic functions below is to
13273   // account for this.
13274   VectorFormat chunk_form = instr->GetSVEVectorFormat();
13275   VectorFormat element_form = kFormatUndefined;
13276 
13277   switch (instr->Mask(SVEReverseWithinElementsMask)) {
13278     case RBIT_z_p_z:
13279       rbit(chunk_form, result, zn);
13280       break;
13281     case REVB_z_z:
13282       VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
13283                   (chunk_form == kFormatVnD));
13284       element_form = kFormatVnB;
13285       break;
13286     case REVH_z_z:
13287       VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
13288       element_form = kFormatVnH;
13289       break;
13290     case REVW_z_z:
13291       VIXL_ASSERT(chunk_form == kFormatVnD);
13292       element_form = kFormatVnS;
13293       break;
13294     default:
13295       VIXL_UNIMPLEMENTED();
13296       break;
13297   }
13298 
13299   if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
13300     VIXL_ASSERT(element_form != kFormatUndefined);
13301     switch (chunk_form) {
13302       case kFormatVnH:
13303         rev16(element_form, result, zn);
13304         break;
13305       case kFormatVnS:
13306         rev32(element_form, result, zn);
13307         break;
13308       case kFormatVnD:
13309         rev64(element_form, result, zn);
13310         break;
13311       default:
13312         VIXL_UNIMPLEMENTED();
13313     }
13314   }
13315 
13316   mov_merging(chunk_form, zd, pg, result);
13317 }
13318 
13319 void Simulator::VisitSVEVectorSplice(const Instruction* instr) {
13320   VectorFormat vform = instr->GetSVEVectorFormat();
13321   SimVRegister& zd = ReadVRegister(instr->GetRd());
13322   SimVRegister& zn = ReadVRegister(instr->GetRn());
13323   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13324   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13325 
13326   switch (form_hash_) {
13327     case "splice_z_p_zz_des"_h:
13328       splice(vform, zd, pg, zd, zn);
13329       break;
13330     case "splice_z_p_zz_con"_h:
13331       splice(vform, zd, pg, zn, zn2);
13332       break;
13333     default:
13334       VIXL_UNIMPLEMENTED();
13335       break;
13336   }
13337 }
13338 
13339 void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
13340   SimVRegister& zd = ReadVRegister(instr->GetRd());
13341   switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
13342     case DUP_z_r:
13343       dup_immediate(instr->GetSVEVectorFormat(),
13344                     zd,
13345                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13346       break;
13347     default:
13348       VIXL_UNIMPLEMENTED();
13349       break;
13350   }
13351 }
13352 
13353 void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
13354   SimVRegister& zd = ReadVRegister(instr->GetRd());
13355   VectorFormat vform = instr->GetSVEVectorFormat();
13356   switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
13357     case INSR_z_v:
13358       insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
13359       break;
13360     default:
13361       VIXL_UNIMPLEMENTED();
13362       break;
13363   }
13364 }
13365 
13366 void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
13367   SimVRegister& zd = ReadVRegister(instr->GetRd());
13368   VectorFormat vform = instr->GetSVEVectorFormat();
13369   switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
13370     case INSR_z_r:
13371       insr(vform, zd, ReadXRegister(instr->GetRn()));
13372       break;
13373     default:
13374       VIXL_UNIMPLEMENTED();
13375       break;
13376   }
13377 }
13378 
13379 void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
13380   SimVRegister& zd = ReadVRegister(instr->GetRd());
13381   switch (instr->Mask(SVEBroadcastIndexElementMask)) {
13382     case DUP_z_zi: {
13383       std::pair<int, int> index_and_lane_size =
13384           instr->GetSVEPermuteIndexAndLaneSizeLog2();
13385       int index = index_and_lane_size.first;
13386       int lane_size_in_bytes_log_2 = index_and_lane_size.second;
13387       VectorFormat vform =
13388           SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
13389       if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
13390         // Out of bounds, set the destination register to zero.
13391         dup_immediate(kFormatVnD, zd, 0);
13392       } else {
13393         dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
13394       }
13395       return;
13396     }
13397     default:
13398       VIXL_UNIMPLEMENTED();
13399       break;
13400   }
13401 }
13402 
13403 void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
13404   SimVRegister& zd = ReadVRegister(instr->GetRd());
13405   VectorFormat vform = instr->GetSVEVectorFormat();
13406   switch (instr->Mask(SVEReverseVectorElementsMask)) {
13407     case REV_z_z:
13408       rev(vform, zd, ReadVRegister(instr->GetRn()));
13409       break;
13410     default:
13411       VIXL_UNIMPLEMENTED();
13412       break;
13413   }
13414 }
13415 
13416 void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
13417   SimVRegister& zd = ReadVRegister(instr->GetRd());
13418   VectorFormat vform = instr->GetSVEVectorFormat();
13419   switch (instr->Mask(SVEUnpackVectorElementsMask)) {
13420     case SUNPKHI_z_z:
13421       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
13422       break;
13423     case SUNPKLO_z_z:
13424       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
13425       break;
13426     case UUNPKHI_z_z:
13427       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
13428       break;
13429     case UUNPKLO_z_z:
13430       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
13431       break;
13432     default:
13433       VIXL_UNIMPLEMENTED();
13434       break;
13435   }
13436 }
13437 
13438 void Simulator::VisitSVETableLookup(const Instruction* instr) {
13439   VectorFormat vform = instr->GetSVEVectorFormat();
13440   SimVRegister& zd = ReadVRegister(instr->GetRd());
13441   SimVRegister& zn = ReadVRegister(instr->GetRn());
13442   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13443   SimVRegister& zm = ReadVRegister(instr->GetRm());
13444 
13445   switch (form_hash_) {
13446     case "tbl_z_zz_1"_h:
13447       tbl(vform, zd, zn, zm);
13448       break;
13449     case "tbl_z_zz_2"_h:
13450       tbl(vform, zd, zn, zn2, zm);
13451       break;
13452     case "tbx_z_zz"_h:
13453       tbx(vform, zd, zn, zm);
13454       break;
13455     default:
13456       VIXL_UNIMPLEMENTED();
13457       break;
13458   }
13459 }
13460 
13461 void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
13462   VectorFormat vform = instr->GetSVEVectorFormat();
13463   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13464   SimPRegister& pn = ReadPRegister(instr->GetPn());
13465 
13466   switch (instr->Mask(SVEPredicateCountMask)) {
13467     case CNTP_r_p_p: {
13468       WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
13469       break;
13470     }
13471     default:
13472       VIXL_UNIMPLEMENTED();
13473       break;
13474   }
13475 }
13476 
13477 void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
13478   Instr op = instr->Mask(SVEPredicateLogicalMask);
13479   SimPRegister& pd = ReadPRegister(instr->GetPd());
13480   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13481   SimPRegister& pn = ReadPRegister(instr->GetPn());
13482   SimPRegister& pm = ReadPRegister(instr->GetPm());
13483   SimPRegister result;
13484   switch (op) {
13485     case ANDS_p_p_pp_z:
13486     case AND_p_p_pp_z:
13487     case BICS_p_p_pp_z:
13488     case BIC_p_p_pp_z:
13489     case EORS_p_p_pp_z:
13490     case EOR_p_p_pp_z:
13491     case NANDS_p_p_pp_z:
13492     case NAND_p_p_pp_z:
13493     case NORS_p_p_pp_z:
13494     case NOR_p_p_pp_z:
13495     case ORNS_p_p_pp_z:
13496     case ORN_p_p_pp_z:
13497     case ORRS_p_p_pp_z:
13498     case ORR_p_p_pp_z:
13499       SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
13500                                 result,
13501                                 pn,
13502                                 pm);
13503       break;
13504     case SEL_p_p_pp:
13505       sel(pd, pg, pn, pm);
13506       return;
13507     default:
13508       VIXL_UNIMPLEMENTED();
13509       break;
13510   }
13511 
13512   mov_zeroing(pd, pg, result);
13513   if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
13514     PredTest(kFormatVnB, pg, pd);
13515   }
13516 }
13517 
13518 void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
13519   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13520   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13521   switch (instr->Mask(SVEPredicateFirstActiveMask)) {
13522     case PFIRST_p_p_p:
13523       pfirst(pdn, pg, pdn);
13524       // TODO: Is this broken when pg == pdn?
13525       PredTest(kFormatVnB, pg, pdn);
13526       break;
13527     default:
13528       VIXL_UNIMPLEMENTED();
13529       break;
13530   }
13531 }
13532 
13533 void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
13534   // This group only contains PTRUE{S}, and there are no unallocated encodings.
13535   VIXL_STATIC_ASSERT(
13536       SVEPredicateInitializeMask ==
13537       (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
13538   VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
13539               (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
13540 
13541   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13542   VectorFormat vform = instr->GetSVEVectorFormat();
13543 
13544   ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
13545   if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
13546 }
13547 
13548 void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
13549   // This group only contains PNEXT, and there are no unallocated encodings.
13550   VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
13551   VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
13552 
13553   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13554   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13555   VectorFormat vform = instr->GetSVEVectorFormat();
13556 
13557   pnext(vform, pdn, pg, pdn);
13558   // TODO: Is this broken when pg == pdn?
13559   PredTest(vform, pg, pdn);
13560 }
13561 
13562 void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
13563     const Instruction* instr) {
13564   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13565   LogicPRegister pg(ReadPRegister(instr->GetPn()));
13566   FlagsUpdate flags = LeaveFlags;
13567   switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
13568     case RDFFR_p_p_f:
13569       // Do nothing.
13570       break;
13571     case RDFFRS_p_p_f:
13572       flags = SetFlags;
13573       break;
13574     default:
13575       VIXL_UNIMPLEMENTED();
13576       break;
13577   }
13578 
13579   LogicPRegister ffr(ReadFFR());
13580   mov_zeroing(pd, pg, ffr);
13581 
13582   if (flags == SetFlags) {
13583     PredTest(kFormatVnB, pg, pd);
13584   }
13585 }
13586 
13587 void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
13588     const Instruction* instr) {
13589   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13590   LogicPRegister ffr(ReadFFR());
13591   switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
13592     case RDFFR_p_f:
13593       mov(pd, ffr);
13594       break;
13595     default:
13596       VIXL_UNIMPLEMENTED();
13597       break;
13598   }
13599 }
13600 
13601 void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
13602   switch (instr->Mask(SVEPredicateTestMask)) {
13603     case PTEST_p_p:
13604       PredTest(kFormatVnB,
13605                ReadPRegister(instr->ExtractBits(13, 10)),
13606                ReadPRegister(instr->GetPn()));
13607       break;
13608     default:
13609       VIXL_UNIMPLEMENTED();
13610       break;
13611   }
13612 }
13613 
13614 void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
13615   switch (instr->Mask(SVEPredicateZeroMask)) {
13616     case PFALSE_p:
13617       pfalse(ReadPRegister(instr->GetPd()));
13618       break;
13619     default:
13620       VIXL_UNIMPLEMENTED();
13621       break;
13622   }
13623 }
13624 
13625 void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
13626   SimPRegister& pd = ReadPRegister(instr->GetPd());
13627   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13628   SimPRegister& pn = ReadPRegister(instr->GetPn());
13629   SimPRegister& pm = ReadPRegister(instr->GetPm());
13630 
13631   bool set_flags = false;
13632   switch (instr->Mask(SVEPropagateBreakMask)) {
13633     case BRKPAS_p_p_pp:
13634       set_flags = true;
13635       VIXL_FALLTHROUGH();
13636     case BRKPA_p_p_pp:
13637       brkpa(pd, pg, pn, pm);
13638       break;
13639     case BRKPBS_p_p_pp:
13640       set_flags = true;
13641       VIXL_FALLTHROUGH();
13642     case BRKPB_p_p_pp:
13643       brkpb(pd, pg, pn, pm);
13644       break;
13645     default:
13646       VIXL_UNIMPLEMENTED();
13647       break;
13648   }
13649 
13650   if (set_flags) {
13651     PredTest(kFormatVnB, pg, pd);
13652   }
13653 }
13654 
13655 void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
13656   uint64_t length = 0;
13657   switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
13658     case ADDPL_r_ri:
13659       length = GetPredicateLengthInBytes();
13660       break;
13661     case ADDVL_r_ri:
13662       length = GetVectorLengthInBytes();
13663       break;
13664     default:
13665       VIXL_UNIMPLEMENTED();
13666   }
13667   uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
13668   WriteXRegister(instr->GetRd(),
13669                  base + (length * instr->GetImmSVEVLScale()),
13670                  LogRegWrites,
13671                  Reg31IsStackPointer);
13672 }
13673 
13674 void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
13675   int64_t scale = instr->GetImmSVEVLScale();
13676 
13677   switch (instr->Mask(SVEStackFrameSizeMask)) {
13678     case RDVL_r_i:
13679       WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
13680       break;
13681     default:
13682       VIXL_UNIMPLEMENTED();
13683   }
13684 }
13685 
13686 void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
13687   // The only instruction in this group is `sel`, and there are no unused
13688   // encodings.
13689   VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
13690 
13691   VectorFormat vform = instr->GetSVEVectorFormat();
13692   SimVRegister& zd = ReadVRegister(instr->GetRd());
13693   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13694   SimVRegister& zn = ReadVRegister(instr->GetRn());
13695   SimVRegister& zm = ReadVRegister(instr->GetRm());
13696 
13697   sel(vform, zd, pg, zn, zm);
13698 }
13699 
13700 void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
13701   switch (instr->Mask(SVEFFRInitialiseMask)) {
13702     case SETFFR_f: {
13703       LogicPRegister ffr(ReadFFR());
13704       ffr.SetAllBits();
13705       break;
13706     }
13707     default:
13708       VIXL_UNIMPLEMENTED();
13709       break;
13710   }
13711 }
13712 
13713 void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
13714   switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
13715     case WRFFR_f_p: {
13716       SimPRegister pn(ReadPRegister(instr->GetPn()));
13717       bool last_active = true;
13718       for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
13719         bool active = pn.GetBit(i);
13720         if (active && !last_active) {
13721           // `pn` is non-monotonic. This is UNPREDICTABLE.
13722           VIXL_ABORT();
13723         }
13724         last_active = active;
13725       }
13726       mov(ReadFFR(), pn);
13727       break;
13728     }
13729     default:
13730       VIXL_UNIMPLEMENTED();
13731       break;
13732   }
13733 }
13734 
13735 void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
13736   bool is_signed;
13737   switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
13738     case LD1B_z_p_bi_u8:
13739     case LD1B_z_p_bi_u16:
13740     case LD1B_z_p_bi_u32:
13741     case LD1B_z_p_bi_u64:
13742     case LD1H_z_p_bi_u16:
13743     case LD1H_z_p_bi_u32:
13744     case LD1H_z_p_bi_u64:
13745     case LD1W_z_p_bi_u32:
13746     case LD1W_z_p_bi_u64:
13747     case LD1D_z_p_bi_u64:
13748       is_signed = false;
13749       break;
13750     case LD1SB_z_p_bi_s16:
13751     case LD1SB_z_p_bi_s32:
13752     case LD1SB_z_p_bi_s64:
13753     case LD1SH_z_p_bi_s32:
13754     case LD1SH_z_p_bi_s64:
13755     case LD1SW_z_p_bi_s64:
13756       is_signed = true;
13757       break;
13758     default:
13759       // This encoding group is complete, so no other values should be possible.
13760       VIXL_UNREACHABLE();
13761       is_signed = false;
13762       break;
13763   }
13764 
13765   int vl = GetVectorLengthInBytes();
13766   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
13767   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
13768   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
13769   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
13770   uint64_t offset =
13771       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
13772   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
13773   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
13774   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
13775   SVEStructuredLoadHelper(vform,
13776                           ReadPRegister(instr->GetPgLow8()),
13777                           instr->GetRt(),
13778                           addr,
13779                           is_signed);
13780 }
13781 
13782 void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
13783     const Instruction* instr) {
13784   bool is_signed;
13785   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
13786     case LD1B_z_p_br_u8:
13787     case LD1B_z_p_br_u16:
13788     case LD1B_z_p_br_u32:
13789     case LD1B_z_p_br_u64:
13790     case LD1H_z_p_br_u16:
13791     case LD1H_z_p_br_u32:
13792     case LD1H_z_p_br_u64:
13793     case LD1W_z_p_br_u32:
13794     case LD1W_z_p_br_u64:
13795     case LD1D_z_p_br_u64:
13796       is_signed = false;
13797       break;
13798     case LD1SB_z_p_br_s16:
13799     case LD1SB_z_p_br_s32:
13800     case LD1SB_z_p_br_s64:
13801     case LD1SH_z_p_br_s32:
13802     case LD1SH_z_p_br_s64:
13803     case LD1SW_z_p_br_s64:
13804       is_signed = true;
13805       break;
13806     default:
13807       // This encoding group is complete, so no other values should be possible.
13808       VIXL_UNREACHABLE();
13809       is_signed = false;
13810       break;
13811   }
13812 
13813   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
13814   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
13815   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
13816   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
13817   uint64_t offset = ReadXRegister(instr->GetRm());
13818   offset <<= msize_in_bytes_log2;
13819   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
13820   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
13821   SVEStructuredLoadHelper(vform,
13822                           ReadPRegister(instr->GetPgLow8()),
13823                           instr->GetRt(),
13824                           addr,
13825                           is_signed);
13826 }
13827 
13828 void Simulator::DoUnreachable(const Instruction* instr) {
13829   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13830               (instr->GetImmException() == kUnreachableOpcode));
13831 
13832   fprintf(stream_,
13833           "Hit UNREACHABLE marker at pc=%p.\n",
13834           reinterpret_cast<const void*>(instr));
13835   abort();
13836 }
13837 
13838 
13839 void Simulator::DoTrace(const Instruction* instr) {
13840   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13841               (instr->GetImmException() == kTraceOpcode));
13842 
13843   // Read the arguments encoded inline in the instruction stream.
13844   uint32_t parameters;
13845   uint32_t command;
13846 
13847   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
13848   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
13849   memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
13850 
13851   switch (command) {
13852     case TRACE_ENABLE:
13853       SetTraceParameters(GetTraceParameters() | parameters);
13854       break;
13855     case TRACE_DISABLE:
13856       SetTraceParameters(GetTraceParameters() & ~parameters);
13857       break;
13858     default:
13859       VIXL_UNREACHABLE();
13860   }
13861 
13862   WritePc(instr->GetInstructionAtOffset(kTraceLength));
13863 }
13864 
13865 
13866 void Simulator::DoLog(const Instruction* instr) {
13867   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13868               (instr->GetImmException() == kLogOpcode));
13869 
13870   // Read the arguments encoded inline in the instruction stream.
13871   uint32_t parameters;
13872 
13873   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
13874   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
13875 
13876   // We don't support a one-shot LOG_DISASM.
13877   VIXL_ASSERT((parameters & LOG_DISASM) == 0);
13878   // Print the requested information.
13879   if (parameters & LOG_SYSREGS) PrintSystemRegisters();
13880   if (parameters & LOG_REGS) PrintRegisters();
13881   if (parameters & LOG_VREGS) PrintVRegisters();
13882 
13883   WritePc(instr->GetInstructionAtOffset(kLogLength));
13884 }
13885 
13886 
13887 void Simulator::DoPrintf(const Instruction* instr) {
13888   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13889               (instr->GetImmException() == kPrintfOpcode));
13890 
13891   // Read the arguments encoded inline in the instruction stream.
13892   uint32_t arg_count;
13893   uint32_t arg_pattern_list;
13894   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
13895   memcpy(&arg_count, instr + kPrintfArgCountOffset, sizeof(arg_count));
13896   memcpy(&arg_pattern_list,
13897          instr + kPrintfArgPatternListOffset,
13898          sizeof(arg_pattern_list));
13899 
13900   VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
13901   VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
13902 
13903   // We need to call the host printf function with a set of arguments defined by
13904   // arg_pattern_list. Because we don't know the types and sizes of the
13905   // arguments, this is very difficult to do in a robust and portable way. To
13906   // work around the problem, we pick apart the format string, and print one
13907   // format placeholder at a time.
13908 
13909   // Allocate space for the format string. We take a copy, so we can modify it.
13910   // Leave enough space for one extra character per expected argument (plus the
13911   // '\0' termination).
13912   const char* format_base = ReadRegister<const char*>(0);
13913   VIXL_ASSERT(format_base != NULL);
13914   size_t length = strlen(format_base) + 1;
13915   char* const format = allocator_.New<char[]>(length + arg_count);
13916   // A list of chunks, each with exactly one format placeholder.
13917   const char* chunks[kPrintfMaxArgCount];
13918 
13919   // Copy the format string and search for format placeholders.
13920   uint32_t placeholder_count = 0;
13921   char* format_scratch = format;
13922   for (size_t i = 0; i < length; i++) {
13923     if (format_base[i] != '%') {
13924       *format_scratch++ = format_base[i];
13925     } else {
13926       if (format_base[i + 1] == '%') {
13927         // Ignore explicit "%%" sequences.
13928         *format_scratch++ = format_base[i];
13929         i++;
13930         // Chunks after the first are passed as format strings to printf, so we
13931         // need to escape '%' characters in those chunks.
13932         if (placeholder_count > 0) *format_scratch++ = format_base[i];
13933       } else {
13934         VIXL_CHECK(placeholder_count < arg_count);
13935         // Insert '\0' before placeholders, and store their locations.
13936         *format_scratch++ = '\0';
13937         chunks[placeholder_count++] = format_scratch;
13938         *format_scratch++ = format_base[i];
13939       }
13940     }
13941   }
13942   VIXL_CHECK(placeholder_count == arg_count);
13943 
13944   // Finally, call printf with each chunk, passing the appropriate register
13945   // argument. Normally, printf returns the number of bytes transmitted, so we
13946   // can emulate a single printf call by adding the result from each chunk. If
13947   // any call returns a negative (error) value, though, just return that value.
13948 
13949   printf("%s", clr_printf);
13950 
13951   // Because '\0' is inserted before each placeholder, the first string in
13952   // 'format' contains no format placeholders and should be printed literally.
13953   int result = printf("%s", format);
13954   int pcs_r = 1;  // Start at x1. x0 holds the format string.
13955   int pcs_f = 0;  // Start at d0.
13956   if (result >= 0) {
13957     for (uint32_t i = 0; i < placeholder_count; i++) {
13958       int part_result = -1;
13959 
13960       uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
13961       arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
13962       switch (arg_pattern) {
13963         case kPrintfArgW:
13964           part_result = printf(chunks[i], ReadWRegister(pcs_r++));
13965           break;
13966         case kPrintfArgX:
13967           part_result = printf(chunks[i], ReadXRegister(pcs_r++));
13968           break;
13969         case kPrintfArgD:
13970           part_result = printf(chunks[i], ReadDRegister(pcs_f++));
13971           break;
13972         default:
13973           VIXL_UNREACHABLE();
13974       }
13975 
13976       if (part_result < 0) {
13977         // Handle error values.
13978         result = part_result;
13979         break;
13980       }
13981 
13982       result += part_result;
13983     }
13984   }
13985 
13986   printf("%s", clr_normal);
13987 
13988   // Printf returns its result in x0 (just like the C library's printf).
13989   WriteXRegister(0, result);
13990 
13991   // The printf parameters are inlined in the code, so skip them.
13992   WritePc(instr->GetInstructionAtOffset(kPrintfLength));
13993 
13994   // Set LR as if we'd just called a native printf function.
13995   WriteLr(ReadPc());
13996   allocator_.DeleteArray(format);
13997 }
13998 
13999 
14000 #ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
14001 void Simulator::DoRuntimeCall(const Instruction* instr) {
14002   VIXL_STATIC_ASSERT(kRuntimeCallAddressSize == sizeof(uintptr_t));
14003   // The appropriate `Simulator::SimulateRuntimeCall()` wrapper and the function
14004   // to call are passed inlined in the assembly.
14005   uintptr_t call_wrapper_address =
14006       MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset);
14007   uintptr_t function_address =
14008       MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset);
14009   RuntimeCallType call_type = static_cast<RuntimeCallType>(
14010       MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
14011   auto runtime_call_wrapper =
14012       reinterpret_cast<void (*)(Simulator*, uintptr_t)>(call_wrapper_address);
14013 
14014   if (call_type == kCallRuntime) {
14015     WriteRegister(kLinkRegCode,
14016                   instr->GetInstructionAtOffset(kRuntimeCallLength));
14017   }
14018   runtime_call_wrapper(this, function_address);
14019   // Read the return address from `lr` and write it into `pc`.
14020   WritePc(ReadRegister<Instruction*>(kLinkRegCode));
14021 }
14022 #else
14023 void Simulator::DoRuntimeCall(const Instruction* instr) {
14024   USE(instr);
14025   VIXL_UNREACHABLE();
14026 }
14027 #endif
14028 
14029 
14030 void Simulator::DoConfigureCPUFeatures(const Instruction* instr) {
14031   VIXL_ASSERT(instr->Mask(ExceptionMask) == HLT);
14032 
14033   typedef ConfigureCPUFeaturesElementType ElementType;
14034   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <
14035               std::numeric_limits<ElementType>::max());
14036 
14037   // k{Set,Enable,Disable}CPUFeatures have the same parameter encoding.
14038 
14039   size_t element_size = sizeof(ElementType);
14040   size_t offset = kConfigureCPUFeaturesListOffset;
14041 
14042   // Read the kNone-terminated list of features.
14043   CPUFeatures parameters;
14044   while (true) {
14045     ElementType feature = MemRead<ElementType>(instr + offset);
14046     offset += element_size;
14047     if (feature == static_cast<ElementType>(CPUFeatures::kNone)) break;
14048     parameters.Combine(static_cast<CPUFeatures::Feature>(feature));
14049   }
14050 
14051   switch (instr->GetImmException()) {
14052     case kSetCPUFeaturesOpcode:
14053       SetCPUFeatures(parameters);
14054       break;
14055     case kEnableCPUFeaturesOpcode:
14056       GetCPUFeatures()->Combine(parameters);
14057       break;
14058     case kDisableCPUFeaturesOpcode:
14059       GetCPUFeatures()->Remove(parameters);
14060       break;
14061     default:
14062       VIXL_UNREACHABLE();
14063       break;
14064   }
14065 
14066   WritePc(instr->GetInstructionAtOffset(AlignUp(offset, kInstructionSize)));
14067 }
14068 
14069 
14070 void Simulator::DoSaveCPUFeatures(const Instruction* instr) {
14071   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14072               (instr->GetImmException() == kSaveCPUFeaturesOpcode));
14073   USE(instr);
14074 
14075   saved_cpu_features_.push_back(*GetCPUFeatures());
14076 }
14077 
14078 
14079 void Simulator::DoRestoreCPUFeatures(const Instruction* instr) {
14080   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14081               (instr->GetImmException() == kRestoreCPUFeaturesOpcode));
14082   USE(instr);
14083 
14084   SetCPUFeatures(saved_cpu_features_.back());
14085   saved_cpu_features_.pop_back();
14086 }
14087 
14088 
14089 }  // namespace aarch64
14090 }  // namespace vixl
14091 
14092 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
14093