• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include "simulator-aarch64.h"
30 
31 #include <cmath>
32 #include <cstring>
33 #include <errno.h>
34 #include <limits>
35 #include <sys/mman.h>
36 #include <unistd.h>
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 using vixl::internal::SimFloat16;
42 
43 const Instruction* Simulator::kEndOfSimAddress = NULL;
44 
45 bool MetaDataDepot::MetaDataMTE::is_active = false;
46 
SetBits(int msb,int lsb,uint32_t bits)47 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
48   int width = msb - lsb + 1;
49   VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
50 
51   bits <<= lsb;
52   uint32_t mask = ((1 << width) - 1) << lsb;
53   VIXL_ASSERT((mask & write_ignore_mask_) == 0);
54 
55   value_ = (value_ & ~mask) | (bits & mask);
56 }
57 
58 
DefaultValueFor(SystemRegister id)59 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
60   switch (id) {
61     case NZCV:
62       return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
63     case FPCR:
64       return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
65     default:
66       VIXL_UNREACHABLE();
67       return SimSystemRegister();
68   }
69 }
70 
71 const Simulator::FormToVisitorFnMap Simulator::FORM_TO_VISITOR = {
72     DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
73     SIM_AUD_VISITOR_MAP(Simulator),
74     {"smlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
75     {"smlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
76     {"smull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
77     {"sqdmlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
78     {"sqdmlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
79     {"sqdmull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
80     {"umlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
81     {"umlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
82     {"umull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
83     {"fcmla_asimdelem_c_h"_h, &Simulator::SimulateNEONComplexMulByElement},
84     {"fcmla_asimdelem_c_s"_h, &Simulator::SimulateNEONComplexMulByElement},
85     {"fmlal2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
86     {"fmlal_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
87     {"fmlsl2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
88     {"fmlsl_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
89     {"fmla_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
90     {"fmls_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
91     {"fmulx_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
92     {"fmul_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
93     {"fmla_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
94     {"fmls_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
95     {"fmulx_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
96     {"fmul_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
97     {"sdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
98     {"udot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
99     {"adclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
100     {"adclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
101     {"addhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
102     {"addhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
103     {"addp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
104     {"bcax_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
105     {"bdep_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
106     {"bext_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
107     {"bgrp_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
108     {"bsl1n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
109     {"bsl2n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
110     {"bsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
111     {"cadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
112     {"cdot_z_zzz"_h, &Simulator::SimulateSVEComplexDotProduct},
113     {"cdot_z_zzzi_d"_h, &Simulator::SimulateSVEComplexDotProduct},
114     {"cdot_z_zzzi_s"_h, &Simulator::SimulateSVEComplexDotProduct},
115     {"cmla_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
116     {"cmla_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
117     {"cmla_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
118     {"eor3_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
119     {"eorbt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
120     {"eortb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
121     {"ext_z_zi_con"_h, &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm},
122     {"faddp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
123     {"fcvtlt_z_p_z_h2s"_h, &Simulator::SimulateSVEFPConvertLong},
124     {"fcvtlt_z_p_z_s2d"_h, &Simulator::SimulateSVEFPConvertLong},
125     {"fcvtnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
126     {"fcvtnt_z_p_z_s2h"_h, &Simulator::Simulate_ZdH_PgM_ZnS},
127     {"fcvtx_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
128     {"fcvtxnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
129     {"flogb_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
130     {"fmaxnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
131     {"fmaxp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
132     {"fminnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
133     {"fminp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
134     {"fmlalb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
135     {"fmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
136     {"fmlalt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
137     {"fmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
138     {"fmlslb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
139     {"fmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
140     {"fmlslt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
141     {"fmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
142     {"histcnt_z_p_zz"_h, &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT},
143     {"histseg_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
144     {"ldnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
145     {"ldnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
146     {"ldnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
147     {"ldnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
148     {"ldnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
149     {"ldnt1sb_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
150     {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
151     {"ldnt1sh_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
152     {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
153     {"ldnt1sw_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
154     {"ldnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
155     {"ldnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
156     {"match_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
157     {"mla_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
158     {"mla_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
159     {"mla_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
160     {"mls_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
161     {"mls_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
162     {"mls_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
163     {"mul_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
164     {"mul_z_zzi_d"_h, &Simulator::SimulateSVEMulIndex},
165     {"mul_z_zzi_h"_h, &Simulator::SimulateSVEMulIndex},
166     {"mul_z_zzi_s"_h, &Simulator::SimulateSVEMulIndex},
167     {"nbsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
168     {"nmatch_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
169     {"pmul_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
170     {"pmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
171     {"pmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
172     {"raddhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
173     {"raddhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
174     {"rshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
175     {"rshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
176     {"rsubhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
177     {"rsubhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
178     {"saba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
179     {"sabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
180     {"sabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
181     {"sabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
182     {"sabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
183     {"sadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
184     {"saddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
185     {"saddlbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
186     {"saddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
187     {"saddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
188     {"saddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
189     {"sbclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
190     {"sbclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
191     {"shadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
192     {"shrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
193     {"shrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
194     {"shsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
195     {"shsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
196     {"sli_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
197     {"smaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
198     {"sminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
199     {"smlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
200     {"smlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
201     {"smlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
202     {"smlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
203     {"smlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
204     {"smlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
205     {"smlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
206     {"smlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
207     {"smlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
208     {"smlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
209     {"smlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
210     {"smlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
211     {"smulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
212     {"smullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
213     {"smullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
214     {"smullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
215     {"smullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
216     {"smullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
217     {"smullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
218     {"splice_z_p_zz_con"_h, &Simulator::VisitSVEVectorSplice},
219     {"sqabs_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
220     {"sqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
221     {"sqcadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
222     {"sqdmlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
223     {"sqdmlalb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
224     {"sqdmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
225     {"sqdmlalbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
226     {"sqdmlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
227     {"sqdmlalt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
228     {"sqdmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
229     {"sqdmlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
230     {"sqdmlslb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
231     {"sqdmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
232     {"sqdmlslbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
233     {"sqdmlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
234     {"sqdmlslt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
235     {"sqdmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
236     {"sqdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
237     {"sqdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
238     {"sqdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
239     {"sqdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
240     {"sqdmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
241     {"sqdmullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
242     {"sqdmullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
243     {"sqdmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
244     {"sqdmullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
245     {"sqdmullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
246     {"sqneg_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
247     {"sqrdcmlah_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
248     {"sqrdcmlah_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
249     {"sqrdcmlah_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
250     {"sqrdmlah_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
251     {"sqrdmlah_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
252     {"sqrdmlah_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
253     {"sqrdmlah_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
254     {"sqrdmlsh_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
255     {"sqrdmlsh_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
256     {"sqrdmlsh_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
257     {"sqrdmlsh_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
258     {"sqrdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
259     {"sqrdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
260     {"sqrdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
261     {"sqrdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
262     {"sqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
263     {"sqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
264     {"sqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
265     {"sqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
266     {"sqrshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
267     {"sqrshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
268     {"sqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
269     {"sqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
270     {"sqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
271     {"sqshlu_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
272     {"sqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
273     {"sqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
274     {"sqshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
275     {"sqshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
276     {"sqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
277     {"sqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
278     {"sqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
279     {"sqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
280     {"sqxtunb_z_zz"_h, &Simulator::SimulateSVENarrow},
281     {"sqxtunt_z_zz"_h, &Simulator::SimulateSVENarrow},
282     {"srhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
283     {"sri_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
284     {"srshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
285     {"srshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
286     {"srshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
287     {"srsra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
288     {"sshllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
289     {"sshllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
290     {"ssra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
291     {"ssublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
292     {"ssublbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
293     {"ssublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
294     {"ssubltb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
295     {"ssubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
296     {"ssubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
297     {"stnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
298     {"stnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
299     {"stnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
300     {"stnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
301     {"stnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
302     {"stnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
303     {"stnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
304     {"subhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
305     {"subhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
306     {"suqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
307     {"tbl_z_zz_2"_h, &Simulator::VisitSVETableLookup},
308     {"tbx_z_zz"_h, &Simulator::VisitSVETableLookup},
309     {"uaba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
310     {"uabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
311     {"uabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
312     {"uabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
313     {"uabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
314     {"uadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
315     {"uaddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
316     {"uaddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
317     {"uaddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
318     {"uaddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
319     {"uhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
320     {"uhsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
321     {"uhsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
322     {"umaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
323     {"uminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
324     {"umlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
325     {"umlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
326     {"umlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
327     {"umlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
328     {"umlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
329     {"umlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
330     {"umlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
331     {"umlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
332     {"umlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
333     {"umlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
334     {"umlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
335     {"umlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
336     {"umulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
337     {"umullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
338     {"umullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
339     {"umullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
340     {"umullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
341     {"umullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
342     {"umullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
343     {"uqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
344     {"uqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
345     {"uqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
346     {"uqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
347     {"uqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
348     {"uqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
349     {"uqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
350     {"uqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
351     {"uqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
352     {"uqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
353     {"uqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
354     {"uqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
355     {"uqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
356     {"uqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
357     {"urecpe_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
358     {"urhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
359     {"urshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
360     {"urshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
361     {"urshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
362     {"ursqrte_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
363     {"ursra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
364     {"ushllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
365     {"ushllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
366     {"usqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
367     {"usra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
368     {"usublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
369     {"usublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
370     {"usubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
371     {"usubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
372     {"whilege_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
373     {"whilegt_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
374     {"whilehi_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
375     {"whilehs_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
376     {"whilerw_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
377     {"whilewr_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
378     {"xar_z_zzi"_h, &Simulator::SimulateSVEExclusiveOrRotate},
379     {"smmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
380     {"ummla_z_zzz"_h, &Simulator::SimulateMatrixMul},
381     {"usmmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
382     {"smmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
383     {"ummla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
384     {"usmmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
385     {"fmmla_z_zzz_s"_h, &Simulator::SimulateSVEFPMatrixMul},
386     {"fmmla_z_zzz_d"_h, &Simulator::SimulateSVEFPMatrixMul},
387     {"ld1row_z_p_bi_u32"_h,
388       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
389     {"ld1row_z_p_br_contiguous"_h,
390       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
391     {"ld1rod_z_p_bi_u64"_h,
392       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
393     {"ld1rod_z_p_br_contiguous"_h,
394       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
395     {"ld1rob_z_p_bi_u8"_h,
396       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
397     {"ld1rob_z_p_br_contiguous"_h,
398       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
399     {"ld1roh_z_p_bi_u16"_h,
400       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
401     {"ld1roh_z_p_br_contiguous"_h,
402       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
403     {"usdot_z_zzz_s"_h, &Simulator::VisitSVEIntMulAddUnpredicated},
404     {"sudot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
405     {"usdot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
406     {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra},
407     {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
408     {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
409     {"addg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
410     {"gmi_64g_dp_2src"_h, &Simulator::SimulateMTETagMaskInsert},
411     {"irg_64i_dp_2src"_h, &Simulator::Simulate_XdSP_XnSP_Xm},
412     {"ldg_64loffset_ldsttags"_h, &Simulator::SimulateMTELoadTag},
413     {"st2g_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
414     {"st2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
415     {"st2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
416     {"stgp_64_ldstpair_off"_h, &Simulator::SimulateMTEStoreTagPair},
417     {"stgp_64_ldstpair_post"_h, &Simulator::SimulateMTEStoreTagPair},
418     {"stgp_64_ldstpair_pre"_h, &Simulator::SimulateMTEStoreTagPair},
419     {"stg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
420     {"stg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
421     {"stg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
422     {"stz2g_64soffset_ldsttags"_h,
423       &Simulator::Simulator::SimulateMTEStoreTag},
424     {"stz2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
425     {"stz2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
426     {"stzg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
427     {"stzg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
428     {"stzg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
429     {"subg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
430     {"subps_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
431     {"subp_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
432     {"cpyen_cpy_memcms"_h, &Simulator::SimulateCpyE},
433     {"cpyern_cpy_memcms"_h, &Simulator::SimulateCpyE},
434     {"cpyewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
435     {"cpye_cpy_memcms"_h, &Simulator::SimulateCpyE},
436     {"cpyfen_cpy_memcms"_h, &Simulator::SimulateCpyE},
437     {"cpyfern_cpy_memcms"_h, &Simulator::SimulateCpyE},
438     {"cpyfewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
439     {"cpyfe_cpy_memcms"_h, &Simulator::SimulateCpyE},
440     {"cpyfmn_cpy_memcms"_h, &Simulator::SimulateCpyM},
441     {"cpyfmrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
442     {"cpyfmwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
443     {"cpyfm_cpy_memcms"_h, &Simulator::SimulateCpyM},
444     {"cpyfpn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
445     {"cpyfprn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
446     {"cpyfpwn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
447     {"cpyfp_cpy_memcms"_h, &Simulator::SimulateCpyFP},
448     {"cpymn_cpy_memcms"_h, &Simulator::SimulateCpyM},
449     {"cpymrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
450     {"cpymwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
451     {"cpym_cpy_memcms"_h, &Simulator::SimulateCpyM},
452     {"cpypn_cpy_memcms"_h, &Simulator::SimulateCpyP},
453     {"cpyprn_cpy_memcms"_h, &Simulator::SimulateCpyP},
454     {"cpypwn_cpy_memcms"_h, &Simulator::SimulateCpyP},
455     {"cpyp_cpy_memcms"_h, &Simulator::SimulateCpyP},
456     {"setp_set_memcms"_h, &Simulator::SimulateSetP},
457     {"setpn_set_memcms"_h, &Simulator::SimulateSetP},
458     {"setgp_set_memcms"_h, &Simulator::SimulateSetGP},
459     {"setgpn_set_memcms"_h, &Simulator::SimulateSetGP},
460     {"setm_set_memcms"_h, &Simulator::SimulateSetM},
461     {"setmn_set_memcms"_h, &Simulator::SimulateSetM},
462     {"setgm_set_memcms"_h, &Simulator::SimulateSetGM},
463     {"setgmn_set_memcms"_h, &Simulator::SimulateSetGM},
464     {"sete_set_memcms"_h, &Simulator::SimulateSetE},
465     {"seten_set_memcms"_h, &Simulator::SimulateSetE},
466     {"setge_set_memcms"_h, &Simulator::SimulateSetE},
467     {"setgen_set_memcms"_h, &Simulator::SimulateSetE},
468     {"abs_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
469     {"abs_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
470     {"cnt_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
471     {"cnt_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
472     {"ctz_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
473     {"ctz_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
474     {"smax_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
475     {"smax_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
476     {"smin_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
477     {"smin_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
478     {"smax_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
479     {"smax_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
480     {"smin_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
481     {"smin_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
482     {"umax_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
483     {"umax_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
484     {"umin_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
485     {"umin_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
486     {"umax_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
487     {"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
488     {"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
489     {"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
490 };
491 
492 const Simulator::FormToVisitorFnMap*
GetFormToVisitorFnMap()493 Simulator::GetFormToVisitorFnMap() {
494   return &FORM_TO_VISITOR;
495 }
496 
497 #ifndef PANDA_BUILD
Simulator(Decoder * decoder,FILE * stream,SimStack::Allocated stack)498 Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
499     : memory_(std::move(stack)),
500       last_instr_(NULL),
501       cpu_features_auditor_(decoder, CPUFeatures::All()) {
502 #else
503 Simulator::Simulator(PandaAllocator* allocator, Decoder* decoder, SimStack::Allocated stack, FILE* stream)
504     : memory_(std::move(stack)),
505       last_instr_(NULL),
506       allocator_(allocator),
507       cpu_features_auditor_(decoder, CPUFeatures::All()),
508       saved_cpu_features_(allocator_.Adapter()) {
509 #endif
510   // Ensure that shift operations act as the simulator expects.
511   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
512   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
513 
514   // Set up a placeholder pipe for CanReadMemory.
515   VIXL_CHECK(pipe(placeholder_pipe_fd_) == 0);
516 
517   // Set up the decoder.
518   decoder_ = decoder;
519   decoder_->AppendVisitor(this);
520 
521   stream_ = stream;
522 
523 #ifndef PANDA_BUILD
524   print_disasm_ = new PrintDisassembler(stream_);
525 #else
526   print_disasm_ = allocator_.New<PrintDisassembler>(allocator, stream_);
527 #endif
528 
529   memory_.AppendMetaData(&meta_data_);
530 
531   // The Simulator and Disassembler share the same available list, held by the
532   // auditor. The Disassembler only annotates instructions with features that
533   // are _not_ available, so registering the auditor should have no effect
534   // unless the simulator is about to abort (due to missing features). In
535   // practice, this means that with trace enabled, the simulator will crash just
536   // after the disassembler prints the instruction, with the missing features
537   // enumerated.
538   print_disasm_->RegisterCPUFeaturesAuditor(&cpu_features_auditor_);
539 
540   SetColouredTrace(false);
541   trace_parameters_ = LOG_NONE;
542 
543   // We have to configure the SVE vector register length before calling
544   // ResetState().
545   SetVectorLengthInBits(kZRegMinSize);
546 
547   ResetState();
548 
549   // Print a warning about exclusive-access instructions, but only the first
550   // time they are encountered. This warning can be silenced using
551   // SilenceExclusiveAccessWarning().
552   print_exclusive_access_warning_ = true;
553 
554   guard_pages_ = false;
555 
556   // Initialize the common state of RNDR and RNDRRS.
557   uint16_t seed[3] = {11, 22, 33};
558   VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
559   memcpy(rand_state_, seed, sizeof(rand_state_));
560 
561   // Initialize all bits of pseudo predicate register to true.
562   LogicPRegister ones(pregister_all_true_);
563   ones.SetAllBits();
564 
565   // Initialize the debugger but disable it by default.
566   SetDebuggerEnabled(false);
567 #ifndef PANDA_BUILD
568   debugger_ = std::make_unique<Debugger>(this);
569 #else
570   debugger_ = allocator_.New<Debugger>(this);
571 #endif
572 }
573 
574 void Simulator::ResetSystemRegisters() {
575   // Reset the system registers.
576   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
577   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
578   ResetFFR();
579 }
580 
581 void Simulator::ResetRegisters() {
582   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
583     WriteXRegister(i, 0xbadbeef);
584   }
585   // Returning to address 0 exits the Simulator.
586   WriteLr(kEndOfSimAddress);
587 }
588 
589 void Simulator::ResetVRegisters() {
590   // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
591   VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
592   int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
593   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
594     VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
595     vregisters_[i].NotifyAccessAsZ();
596     for (int lane = 0; lane < lane_count; lane++) {
597       // Encode the register number and (D-sized) lane into each NaN, to
598       // make them easier to trace.
599       uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
600                           (0x0000000000000001 * lane);
601       VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
602       VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
603       vregisters_[i].Insert(lane, nan_bits);
604     }
605   }
606 }
607 
608 void Simulator::ResetPRegisters() {
609   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
610   int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
611   // Ensure the register configuration fits in this bit encoding.
612   VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
613   VIXL_ASSERT(lane_count <= UINT8_MAX);
614   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
615     VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
616     for (int lane = 0; lane < lane_count; lane++) {
617       // Encode the register number and (H-sized) lane into each lane slot.
618       uint16_t bits = (0x0100 * lane) | i;
619       pregisters_[i].Insert(lane, bits);
620     }
621   }
622 }
623 
624 void Simulator::ResetFFR() {
625   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
626   int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
627   ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
628 }
629 
630 void Simulator::ResetState() {
631   ResetSystemRegisters();
632   ResetRegisters();
633   ResetVRegisters();
634   ResetPRegisters();
635 
636   WriteSp(memory_.GetStack().GetBase());
637 
638   pc_ = NULL;
639   pc_modified_ = false;
640 
641   // BTI state.
642   btype_ = DefaultBType;
643   next_btype_ = DefaultBType;
644 
645   meta_data_.ResetState();
646 }
647 
648 void Simulator::SetVectorLengthInBits(unsigned vector_length) {
649   VIXL_ASSERT((vector_length >= kZRegMinSize) &&
650               (vector_length <= kZRegMaxSize));
651   VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
652   vector_length_ = vector_length;
653 
654   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
655     vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
656   }
657   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
658     pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
659   }
660 
661   ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
662 
663   ResetVRegisters();
664   ResetPRegisters();
665   ResetFFR();
666 }
667 
668 Simulator::~Simulator() {
669   // The decoder may outlive the simulator.
670   decoder_->RemoveVisitor(print_disasm_);
671 #ifdef PANDA_BUILD
672   allocator_.DeleteObject(print_disasm_);
673   allocator_.DeleteObject(debugger_);
674 #endif
675   close(placeholder_pipe_fd_[0]);
676   close(placeholder_pipe_fd_[1]);
677 }
678 
679 
680 void Simulator::Run() {
681   // Flush any written registers before executing anything, so that
682   // manually-set registers are logged _before_ the first instruction.
683   LogAllWrittenRegisters();
684 
685   if (debugger_enabled_) {
686     // Slow path to check for breakpoints only if the debugger is enabled.
687     Debugger* debugger = GetDebugger();
688     while (!IsSimulationFinished()) {
689       if (debugger->IsAtBreakpoint()) {
690         fprintf(stream_, "Debugger hit breakpoint, breaking...\n");
691         debugger->Debug();
692       } else {
693         ExecuteInstruction();
694       }
695     }
696   } else {
697     while (!IsSimulationFinished()) {
698       ExecuteInstruction();
699     }
700   }
701 }
702 
703 
704 void Simulator::RunFrom(const Instruction* first) {
705   WritePc(first, NoBranchLog);
706   Run();
707 }
708 
709 
710 // clang-format off
711 const char* Simulator::xreg_names[] = {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",
712                                        "x6",  "x7",  "x8",  "x9",  "x10", "x11",
713                                        "x12", "x13", "x14", "x15", "x16", "x17",
714                                        "x18", "x19", "x20", "x21", "x22", "x23",
715                                        "x24", "x25", "x26", "x27", "x28", "x29",
716                                        "lr",  "xzr", "sp"};
717 
718 const char* Simulator::wreg_names[] = {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",
719                                        "w6",  "w7",  "w8",  "w9",  "w10", "w11",
720                                        "w12", "w13", "w14", "w15", "w16", "w17",
721                                        "w18", "w19", "w20", "w21", "w22", "w23",
722                                        "w24", "w25", "w26", "w27", "w28", "w29",
723                                        "w30", "wzr", "wsp"};
724 
725 const char* Simulator::breg_names[] = {"b0",  "b1",  "b2",  "b3",  "b4",  "b5",
726                                        "b6",  "b7",  "b8",  "b9",  "b10", "b11",
727                                        "b12", "b13", "b14", "b15", "b16", "b17",
728                                        "b18", "b19", "b20", "b21", "b22", "b23",
729                                        "b24", "b25", "b26", "b27", "b28", "b29",
730                                        "b30", "b31"};
731 
732 const char* Simulator::hreg_names[] = {"h0",  "h1",  "h2",  "h3",  "h4",  "h5",
733                                        "h6",  "h7",  "h8",  "h9",  "h10", "h11",
734                                        "h12", "h13", "h14", "h15", "h16", "h17",
735                                        "h18", "h19", "h20", "h21", "h22", "h23",
736                                        "h24", "h25", "h26", "h27", "h28", "h29",
737                                        "h30", "h31"};
738 
739 const char* Simulator::sreg_names[] = {"s0",  "s1",  "s2",  "s3",  "s4",  "s5",
740                                        "s6",  "s7",  "s8",  "s9",  "s10", "s11",
741                                        "s12", "s13", "s14", "s15", "s16", "s17",
742                                        "s18", "s19", "s20", "s21", "s22", "s23",
743                                        "s24", "s25", "s26", "s27", "s28", "s29",
744                                        "s30", "s31"};
745 
746 const char* Simulator::dreg_names[] = {"d0",  "d1",  "d2",  "d3",  "d4",  "d5",
747                                        "d6",  "d7",  "d8",  "d9",  "d10", "d11",
748                                        "d12", "d13", "d14", "d15", "d16", "d17",
749                                        "d18", "d19", "d20", "d21", "d22", "d23",
750                                        "d24", "d25", "d26", "d27", "d28", "d29",
751                                        "d30", "d31"};
752 
753 const char* Simulator::vreg_names[] = {"v0",  "v1",  "v2",  "v3",  "v4",  "v5",
754                                        "v6",  "v7",  "v8",  "v9",  "v10", "v11",
755                                        "v12", "v13", "v14", "v15", "v16", "v17",
756                                        "v18", "v19", "v20", "v21", "v22", "v23",
757                                        "v24", "v25", "v26", "v27", "v28", "v29",
758                                        "v30", "v31"};
759 
760 const char* Simulator::zreg_names[] = {"z0",  "z1",  "z2",  "z3",  "z4",  "z5",
761                                        "z6",  "z7",  "z8",  "z9",  "z10", "z11",
762                                        "z12", "z13", "z14", "z15", "z16", "z17",
763                                        "z18", "z19", "z20", "z21", "z22", "z23",
764                                        "z24", "z25", "z26", "z27", "z28", "z29",
765                                        "z30", "z31"};
766 
767 const char* Simulator::preg_names[] = {"p0",  "p1",  "p2",  "p3",  "p4",  "p5",
768                                        "p6",  "p7",  "p8",  "p9",  "p10", "p11",
769                                        "p12", "p13", "p14", "p15"};
770 // clang-format on
771 
772 
773 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
774   // If the code represents the stack pointer, index the name after zr.
775   if ((code == kSPRegInternalCode) ||
776       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
777     code = kZeroRegCode + 1;
778   }
779   VIXL_ASSERT(code < ArrayLength(wreg_names));
780   return wreg_names[code];
781 }
782 
783 
784 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
785   // If the code represents the stack pointer, index the name after zr.
786   if ((code == kSPRegInternalCode) ||
787       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
788     code = kZeroRegCode + 1;
789   }
790   VIXL_ASSERT(code < ArrayLength(xreg_names));
791   return xreg_names[code];
792 }
793 
794 
795 const char* Simulator::BRegNameForCode(unsigned code) {
796   VIXL_ASSERT(code < kNumberOfVRegisters);
797   return breg_names[code];
798 }
799 
800 
801 const char* Simulator::HRegNameForCode(unsigned code) {
802   VIXL_ASSERT(code < kNumberOfVRegisters);
803   return hreg_names[code];
804 }
805 
806 
807 const char* Simulator::SRegNameForCode(unsigned code) {
808   VIXL_ASSERT(code < kNumberOfVRegisters);
809   return sreg_names[code];
810 }
811 
812 
813 const char* Simulator::DRegNameForCode(unsigned code) {
814   VIXL_ASSERT(code < kNumberOfVRegisters);
815   return dreg_names[code];
816 }
817 
818 
819 const char* Simulator::VRegNameForCode(unsigned code) {
820   VIXL_ASSERT(code < kNumberOfVRegisters);
821   return vreg_names[code];
822 }
823 
824 
825 const char* Simulator::ZRegNameForCode(unsigned code) {
826   VIXL_ASSERT(code < kNumberOfZRegisters);
827   return zreg_names[code];
828 }
829 
830 
831 const char* Simulator::PRegNameForCode(unsigned code) {
832   VIXL_ASSERT(code < kNumberOfPRegisters);
833   return preg_names[code];
834 }
835 
836 SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
837   SimVRegister ones, result;
838   dup_immediate(kFormatVnB, ones, 0xff);
839   mov_zeroing(kFormatVnB, result, pg, ones);
840   return result;
841 }
842 
843 void Simulator::ExtractFromSimVRegister(VectorFormat vform,
844                                         SimPRegister& pd,
845                                         SimVRegister vreg) {
846   SimVRegister zero;
847   dup_immediate(kFormatVnB, zero, 0);
848   SVEIntCompareVectorsHelper(ne,
849                              vform,
850                              pd,
851                              GetPTrue(),
852                              vreg,
853                              zero,
854                              false,
855                              LeaveFlags);
856 }
857 
858 #define COLOUR(colour_code) "\033[0;" colour_code "m"
859 #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
860 #define COLOUR_HIGHLIGHT "\033[43m"
861 #define NORMAL ""
862 #define GREY "30"
863 #define RED "31"
864 #define GREEN "32"
865 #define YELLOW "33"
866 #define BLUE "34"
867 #define MAGENTA "35"
868 #define CYAN "36"
869 #define WHITE "37"
870 void Simulator::SetColouredTrace(bool value) {
871   coloured_trace_ = value;
872 
873   clr_normal = value ? COLOUR(NORMAL) : "";
874   clr_flag_name = value ? COLOUR_BOLD(WHITE) : "";
875   clr_flag_value = value ? COLOUR(NORMAL) : "";
876   clr_reg_name = value ? COLOUR_BOLD(CYAN) : "";
877   clr_reg_value = value ? COLOUR(CYAN) : "";
878   clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
879   clr_vreg_value = value ? COLOUR(MAGENTA) : "";
880   clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
881   clr_preg_value = value ? COLOUR(GREEN) : "";
882   clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
883   clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
884   clr_warning_message = value ? COLOUR(YELLOW) : "";
885   clr_printf = value ? COLOUR(GREEN) : "";
886   clr_branch_marker = value ? COLOUR(GREY) COLOUR_HIGHLIGHT : "";
887 
888   if (value) {
889     print_disasm_->SetCPUFeaturesPrefix("// Needs: " COLOUR_BOLD(RED));
890     print_disasm_->SetCPUFeaturesSuffix(COLOUR(NORMAL));
891   } else {
892     print_disasm_->SetCPUFeaturesPrefix("// Needs: ");
893     print_disasm_->SetCPUFeaturesSuffix("");
894   }
895 }
896 
897 
898 void Simulator::SetTraceParameters(int parameters) {
899   bool disasm_before = trace_parameters_ & LOG_DISASM;
900   trace_parameters_ = parameters;
901   bool disasm_after = trace_parameters_ & LOG_DISASM;
902 
903   if (disasm_before != disasm_after) {
904     if (disasm_after) {
905       decoder_->InsertVisitorBefore(print_disasm_, this);
906     } else {
907       decoder_->RemoveVisitor(print_disasm_);
908     }
909   }
910 }
911 
912 // Helpers ---------------------------------------------------------------------
913 uint64_t Simulator::AddWithCarry(unsigned reg_size,
914                                  bool set_flags,
915                                  uint64_t left,
916                                  uint64_t right,
917                                  int carry_in) {
918   std::pair<uint64_t, uint8_t> result_and_flags =
919       AddWithCarry(reg_size, left, right, carry_in);
920   if (set_flags) {
921     uint8_t flags = result_and_flags.second;
922     ReadNzcv().SetN((flags >> 3) & 1);
923     ReadNzcv().SetZ((flags >> 2) & 1);
924     ReadNzcv().SetC((flags >> 1) & 1);
925     ReadNzcv().SetV((flags >> 0) & 1);
926     LogSystemRegister(NZCV);
927   }
928   return result_and_flags.first;
929 }
930 
931 std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size,
932                                                      uint64_t left,
933                                                      uint64_t right,
934                                                      int carry_in) {
935   VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
936   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
937 
938   uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
939   uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
940   uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
941 
942   left &= reg_mask;
943   right &= reg_mask;
944   uint64_t result = (left + right + carry_in) & reg_mask;
945 
946   // NZCV bits, ordered N in bit 3 to V in bit 0.
947   uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0;
948   nzcv |= CalcZFlag(result) ? 4 : 0;
949 
950   // Compute the C flag by comparing the result to the max unsigned integer.
951   uint64_t max_uint_2op = max_uint - carry_in;
952   bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
953   nzcv |= C ? 2 : 0;
954 
955   // Overflow iff the sign bit is the same for the two inputs and different
956   // for the result.
957   uint64_t left_sign = left & sign_mask;
958   uint64_t right_sign = right & sign_mask;
959   uint64_t result_sign = result & sign_mask;
960   bool V = (left_sign == right_sign) && (left_sign != result_sign);
961   nzcv |= V ? 1 : 0;
962 
963   return std::make_pair(result, nzcv);
964 }
965 
966 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
967 
968 vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) {
969   std::pair<uint64_t, uint8_t> sum_lo =
970       AddWithCarry(kXRegSize, x.second, y.second, 0);
971   int carry_in = (sum_lo.second & 0x2) >> 1;  // C flag in NZCV result.
972   std::pair<uint64_t, uint8_t> sum_hi =
973       AddWithCarry(kXRegSize, x.first, y.first, carry_in);
974   return std::make_pair(sum_hi.first, sum_lo.first);
975 }
976 
977 vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
978   // Negate the integer value. Throw an assertion when the input is INT128_MIN.
979   VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
980   x.first = ~x.first;
981   x.second = ~x.second;
982   return Add128(x, {0, 1});
983 }
984 
985 vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
986   bool neg_result = false;
987   if ((x >> 63) == 1) {
988     x = -x;
989     neg_result = !neg_result;
990   }
991   if ((y >> 63) == 1) {
992     y = -y;
993     neg_result = !neg_result;
994   }
995 
996   uint64_t x_lo = x & 0xffffffff;
997   uint64_t x_hi = x >> 32;
998   uint64_t y_lo = y & 0xffffffff;
999   uint64_t y_hi = y >> 32;
1000 
1001   uint64_t t1 = x_lo * y_hi;
1002   uint64_t t2 = x_hi * y_lo;
1003   vixl_uint128_t a = std::make_pair(0, x_lo * y_lo);
1004   vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32);
1005   vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32);
1006   vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0);
1007 
1008   vixl_uint128_t result = Add128(a, b);
1009   result = Add128(result, c);
1010   result = Add128(result, d);
1011   return neg_result ? std::make_pair(-result.first - 1, -result.second)
1012                     : result;
1013 }
1014 
1015 int64_t Simulator::ShiftOperand(unsigned reg_size,
1016                                 uint64_t uvalue,
1017                                 Shift shift_type,
1018                                 unsigned amount) const {
1019   VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
1020               (reg_size == kSRegSize) || (reg_size == kDRegSize));
1021   if (amount > 0) {
1022     uint64_t mask = GetUintMask(reg_size);
1023     bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
1024     // The behavior is undefined in c++ if the shift amount greater than or
1025     // equal to the register lane size. Work out the shifted result based on
1026     // architectural behavior before performing the c++ type shift operations.
1027     switch (shift_type) {
1028       case LSL:
1029         if (amount >= reg_size) {
1030           return UINT64_C(0);
1031         }
1032         uvalue <<= amount;
1033         break;
1034       case LSR:
1035         if (amount >= reg_size) {
1036           return UINT64_C(0);
1037         }
1038         uvalue >>= amount;
1039         break;
1040       case ASR:
1041         if (amount >= reg_size) {
1042           return is_negative ? ~UINT64_C(0) : UINT64_C(0);
1043         }
1044         uvalue >>= amount;
1045         if (is_negative) {
1046           // Simulate sign-extension to 64 bits.
1047           uvalue |= ~UINT64_C(0) << (reg_size - amount);
1048         }
1049         break;
1050       case ROR: {
1051         uvalue = RotateRight(uvalue, amount, reg_size);
1052         break;
1053       }
1054       default:
1055         VIXL_UNIMPLEMENTED();
1056         return 0;
1057     }
1058     uvalue &= mask;
1059   }
1060 
1061   int64_t result;
1062   memcpy(&result, &uvalue, sizeof(result));
1063   return result;
1064 }
1065 
1066 
1067 int64_t Simulator::ExtendValue(unsigned reg_size,
1068                                int64_t value,
1069                                Extend extend_type,
1070                                unsigned left_shift) const {
1071   switch (extend_type) {
1072     case UXTB:
1073       value &= kByteMask;
1074       break;
1075     case UXTH:
1076       value &= kHalfWordMask;
1077       break;
1078     case UXTW:
1079       value &= kWordMask;
1080       break;
1081     case SXTB:
1082       value &= kByteMask;
1083       if ((value & 0x80) != 0) {
1084         value |= ~UINT64_C(0) << 8;
1085       }
1086       break;
1087     case SXTH:
1088       value &= kHalfWordMask;
1089       if ((value & 0x8000) != 0) {
1090         value |= ~UINT64_C(0) << 16;
1091       }
1092       break;
1093     case SXTW:
1094       value &= kWordMask;
1095       if ((value & 0x80000000) != 0) {
1096         value |= ~UINT64_C(0) << 32;
1097       }
1098       break;
1099     case UXTX:
1100     case SXTX:
1101       break;
1102     default:
1103       VIXL_UNREACHABLE();
1104   }
1105   return ShiftOperand(reg_size, value, LSL, left_shift);
1106 }
1107 
1108 
1109 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
1110   AssertSupportedFPCR();
1111 
1112   // TODO: This assumes that the C++ implementation handles comparisons in the
1113   // way that we expect (as per AssertSupportedFPCR()).
1114   bool process_exception = false;
1115   if ((IsNaN(val0) != 0) || (IsNaN(val1) != 0)) {
1116     ReadNzcv().SetRawValue(FPUnorderedFlag);
1117     if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
1118         (trap == EnableTrap)) {
1119       process_exception = true;
1120     }
1121   } else if (val0 < val1) {
1122     ReadNzcv().SetRawValue(FPLessThanFlag);
1123   } else if (val0 > val1) {
1124     ReadNzcv().SetRawValue(FPGreaterThanFlag);
1125   } else if (val0 == val1) {
1126     ReadNzcv().SetRawValue(FPEqualFlag);
1127   } else {
1128     VIXL_UNREACHABLE();
1129   }
1130   LogSystemRegister(NZCV);
1131   if (process_exception) FPProcessException();
1132 }
1133 
1134 
1135 uint64_t Simulator::ComputeMemOperandAddress(const MemOperand& mem_op) const {
1136   VIXL_ASSERT(mem_op.IsValid());
1137   int64_t base = ReadRegister<int64_t>(mem_op.GetBaseRegister());
1138   if (mem_op.IsImmediateOffset()) {
1139     return base + mem_op.GetOffset();
1140   } else {
1141     VIXL_ASSERT(mem_op.GetRegisterOffset().IsValid());
1142     int64_t offset = ReadRegister<int64_t>(mem_op.GetRegisterOffset());
1143     unsigned shift_amount = mem_op.GetShiftAmount();
1144     if (mem_op.GetShift() != NO_SHIFT) {
1145       offset = ShiftOperand(kXRegSize, offset, mem_op.GetShift(), shift_amount);
1146     }
1147     if (mem_op.GetExtend() != NO_EXTEND) {
1148       offset = ExtendValue(kXRegSize, offset, mem_op.GetExtend(), shift_amount);
1149     }
1150     return static_cast<uint64_t>(base + offset);
1151   }
1152 }
1153 
1154 
1155 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
1156     unsigned reg_size, unsigned lane_size) {
1157   VIXL_ASSERT(reg_size >= lane_size);
1158 
1159   uint32_t format = 0;
1160   if (reg_size != lane_size) {
1161     switch (reg_size) {
1162       default:
1163         VIXL_UNREACHABLE();
1164         break;
1165       case kQRegSizeInBytes:
1166         format = kPrintRegAsQVector;
1167         break;
1168       case kDRegSizeInBytes:
1169         format = kPrintRegAsDVector;
1170         break;
1171     }
1172   }
1173 
1174   switch (lane_size) {
1175     default:
1176       VIXL_UNREACHABLE();
1177       break;
1178     case kQRegSizeInBytes:
1179       format |= kPrintReg1Q;
1180       break;
1181     case kDRegSizeInBytes:
1182       format |= kPrintReg1D;
1183       break;
1184     case kSRegSizeInBytes:
1185       format |= kPrintReg1S;
1186       break;
1187     case kHRegSizeInBytes:
1188       format |= kPrintReg1H;
1189       break;
1190     case kBRegSizeInBytes:
1191       format |= kPrintReg1B;
1192       break;
1193   }
1194   // These sizes would be duplicate case labels.
1195   VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
1196   VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
1197   VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
1198   VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
1199 
1200   return static_cast<PrintRegisterFormat>(format);
1201 }
1202 
1203 
1204 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
1205     VectorFormat vform) {
1206   switch (vform) {
1207     default:
1208       VIXL_UNREACHABLE();
1209       return kPrintReg16B;
1210     case kFormat16B:
1211       return kPrintReg16B;
1212     case kFormat8B:
1213       return kPrintReg8B;
1214     case kFormat8H:
1215       return kPrintReg8H;
1216     case kFormat4H:
1217       return kPrintReg4H;
1218     case kFormat4S:
1219       return kPrintReg4S;
1220     case kFormat2S:
1221       return kPrintReg2S;
1222     case kFormat2D:
1223       return kPrintReg2D;
1224     case kFormat1D:
1225       return kPrintReg1D;
1226 
1227     case kFormatB:
1228       return kPrintReg1B;
1229     case kFormatH:
1230       return kPrintReg1H;
1231     case kFormatS:
1232       return kPrintReg1S;
1233     case kFormatD:
1234       return kPrintReg1D;
1235 
1236     case kFormatVnB:
1237       return kPrintRegVnB;
1238     case kFormatVnH:
1239       return kPrintRegVnH;
1240     case kFormatVnS:
1241       return kPrintRegVnS;
1242     case kFormatVnD:
1243       return kPrintRegVnD;
1244   }
1245 }
1246 
1247 
1248 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
1249     VectorFormat vform) {
1250   switch (vform) {
1251     default:
1252       VIXL_UNREACHABLE();
1253       return kPrintReg16B;
1254     case kFormat8H:
1255       return kPrintReg8HFP;
1256     case kFormat4H:
1257       return kPrintReg4HFP;
1258     case kFormat4S:
1259       return kPrintReg4SFP;
1260     case kFormat2S:
1261       return kPrintReg2SFP;
1262     case kFormat2D:
1263       return kPrintReg2DFP;
1264     case kFormat1D:
1265       return kPrintReg1DFP;
1266     case kFormatH:
1267       return kPrintReg1HFP;
1268     case kFormatS:
1269       return kPrintReg1SFP;
1270     case kFormatD:
1271       return kPrintReg1DFP;
1272   }
1273 }
1274 
1275 void Simulator::PrintRegisters() {
1276   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1277     if (i == kSpRegCode) i = kSPRegInternalCode;
1278     PrintRegister(i);
1279   }
1280 }
1281 
1282 void Simulator::PrintVRegisters() {
1283   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1284     PrintVRegister(i);
1285   }
1286 }
1287 
1288 void Simulator::PrintZRegisters() {
1289   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
1290     PrintZRegister(i);
1291   }
1292 }
1293 
1294 void Simulator::PrintWrittenRegisters() {
1295   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1296     if (registers_[i].WrittenSinceLastLog()) {
1297       if (i == kSpRegCode) i = kSPRegInternalCode;
1298       PrintRegister(i);
1299     }
1300   }
1301 }
1302 
1303 void Simulator::PrintWrittenVRegisters() {
1304   bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
1305   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1306     if (vregisters_[i].WrittenSinceLastLog()) {
1307       // Z registers are initialised in the constructor before the user can
1308       // configure the CPU features, so we must also check for SVE here.
1309       if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
1310         PrintZRegister(i);
1311       } else {
1312         PrintVRegister(i);
1313       }
1314     }
1315   }
1316 }
1317 
1318 void Simulator::PrintWrittenPRegisters() {
1319   // P registers are initialised in the constructor before the user can
1320   // configure the CPU features, so we must check for SVE here.
1321   if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
1322   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1323     if (pregisters_[i].WrittenSinceLastLog()) {
1324       PrintPRegister(i);
1325     }
1326   }
1327   if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
1328 }
1329 
1330 void Simulator::PrintSystemRegisters() {
1331   PrintSystemRegister(NZCV);
1332   PrintSystemRegister(FPCR);
1333 }
1334 
1335 void Simulator::PrintRegisterValue(const uint8_t* value,
1336                                    int value_size,
1337                                    PrintRegisterFormat format) {
1338   int print_width = GetPrintRegSizeInBytes(format);
1339   VIXL_ASSERT(print_width <= value_size);
1340   for (int i = value_size - 1; i >= print_width; i--) {
1341     // Pad with spaces so that values align vertically.
1342     fprintf(stream_, "  ");
1343     // If we aren't explicitly printing a partial value, ensure that the
1344     // unprinted bits are zero.
1345     VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
1346   }
1347   fprintf(stream_, "0x");
1348   for (int i = print_width - 1; i >= 0; i--) {
1349     fprintf(stream_, "%02x", value[i]);
1350   }
1351 }
1352 
1353 void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
1354                                                 uint16_t lane_mask,
1355                                                 PrintRegisterFormat format) {
1356   VIXL_ASSERT((format & kPrintRegAsFP) != 0);
1357   int lane_size = GetPrintRegLaneSizeInBytes(format);
1358   fprintf(stream_, " (");
1359   bool last_inactive = false;
1360   const char* sep = "";
1361   for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
1362     bool access = (lane_mask & (1 << (i * lane_size))) != 0;
1363     if (access) {
1364       // Read the lane as a double, so we can format all FP types in the same
1365       // way. We squash NaNs, and a double can exactly represent any other value
1366       // that the smaller types can represent, so this is lossless.
1367       double element;
1368       switch (lane_size) {
1369         case kHRegSizeInBytes: {
1370           Float16 element_fp16;
1371           VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
1372           memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
1373           element = FPToDouble(element_fp16, kUseDefaultNaN);
1374           break;
1375         }
1376         case kSRegSizeInBytes: {
1377           float element_fp32;
1378           memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
1379           element = static_cast<double>(element_fp32);
1380           break;
1381         }
1382         case kDRegSizeInBytes: {
1383           memcpy(&element, &value[i * lane_size], sizeof(element));
1384           break;
1385         }
1386         default:
1387           VIXL_UNREACHABLE();
1388           fprintf(stream_, "{UnknownFPValue}");
1389           continue;
1390       }
1391       if (IsNaN(element)) {
1392         // The fprintf behaviour for NaNs is implementation-defined. Always
1393         // print "nan", so that traces are consistent.
1394         fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
1395       } else {
1396         fprintf(stream_,
1397                 "%s%s%#.4g%s",
1398                 sep,
1399                 clr_vreg_value,
1400                 element,
1401                 clr_normal);
1402       }
1403       last_inactive = false;
1404     } else if (!last_inactive) {
1405       // Replace each contiguous sequence of inactive lanes with "...".
1406       fprintf(stream_, "%s...", sep);
1407       last_inactive = true;
1408     }
1409   }
1410   fprintf(stream_, ")");
1411 }
1412 
1413 void Simulator::PrintRegister(int code,
1414                               PrintRegisterFormat format,
1415                               const char* suffix) {
1416   VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
1417               (static_cast<unsigned>(code) == kSPRegInternalCode));
1418   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
1419   VIXL_ASSERT((format & kPrintRegAsFP) == 0);
1420 
1421   SimRegister* reg;
1422   SimRegister zero;
1423   if (code == kZeroRegCode) {
1424     reg = &zero;
1425   } else {
1426     // registers_[31] holds the SP.
1427     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
1428     reg = &registers_[code % kNumberOfRegisters];
1429   }
1430 
1431   // We trace register writes as whole register values, implying that any
1432   // unprinted bits are all zero:
1433   //   "#       x{code}: 0x{-----value----}"
1434   //   "#       w{code}:         0x{-value}"
1435   // Stores trace partial register values, implying nothing about the unprinted
1436   // bits:
1437   //   "# x{code}<63:0>: 0x{-----value----}"
1438   //   "# x{code}<31:0>:         0x{-value}"
1439   //   "# x{code}<15:0>:             0x{--}"
1440   //   "#  x{code}<7:0>:               0x{}"
1441 
1442   bool is_partial = (format & kPrintRegPartial) != 0;
1443   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1444   std::stringstream name;
1445   if (is_partial) {
1446     name << XRegNameForCode(code) << GetPartialRegSuffix(format);
1447   } else {
1448     // Notify the register that it has been logged, but only if we're printing
1449     // all of it.
1450     reg->NotifyRegisterLogged();
1451     switch (print_reg_size) {
1452       case kWRegSize:
1453         name << WRegNameForCode(code);
1454         break;
1455       case kXRegSize:
1456         name << XRegNameForCode(code);
1457         break;
1458       default:
1459         VIXL_UNREACHABLE();
1460         return;
1461     }
1462   }
1463 
1464   fprintf(stream_,
1465           "# %s%*s: %s",
1466           clr_reg_name,
1467           kPrintRegisterNameFieldWidth,
1468           name.str().c_str(),
1469           clr_reg_value);
1470   PrintRegisterValue(*reg, format);
1471   fprintf(stream_, "%s%s", clr_normal, suffix);
1472 }
1473 
1474 void Simulator::PrintVRegister(int code,
1475                                PrintRegisterFormat format,
1476                                const char* suffix) {
1477   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
1478   VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
1479               ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
1480               ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
1481 
1482   // We trace register writes as whole register values, implying that any
1483   // unprinted bits are all zero:
1484   //   "#        v{code}: 0x{-------------value------------}"
1485   //   "#        d{code}:                 0x{-----value----}"
1486   //   "#        s{code}:                         0x{-value}"
1487   //   "#        h{code}:                             0x{--}"
1488   //   "#        b{code}:                               0x{}"
1489   // Stores trace partial register values, implying nothing about the unprinted
1490   // bits:
1491   //   "# v{code}<127:0>: 0x{-------------value------------}"
1492   //   "#  v{code}<63:0>:                 0x{-----value----}"
1493   //   "#  v{code}<31:0>:                         0x{-value}"
1494   //   "#  v{code}<15:0>:                             0x{--}"
1495   //   "#   v{code}<7:0>:                               0x{}"
1496 
1497   bool is_partial = ((format & kPrintRegPartial) != 0);
1498   std::stringstream name;
1499   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1500   if (is_partial) {
1501     name << VRegNameForCode(code) << GetPartialRegSuffix(format);
1502   } else {
1503     // Notify the register that it has been logged, but only if we're printing
1504     // all of it.
1505     vregisters_[code].NotifyRegisterLogged();
1506     switch (print_reg_size) {
1507       case kBRegSize:
1508         name << BRegNameForCode(code);
1509         break;
1510       case kHRegSize:
1511         name << HRegNameForCode(code);
1512         break;
1513       case kSRegSize:
1514         name << SRegNameForCode(code);
1515         break;
1516       case kDRegSize:
1517         name << DRegNameForCode(code);
1518         break;
1519       case kQRegSize:
1520         name << VRegNameForCode(code);
1521         break;
1522       default:
1523         VIXL_UNREACHABLE();
1524         return;
1525     }
1526   }
1527 
1528   fprintf(stream_,
1529           "# %s%*s: %s",
1530           clr_vreg_name,
1531           kPrintRegisterNameFieldWidth,
1532           name.str().c_str(),
1533           clr_vreg_value);
1534   PrintRegisterValue(vregisters_[code], format);
1535   fprintf(stream_, "%s", clr_normal);
1536   if ((format & kPrintRegAsFP) != 0) {
1537     PrintRegisterValueFPAnnotations(vregisters_[code], format);
1538   }
1539   fprintf(stream_, "%s", suffix);
1540 }
1541 
1542 void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
1543                                                    int reg_count,
1544                                                    uint16_t focus_mask,
1545                                                    PrintRegisterFormat format) {
1546   bool print_fp = (format & kPrintRegAsFP) != 0;
1547   // Suppress FP formatting, so we can specify the lanes we're interested in.
1548   PrintRegisterFormat format_no_fp =
1549       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1550 
1551   for (int r = 0; r < reg_count; r++) {
1552     int code = (rt_code + r) % kNumberOfVRegisters;
1553     PrintVRegister(code, format_no_fp, "");
1554     if (print_fp) {
1555       PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
1556     }
1557     fprintf(stream_, "\n");
1558   }
1559 }
1560 
1561 void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
1562                                                    int q_index,
1563                                                    int reg_count,
1564                                                    uint16_t focus_mask,
1565                                                    PrintRegisterFormat format) {
1566   bool print_fp = (format & kPrintRegAsFP) != 0;
1567   // Suppress FP formatting, so we can specify the lanes we're interested in.
1568   PrintRegisterFormat format_no_fp =
1569       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1570 
1571   PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
1572 
1573   const unsigned size = kQRegSizeInBytes;
1574   unsigned byte_index = q_index * size;
1575   const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
1576   VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
1577 
1578   for (int r = 0; r < reg_count; r++) {
1579     int code = (rt_code + r) % kNumberOfZRegisters;
1580     PrintPartialZRegister(code, q_index, format_no_fp, "");
1581     if (print_fp) {
1582       PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
1583     }
1584     fprintf(stream_, "\n");
1585   }
1586 }
1587 
1588 void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
1589   // We're going to print the register in parts, so force a partial format.
1590   format = GetPrintRegPartial(format);
1591   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1592   int vl = GetVectorLengthInBits();
1593   VIXL_ASSERT((vl % kQRegSize) == 0);
1594   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1595     PrintPartialZRegister(code, i, format);
1596   }
1597   vregisters_[code].NotifyRegisterLogged();
1598 }
1599 
1600 void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
1601   // We're going to print the register in parts, so force a partial format.
1602   format = GetPrintRegPartial(format);
1603   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1604   int vl = GetVectorLengthInBits();
1605   VIXL_ASSERT((vl % kQRegSize) == 0);
1606   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1607     PrintPartialPRegister(code, i, format);
1608   }
1609   pregisters_[code].NotifyRegisterLogged();
1610 }
1611 
1612 void Simulator::PrintFFR(PrintRegisterFormat format) {
1613   // We're going to print the register in parts, so force a partial format.
1614   format = GetPrintRegPartial(format);
1615   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1616   int vl = GetVectorLengthInBits();
1617   VIXL_ASSERT((vl % kQRegSize) == 0);
1618   SimPRegister& ffr = ReadFFR();
1619   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1620     PrintPartialPRegister("FFR", ffr, i, format);
1621   }
1622   ffr.NotifyRegisterLogged();
1623 }
1624 
1625 void Simulator::PrintPartialZRegister(int code,
1626                                       int q_index,
1627                                       PrintRegisterFormat format,
1628                                       const char* suffix) {
1629   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
1630   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1631   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1632   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1633 
1634   // We _only_ trace partial Z register values in Q-sized chunks, because
1635   // they're often too large to reasonably fit on a single line. Each line
1636   // implies nothing about the unprinted bits.
1637   //   "# z{code}<127:0>: 0x{-------------value------------}"
1638 
1639   format = GetPrintRegAsQChunkOfSVE(format);
1640 
1641   const unsigned size = kQRegSizeInBytes;
1642   unsigned byte_index = q_index * size;
1643   const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
1644   VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
1645 
1646   int lsb = q_index * kQRegSize;
1647   int msb = lsb + kQRegSize - 1;
1648   std::stringstream name;
1649   name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
1650 
1651   fprintf(stream_,
1652           "# %s%*s: %s",
1653           clr_vreg_name,
1654           kPrintRegisterNameFieldWidth,
1655           name.str().c_str(),
1656           clr_vreg_value);
1657   PrintRegisterValue(value, size, format);
1658   fprintf(stream_, "%s", clr_normal);
1659   if ((format & kPrintRegAsFP) != 0) {
1660     PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
1661   }
1662   fprintf(stream_, "%s", suffix);
1663 }
1664 
1665 void Simulator::PrintPartialPRegister(const char* name,
1666                                       const SimPRegister& reg,
1667                                       int q_index,
1668                                       PrintRegisterFormat format,
1669                                       const char* suffix) {
1670   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1671   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1672   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1673 
1674   // We don't currently use the format for anything here.
1675   USE(format);
1676 
1677   // We _only_ trace partial P register values, because they're often too large
1678   // to reasonably fit on a single line. Each line implies nothing about the
1679   // unprinted bits.
1680   //
1681   // We print values in binary, with spaces between each bit, in order for the
1682   // bits to align with the Z register bytes that they predicate.
1683   //   "# {name}<15:0>: 0b{-------------value------------}"
1684 
1685   int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
1686   int lsb = q_index * print_size_in_bits;
1687   int msb = lsb + print_size_in_bits - 1;
1688   std::stringstream prefix;
1689   prefix << name << '<' << msb << ':' << lsb << '>';
1690 
1691   fprintf(stream_,
1692           "# %s%*s: %s0b",
1693           clr_preg_name,
1694           kPrintRegisterNameFieldWidth,
1695           prefix.str().c_str(),
1696           clr_preg_value);
1697   for (int i = msb; i >= lsb; i--) {
1698     fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
1699   }
1700   fprintf(stream_, "%s%s", clr_normal, suffix);
1701 }
1702 
1703 void Simulator::PrintPartialPRegister(int code,
1704                                       int q_index,
1705                                       PrintRegisterFormat format,
1706                                       const char* suffix) {
1707   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
1708   PrintPartialPRegister(PRegNameForCode(code),
1709                         pregisters_[code],
1710                         q_index,
1711                         format,
1712                         suffix);
1713 }
1714 
1715 void Simulator::PrintSystemRegister(SystemRegister id) {
1716   switch (id) {
1717     case NZCV:
1718       fprintf(stream_,
1719               "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
1720               clr_flag_name,
1721               clr_flag_value,
1722               ReadNzcv().GetN(),
1723               ReadNzcv().GetZ(),
1724               ReadNzcv().GetC(),
1725               ReadNzcv().GetV(),
1726               clr_normal);
1727       break;
1728     case FPCR: {
1729       static const char* rmode[] = {"0b00 (Round to Nearest)",
1730                                     "0b01 (Round towards Plus Infinity)",
1731                                     "0b10 (Round towards Minus Infinity)",
1732                                     "0b11 (Round towards Zero)"};
1733       VIXL_ASSERT(ReadFpcr().GetRMode() < ArrayLength(rmode));
1734       fprintf(stream_,
1735               "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
1736               clr_flag_name,
1737               clr_flag_value,
1738               ReadFpcr().GetAHP(),
1739               ReadFpcr().GetDN(),
1740               ReadFpcr().GetFZ(),
1741               rmode[ReadFpcr().GetRMode()],
1742               clr_normal);
1743       break;
1744     }
1745     default:
1746       VIXL_UNREACHABLE();
1747   }
1748 }
1749 
1750 uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
1751                                        uint16_t future_access_mask,
1752                                        int struct_element_count,
1753                                        int lane_size_in_bytes,
1754                                        const char* op,
1755                                        uintptr_t address,
1756                                        int reg_size_in_bytes) {
1757   // We want to assume that we'll access at least one lane.
1758   VIXL_ASSERT(access_mask != 0);
1759   VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
1760               (reg_size_in_bytes == kQRegSizeInBytes));
1761   bool started_annotation = false;
1762   // Indent to match the register field, the fixed formatting, and the value
1763   // prefix ("0x"): "# {name}: 0x"
1764   fprintf(stream_, "# %*s    ", kPrintRegisterNameFieldWidth, "");
1765   // First, annotate the lanes (byte by byte).
1766   for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
1767     bool access = (access_mask & (1 << lane)) != 0;
1768     bool future = (future_access_mask & (1 << lane)) != 0;
1769     if (started_annotation) {
1770       // If we've started an annotation, draw a horizontal line in addition to
1771       // any other symbols.
1772       if (access) {
1773         fprintf(stream_, "─╨");
1774       } else if (future) {
1775         fprintf(stream_, "─║");
1776       } else {
1777         fprintf(stream_, "──");
1778       }
1779     } else {
1780       if (access) {
1781         started_annotation = true;
1782         fprintf(stream_, " ╙");
1783       } else if (future) {
1784         fprintf(stream_, " ║");
1785       } else {
1786         fprintf(stream_, "  ");
1787       }
1788     }
1789   }
1790   VIXL_ASSERT(started_annotation);
1791   fprintf(stream_, "─ 0x");
1792   int lane_size_in_nibbles = lane_size_in_bytes * 2;
1793   // Print the most-significant struct element first.
1794   const char* sep = "";
1795   for (int i = struct_element_count - 1; i >= 0; i--) {
1796     int offset = lane_size_in_bytes * i;
1797     uint64_t nibble = MemReadUint(lane_size_in_bytes, address + offset);
1798     fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
1799     sep = "'";
1800   }
1801   fprintf(stream_,
1802           " %s %s0x%016" PRIxPTR "%s\n",
1803           op,
1804           clr_memory_address,
1805           address,
1806           clr_normal);
1807   return future_access_mask & ~access_mask;
1808 }
1809 
1810 void Simulator::PrintAccess(int code,
1811                             PrintRegisterFormat format,
1812                             const char* op,
1813                             uintptr_t address) {
1814   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1815   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1816   if ((format & kPrintRegPartial) == 0) {
1817     if (code != kZeroRegCode) {
1818       registers_[code].NotifyRegisterLogged();
1819     }
1820   }
1821   // Scalar-format accesses use a simple format:
1822   //   "# {reg}: 0x{value} -> {address}"
1823 
1824   // Suppress the newline, so the access annotation goes on the same line.
1825   PrintRegister(code, format, "");
1826   fprintf(stream_,
1827           " %s %s0x%016" PRIxPTR "%s\n",
1828           op,
1829           clr_memory_address,
1830           address,
1831           clr_normal);
1832 }
1833 
1834 void Simulator::PrintVAccess(int code,
1835                              PrintRegisterFormat format,
1836                              const char* op,
1837                              uintptr_t address) {
1838   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1839 
1840   // Scalar-format accesses use a simple format:
1841   //   "# v{code}: 0x{value} -> {address}"
1842 
1843   // Suppress the newline, so the access annotation goes on the same line.
1844   PrintVRegister(code, format, "");
1845   fprintf(stream_,
1846           " %s %s0x%016" PRIxPTR "%s\n",
1847           op,
1848           clr_memory_address,
1849           address,
1850           clr_normal);
1851 }
1852 
1853 void Simulator::PrintVStructAccess(int rt_code,
1854                                    int reg_count,
1855                                    PrintRegisterFormat format,
1856                                    const char* op,
1857                                    uintptr_t address) {
1858   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1859 
1860   // For example:
1861   //   "# v{code}: 0x{value}"
1862   //   "#     ...: 0x{value}"
1863   //   "#              ║   ╙─ {struct_value} -> {lowest_address}"
1864   //   "#              ╙───── {struct_value} -> {highest_address}"
1865 
1866   uint16_t lane_mask = GetPrintRegLaneMask(format);
1867   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1868 
1869   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
1870   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1871   for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
1872     uint16_t access_mask = 1 << i;
1873     VIXL_ASSERT((lane_mask & access_mask) != 0);
1874     lane_mask = PrintPartialAccess(access_mask,
1875                                    lane_mask,
1876                                    reg_count,
1877                                    lane_size_in_bytes,
1878                                    op,
1879                                    address + (i * reg_count));
1880   }
1881 }
1882 
1883 void Simulator::PrintVSingleStructAccess(int rt_code,
1884                                          int reg_count,
1885                                          int lane,
1886                                          PrintRegisterFormat format,
1887                                          const char* op,
1888                                          uintptr_t address) {
1889   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1890 
1891   // For example:
1892   //   "# v{code}: 0x{value}"
1893   //   "#     ...: 0x{value}"
1894   //   "#              ╙───── {struct_value} -> {address}"
1895 
1896   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1897   uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
1898   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1899   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1900 }
1901 
1902 void Simulator::PrintVReplicatingStructAccess(int rt_code,
1903                                               int reg_count,
1904                                               PrintRegisterFormat format,
1905                                               const char* op,
1906                                               uintptr_t address) {
1907   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1908 
1909   // For example:
1910   //   "# v{code}: 0x{value}"
1911   //   "#     ...: 0x{value}"
1912   //   "#            ╙─╨─╨─╨─ {struct_value} -> {address}"
1913 
1914   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1915   uint16_t lane_mask = GetPrintRegLaneMask(format);
1916   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1917   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1918 }
1919 
1920 void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
1921   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1922 
1923   // Scalar-format accesses are split into separate chunks, each of which uses a
1924   // simple format:
1925   //   "#   z{code}<127:0>: 0x{value} -> {address}"
1926   //   "# z{code}<255:128>: 0x{value} -> {address + 16}"
1927   //   "# z{code}<383:256>: 0x{value} -> {address + 32}"
1928   // etc
1929 
1930   int vl = GetVectorLengthInBits();
1931   VIXL_ASSERT((vl % kQRegSize) == 0);
1932   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1933     // Suppress the newline, so the access annotation goes on the same line.
1934     PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
1935     fprintf(stream_,
1936             " %s %s0x%016" PRIxPTR "%s\n",
1937             op,
1938             clr_memory_address,
1939             address,
1940             clr_normal);
1941     address += kQRegSizeInBytes;
1942   }
1943 }
1944 
1945 void Simulator::PrintZStructAccess(int rt_code,
1946                                    int reg_count,
1947                                    const LogicPRegister& pg,
1948                                    PrintRegisterFormat format,
1949                                    int msize_in_bytes,
1950                                    const char* op,
1951                                    const LogicSVEAddressVector& addr) {
1952   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1953 
1954   // For example:
1955   //   "# z{code}<255:128>: 0x{value}"
1956   //   "#     ...<255:128>: 0x{value}"
1957   //   "#                       ║   ╙─ {struct_value} -> {first_address}"
1958   //   "#                       ╙───── {struct_value} -> {last_address}"
1959 
1960   // We're going to print the register in parts, so force a partial format.
1961   bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
1962   format = GetPrintRegPartial(format);
1963 
1964   int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
1965   int vl = GetVectorLengthInBits();
1966   VIXL_ASSERT((vl % kQRegSize) == 0);
1967   int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
1968   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1969     uint16_t pred =
1970         pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
1971     if ((pred == 0) && skip_inactive_chunks) continue;
1972 
1973     PrintZRegistersForStructuredAccess(rt_code,
1974                                        q_index,
1975                                        reg_count,
1976                                        pred,
1977                                        format);
1978     if (pred == 0) {
1979       // This register chunk has no active lanes. The loop below would print
1980       // nothing, so leave a blank line to keep structures grouped together.
1981       fprintf(stream_, "#\n");
1982       continue;
1983     }
1984     for (int i = 0; i < lanes_per_q; i++) {
1985       uint16_t access = 1 << (i * esize_in_bytes);
1986       int lane = (q_index * lanes_per_q) + i;
1987       // Skip inactive lanes.
1988       if ((pred & access) == 0) continue;
1989       pred = PrintPartialAccess(access,
1990                                 pred,
1991                                 reg_count,
1992                                 msize_in_bytes,
1993                                 op,
1994                                 addr.GetStructAddress(lane));
1995     }
1996   }
1997 
1998   // We print the whole register, even for stores.
1999   for (int i = 0; i < reg_count; i++) {
2000     vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
2001   }
2002 }
2003 
2004 void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
2005   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
2006 
2007   // Scalar-format accesses are split into separate chunks, each of which uses a
2008   // simple format:
2009   //   "#  p{code}<15:0>: 0b{value} -> {address}"
2010   //   "# p{code}<31:16>: 0b{value} -> {address + 2}"
2011   //   "# p{code}<47:32>: 0b{value} -> {address + 4}"
2012   // etc
2013 
2014   int vl = GetVectorLengthInBits();
2015   VIXL_ASSERT((vl % kQRegSize) == 0);
2016   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
2017     // Suppress the newline, so the access annotation goes on the same line.
2018     PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
2019     fprintf(stream_,
2020             " %s %s0x%016" PRIxPTR "%s\n",
2021             op,
2022             clr_memory_address,
2023             address,
2024             clr_normal);
2025     address += kQRegSizeInBytes;
2026   }
2027 }
2028 
2029 void Simulator::PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
2030   fprintf(stream_,
2031           "#               %s: %s0x%016" PRIxPTR " %s<- %s0x%02x%s",
2032           clr_reg_name,
2033           clr_memory_address,
2034           dst,
2035           clr_normal,
2036           clr_reg_value,
2037           value,
2038           clr_normal);
2039 
2040   fprintf(stream_,
2041           " <- %s0x%016" PRIxPTR "%s\n",
2042           clr_memory_address,
2043           src,
2044           clr_normal);
2045 }
2046 
2047 void Simulator::PrintRead(int rt_code,
2048                           PrintRegisterFormat format,
2049                           uintptr_t address) {
2050   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2051   if (rt_code != kZeroRegCode) {
2052     registers_[rt_code].NotifyRegisterLogged();
2053   }
2054   PrintAccess(rt_code, format, "<-", address);
2055 }
2056 
2057 void Simulator::PrintExtendingRead(int rt_code,
2058                                    PrintRegisterFormat format,
2059                                    int access_size_in_bytes,
2060                                    uintptr_t address) {
2061   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
2062   if (access_size_in_bytes == reg_size_in_bytes) {
2063     // There is no extension here, so print a simple load.
2064     PrintRead(rt_code, format, address);
2065     return;
2066   }
2067   VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
2068 
2069   // For sign- and zero-extension, make it clear that the resulting register
2070   // value is different from what is loaded from memory.
2071   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2072   if (rt_code != kZeroRegCode) {
2073     registers_[rt_code].NotifyRegisterLogged();
2074   }
2075   PrintRegister(rt_code, format);
2076   PrintPartialAccess(1,
2077                      0,
2078                      1,
2079                      access_size_in_bytes,
2080                      "<-",
2081                      address,
2082                      kXRegSizeInBytes);
2083 }
2084 
2085 void Simulator::PrintVRead(int rt_code,
2086                            PrintRegisterFormat format,
2087                            uintptr_t address) {
2088   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2089   vregisters_[rt_code].NotifyRegisterLogged();
2090   PrintVAccess(rt_code, format, "<-", address);
2091 }
2092 
2093 void Simulator::PrintWrite(int rt_code,
2094                            PrintRegisterFormat format,
2095                            uintptr_t address) {
2096   // Because this trace doesn't represent a change to the source register's
2097   // value, only print the relevant part of the value.
2098   format = GetPrintRegPartial(format);
2099   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2100   if (rt_code != kZeroRegCode) {
2101     registers_[rt_code].NotifyRegisterLogged();
2102   }
2103   PrintAccess(rt_code, format, "->", address);
2104 }
2105 
2106 void Simulator::PrintVWrite(int rt_code,
2107                             PrintRegisterFormat format,
2108                             uintptr_t address) {
2109   // Because this trace doesn't represent a change to the source register's
2110   // value, only print the relevant part of the value.
2111   format = GetPrintRegPartial(format);
2112   // It only makes sense to write scalar values here. Vectors are handled by
2113   // PrintVStructAccess.
2114   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
2115   PrintVAccess(rt_code, format, "->", address);
2116 }
2117 
2118 void Simulator::PrintTakenBranch(const Instruction* target) {
2119   fprintf(stream_,
2120           "# %sBranch%s to 0x%016" PRIx64 ".\n",
2121           clr_branch_marker,
2122           clr_normal,
2123           reinterpret_cast<uint64_t>(target));
2124 }
2125 
2126 // Visitors---------------------------------------------------------------------
2127 
2128 
2129 void Simulator::Visit(Metadata* metadata, const Instruction* instr) {
2130   VIXL_ASSERT(metadata->count("form") > 0);
2131   const std::string& form = (*metadata)["form"];
2132   form_hash_ = Hash(form.c_str());
2133   const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap();
2134   FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
2135   if (it == fv->end()) {
2136     VisitUnimplemented(instr);
2137   } else {
2138     (it->second)(this, instr);
2139   }
2140 }
2141 
2142 void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2143   VectorFormat vform = instr->GetSVEVectorFormat();
2144   SimPRegister& pd = ReadPRegister(instr->GetPd());
2145   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2146   SimVRegister& zm = ReadVRegister(instr->GetRm());
2147   SimVRegister& zn = ReadVRegister(instr->GetRn());
2148 
2149   switch (form_hash_) {
2150     case "match_p_p_zz"_h:
2151       match(vform, pd, zn, zm, /* negate_match = */ false);
2152       break;
2153     case "nmatch_p_p_zz"_h:
2154       match(vform, pd, zn, zm, /* negate_match = */ true);
2155       break;
2156     default:
2157       VIXL_UNIMPLEMENTED();
2158   }
2159   mov_zeroing(pd, pg, pd);
2160   PredTest(vform, pg, pd);
2161 }
2162 
2163 void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) {
2164   VectorFormat vform = instr->GetSVEVectorFormat();
2165   SimPRegister& pd = ReadPRegister(instr->GetPd());
2166   uint64_t src1 = ReadXRegister(instr->GetRn());
2167   uint64_t src2 = ReadXRegister(instr->GetRm());
2168 
2169   uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1);
2170   absdiff >>= LaneSizeInBytesLog2FromFormat(vform);
2171 
2172   bool no_conflict = false;
2173   switch (form_hash_) {
2174     case "whilerw_p_rr"_h:
2175       no_conflict = (absdiff == 0);
2176       break;
2177     case "whilewr_p_rr"_h:
2178       no_conflict = (absdiff == 0) || (src2 <= src1);
2179       break;
2180     default:
2181       VIXL_UNIMPLEMENTED();
2182   }
2183 
2184   LogicPRegister dst(pd);
2185   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2186     dst.SetActive(vform,
2187                   i,
2188                   no_conflict || (static_cast<uint64_t>(i) < absdiff));
2189   }
2190 
2191   PredTest(vform, GetPTrue(), pd);
2192 }
2193 
2194 void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) {
2195   VIXL_ASSERT(form_hash_ == "ext_z_zi_con"_h);
2196 
2197   SimVRegister& zd = ReadVRegister(instr->GetRd());
2198   SimVRegister& zn = ReadVRegister(instr->GetRn());
2199   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
2200 
2201   int index = instr->GetSVEExtractImmediate();
2202   int vl = GetVectorLengthInBytes();
2203   index = (index >= vl) ? 0 : index;
2204 
2205   ext(kFormatVnB, zd, zn, zn2, index);
2206 }
2207 
2208 void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) {
2209   SimVRegister& zd = ReadVRegister(instr->GetRd());
2210   SimVRegister& zm = ReadVRegister(instr->GetRm());
2211   SimVRegister& zn = ReadVRegister(instr->GetRn());
2212 
2213   switch (form_hash_) {
2214     case "histseg_z_zz"_h:
2215       if (instr->GetSVEVectorFormat() == kFormatVnB) {
2216         histogram(kFormatVnB,
2217                   zd,
2218                   GetPTrue(),
2219                   zn,
2220                   zm,
2221                   /* do_segmented = */ true);
2222       } else {
2223         VIXL_UNIMPLEMENTED();
2224       }
2225       break;
2226     case "pmul_z_zz"_h:
2227       pmul(kFormatVnB, zd, zn, zm);
2228       break;
2229     default:
2230       VIXL_UNIMPLEMENTED();
2231   }
2232 }
2233 
2234 void Simulator::SimulateSVEMulIndex(const Instruction* instr) {
2235   VectorFormat vform = instr->GetSVEVectorFormat();
2236   SimVRegister& zd = ReadVRegister(instr->GetRd());
2237   SimVRegister& zn = ReadVRegister(instr->GetRn());
2238 
2239   // The encoding for B and H-sized lanes are redefined to encode the most
2240   // significant bit of index for H-sized lanes. B-sized lanes are not
2241   // supported.
2242   if (vform == kFormatVnB) vform = kFormatVnH;
2243 
2244   VIXL_ASSERT((form_hash_ == "mul_z_zzi_d"_h) ||
2245               (form_hash_ == "mul_z_zzi_h"_h) ||
2246               (form_hash_ == "mul_z_zzi_s"_h));
2247 
2248   SimVRegister temp;
2249   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2250   mul(vform, zd, zn, temp);
2251 }
2252 
2253 void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) {
2254   VectorFormat vform = instr->GetSVEVectorFormat();
2255   SimVRegister& zda = ReadVRegister(instr->GetRd());
2256   SimVRegister& zn = ReadVRegister(instr->GetRn());
2257 
2258   // The encoding for B and H-sized lanes are redefined to encode the most
2259   // significant bit of index for H-sized lanes. B-sized lanes are not
2260   // supported.
2261   if (vform == kFormatVnB) vform = kFormatVnH;
2262 
2263   VIXL_ASSERT(
2264       (form_hash_ == "mla_z_zzzi_d"_h) || (form_hash_ == "mla_z_zzzi_h"_h) ||
2265       (form_hash_ == "mla_z_zzzi_s"_h) || (form_hash_ == "mls_z_zzzi_d"_h) ||
2266       (form_hash_ == "mls_z_zzzi_h"_h) || (form_hash_ == "mls_z_zzzi_s"_h));
2267 
2268   SimVRegister temp;
2269   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2270   if (instr->ExtractBit(10) == 0) {
2271     mla(vform, zda, zda, zn, temp);
2272   } else {
2273     mls(vform, zda, zda, zn, temp);
2274   }
2275 }
2276 
2277 void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) {
2278   VectorFormat vform = instr->GetSVEVectorFormat();
2279   SimVRegister& zd = ReadVRegister(instr->GetRd());
2280   SimVRegister& zn = ReadVRegister(instr->GetRn());
2281 
2282   // The encoding for B and H-sized lanes are redefined to encode the most
2283   // significant bit of index for H-sized lanes. B-sized lanes are not
2284   // supported.
2285   if (vform == kFormatVnB) {
2286     vform = kFormatVnH;
2287   }
2288 
2289   SimVRegister temp;
2290   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2291   switch (form_hash_) {
2292     case "sqdmulh_z_zzi_h"_h:
2293     case "sqdmulh_z_zzi_s"_h:
2294     case "sqdmulh_z_zzi_d"_h:
2295       sqdmulh(vform, zd, zn, temp);
2296       break;
2297     case "sqrdmulh_z_zzi_h"_h:
2298     case "sqrdmulh_z_zzi_s"_h:
2299     case "sqrdmulh_z_zzi_d"_h:
2300       sqrdmulh(vform, zd, zn, temp);
2301       break;
2302     default:
2303       VIXL_UNIMPLEMENTED();
2304   }
2305 }
2306 
2307 void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
2308   VectorFormat vform = instr->GetSVEVectorFormat();
2309   SimVRegister& zd = ReadVRegister(instr->GetRd());
2310   SimVRegister& zn = ReadVRegister(instr->GetRn());
2311 
2312   SimVRegister temp, zm_idx, zn_b, zn_t;
2313   // Instead of calling the indexed form of the instruction logic, we call the
2314   // vector form, which can reuse existing function logic without modification.
2315   // Select the specified elements based on the index input and than pack them
2316   // to the corresponding position.
2317   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2318   dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
2319   pack_even_elements(vform_half, zm_idx, temp);
2320 
2321   pack_even_elements(vform_half, zn_b, zn);
2322   pack_odd_elements(vform_half, zn_t, zn);
2323 
2324   switch (form_hash_) {
2325     case "smullb_z_zzi_s"_h:
2326     case "smullb_z_zzi_d"_h:
2327       smull(vform, zd, zn_b, zm_idx);
2328       break;
2329     case "smullt_z_zzi_s"_h:
2330     case "smullt_z_zzi_d"_h:
2331       smull(vform, zd, zn_t, zm_idx);
2332       break;
2333     case "sqdmullb_z_zzi_d"_h:
2334       sqdmull(vform, zd, zn_b, zm_idx);
2335       break;
2336     case "sqdmullt_z_zzi_d"_h:
2337       sqdmull(vform, zd, zn_t, zm_idx);
2338       break;
2339     case "umullb_z_zzi_s"_h:
2340     case "umullb_z_zzi_d"_h:
2341       umull(vform, zd, zn_b, zm_idx);
2342       break;
2343     case "umullt_z_zzi_s"_h:
2344     case "umullt_z_zzi_d"_h:
2345       umull(vform, zd, zn_t, zm_idx);
2346       break;
2347     case "sqdmullb_z_zzi_s"_h:
2348       sqdmull(vform, zd, zn_b, zm_idx);
2349       break;
2350     case "sqdmullt_z_zzi_s"_h:
2351       sqdmull(vform, zd, zn_t, zm_idx);
2352       break;
2353     case "smlalb_z_zzzi_s"_h:
2354     case "smlalb_z_zzzi_d"_h:
2355       smlal(vform, zd, zn_b, zm_idx);
2356       break;
2357     case "smlalt_z_zzzi_s"_h:
2358     case "smlalt_z_zzzi_d"_h:
2359       smlal(vform, zd, zn_t, zm_idx);
2360       break;
2361     case "smlslb_z_zzzi_s"_h:
2362     case "smlslb_z_zzzi_d"_h:
2363       smlsl(vform, zd, zn_b, zm_idx);
2364       break;
2365     case "smlslt_z_zzzi_s"_h:
2366     case "smlslt_z_zzzi_d"_h:
2367       smlsl(vform, zd, zn_t, zm_idx);
2368       break;
2369     case "umlalb_z_zzzi_s"_h:
2370     case "umlalb_z_zzzi_d"_h:
2371       umlal(vform, zd, zn_b, zm_idx);
2372       break;
2373     case "umlalt_z_zzzi_s"_h:
2374     case "umlalt_z_zzzi_d"_h:
2375       umlal(vform, zd, zn_t, zm_idx);
2376       break;
2377     case "umlslb_z_zzzi_s"_h:
2378     case "umlslb_z_zzzi_d"_h:
2379       umlsl(vform, zd, zn_b, zm_idx);
2380       break;
2381     case "umlslt_z_zzzi_s"_h:
2382     case "umlslt_z_zzzi_d"_h:
2383       umlsl(vform, zd, zn_t, zm_idx);
2384       break;
2385     default:
2386       VIXL_UNIMPLEMENTED();
2387   }
2388 }
2389 
2390 void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) {
2391   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2392   SimVRegister& zd = ReadVRegister(instr->GetRd());
2393   SimVRegister& zn = ReadVRegister(instr->GetRn());
2394   SimVRegister result, zd_b;
2395 
2396   pack_even_elements(kFormatVnH, zd_b, zd);
2397 
2398   switch (form_hash_) {
2399     case "fcvtnt_z_p_z_s2h"_h:
2400       fcvt(kFormatVnH, kFormatVnS, result, pg, zn);
2401       pack_even_elements(kFormatVnH, result, result);
2402       zip1(kFormatVnH, result, zd_b, result);
2403       break;
2404     default:
2405       VIXL_UNIMPLEMENTED();
2406   }
2407   mov_merging(kFormatVnS, zd, pg, result);
2408 }
2409 
2410 void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) {
2411   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2412   SimVRegister& zd = ReadVRegister(instr->GetRd());
2413   SimVRegister& zn = ReadVRegister(instr->GetRn());
2414   SimVRegister result, zero, zd_b;
2415 
2416   zero.Clear();
2417   pack_even_elements(kFormatVnS, zd_b, zd);
2418 
2419   switch (form_hash_) {
2420     case "fcvtnt_z_p_z_d2s"_h:
2421       fcvt(kFormatVnS, kFormatVnD, result, pg, zn);
2422       pack_even_elements(kFormatVnS, result, result);
2423       zip1(kFormatVnS, result, zd_b, result);
2424       break;
2425     case "fcvtx_z_p_z_d2s"_h:
2426       fcvtxn(kFormatVnS, result, zn);
2427       zip1(kFormatVnS, result, result, zero);
2428       break;
2429     case "fcvtxnt_z_p_z_d2s"_h:
2430       fcvtxn(kFormatVnS, result, zn);
2431       zip1(kFormatVnS, result, zd_b, result);
2432       break;
2433     default:
2434       VIXL_UNIMPLEMENTED();
2435   }
2436   mov_merging(kFormatVnD, zd, pg, result);
2437 }
2438 
2439 void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) {
2440   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2441   SimVRegister& zd = ReadVRegister(instr->GetRd());
2442   SimVRegister& zn = ReadVRegister(instr->GetRn());
2443   SimVRegister result;
2444 
2445   switch (form_hash_) {
2446     case "fcvtlt_z_p_z_h2s"_h:
2447       ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes);
2448       fcvt(kFormatVnS, kFormatVnH, zd, pg, result);
2449       break;
2450     case "fcvtlt_z_p_z_s2d"_h:
2451       ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes);
2452       fcvt(kFormatVnD, kFormatVnS, zd, pg, result);
2453       break;
2454     default:
2455       VIXL_UNIMPLEMENTED();
2456   }
2457 }
2458 
2459 void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
2460   VectorFormat vform = instr->GetSVEVectorFormat();
2461   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2462   SimVRegister& zd = ReadVRegister(instr->GetRd());
2463   SimVRegister& zn = ReadVRegister(instr->GetRn());
2464   SimVRegister result;
2465 
2466   if (vform != kFormatVnS) {
2467     VIXL_UNIMPLEMENTED();
2468   }
2469 
2470   switch (form_hash_) {
2471     case "urecpe_z_p_z"_h:
2472       urecpe(vform, result, zn);
2473       break;
2474     case "ursqrte_z_p_z"_h:
2475       ursqrte(vform, result, zn);
2476       break;
2477     default:
2478       VIXL_UNIMPLEMENTED();
2479   }
2480   mov_merging(vform, zd, pg, result);
2481 }
2482 
2483 void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) {
2484   VectorFormat vform = instr->GetSVEVectorFormat();
2485   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2486   SimVRegister& zd = ReadVRegister(instr->GetRd());
2487   SimVRegister& zn = ReadVRegister(instr->GetRn());
2488   SimVRegister result;
2489 
2490   switch (form_hash_) {
2491     case "flogb_z_p_z"_h:
2492       vform = instr->GetSVEVectorFormat(17);
2493       flogb(vform, result, zn);
2494       break;
2495     case "sqabs_z_p_z"_h:
2496       abs(vform, result, zn).SignedSaturate(vform);
2497       break;
2498     case "sqneg_z_p_z"_h:
2499       neg(vform, result, zn).SignedSaturate(vform);
2500       break;
2501     default:
2502       VIXL_UNIMPLEMENTED();
2503   }
2504   mov_merging(vform, zd, pg, result);
2505 }
2506 
2507 void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2508   VectorFormat vform = instr->GetSVEVectorFormat();
2509   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2510   SimVRegister& zd = ReadVRegister(instr->GetRd());
2511   SimVRegister& zm = ReadVRegister(instr->GetRm());
2512   SimVRegister& zn = ReadVRegister(instr->GetRn());
2513   SimVRegister result;
2514 
2515   VIXL_ASSERT(form_hash_ == "histcnt_z_p_zz"_h);
2516   if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
2517     histogram(vform, result, pg, zn, zm);
2518     mov_zeroing(vform, zd, pg, result);
2519   } else {
2520     VIXL_UNIMPLEMENTED();
2521   }
2522 }
2523 
2524 void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
2525   VectorFormat vform = instr->GetSVEVectorFormat();
2526   SimVRegister& zd = ReadVRegister(instr->GetRd());
2527   SimVRegister& zm = ReadVRegister(instr->GetRm());
2528   SimVRegister& zn = ReadVRegister(instr->GetRn());
2529   SimVRegister result;
2530   bool do_bext = false;
2531 
2532   switch (form_hash_) {
2533     case "bdep_z_zz"_h:
2534       bdep(vform, zd, zn, zm);
2535       break;
2536     case "bext_z_zz"_h:
2537       do_bext = true;
2538       VIXL_FALLTHROUGH();
2539     case "bgrp_z_zz"_h:
2540       bgrp(vform, zd, zn, zm, do_bext);
2541       break;
2542     case "eorbt_z_zz"_h:
2543       rotate_elements_right(vform, result, zm, 1);
2544       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2545       mov_alternating(vform, zd, result, 0);
2546       break;
2547     case "eortb_z_zz"_h:
2548       rotate_elements_right(vform, result, zm, -1);
2549       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2550       mov_alternating(vform, zd, result, 1);
2551       break;
2552     case "mul_z_zz"_h:
2553       mul(vform, zd, zn, zm);
2554       break;
2555     case "smulh_z_zz"_h:
2556       smulh(vform, zd, zn, zm);
2557       break;
2558     case "sqdmulh_z_zz"_h:
2559       sqdmulh(vform, zd, zn, zm);
2560       break;
2561     case "sqrdmulh_z_zz"_h:
2562       sqrdmulh(vform, zd, zn, zm);
2563       break;
2564     case "umulh_z_zz"_h:
2565       umulh(vform, zd, zn, zm);
2566       break;
2567     default:
2568       VIXL_UNIMPLEMENTED();
2569   }
2570 }
2571 
2572 void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) {
2573   VectorFormat vform = instr->GetSVEVectorFormat();
2574   SimVRegister& zd = ReadVRegister(instr->GetRd());
2575   SimVRegister& zm = ReadVRegister(instr->GetRm());
2576   SimVRegister& zn = ReadVRegister(instr->GetRn());
2577 
2578   SimVRegister zm_b, zm_t;
2579   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2580   pack_even_elements(vform_half, zm_b, zm);
2581   pack_odd_elements(vform_half, zm_t, zm);
2582 
2583   switch (form_hash_) {
2584     case "saddwb_z_zz"_h:
2585       saddw(vform, zd, zn, zm_b);
2586       break;
2587     case "saddwt_z_zz"_h:
2588       saddw(vform, zd, zn, zm_t);
2589       break;
2590     case "ssubwb_z_zz"_h:
2591       ssubw(vform, zd, zn, zm_b);
2592       break;
2593     case "ssubwt_z_zz"_h:
2594       ssubw(vform, zd, zn, zm_t);
2595       break;
2596     case "uaddwb_z_zz"_h:
2597       uaddw(vform, zd, zn, zm_b);
2598       break;
2599     case "uaddwt_z_zz"_h:
2600       uaddw(vform, zd, zn, zm_t);
2601       break;
2602     case "usubwb_z_zz"_h:
2603       usubw(vform, zd, zn, zm_b);
2604       break;
2605     case "usubwt_z_zz"_h:
2606       usubw(vform, zd, zn, zm_t);
2607       break;
2608     default:
2609       VIXL_UNIMPLEMENTED();
2610   }
2611 }
2612 
2613 void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) {
2614   SimVRegister& zd = ReadVRegister(instr->GetRd());
2615   SimVRegister& zn = ReadVRegister(instr->GetRn());
2616 
2617   std::pair<int, int> shift_and_lane_size =
2618       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2619   int lane_size = shift_and_lane_size.second;
2620   VIXL_ASSERT((lane_size >= 0) &&
2621               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2622   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2623   int shift_dist = shift_and_lane_size.first;
2624 
2625   switch (form_hash_) {
2626     case "sli_z_zzi"_h:
2627       // Shift distance is computed differently for left shifts. Convert the
2628       // result.
2629       shift_dist = (8 << lane_size) - shift_dist;
2630       sli(vform, zd, zn, shift_dist);
2631       break;
2632     case "sri_z_zzi"_h:
2633       sri(vform, zd, zn, shift_dist);
2634       break;
2635     default:
2636       VIXL_UNIMPLEMENTED();
2637   }
2638 }
2639 
2640 void Simulator::SimulateSVENarrow(const Instruction* instr) {
2641   SimVRegister& zd = ReadVRegister(instr->GetRd());
2642   SimVRegister& zn = ReadVRegister(instr->GetRn());
2643   SimVRegister result;
2644 
2645   std::pair<int, int> shift_and_lane_size =
2646       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2647   int lane_size = shift_and_lane_size.second;
2648   VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
2649               (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)));
2650   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2651   int right_shift_dist = shift_and_lane_size.first;
2652   bool top = false;
2653 
2654   switch (form_hash_) {
2655     case "sqxtnt_z_zz"_h:
2656       top = true;
2657       VIXL_FALLTHROUGH();
2658     case "sqxtnb_z_zz"_h:
2659       sqxtn(vform, result, zn);
2660       break;
2661     case "sqxtunt_z_zz"_h:
2662       top = true;
2663       VIXL_FALLTHROUGH();
2664     case "sqxtunb_z_zz"_h:
2665       sqxtun(vform, result, zn);
2666       break;
2667     case "uqxtnt_z_zz"_h:
2668       top = true;
2669       VIXL_FALLTHROUGH();
2670     case "uqxtnb_z_zz"_h:
2671       uqxtn(vform, result, zn);
2672       break;
2673     case "rshrnt_z_zi"_h:
2674       top = true;
2675       VIXL_FALLTHROUGH();
2676     case "rshrnb_z_zi"_h:
2677       rshrn(vform, result, zn, right_shift_dist);
2678       break;
2679     case "shrnt_z_zi"_h:
2680       top = true;
2681       VIXL_FALLTHROUGH();
2682     case "shrnb_z_zi"_h:
2683       shrn(vform, result, zn, right_shift_dist);
2684       break;
2685     case "sqrshrnt_z_zi"_h:
2686       top = true;
2687       VIXL_FALLTHROUGH();
2688     case "sqrshrnb_z_zi"_h:
2689       sqrshrn(vform, result, zn, right_shift_dist);
2690       break;
2691     case "sqrshrunt_z_zi"_h:
2692       top = true;
2693       VIXL_FALLTHROUGH();
2694     case "sqrshrunb_z_zi"_h:
2695       sqrshrun(vform, result, zn, right_shift_dist);
2696       break;
2697     case "sqshrnt_z_zi"_h:
2698       top = true;
2699       VIXL_FALLTHROUGH();
2700     case "sqshrnb_z_zi"_h:
2701       sqshrn(vform, result, zn, right_shift_dist);
2702       break;
2703     case "sqshrunt_z_zi"_h:
2704       top = true;
2705       VIXL_FALLTHROUGH();
2706     case "sqshrunb_z_zi"_h:
2707       sqshrun(vform, result, zn, right_shift_dist);
2708       break;
2709     case "uqrshrnt_z_zi"_h:
2710       top = true;
2711       VIXL_FALLTHROUGH();
2712     case "uqrshrnb_z_zi"_h:
2713       uqrshrn(vform, result, zn, right_shift_dist);
2714       break;
2715     case "uqshrnt_z_zi"_h:
2716       top = true;
2717       VIXL_FALLTHROUGH();
2718     case "uqshrnb_z_zi"_h:
2719       uqshrn(vform, result, zn, right_shift_dist);
2720       break;
2721     default:
2722       VIXL_UNIMPLEMENTED();
2723   }
2724 
2725   if (top) {
2726     // Keep even elements, replace odd elements with the results.
2727     xtn(vform, zd, zd);
2728     zip1(vform, zd, zd, result);
2729   } else {
2730     // Zero odd elements, replace even elements with the results.
2731     SimVRegister zero;
2732     zero.Clear();
2733     zip1(vform, zd, result, zero);
2734   }
2735 }
2736 
2737 void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) {
2738   VectorFormat vform = instr->GetSVEVectorFormat();
2739   SimVRegister& zd = ReadVRegister(instr->GetRd());
2740   SimVRegister& zm = ReadVRegister(instr->GetRm());
2741   SimVRegister& zn = ReadVRegister(instr->GetRn());
2742   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2743 
2744   // Construct temporary registers containing the even (bottom) and odd (top)
2745   // elements.
2746   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2747   pack_even_elements(vform_half, zn_b, zn);
2748   pack_even_elements(vform_half, zm_b, zm);
2749   pack_odd_elements(vform_half, zn_t, zn);
2750   pack_odd_elements(vform_half, zm_t, zm);
2751 
2752   switch (form_hash_) {
2753     case "sabdlb_z_zz"_h:
2754       sabdl(vform, zd, zn_b, zm_b);
2755       break;
2756     case "sabdlt_z_zz"_h:
2757       sabdl(vform, zd, zn_t, zm_t);
2758       break;
2759     case "saddlb_z_zz"_h:
2760       saddl(vform, zd, zn_b, zm_b);
2761       break;
2762     case "saddlbt_z_zz"_h:
2763       saddl(vform, zd, zn_b, zm_t);
2764       break;
2765     case "saddlt_z_zz"_h:
2766       saddl(vform, zd, zn_t, zm_t);
2767       break;
2768     case "ssublb_z_zz"_h:
2769       ssubl(vform, zd, zn_b, zm_b);
2770       break;
2771     case "ssublbt_z_zz"_h:
2772       ssubl(vform, zd, zn_b, zm_t);
2773       break;
2774     case "ssublt_z_zz"_h:
2775       ssubl(vform, zd, zn_t, zm_t);
2776       break;
2777     case "ssubltb_z_zz"_h:
2778       ssubl(vform, zd, zn_t, zm_b);
2779       break;
2780     case "uabdlb_z_zz"_h:
2781       uabdl(vform, zd, zn_b, zm_b);
2782       break;
2783     case "uabdlt_z_zz"_h:
2784       uabdl(vform, zd, zn_t, zm_t);
2785       break;
2786     case "uaddlb_z_zz"_h:
2787       uaddl(vform, zd, zn_b, zm_b);
2788       break;
2789     case "uaddlt_z_zz"_h:
2790       uaddl(vform, zd, zn_t, zm_t);
2791       break;
2792     case "usublb_z_zz"_h:
2793       usubl(vform, zd, zn_b, zm_b);
2794       break;
2795     case "usublt_z_zz"_h:
2796       usubl(vform, zd, zn_t, zm_t);
2797       break;
2798     case "sabalb_z_zzz"_h:
2799       sabal(vform, zd, zn_b, zm_b);
2800       break;
2801     case "sabalt_z_zzz"_h:
2802       sabal(vform, zd, zn_t, zm_t);
2803       break;
2804     case "uabalb_z_zzz"_h:
2805       uabal(vform, zd, zn_b, zm_b);
2806       break;
2807     case "uabalt_z_zzz"_h:
2808       uabal(vform, zd, zn_t, zm_t);
2809       break;
2810     default:
2811       VIXL_UNIMPLEMENTED();
2812   }
2813 }
2814 
2815 void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
2816   VectorFormat vform = instr->GetSVEVectorFormat();
2817   SimVRegister& zd = ReadVRegister(instr->GetRd());
2818   SimVRegister& zm = ReadVRegister(instr->GetRm());
2819   SimVRegister& zn = ReadVRegister(instr->GetRn());
2820   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2821   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2822   pack_even_elements(vform_half, zn_b, zn);
2823   pack_even_elements(vform_half, zm_b, zm);
2824   pack_odd_elements(vform_half, zn_t, zn);
2825   pack_odd_elements(vform_half, zm_t, zm);
2826 
2827   switch (form_hash_) {
2828     case "pmullb_z_zz"_h:
2829       // '00' is reserved for Q-sized lane.
2830       if (vform == kFormatVnB) {
2831         VIXL_UNIMPLEMENTED();
2832       }
2833       pmull(vform, zd, zn_b, zm_b);
2834       break;
2835     case "pmullt_z_zz"_h:
2836       // '00' is reserved for Q-sized lane.
2837       if (vform == kFormatVnB) {
2838         VIXL_UNIMPLEMENTED();
2839       }
2840       pmull(vform, zd, zn_t, zm_t);
2841       break;
2842     case "smullb_z_zz"_h:
2843       smull(vform, zd, zn_b, zm_b);
2844       break;
2845     case "smullt_z_zz"_h:
2846       smull(vform, zd, zn_t, zm_t);
2847       break;
2848     case "sqdmullb_z_zz"_h:
2849       sqdmull(vform, zd, zn_b, zm_b);
2850       break;
2851     case "sqdmullt_z_zz"_h:
2852       sqdmull(vform, zd, zn_t, zm_t);
2853       break;
2854     case "umullb_z_zz"_h:
2855       umull(vform, zd, zn_b, zm_b);
2856       break;
2857     case "umullt_z_zz"_h:
2858       umull(vform, zd, zn_t, zm_t);
2859       break;
2860     default:
2861       VIXL_UNIMPLEMENTED();
2862   }
2863 }
2864 
2865 void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) {
2866   SimVRegister& zd = ReadVRegister(instr->GetRd());
2867   SimVRegister& zm = ReadVRegister(instr->GetRm());
2868   SimVRegister& zn = ReadVRegister(instr->GetRn());
2869   SimVRegister result;
2870   bool top = false;
2871 
2872   VectorFormat vform_src = instr->GetSVEVectorFormat();
2873   if (vform_src == kFormatVnB) {
2874     VIXL_UNIMPLEMENTED();
2875   }
2876   VectorFormat vform = VectorFormatHalfWidth(vform_src);
2877 
2878   switch (form_hash_) {
2879     case "addhnt_z_zz"_h:
2880       top = true;
2881       VIXL_FALLTHROUGH();
2882     case "addhnb_z_zz"_h:
2883       addhn(vform, result, zn, zm);
2884       break;
2885     case "raddhnt_z_zz"_h:
2886       top = true;
2887       VIXL_FALLTHROUGH();
2888     case "raddhnb_z_zz"_h:
2889       raddhn(vform, result, zn, zm);
2890       break;
2891     case "rsubhnt_z_zz"_h:
2892       top = true;
2893       VIXL_FALLTHROUGH();
2894     case "rsubhnb_z_zz"_h:
2895       rsubhn(vform, result, zn, zm);
2896       break;
2897     case "subhnt_z_zz"_h:
2898       top = true;
2899       VIXL_FALLTHROUGH();
2900     case "subhnb_z_zz"_h:
2901       subhn(vform, result, zn, zm);
2902       break;
2903     default:
2904       VIXL_UNIMPLEMENTED();
2905   }
2906 
2907   if (top) {
2908     // Keep even elements, replace odd elements with the results.
2909     xtn(vform, zd, zd);
2910     zip1(vform, zd, zd, result);
2911   } else {
2912     // Zero odd elements, replace even elements with the results.
2913     SimVRegister zero;
2914     zero.Clear();
2915     zip1(vform, zd, result, zero);
2916   }
2917 }
2918 
2919 void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) {
2920   SimVRegister& zd = ReadVRegister(instr->GetRd());
2921   SimVRegister& zn = ReadVRegister(instr->GetRn());
2922   SimVRegister zn_b, zn_t;
2923 
2924   std::pair<int, int> shift_and_lane_size =
2925       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2926   int lane_size = shift_and_lane_size.second;
2927   VIXL_ASSERT((lane_size >= 0) &&
2928               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2929   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1);
2930   int right_shift_dist = shift_and_lane_size.first;
2931   int left_shift_dist = (8 << lane_size) - right_shift_dist;
2932 
2933   // Construct temporary registers containing the even (bottom) and odd (top)
2934   // elements.
2935   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2936   pack_even_elements(vform_half, zn_b, zn);
2937   pack_odd_elements(vform_half, zn_t, zn);
2938 
2939   switch (form_hash_) {
2940     case "sshllb_z_zi"_h:
2941       sshll(vform, zd, zn_b, left_shift_dist);
2942       break;
2943     case "sshllt_z_zi"_h:
2944       sshll(vform, zd, zn_t, left_shift_dist);
2945       break;
2946     case "ushllb_z_zi"_h:
2947       ushll(vform, zd, zn_b, left_shift_dist);
2948       break;
2949     case "ushllt_z_zi"_h:
2950       ushll(vform, zd, zn_t, left_shift_dist);
2951       break;
2952     default:
2953       VIXL_UNIMPLEMENTED();
2954   }
2955 }
2956 
2957 void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) {
2958   VectorFormat vform = instr->GetSVEVectorFormat();
2959   SimVRegister& zda = ReadVRegister(instr->GetRd());
2960   SimVRegister& zn = ReadVRegister(instr->GetRn());
2961   unsigned zm_code = instr->GetRm();
2962   int index = -1;
2963   bool is_mla = false;
2964 
2965   switch (form_hash_) {
2966     case "sqrdmlah_z_zzz"_h:
2967       is_mla = true;
2968       VIXL_FALLTHROUGH();
2969     case "sqrdmlsh_z_zzz"_h:
2970       // Nothing to do.
2971       break;
2972     case "sqrdmlah_z_zzzi_h"_h:
2973       is_mla = true;
2974       VIXL_FALLTHROUGH();
2975     case "sqrdmlsh_z_zzzi_h"_h:
2976       vform = kFormatVnH;
2977       index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
2978       zm_code = instr->ExtractBits(18, 16);
2979       break;
2980     case "sqrdmlah_z_zzzi_s"_h:
2981       is_mla = true;
2982       VIXL_FALLTHROUGH();
2983     case "sqrdmlsh_z_zzzi_s"_h:
2984       vform = kFormatVnS;
2985       index = instr->ExtractBits(20, 19);
2986       zm_code = instr->ExtractBits(18, 16);
2987       break;
2988     case "sqrdmlah_z_zzzi_d"_h:
2989       is_mla = true;
2990       VIXL_FALLTHROUGH();
2991     case "sqrdmlsh_z_zzzi_d"_h:
2992       vform = kFormatVnD;
2993       index = instr->ExtractBit(20);
2994       zm_code = instr->ExtractBits(19, 16);
2995       break;
2996     default:
2997       VIXL_UNIMPLEMENTED();
2998   }
2999 
3000   SimVRegister& zm = ReadVRegister(zm_code);
3001   SimVRegister zm_idx;
3002   if (index >= 0) {
3003     dup_elements_to_segments(vform, zm_idx, zm, index);
3004   }
3005 
3006   if (is_mla) {
3007     sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm);
3008   } else {
3009     sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm);
3010   }
3011 }
3012 
3013 void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
3014   SimVRegister& zda = ReadVRegister(instr->GetRd());
3015   SimVRegister& zn = ReadVRegister(instr->GetRn());
3016   SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
3017 
3018   SimVRegister temp, zm_idx, zn_b, zn_t;
3019   Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
3020   dup_elements_to_segments(kFormatVnS, temp, zm, index);
3021   pack_even_elements(kFormatVnS, zm_idx, temp);
3022   pack_even_elements(kFormatVnS, zn_b, zn);
3023   pack_odd_elements(kFormatVnS, zn_t, zn);
3024 
3025   switch (form_hash_) {
3026     case "sqdmlalb_z_zzzi_d"_h:
3027       sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
3028       break;
3029     case "sqdmlalt_z_zzzi_d"_h:
3030       sqdmlal(kFormatVnD, zda, zn_t, zm_idx);
3031       break;
3032     case "sqdmlslb_z_zzzi_d"_h:
3033       sqdmlsl(kFormatVnD, zda, zn_b, zm_idx);
3034       break;
3035     case "sqdmlslt_z_zzzi_d"_h:
3036       sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
3037       break;
3038     default:
3039       VIXL_UNIMPLEMENTED();
3040   }
3041 }
3042 
3043 void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
3044   SimVRegister& zda = ReadVRegister(instr->GetRd());
3045   SimVRegister& zm = ReadVRegister(instr->GetRm());
3046   SimVRegister& zn = ReadVRegister(instr->GetRn());
3047 
3048   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
3049   pack_even_elements(kFormatVnH, zn_b, zn);
3050   pack_even_elements(kFormatVnH, zm_b, zm);
3051   pack_odd_elements(kFormatVnH, zn_t, zn);
3052   pack_odd_elements(kFormatVnH, zm_t, zm);
3053 
3054   switch (form_hash_) {
3055     case "fmlalb_z_zzz"_h:
3056       fmlal(kFormatVnS, zda, zn_b, zm_b);
3057       break;
3058     case "fmlalt_z_zzz"_h:
3059       fmlal(kFormatVnS, zda, zn_t, zm_t);
3060       break;
3061     case "fmlslb_z_zzz"_h:
3062       fmlsl(kFormatVnS, zda, zn_b, zm_b);
3063       break;
3064     case "fmlslt_z_zzz"_h:
3065       fmlsl(kFormatVnS, zda, zn_t, zm_t);
3066       break;
3067     default:
3068       VIXL_UNIMPLEMENTED();
3069   }
3070 }
3071 
3072 void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
3073   SimVRegister& zda = ReadVRegister(instr->GetRd());
3074   SimVRegister& zn = ReadVRegister(instr->GetRn());
3075   SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
3076 
3077   SimVRegister temp, zm_idx, zn_b, zn_t;
3078   Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
3079   dup_elements_to_segments(kFormatVnH, temp, zm, index);
3080   pack_even_elements(kFormatVnH, zm_idx, temp);
3081   pack_even_elements(kFormatVnH, zn_b, zn);
3082   pack_odd_elements(kFormatVnH, zn_t, zn);
3083 
3084   switch (form_hash_) {
3085     case "fmlalb_z_zzzi_s"_h:
3086       fmlal(kFormatVnS, zda, zn_b, zm_idx);
3087       break;
3088     case "fmlalt_z_zzzi_s"_h:
3089       fmlal(kFormatVnS, zda, zn_t, zm_idx);
3090       break;
3091     case "fmlslb_z_zzzi_s"_h:
3092       fmlsl(kFormatVnS, zda, zn_b, zm_idx);
3093       break;
3094     case "fmlslt_z_zzzi_s"_h:
3095       fmlsl(kFormatVnS, zda, zn_t, zm_idx);
3096       break;
3097     case "sqdmlalb_z_zzzi_s"_h:
3098       sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
3099       break;
3100     case "sqdmlalt_z_zzzi_s"_h:
3101       sqdmlal(kFormatVnS, zda, zn_t, zm_idx);
3102       break;
3103     case "sqdmlslb_z_zzzi_s"_h:
3104       sqdmlsl(kFormatVnS, zda, zn_b, zm_idx);
3105       break;
3106     case "sqdmlslt_z_zzzi_s"_h:
3107       sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
3108       break;
3109     default:
3110       VIXL_UNIMPLEMENTED();
3111   }
3112 }
3113 
3114 void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) {
3115   VectorFormat vform = instr->GetSVEVectorFormat();
3116   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3117   SimVRegister& zda = ReadVRegister(instr->GetRd());
3118   SimVRegister& zn = ReadVRegister(instr->GetRn());
3119   SimVRegister result;
3120 
3121   switch (form_hash_) {
3122     case "sadalp_z_p_z"_h:
3123       sadalp(vform, result, zn);
3124       break;
3125     case "uadalp_z_p_z"_h:
3126       uadalp(vform, result, zn);
3127       break;
3128     default:
3129       VIXL_UNIMPLEMENTED();
3130   }
3131   mov_merging(vform, zda, pg, result);
3132 }
3133 
3134 void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) {
3135   VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD;
3136   SimVRegister& zda = ReadVRegister(instr->GetRd());
3137   SimVRegister& zm = ReadVRegister(instr->GetRm());
3138   SimVRegister& zn = ReadVRegister(instr->GetRn());
3139 
3140   SimVRegister not_zn;
3141   not_(vform, not_zn, zn);
3142 
3143   switch (form_hash_) {
3144     case "adclb_z_zzz"_h:
3145       adcl(vform, zda, zn, zm, /* top = */ false);
3146       break;
3147     case "adclt_z_zzz"_h:
3148       adcl(vform, zda, zn, zm, /* top = */ true);
3149       break;
3150     case "sbclb_z_zzz"_h:
3151       adcl(vform, zda, not_zn, zm, /* top = */ false);
3152       break;
3153     case "sbclt_z_zzz"_h:
3154       adcl(vform, zda, not_zn, zm, /* top = */ true);
3155       break;
3156     default:
3157       VIXL_UNIMPLEMENTED();
3158   }
3159 }
3160 
3161 void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) {
3162   VectorFormat vform = instr->GetSVEVectorFormat();
3163   SimVRegister& zda = ReadVRegister(instr->GetRd());
3164   SimVRegister& zm = ReadVRegister(instr->GetRm());
3165   SimVRegister& zn = ReadVRegister(instr->GetRn());
3166 
3167   switch (form_hash_) {
3168     case "saba_z_zzz"_h:
3169       saba(vform, zda, zn, zm);
3170       break;
3171     case "uaba_z_zzz"_h:
3172       uaba(vform, zda, zn, zm);
3173       break;
3174     default:
3175       VIXL_UNIMPLEMENTED();
3176   }
3177 }
3178 
3179 void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) {
3180   SimVRegister& zda = ReadVRegister(instr->GetRd());
3181   SimVRegister& zn = ReadVRegister(instr->GetRn());
3182   int rot = instr->ExtractBits(11, 10) * 90;
3183   // vform and zm are only valid for the vector form of instruction.
3184   VectorFormat vform = instr->GetSVEVectorFormat();
3185   SimVRegister& zm = ReadVRegister(instr->GetRm());
3186 
3187   // Inputs for indexed form of instruction.
3188   SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16));
3189   SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16));
3190   int idx_h = instr->ExtractBits(20, 19);
3191   int idx_s = instr->ExtractBit(20);
3192 
3193   switch (form_hash_) {
3194     case "cmla_z_zzz"_h:
3195       cmla(vform, zda, zda, zn, zm, rot);
3196       break;
3197     case "cmla_z_zzzi_h"_h:
3198       cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3199       break;
3200     case "cmla_z_zzzi_s"_h:
3201       cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3202       break;
3203     case "sqrdcmlah_z_zzz"_h:
3204       sqrdcmlah(vform, zda, zda, zn, zm, rot);
3205       break;
3206     case "sqrdcmlah_z_zzzi_h"_h:
3207       sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3208       break;
3209     case "sqrdcmlah_z_zzzi_s"_h:
3210       sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3211       break;
3212     default:
3213       VIXL_UNIMPLEMENTED();
3214   }
3215 }
3216 
3217 void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) {
3218   SimVRegister& zd = ReadVRegister(instr->GetRd());
3219   SimVRegister& zn = ReadVRegister(instr->GetRn());
3220 
3221   std::pair<int, int> shift_and_lane_size =
3222       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3223   int lane_size = shift_and_lane_size.second;
3224   VIXL_ASSERT((lane_size >= 0) &&
3225               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
3226   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3227   int shift_dist = shift_and_lane_size.first;
3228 
3229   switch (form_hash_) {
3230     case "srsra_z_zi"_h:
3231       srsra(vform, zd, zn, shift_dist);
3232       break;
3233     case "ssra_z_zi"_h:
3234       ssra(vform, zd, zn, shift_dist);
3235       break;
3236     case "ursra_z_zi"_h:
3237       ursra(vform, zd, zn, shift_dist);
3238       break;
3239     case "usra_z_zi"_h:
3240       usra(vform, zd, zn, shift_dist);
3241       break;
3242     default:
3243       VIXL_UNIMPLEMENTED();
3244   }
3245 }
3246 
3247 void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
3248   VectorFormat vform = instr->GetSVEVectorFormat();
3249   SimVRegister& zda = ReadVRegister(instr->GetRd());
3250   SimVRegister& zm = ReadVRegister(instr->GetRm());
3251   SimVRegister& zn = ReadVRegister(instr->GetRn());
3252 
3253   SimVRegister zero, zn_b, zm_b, zn_t, zm_t;
3254   zero.Clear();
3255 
3256   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3257   uzp1(vform_half, zn_b, zn, zero);
3258   uzp1(vform_half, zm_b, zm, zero);
3259   uzp2(vform_half, zn_t, zn, zero);
3260   uzp2(vform_half, zm_t, zm, zero);
3261 
3262   switch (form_hash_) {
3263     case "smlalb_z_zzz"_h:
3264       smlal(vform, zda, zn_b, zm_b);
3265       break;
3266     case "smlalt_z_zzz"_h:
3267       smlal(vform, zda, zn_t, zm_t);
3268       break;
3269     case "smlslb_z_zzz"_h:
3270       smlsl(vform, zda, zn_b, zm_b);
3271       break;
3272     case "smlslt_z_zzz"_h:
3273       smlsl(vform, zda, zn_t, zm_t);
3274       break;
3275     case "sqdmlalb_z_zzz"_h:
3276       sqdmlal(vform, zda, zn_b, zm_b);
3277       break;
3278     case "sqdmlalbt_z_zzz"_h:
3279       sqdmlal(vform, zda, zn_b, zm_t);
3280       break;
3281     case "sqdmlalt_z_zzz"_h:
3282       sqdmlal(vform, zda, zn_t, zm_t);
3283       break;
3284     case "sqdmlslb_z_zzz"_h:
3285       sqdmlsl(vform, zda, zn_b, zm_b);
3286       break;
3287     case "sqdmlslbt_z_zzz"_h:
3288       sqdmlsl(vform, zda, zn_b, zm_t);
3289       break;
3290     case "sqdmlslt_z_zzz"_h:
3291       sqdmlsl(vform, zda, zn_t, zm_t);
3292       break;
3293     case "umlalb_z_zzz"_h:
3294       umlal(vform, zda, zn_b, zm_b);
3295       break;
3296     case "umlalt_z_zzz"_h:
3297       umlal(vform, zda, zn_t, zm_t);
3298       break;
3299     case "umlslb_z_zzz"_h:
3300       umlsl(vform, zda, zn_b, zm_b);
3301       break;
3302     case "umlslt_z_zzz"_h:
3303       umlsl(vform, zda, zn_t, zm_t);
3304       break;
3305     default:
3306       VIXL_UNIMPLEMENTED();
3307   }
3308 }
3309 
3310 void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
3311   VectorFormat vform = instr->GetSVEVectorFormat();
3312   SimVRegister& zda = ReadVRegister(instr->GetRd());
3313   SimVRegister& zn = ReadVRegister(instr->GetRn());
3314   int rot = instr->ExtractBits(11, 10) * 90;
3315   unsigned zm_code = instr->GetRm();
3316   int index = -1;
3317 
3318   switch (form_hash_) {
3319     case "cdot_z_zzz"_h:
3320       // Nothing to do.
3321       break;
3322     case "cdot_z_zzzi_s"_h:
3323       index = zm_code >> 3;
3324       zm_code &= 0x7;
3325       break;
3326     case "cdot_z_zzzi_d"_h:
3327       index = zm_code >> 4;
3328       zm_code &= 0xf;
3329       break;
3330     default:
3331       VIXL_UNIMPLEMENTED();
3332   }
3333 
3334   SimVRegister temp;
3335   SimVRegister& zm = ReadVRegister(zm_code);
3336   if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
3337   cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
3338 }
3339 
3340 void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
3341   VectorFormat vform = kFormatVnD;
3342   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3343   SimVRegister& zm = ReadVRegister(instr->GetRm());
3344   SimVRegister& zk = ReadVRegister(instr->GetRn());
3345   SimVRegister temp;
3346 
3347   switch (form_hash_) {
3348     case "bcax_z_zzz"_h:
3349       bic(vform, temp, zm, zk);
3350       eor(vform, zdn, temp, zdn);
3351       break;
3352     case "bsl1n_z_zzz"_h:
3353       not_(vform, temp, zdn);
3354       bsl(vform, zdn, zk, temp, zm);
3355       break;
3356     case "bsl2n_z_zzz"_h:
3357       not_(vform, temp, zm);
3358       bsl(vform, zdn, zk, zdn, temp);
3359       break;
3360     case "bsl_z_zzz"_h:
3361       bsl(vform, zdn, zk, zdn, zm);
3362       break;
3363     case "eor3_z_zzz"_h:
3364       eor(vform, temp, zdn, zm);
3365       eor(vform, zdn, temp, zk);
3366       break;
3367     case "nbsl_z_zzz"_h:
3368       bsl(vform, zdn, zk, zdn, zm);
3369       not_(vform, zdn, zdn);
3370       break;
3371     default:
3372       VIXL_UNIMPLEMENTED();
3373   }
3374 }
3375 
3376 void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) {
3377   VectorFormat vform = instr->GetSVEVectorFormat();
3378   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3379   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3380   SimVRegister& zm = ReadVRegister(instr->GetRn());
3381   SimVRegister result;
3382 
3383   switch (form_hash_) {
3384     case "shadd_z_p_zz"_h:
3385       add(vform, result, zdn, zm).Halve(vform);
3386       break;
3387     case "shsub_z_p_zz"_h:
3388       sub(vform, result, zdn, zm).Halve(vform);
3389       break;
3390     case "shsubr_z_p_zz"_h:
3391       sub(vform, result, zm, zdn).Halve(vform);
3392       break;
3393     case "srhadd_z_p_zz"_h:
3394       add(vform, result, zdn, zm).Halve(vform).Round(vform);
3395       break;
3396     case "uhadd_z_p_zz"_h:
3397       add(vform, result, zdn, zm).Uhalve(vform);
3398       break;
3399     case "uhsub_z_p_zz"_h:
3400       sub(vform, result, zdn, zm).Uhalve(vform);
3401       break;
3402     case "uhsubr_z_p_zz"_h:
3403       sub(vform, result, zm, zdn).Uhalve(vform);
3404       break;
3405     case "urhadd_z_p_zz"_h:
3406       add(vform, result, zdn, zm).Uhalve(vform).Round(vform);
3407       break;
3408     default:
3409       VIXL_UNIMPLEMENTED();
3410       break;
3411   }
3412   mov_merging(vform, zdn, pg, result);
3413 }
3414 
3415 void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) {
3416   VectorFormat vform = instr->GetSVEVectorFormat();
3417   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3418   SimVRegister& zm = ReadVRegister(instr->GetRn());
3419   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3420   SimVRegister result;
3421 
3422   switch (form_hash_) {
3423     case "sqadd_z_p_zz"_h:
3424       add(vform, result, zdn, zm).SignedSaturate(vform);
3425       break;
3426     case "sqsub_z_p_zz"_h:
3427       sub(vform, result, zdn, zm).SignedSaturate(vform);
3428       break;
3429     case "sqsubr_z_p_zz"_h:
3430       sub(vform, result, zm, zdn).SignedSaturate(vform);
3431       break;
3432     case "suqadd_z_p_zz"_h:
3433       suqadd(vform, result, zdn, zm);
3434       break;
3435     case "uqadd_z_p_zz"_h:
3436       add(vform, result, zdn, zm).UnsignedSaturate(vform);
3437       break;
3438     case "uqsub_z_p_zz"_h:
3439       sub(vform, result, zdn, zm).UnsignedSaturate(vform);
3440       break;
3441     case "uqsubr_z_p_zz"_h:
3442       sub(vform, result, zm, zdn).UnsignedSaturate(vform);
3443       break;
3444     case "usqadd_z_p_zz"_h:
3445       usqadd(vform, result, zdn, zm);
3446       break;
3447     default:
3448       VIXL_UNIMPLEMENTED();
3449       break;
3450   }
3451   mov_merging(vform, zdn, pg, result);
3452 }
3453 
3454 void Simulator::SimulateSVEIntArithPair(const Instruction* instr) {
3455   VectorFormat vform = instr->GetSVEVectorFormat();
3456   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3457   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3458   SimVRegister& zm = ReadVRegister(instr->GetRn());
3459   SimVRegister result;
3460 
3461   switch (form_hash_) {
3462     case "addp_z_p_zz"_h:
3463       addp(vform, result, zdn, zm);
3464       break;
3465     case "smaxp_z_p_zz"_h:
3466       smaxp(vform, result, zdn, zm);
3467       break;
3468     case "sminp_z_p_zz"_h:
3469       sminp(vform, result, zdn, zm);
3470       break;
3471     case "umaxp_z_p_zz"_h:
3472       umaxp(vform, result, zdn, zm);
3473       break;
3474     case "uminp_z_p_zz"_h:
3475       uminp(vform, result, zdn, zm);
3476       break;
3477     default:
3478       VIXL_UNIMPLEMENTED();
3479       break;
3480   }
3481   mov_merging(vform, zdn, pg, result);
3482 }
3483 
3484 void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) {
3485   VectorFormat vform = instr->GetSVEVectorFormat();
3486   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3487   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3488   SimVRegister& zm = ReadVRegister(instr->GetRn());
3489   SimVRegister result;
3490 
3491   switch (form_hash_) {
3492     case "faddp_z_p_zz"_h:
3493       faddp(vform, result, zdn, zm);
3494       break;
3495     case "fmaxnmp_z_p_zz"_h:
3496       fmaxnmp(vform, result, zdn, zm);
3497       break;
3498     case "fmaxp_z_p_zz"_h:
3499       fmaxp(vform, result, zdn, zm);
3500       break;
3501     case "fminnmp_z_p_zz"_h:
3502       fminnmp(vform, result, zdn, zm);
3503       break;
3504     case "fminp_z_p_zz"_h:
3505       fminp(vform, result, zdn, zm);
3506       break;
3507     default:
3508       VIXL_UNIMPLEMENTED();
3509   }
3510   mov_merging(vform, zdn, pg, result);
3511 }
3512 
3513 void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) {
3514   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3515   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3516 
3517   std::pair<int, int> shift_and_lane_size =
3518       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
3519   unsigned lane_size = shift_and_lane_size.second;
3520   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3521   int right_shift_dist = shift_and_lane_size.first;
3522   int left_shift_dist = (8 << lane_size) - right_shift_dist;
3523   SimVRegister result;
3524 
3525   switch (form_hash_) {
3526     case "sqshl_z_p_zi"_h:
3527       sqshl(vform, result, zdn, left_shift_dist);
3528       break;
3529     case "sqshlu_z_p_zi"_h:
3530       sqshlu(vform, result, zdn, left_shift_dist);
3531       break;
3532     case "srshr_z_p_zi"_h:
3533       sshr(vform, result, zdn, right_shift_dist).Round(vform);
3534       break;
3535     case "uqshl_z_p_zi"_h:
3536       uqshl(vform, result, zdn, left_shift_dist);
3537       break;
3538     case "urshr_z_p_zi"_h:
3539       ushr(vform, result, zdn, right_shift_dist).Round(vform);
3540       break;
3541     default:
3542       VIXL_UNIMPLEMENTED();
3543   }
3544   mov_merging(vform, zdn, pg, result);
3545 }
3546 
3547 void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) {
3548   VIXL_ASSERT(form_hash_ == "xar_z_zzi"_h);
3549 
3550   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3551   SimVRegister& zm = ReadVRegister(instr->GetRn());
3552 
3553   std::pair<int, int> shift_and_lane_size =
3554       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3555   unsigned lane_size = shift_and_lane_size.second;
3556   VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
3557   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3558   int shift_dist = shift_and_lane_size.first;
3559   eor(vform, zdn, zdn, zm);
3560   ror(vform, zdn, zdn, shift_dist);
3561 }
3562 
3563 void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) {
3564   VectorFormat vform = instr->GetSVEVectorFormat();
3565   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3566   SimVRegister& zm = ReadVRegister(instr->GetRn());
3567   int rot = (instr->ExtractBit(10) == 0) ? 90 : 270;
3568 
3569   switch (form_hash_) {
3570     case "cadd_z_zz"_h:
3571       cadd(vform, zdn, zdn, zm, rot);
3572       break;
3573     case "sqcadd_z_zz"_h:
3574       cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true);
3575       break;
3576     default:
3577       VIXL_UNIMPLEMENTED();
3578   }
3579 }
3580 
3581 void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) {
3582   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3583   SimVRegister& zn = ReadVRegister(instr->GetRn());
3584   uint64_t xm = ReadXRegister(instr->GetRm());
3585 
3586   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3587   int msize = -1;
3588   bool is_signed = false;
3589 
3590   switch (form_hash_) {
3591     case "ldnt1b_z_p_ar_d_64_unscaled"_h:
3592       msize = 0;
3593       break;
3594     case "ldnt1d_z_p_ar_d_64_unscaled"_h:
3595       msize = 3;
3596       break;
3597     case "ldnt1h_z_p_ar_d_64_unscaled"_h:
3598       msize = 1;
3599       break;
3600     case "ldnt1sb_z_p_ar_d_64_unscaled"_h:
3601       msize = 0;
3602       is_signed = true;
3603       break;
3604     case "ldnt1sh_z_p_ar_d_64_unscaled"_h:
3605       msize = 1;
3606       is_signed = true;
3607       break;
3608     case "ldnt1sw_z_p_ar_d_64_unscaled"_h:
3609       msize = 2;
3610       is_signed = true;
3611       break;
3612     case "ldnt1w_z_p_ar_d_64_unscaled"_h:
3613       msize = 2;
3614       break;
3615     default:
3616       VIXL_UNIMPLEMENTED();
3617   }
3618   addr.SetMsizeInBytesLog2(msize);
3619   SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed);
3620 }
3621 
3622 void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) {
3623   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3624   SimVRegister& zn = ReadVRegister(instr->GetRn());
3625   uint64_t xm = ReadXRegister(instr->GetRm());
3626 
3627   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3628   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_d_64_unscaled"_h) ||
3629               (form_hash_ == "stnt1d_z_p_ar_d_64_unscaled"_h) ||
3630               (form_hash_ == "stnt1h_z_p_ar_d_64_unscaled"_h) ||
3631               (form_hash_ == "stnt1w_z_p_ar_d_64_unscaled"_h));
3632 
3633   addr.SetMsizeInBytesLog2(
3634       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3635   SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr);
3636 }
3637 
3638 void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) {
3639   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3640   SimVRegister& zn = ReadVRegister(instr->GetRn());
3641   uint64_t xm = ReadXRegister(instr->GetRm());
3642 
3643   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3644   int msize = -1;
3645   bool is_signed = false;
3646 
3647   switch (form_hash_) {
3648     case "ldnt1b_z_p_ar_s_x32_unscaled"_h:
3649       msize = 0;
3650       break;
3651     case "ldnt1h_z_p_ar_s_x32_unscaled"_h:
3652       msize = 1;
3653       break;
3654     case "ldnt1sb_z_p_ar_s_x32_unscaled"_h:
3655       msize = 0;
3656       is_signed = true;
3657       break;
3658     case "ldnt1sh_z_p_ar_s_x32_unscaled"_h:
3659       msize = 1;
3660       is_signed = true;
3661       break;
3662     case "ldnt1w_z_p_ar_s_x32_unscaled"_h:
3663       msize = 2;
3664       break;
3665     default:
3666       VIXL_UNIMPLEMENTED();
3667   }
3668   addr.SetMsizeInBytesLog2(msize);
3669   SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed);
3670 }
3671 
3672 void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) {
3673   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3674   SimVRegister& zn = ReadVRegister(instr->GetRn());
3675   uint64_t xm = ReadXRegister(instr->GetRm());
3676 
3677   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3678   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_s_x32_unscaled"_h) ||
3679               (form_hash_ == "stnt1h_z_p_ar_s_x32_unscaled"_h) ||
3680               (form_hash_ == "stnt1w_z_p_ar_s_x32_unscaled"_h));
3681 
3682   addr.SetMsizeInBytesLog2(
3683       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3684   SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr);
3685 }
3686 
3687 void Simulator::VisitReserved(const Instruction* instr) {
3688   // UDF is the only instruction in this group, and the Decoder is precise here.
3689   VIXL_ASSERT(instr->Mask(ReservedMask) == UDF);
3690 
3691   printf("UDF (permanently undefined) instruction at %p: 0x%08" PRIx32 "\n",
3692          reinterpret_cast<const void*>(instr),
3693          instr->GetInstructionBits());
3694   VIXL_ABORT_WITH_MSG("UNDEFINED (UDF)\n");
3695 }
3696 
3697 
3698 void Simulator::VisitUnimplemented(const Instruction* instr) {
3699   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
3700          reinterpret_cast<const void*>(instr),
3701          instr->GetInstructionBits());
3702   VIXL_UNIMPLEMENTED();
3703 }
3704 
3705 
3706 void Simulator::VisitUnallocated(const Instruction* instr) {
3707   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
3708          reinterpret_cast<const void*>(instr),
3709          instr->GetInstructionBits());
3710   VIXL_UNIMPLEMENTED();
3711 }
3712 
3713 
3714 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
3715   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
3716               (instr->Mask(PCRelAddressingMask) == ADRP));
3717 
3718   WriteRegister(instr->GetRd(), instr->GetImmPCOffsetTarget());
3719 }
3720 
3721 
3722 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
3723   switch (instr->Mask(UnconditionalBranchMask)) {
3724     case BL:
3725       WriteLr(instr->GetNextInstruction());
3726       VIXL_FALLTHROUGH();
3727     case B:
3728       WritePc(instr->GetImmPCOffsetTarget());
3729       break;
3730     default:
3731       VIXL_UNREACHABLE();
3732   }
3733 }
3734 
3735 
3736 void Simulator::VisitConditionalBranch(const Instruction* instr) {
3737   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
3738   if (ConditionPassed(instr->GetConditionBranch())) {
3739     WritePc(instr->GetImmPCOffsetTarget());
3740   }
3741 }
3742 
3743 BType Simulator::GetBTypeFromInstruction(const Instruction* instr) const {
3744   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3745     case BLR:
3746     case BLRAA:
3747     case BLRAB:
3748     case BLRAAZ:
3749     case BLRABZ:
3750       return BranchAndLink;
3751     case BR:
3752     case BRAA:
3753     case BRAB:
3754     case BRAAZ:
3755     case BRABZ:
3756       if ((instr->GetRn() == 16) || (instr->GetRn() == 17) ||
3757           !PcIsInGuardedPage()) {
3758         return BranchFromUnguardedOrToIP;
3759       }
3760       return BranchFromGuardedNotToIP;
3761   }
3762   return DefaultBType;
3763 }
3764 
3765 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
3766   bool authenticate = false;
3767   bool link = false;
3768   bool ret = false;
3769   uint64_t addr = ReadXRegister(instr->GetRn());
3770   uint64_t context = 0;
3771 
3772   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3773     case BLR:
3774       link = true;
3775       VIXL_FALLTHROUGH();
3776     case BR:
3777       break;
3778 
3779     case BLRAAZ:
3780     case BLRABZ:
3781       link = true;
3782       VIXL_FALLTHROUGH();
3783     case BRAAZ:
3784     case BRABZ:
3785       authenticate = true;
3786       break;
3787 
3788     case BLRAA:
3789     case BLRAB:
3790       link = true;
3791       VIXL_FALLTHROUGH();
3792     case BRAA:
3793     case BRAB:
3794       authenticate = true;
3795       context = ReadXRegister(instr->GetRd());
3796       break;
3797 
3798     case RETAA:
3799     case RETAB:
3800       authenticate = true;
3801       addr = ReadXRegister(kLinkRegCode);
3802       context = ReadXRegister(31, Reg31IsStackPointer);
3803       VIXL_FALLTHROUGH();
3804     case RET:
3805       ret = true;
3806       break;
3807     default:
3808       VIXL_UNREACHABLE();
3809   }
3810 
3811   if (link) {
3812     WriteLr(instr->GetNextInstruction());
3813   }
3814 
3815   if (authenticate) {
3816     PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
3817     addr = AuthPAC(addr, context, key, kInstructionPointer);
3818 
3819     int error_lsb = GetTopPACBit(addr, kInstructionPointer) - 2;
3820     if (((addr >> error_lsb) & 0x3) != 0x0) {
3821       VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
3822     }
3823   }
3824 
3825   if (!ret) {
3826     // Check for interceptions to the target address, if one is found, call it.
3827     MetaDataDepot::BranchInterceptionAbstract* interception =
3828         meta_data_.FindBranchInterception(addr);
3829 
3830     if (interception != nullptr) {
3831       // Instead of writing the address of the function to the PC, call the
3832       // function's interception directly. We change the address that will be
3833       // branched to so that afterwards we continue execution from
3834       // the address in the LR. Note: the interception may modify the LR so
3835       // store it before calling the interception.
3836       addr = ReadRegister<uint64_t>(kLinkRegCode);
3837       (*interception)(this);
3838     }
3839   }
3840 
3841   WriteNextBType(GetBTypeFromInstruction(instr));
3842   WritePc(Instruction::Cast(addr));
3843 }
3844 
3845 
3846 void Simulator::VisitTestBranch(const Instruction* instr) {
3847   unsigned bit_pos =
3848       (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40();
3849   bool bit_zero = ((ReadXRegister(instr->GetRt()) >> bit_pos) & 1) == 0;
3850   bool take_branch = false;
3851   switch (instr->Mask(TestBranchMask)) {
3852     case TBZ:
3853       take_branch = bit_zero;
3854       break;
3855     case TBNZ:
3856       take_branch = !bit_zero;
3857       break;
3858     default:
3859       VIXL_UNIMPLEMENTED();
3860   }
3861   if (take_branch) {
3862     WritePc(instr->GetImmPCOffsetTarget());
3863   }
3864 }
3865 
3866 
3867 void Simulator::VisitCompareBranch(const Instruction* instr) {
3868   unsigned rt = instr->GetRt();
3869   bool take_branch = false;
3870   switch (instr->Mask(CompareBranchMask)) {
3871     case CBZ_w:
3872       take_branch = (ReadWRegister(rt) == 0);
3873       break;
3874     case CBZ_x:
3875       take_branch = (ReadXRegister(rt) == 0);
3876       break;
3877     case CBNZ_w:
3878       take_branch = (ReadWRegister(rt) != 0);
3879       break;
3880     case CBNZ_x:
3881       take_branch = (ReadXRegister(rt) != 0);
3882       break;
3883     default:
3884       VIXL_UNIMPLEMENTED();
3885   }
3886   if (take_branch) {
3887     WritePc(instr->GetImmPCOffsetTarget());
3888   }
3889 }
3890 
3891 
3892 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
3893   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3894   bool set_flags = instr->GetFlagsUpdate();
3895   int64_t new_val = 0;
3896   Instr operation = instr->Mask(AddSubOpMask);
3897 
3898   switch (operation) {
3899     case ADD:
3900     case ADDS: {
3901       new_val = AddWithCarry(reg_size,
3902                              set_flags,
3903                              ReadRegister(reg_size,
3904                                           instr->GetRn(),
3905                                           instr->GetRnMode()),
3906                              op2);
3907       break;
3908     }
3909     case SUB:
3910     case SUBS: {
3911       new_val = AddWithCarry(reg_size,
3912                              set_flags,
3913                              ReadRegister(reg_size,
3914                                           instr->GetRn(),
3915                                           instr->GetRnMode()),
3916                              ~op2,
3917                              1);
3918       break;
3919     }
3920     default:
3921       VIXL_UNREACHABLE();
3922   }
3923 
3924   WriteRegister(reg_size,
3925                 instr->GetRd(),
3926                 new_val,
3927                 LogRegWrites,
3928                 instr->GetRdMode());
3929 }
3930 
3931 
3932 void Simulator::VisitAddSubShifted(const Instruction* instr) {
3933   // Add/sub/adds/subs don't allow ROR as a shift mode.
3934   VIXL_ASSERT(instr->GetShiftDP() != ROR);
3935 
3936   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3937   int64_t op2 = ShiftOperand(reg_size,
3938                              ReadRegister(reg_size, instr->GetRm()),
3939                              static_cast<Shift>(instr->GetShiftDP()),
3940                              instr->GetImmDPShift());
3941   AddSubHelper(instr, op2);
3942 }
3943 
3944 
3945 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
3946   int64_t op2 = instr->GetImmAddSub()
3947                 << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
3948   AddSubHelper(instr, op2);
3949 }
3950 
3951 
3952 void Simulator::VisitAddSubExtended(const Instruction* instr) {
3953   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3954   int64_t op2 = ExtendValue(reg_size,
3955                             ReadRegister(reg_size, instr->GetRm()),
3956                             static_cast<Extend>(instr->GetExtendMode()),
3957                             instr->GetImmExtendShift());
3958   AddSubHelper(instr, op2);
3959 }
3960 
3961 
3962 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
3963   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3964   int64_t op2 = ReadRegister(reg_size, instr->GetRm());
3965   int64_t new_val;
3966 
3967   if ((instr->Mask(AddSubOpMask) == SUB) ||
3968       (instr->Mask(AddSubOpMask) == SUBS)) {
3969     op2 = ~op2;
3970   }
3971 
3972   new_val = AddWithCarry(reg_size,
3973                          instr->GetFlagsUpdate(),
3974                          ReadRegister(reg_size, instr->GetRn()),
3975                          op2,
3976                          ReadC());
3977 
3978   WriteRegister(reg_size, instr->GetRd(), new_val);
3979 }
3980 
3981 
3982 void Simulator::VisitRotateRightIntoFlags(const Instruction* instr) {
3983   switch (instr->Mask(RotateRightIntoFlagsMask)) {
3984     case RMIF: {
3985       uint64_t value = ReadRegister<uint64_t>(instr->GetRn());
3986       unsigned shift = instr->GetImmRMIFRotation();
3987       unsigned mask = instr->GetNzcv();
3988       uint64_t rotated = RotateRight(value, shift, kXRegSize);
3989 
3990       ReadNzcv().SetFlags((rotated & mask) | (ReadNzcv().GetFlags() & ~mask));
3991       break;
3992     }
3993   }
3994 }
3995 
3996 
3997 void Simulator::VisitEvaluateIntoFlags(const Instruction* instr) {
3998   uint32_t value = ReadRegister<uint32_t>(instr->GetRn());
3999   unsigned msb = (instr->Mask(EvaluateIntoFlagsMask) == SETF16) ? 15 : 7;
4000 
4001   unsigned sign_bit = (value >> msb) & 1;
4002   unsigned overflow_bit = (value >> (msb + 1)) & 1;
4003   ReadNzcv().SetN(sign_bit);
4004   ReadNzcv().SetZ((value << (31 - msb)) == 0);
4005   ReadNzcv().SetV(sign_bit ^ overflow_bit);
4006 }
4007 
4008 
4009 void Simulator::VisitLogicalShifted(const Instruction* instr) {
4010   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4011   Shift shift_type = static_cast<Shift>(instr->GetShiftDP());
4012   unsigned shift_amount = instr->GetImmDPShift();
4013   int64_t op2 = ShiftOperand(reg_size,
4014                              ReadRegister(reg_size, instr->GetRm()),
4015                              shift_type,
4016                              shift_amount);
4017   if (instr->Mask(NOT) == NOT) {
4018     op2 = ~op2;
4019   }
4020   LogicalHelper(instr, op2);
4021 }
4022 
4023 
4024 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
4025   if (instr->GetImmLogical() == 0) {
4026     VIXL_UNIMPLEMENTED();
4027   } else {
4028     LogicalHelper(instr, instr->GetImmLogical());
4029   }
4030 }
4031 
4032 
4033 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
4034   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4035   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
4036   int64_t result = 0;
4037   bool update_flags = false;
4038 
4039   // Switch on the logical operation, stripping out the NOT bit, as it has a
4040   // different meaning for logical immediate instructions.
4041   switch (instr->Mask(LogicalOpMask & ~NOT)) {
4042     case ANDS:
4043       update_flags = true;
4044       VIXL_FALLTHROUGH();
4045     case AND:
4046       result = op1 & op2;
4047       break;
4048     case ORR:
4049       result = op1 | op2;
4050       break;
4051     case EOR:
4052       result = op1 ^ op2;
4053       break;
4054     default:
4055       VIXL_UNIMPLEMENTED();
4056   }
4057 
4058   if (update_flags) {
4059     ReadNzcv().SetN(CalcNFlag(result, reg_size));
4060     ReadNzcv().SetZ(CalcZFlag(result));
4061     ReadNzcv().SetC(0);
4062     ReadNzcv().SetV(0);
4063     LogSystemRegister(NZCV);
4064   }
4065 
4066   WriteRegister(reg_size,
4067                 instr->GetRd(),
4068                 result,
4069                 LogRegWrites,
4070                 instr->GetRdMode());
4071 }
4072 
4073 
4074 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
4075   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4076   ConditionalCompareHelper(instr, ReadRegister(reg_size, instr->GetRm()));
4077 }
4078 
4079 
4080 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
4081   ConditionalCompareHelper(instr, instr->GetImmCondCmp());
4082 }
4083 
4084 
4085 void Simulator::ConditionalCompareHelper(const Instruction* instr,
4086                                          int64_t op2) {
4087   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
4088   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
4089 
4090   if (ConditionPassed(instr->GetCondition())) {
4091     // If the condition passes, set the status flags to the result of comparing
4092     // the operands.
4093     if (instr->Mask(ConditionalCompareMask) == CCMP) {
4094       AddWithCarry(reg_size, true, op1, ~op2, 1);
4095     } else {
4096       VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
4097       AddWithCarry(reg_size, true, op1, op2, 0);
4098     }
4099   } else {
4100     // If the condition fails, set the status flags to the nzcv immediate.
4101     ReadNzcv().SetFlags(instr->GetNzcv());
4102     LogSystemRegister(NZCV);
4103   }
4104 }
4105 
4106 
4107 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
4108   int offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
4109   LoadStoreHelper(instr, offset, Offset);
4110 }
4111 
4112 
4113 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
4114   LoadStoreHelper(instr, instr->GetImmLS(), Offset);
4115 }
4116 
4117 
4118 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
4119   LoadStoreHelper(instr, instr->GetImmLS(), PreIndex);
4120 }
4121 
4122 
4123 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
4124   LoadStoreHelper(instr, instr->GetImmLS(), PostIndex);
4125 }
4126 
4127 
4128 template <typename T1, typename T2>
4129 void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
4130   unsigned rt = instr->GetRt();
4131   unsigned rn = instr->GetRn();
4132 
4133   unsigned element_size = sizeof(T2);
4134   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4135   int offset = instr->GetImmLS();
4136   address += offset;
4137 
4138   // Verify that the address is available to the host.
4139   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4140 
4141   // Check the alignment of `address`.
4142   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
4143     VIXL_ALIGNMENT_EXCEPTION();
4144   }
4145 
4146   WriteRegister<T1>(rt, static_cast<T1>(MemRead<T2>(address)));
4147 
4148   // Approximate load-acquire by issuing a full barrier after the load.
4149   __sync_synchronize();
4150 
4151   LogRead(rt, GetPrintRegisterFormat(element_size), address);
4152 }
4153 
4154 
4155 template <typename T>
4156 void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
4157   unsigned rt = instr->GetRt();
4158   unsigned rn = instr->GetRn();
4159 
4160   unsigned element_size = sizeof(T);
4161   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4162   int offset = instr->GetImmLS();
4163   address += offset;
4164 
4165   // Verify that the address is available to the host.
4166   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4167 
4168   // Check the alignment of `address`.
4169   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
4170     VIXL_ALIGNMENT_EXCEPTION();
4171   }
4172 
4173   // Approximate store-release by issuing a full barrier after the load.
4174   __sync_synchronize();
4175 
4176   MemWrite<T>(address, ReadRegister<T>(rt));
4177 
4178   LogWrite(rt, GetPrintRegisterFormat(element_size), address);
4179 }
4180 
4181 
4182 void Simulator::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
4183   switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
4184     case LDAPURB:
4185       LoadAcquireRCpcUnscaledOffsetHelper<uint8_t, uint8_t>(instr);
4186       break;
4187     case LDAPURH:
4188       LoadAcquireRCpcUnscaledOffsetHelper<uint16_t, uint16_t>(instr);
4189       break;
4190     case LDAPUR_w:
4191       LoadAcquireRCpcUnscaledOffsetHelper<uint32_t, uint32_t>(instr);
4192       break;
4193     case LDAPUR_x:
4194       LoadAcquireRCpcUnscaledOffsetHelper<uint64_t, uint64_t>(instr);
4195       break;
4196     case LDAPURSB_w:
4197       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int8_t>(instr);
4198       break;
4199     case LDAPURSB_x:
4200       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int8_t>(instr);
4201       break;
4202     case LDAPURSH_w:
4203       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int16_t>(instr);
4204       break;
4205     case LDAPURSH_x:
4206       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int16_t>(instr);
4207       break;
4208     case LDAPURSW:
4209       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int32_t>(instr);
4210       break;
4211     case STLURB:
4212       StoreReleaseUnscaledOffsetHelper<uint8_t>(instr);
4213       break;
4214     case STLURH:
4215       StoreReleaseUnscaledOffsetHelper<uint16_t>(instr);
4216       break;
4217     case STLUR_w:
4218       StoreReleaseUnscaledOffsetHelper<uint32_t>(instr);
4219       break;
4220     case STLUR_x:
4221       StoreReleaseUnscaledOffsetHelper<uint64_t>(instr);
4222       break;
4223   }
4224 }
4225 
4226 
4227 void Simulator::VisitLoadStorePAC(const Instruction* instr) {
4228   unsigned dst = instr->GetRt();
4229   unsigned addr_reg = instr->GetRn();
4230 
4231   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
4232 
4233   PACKey key = (instr->ExtractBit(23) == 0) ? kPACKeyDA : kPACKeyDB;
4234   address = AuthPAC(address, 0, key, kDataPointer);
4235 
4236   int error_lsb = GetTopPACBit(address, kInstructionPointer) - 2;
4237   if (((address >> error_lsb) & 0x3) != 0x0) {
4238     VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
4239   }
4240 
4241 
4242   if ((addr_reg == 31) && ((address % 16) != 0)) {
4243     // When the base register is SP the stack pointer is required to be
4244     // quadword aligned prior to the address calculation and write-backs.
4245     // Misalignment will cause a stack alignment fault.
4246     VIXL_ALIGNMENT_EXCEPTION();
4247   }
4248 
4249   int64_t offset = instr->GetImmLSPAC();
4250   address += offset;
4251 
4252   if (instr->Mask(LoadStorePACPreBit) == LoadStorePACPreBit) {
4253     // Pre-index mode.
4254     VIXL_ASSERT(offset != 0);
4255     WriteXRegister(addr_reg, address, LogRegWrites, Reg31IsStackPointer);
4256   }
4257 
4258   uintptr_t addr_ptr = static_cast<uintptr_t>(address);
4259 
4260   // Verify that the calculated address is available to the host.
4261   VIXL_ASSERT(address == addr_ptr);
4262 
4263   WriteXRegister(dst, MemRead<uint64_t>(addr_ptr), NoRegLog);
4264   unsigned access_size = 1 << 3;
4265   LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
4266 }
4267 
4268 
4269 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
4270   Extend ext = static_cast<Extend>(instr->GetExtendMode());
4271   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
4272   unsigned shift_amount = instr->GetImmShiftLS() * instr->GetSizeLS();
4273 
4274   int64_t offset =
4275       ExtendValue(kXRegSize, ReadXRegister(instr->GetRm()), ext, shift_amount);
4276   LoadStoreHelper(instr, offset, Offset);
4277 }
4278 
4279 
4280 void Simulator::LoadStoreHelper(const Instruction* instr,
4281                                 int64_t offset,
4282                                 AddrMode addrmode) {
4283   unsigned srcdst = instr->GetRt();
4284   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4285 
4286   bool rt_is_vreg = false;
4287   int extend_to_size = 0;
4288   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
4289   switch (op) {
4290     case LDRB_w:
4291       WriteWRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4292       extend_to_size = kWRegSizeInBytes;
4293       break;
4294     case LDRH_w:
4295       WriteWRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4296       extend_to_size = kWRegSizeInBytes;
4297       break;
4298     case LDR_w:
4299       WriteWRegister(srcdst, MemRead<uint32_t>(address), NoRegLog);
4300       extend_to_size = kWRegSizeInBytes;
4301       break;
4302     case LDR_x:
4303       WriteXRegister(srcdst, MemRead<uint64_t>(address), NoRegLog);
4304       extend_to_size = kXRegSizeInBytes;
4305       break;
4306     case LDRSB_w:
4307       WriteWRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4308       extend_to_size = kWRegSizeInBytes;
4309       break;
4310     case LDRSH_w:
4311       WriteWRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4312       extend_to_size = kWRegSizeInBytes;
4313       break;
4314     case LDRSB_x:
4315       WriteXRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4316       extend_to_size = kXRegSizeInBytes;
4317       break;
4318     case LDRSH_x:
4319       WriteXRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4320       extend_to_size = kXRegSizeInBytes;
4321       break;
4322     case LDRSW_x:
4323       WriteXRegister(srcdst, MemRead<int32_t>(address), NoRegLog);
4324       extend_to_size = kXRegSizeInBytes;
4325       break;
4326     case LDR_b:
4327       WriteBRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4328       rt_is_vreg = true;
4329       break;
4330     case LDR_h:
4331       WriteHRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4332       rt_is_vreg = true;
4333       break;
4334     case LDR_s:
4335       WriteSRegister(srcdst, MemRead<float>(address), NoRegLog);
4336       rt_is_vreg = true;
4337       break;
4338     case LDR_d:
4339       WriteDRegister(srcdst, MemRead<double>(address), NoRegLog);
4340       rt_is_vreg = true;
4341       break;
4342     case LDR_q:
4343       WriteQRegister(srcdst, MemRead<qreg_t>(address), NoRegLog);
4344       rt_is_vreg = true;
4345       break;
4346 
4347     case STRB_w:
4348       MemWrite<uint8_t>(address, ReadWRegister(srcdst));
4349       break;
4350     case STRH_w:
4351       MemWrite<uint16_t>(address, ReadWRegister(srcdst));
4352       break;
4353     case STR_w:
4354       MemWrite<uint32_t>(address, ReadWRegister(srcdst));
4355       break;
4356     case STR_x:
4357       MemWrite<uint64_t>(address, ReadXRegister(srcdst));
4358       break;
4359     case STR_b:
4360       MemWrite<uint8_t>(address, ReadBRegister(srcdst));
4361       rt_is_vreg = true;
4362       break;
4363     case STR_h:
4364       MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst));
4365       rt_is_vreg = true;
4366       break;
4367     case STR_s:
4368       MemWrite<float>(address, ReadSRegister(srcdst));
4369       rt_is_vreg = true;
4370       break;
4371     case STR_d:
4372       MemWrite<double>(address, ReadDRegister(srcdst));
4373       rt_is_vreg = true;
4374       break;
4375     case STR_q:
4376       MemWrite<qreg_t>(address, ReadQRegister(srcdst));
4377       rt_is_vreg = true;
4378       break;
4379 
4380     // Ignore prfm hint instructions.
4381     case PRFM:
4382       break;
4383 
4384     default:
4385       VIXL_UNIMPLEMENTED();
4386   }
4387 
4388   // Print a detailed trace (including the memory address).
4389   bool extend = (extend_to_size != 0);
4390   unsigned access_size = 1 << instr->GetSizeLS();
4391   unsigned result_size = extend ? extend_to_size : access_size;
4392   PrintRegisterFormat print_format =
4393       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4394                  : GetPrintRegisterFormatForSize(result_size);
4395 
4396   if (instr->IsLoad()) {
4397     if (rt_is_vreg) {
4398       LogVRead(srcdst, print_format, address);
4399     } else {
4400       LogExtendingRead(srcdst, print_format, access_size, address);
4401     }
4402   } else if (instr->IsStore()) {
4403     if (rt_is_vreg) {
4404       LogVWrite(srcdst, print_format, address);
4405     } else {
4406       LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
4407     }
4408   } else {
4409     VIXL_ASSERT(op == PRFM);
4410   }
4411 
4412   local_monitor_.MaybeClear();
4413 }
4414 
4415 
4416 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
4417   LoadStorePairHelper(instr, Offset);
4418 }
4419 
4420 
4421 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
4422   LoadStorePairHelper(instr, PreIndex);
4423 }
4424 
4425 
4426 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
4427   LoadStorePairHelper(instr, PostIndex);
4428 }
4429 
4430 
4431 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
4432   LoadStorePairHelper(instr, Offset);
4433 }
4434 
4435 
4436 void Simulator::LoadStorePairHelper(const Instruction* instr,
4437                                     AddrMode addrmode) {
4438   unsigned rt = instr->GetRt();
4439   unsigned rt2 = instr->GetRt2();
4440   int element_size = 1 << instr->GetSizeLSPair();
4441   int64_t offset = instr->GetImmLSPair() * element_size;
4442   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4443   uintptr_t address2 = address + element_size;
4444 
4445   LoadStorePairOp op =
4446       static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
4447 
4448   // 'rt' and 'rt2' can only be aliased for stores.
4449   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
4450 
4451   bool rt_is_vreg = false;
4452   bool sign_extend = false;
4453   switch (op) {
4454     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
4455     // will print a more detailed log.
4456     case LDP_w: {
4457       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4458       WriteWRegister(rt2, MemRead<uint32_t>(address2), NoRegLog);
4459       break;
4460     }
4461     case LDP_s: {
4462       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
4463       WriteSRegister(rt2, MemRead<float>(address2), NoRegLog);
4464       rt_is_vreg = true;
4465       break;
4466     }
4467     case LDP_x: {
4468       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4469       WriteXRegister(rt2, MemRead<uint64_t>(address2), NoRegLog);
4470       break;
4471     }
4472     case LDP_d: {
4473       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
4474       WriteDRegister(rt2, MemRead<double>(address2), NoRegLog);
4475       rt_is_vreg = true;
4476       break;
4477     }
4478     case LDP_q: {
4479       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
4480       WriteQRegister(rt2, MemRead<qreg_t>(address2), NoRegLog);
4481       rt_is_vreg = true;
4482       break;
4483     }
4484     case LDPSW_x: {
4485       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
4486       WriteXRegister(rt2, MemRead<int32_t>(address2), NoRegLog);
4487       sign_extend = true;
4488       break;
4489     }
4490     case STP_w: {
4491       MemWrite<uint32_t>(address, ReadWRegister(rt));
4492       MemWrite<uint32_t>(address2, ReadWRegister(rt2));
4493       break;
4494     }
4495     case STP_s: {
4496       MemWrite<float>(address, ReadSRegister(rt));
4497       MemWrite<float>(address2, ReadSRegister(rt2));
4498       rt_is_vreg = true;
4499       break;
4500     }
4501     case STP_x: {
4502       MemWrite<uint64_t>(address, ReadXRegister(rt));
4503       MemWrite<uint64_t>(address2, ReadXRegister(rt2));
4504       break;
4505     }
4506     case STP_d: {
4507       MemWrite<double>(address, ReadDRegister(rt));
4508       MemWrite<double>(address2, ReadDRegister(rt2));
4509       rt_is_vreg = true;
4510       break;
4511     }
4512     case STP_q: {
4513       MemWrite<qreg_t>(address, ReadQRegister(rt));
4514       MemWrite<qreg_t>(address2, ReadQRegister(rt2));
4515       rt_is_vreg = true;
4516       break;
4517     }
4518     default:
4519       VIXL_UNREACHABLE();
4520   }
4521 
4522   // Print a detailed trace (including the memory address).
4523   unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
4524   PrintRegisterFormat print_format =
4525       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4526                  : GetPrintRegisterFormatForSize(result_size);
4527 
4528   if (instr->IsLoad()) {
4529     if (rt_is_vreg) {
4530       LogVRead(rt, print_format, address);
4531       LogVRead(rt2, print_format, address2);
4532     } else if (sign_extend) {
4533       LogExtendingRead(rt, print_format, element_size, address);
4534       LogExtendingRead(rt2, print_format, element_size, address2);
4535     } else {
4536       LogRead(rt, print_format, address);
4537       LogRead(rt2, print_format, address2);
4538     }
4539   } else {
4540     if (rt_is_vreg) {
4541       LogVWrite(rt, print_format, address);
4542       LogVWrite(rt2, print_format, address2);
4543     } else {
4544       LogWrite(rt, print_format, address);
4545       LogWrite(rt2, print_format, address2);
4546     }
4547   }
4548 
4549   local_monitor_.MaybeClear();
4550 }
4551 
4552 
4553 template <typename T>
4554 void Simulator::CompareAndSwapHelper(const Instruction* instr) {
4555   unsigned rs = instr->GetRs();
4556   unsigned rt = instr->GetRt();
4557   unsigned rn = instr->GetRn();
4558 
4559   unsigned element_size = sizeof(T);
4560   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4561 
4562   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4563 
4564   bool is_acquire = instr->ExtractBit(22) == 1;
4565   bool is_release = instr->ExtractBit(15) == 1;
4566 
4567   T comparevalue = ReadRegister<T>(rs);
4568   T newvalue = ReadRegister<T>(rt);
4569 
4570   // The architecture permits that the data read clears any exclusive monitors
4571   // associated with that location, even if the compare subsequently fails.
4572   local_monitor_.Clear();
4573 
4574   T data = MemRead<T>(address);
4575   if (is_acquire) {
4576     // Approximate load-acquire by issuing a full barrier after the load.
4577     __sync_synchronize();
4578   }
4579 
4580   if (data == comparevalue) {
4581     if (is_release) {
4582       // Approximate store-release by issuing a full barrier before the store.
4583       __sync_synchronize();
4584     }
4585     MemWrite<T>(address, newvalue);
4586     LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
4587   }
4588   WriteRegister<T>(rs, data, NoRegLog);
4589   LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
4590 }
4591 
4592 
4593 template <typename T>
4594 void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
4595   VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
4596   unsigned rs = instr->GetRs();
4597   unsigned rt = instr->GetRt();
4598   unsigned rn = instr->GetRn();
4599 
4600   VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
4601 
4602   unsigned element_size = sizeof(T);
4603   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4604 
4605   CheckIsValidUnalignedAtomicAccess(rn, address, element_size * 2);
4606 
4607   uint64_t address2 = address + element_size;
4608 
4609   bool is_acquire = instr->ExtractBit(22) == 1;
4610   bool is_release = instr->ExtractBit(15) == 1;
4611 
4612   T comparevalue_high = ReadRegister<T>(rs + 1);
4613   T comparevalue_low = ReadRegister<T>(rs);
4614   T newvalue_high = ReadRegister<T>(rt + 1);
4615   T newvalue_low = ReadRegister<T>(rt);
4616 
4617   // The architecture permits that the data read clears any exclusive monitors
4618   // associated with that location, even if the compare subsequently fails.
4619   local_monitor_.Clear();
4620 
4621   T data_low = MemRead<T>(address);
4622   T data_high = MemRead<T>(address2);
4623 
4624   if (is_acquire) {
4625     // Approximate load-acquire by issuing a full barrier after the load.
4626     __sync_synchronize();
4627   }
4628 
4629   bool same =
4630       (data_high == comparevalue_high) && (data_low == comparevalue_low);
4631   if (same) {
4632     if (is_release) {
4633       // Approximate store-release by issuing a full barrier before the store.
4634       __sync_synchronize();
4635     }
4636 
4637     MemWrite<T>(address, newvalue_low);
4638     MemWrite<T>(address2, newvalue_high);
4639   }
4640 
4641   WriteRegister<T>(rs + 1, data_high, NoRegLog);
4642   WriteRegister<T>(rs, data_low, NoRegLog);
4643 
4644   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4645   LogRead(rs, format, address);
4646   LogRead(rs + 1, format, address2);
4647 
4648   if (same) {
4649     LogWrite(rt, format, address);
4650     LogWrite(rt + 1, format, address2);
4651   }
4652 }
4653 
4654 bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
4655   // To simulate fault-tolerant loads, we need to know what host addresses we
4656   // can access without generating a real fault. One way to do that is to
4657   // attempt to `write()` the memory to a placeholder pipe[1]. This is more
4658   // portable and less intrusive than using (global) signal handlers.
4659   //
4660   // [1]: https://stackoverflow.com/questions/7134590
4661 
4662   size_t written = 0;
4663   bool can_read = true;
4664   // `write` will normally return after one invocation, but it is allowed to
4665   // handle only part of the operation, so wrap it in a loop.
4666   while (can_read && (written < size)) {
4667     ssize_t result = write(placeholder_pipe_fd_[1],
4668                            reinterpret_cast<void*>(address + written),
4669                            size - written);
4670     if (result > 0) {
4671       written += result;
4672     } else {
4673       switch (result) {
4674         case -EPERM:
4675         case -EFAULT:
4676           // The address range is not accessible.
4677           // `write` is supposed to return -EFAULT in this case, but in practice
4678           // it seems to return -EPERM, so we accept that too.
4679           can_read = false;
4680           break;
4681         case -EINTR:
4682           // The call was interrupted by a signal. Just try again.
4683           break;
4684         default:
4685           // Any other error is fatal.
4686           VIXL_ABORT();
4687       }
4688     }
4689   }
4690   // Drain the read side of the pipe. If we don't do this, we'll leak memory as
4691   // the placeholder data is buffered. As before, we expect to drain the whole
4692   // write in one invocation, but cannot guarantee that, so we wrap it in a
4693   // loop. This function is primarily intended to implement SVE fault-tolerant
4694   // loads, so the maximum Z register size is a good default buffer size.
4695   char buffer[kZRegMaxSizeInBytes];
4696   while (written > 0) {
4697     ssize_t result = read(placeholder_pipe_fd_[0],
4698                           reinterpret_cast<void*>(buffer),
4699                           sizeof(buffer));
4700     // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
4701     // we've read everything that was written, so treat 0 as an error.
4702     if (result > 0) {
4703       VIXL_ASSERT(static_cast<size_t>(result) <= written);
4704       written -= result;
4705     } else {
4706       // For -EINTR, just try again. We can't handle any other error.
4707       VIXL_CHECK(result == -EINTR);
4708     }
4709   }
4710 
4711   return can_read;
4712 }
4713 
4714 void Simulator::PrintExclusiveAccessWarning() {
4715   if (print_exclusive_access_warning_) {
4716     fprintf(stderr,
4717             "%sWARNING:%s VIXL simulator support for "
4718             "load-/store-/clear-exclusive "
4719             "instructions is limited. Refer to the README for details.%s\n",
4720             clr_warning,
4721             clr_warning_message,
4722             clr_normal);
4723     print_exclusive_access_warning_ = false;
4724   }
4725 }
4726 
4727 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
4728   LoadStoreExclusive op =
4729       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
4730 
4731   switch (op) {
4732     case CAS_w:
4733     case CASA_w:
4734     case CASL_w:
4735     case CASAL_w:
4736       CompareAndSwapHelper<uint32_t>(instr);
4737       break;
4738     case CAS_x:
4739     case CASA_x:
4740     case CASL_x:
4741     case CASAL_x:
4742       CompareAndSwapHelper<uint64_t>(instr);
4743       break;
4744     case CASB:
4745     case CASAB:
4746     case CASLB:
4747     case CASALB:
4748       CompareAndSwapHelper<uint8_t>(instr);
4749       break;
4750     case CASH:
4751     case CASAH:
4752     case CASLH:
4753     case CASALH:
4754       CompareAndSwapHelper<uint16_t>(instr);
4755       break;
4756     case CASP_w:
4757     case CASPA_w:
4758     case CASPL_w:
4759     case CASPAL_w:
4760       CompareAndSwapPairHelper<uint32_t>(instr);
4761       break;
4762     case CASP_x:
4763     case CASPA_x:
4764     case CASPL_x:
4765     case CASPAL_x:
4766       CompareAndSwapPairHelper<uint64_t>(instr);
4767       break;
4768     default:
4769       PrintExclusiveAccessWarning();
4770 
4771       unsigned rs = instr->GetRs();
4772       unsigned rt = instr->GetRt();
4773       unsigned rt2 = instr->GetRt2();
4774       unsigned rn = instr->GetRn();
4775 
4776       bool is_exclusive = !instr->GetLdStXNotExclusive();
4777       bool is_acquire_release =
4778           !is_exclusive || instr->GetLdStXAcquireRelease();
4779       bool is_load = instr->GetLdStXLoad();
4780       bool is_pair = instr->GetLdStXPair();
4781 
4782       unsigned element_size = 1 << instr->GetLdStXSizeLog2();
4783       unsigned access_size = is_pair ? element_size * 2 : element_size;
4784       uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4785 
4786       CheckIsValidUnalignedAtomicAccess(rn, address, access_size);
4787 
4788       if (is_load) {
4789         if (is_exclusive) {
4790           local_monitor_.MarkExclusive(address, access_size);
4791         } else {
4792           // Any non-exclusive load can clear the local monitor as a side
4793           // effect. We don't need to do this, but it is useful to stress the
4794           // simulated code.
4795           local_monitor_.Clear();
4796         }
4797 
4798         // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
4799         // We will print a more detailed log.
4800         unsigned reg_size = 0;
4801         switch (op) {
4802           case LDXRB_w:
4803           case LDAXRB_w:
4804           case LDARB_w:
4805           case LDLARB:
4806             WriteWRegister(rt, MemRead<uint8_t>(address), NoRegLog);
4807             reg_size = kWRegSizeInBytes;
4808             break;
4809           case LDXRH_w:
4810           case LDAXRH_w:
4811           case LDARH_w:
4812           case LDLARH:
4813             WriteWRegister(rt, MemRead<uint16_t>(address), NoRegLog);
4814             reg_size = kWRegSizeInBytes;
4815             break;
4816           case LDXR_w:
4817           case LDAXR_w:
4818           case LDAR_w:
4819           case LDLAR_w:
4820             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4821             reg_size = kWRegSizeInBytes;
4822             break;
4823           case LDXR_x:
4824           case LDAXR_x:
4825           case LDAR_x:
4826           case LDLAR_x:
4827             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4828             reg_size = kXRegSizeInBytes;
4829             break;
4830           case LDXP_w:
4831           case LDAXP_w:
4832             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4833             WriteWRegister(rt2,
4834                            MemRead<uint32_t>(address + element_size),
4835                            NoRegLog);
4836             reg_size = kWRegSizeInBytes;
4837             break;
4838           case LDXP_x:
4839           case LDAXP_x:
4840             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4841             WriteXRegister(rt2,
4842                            MemRead<uint64_t>(address + element_size),
4843                            NoRegLog);
4844             reg_size = kXRegSizeInBytes;
4845             break;
4846           default:
4847             VIXL_UNREACHABLE();
4848         }
4849 
4850         if (is_acquire_release) {
4851           // Approximate load-acquire by issuing a full barrier after the load.
4852           __sync_synchronize();
4853         }
4854 
4855         PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
4856         LogExtendingRead(rt, format, element_size, address);
4857         if (is_pair) {
4858           LogExtendingRead(rt2, format, element_size, address + element_size);
4859         }
4860       } else {
4861         if (is_acquire_release) {
4862           // Approximate store-release by issuing a full barrier before the
4863           // store.
4864           __sync_synchronize();
4865         }
4866 
4867         bool do_store = true;
4868         if (is_exclusive) {
4869           do_store = local_monitor_.IsExclusive(address, access_size) &&
4870                      global_monitor_.IsExclusive(address, access_size);
4871           WriteWRegister(rs, do_store ? 0 : 1);
4872 
4873           //  - All exclusive stores explicitly clear the local monitor.
4874           local_monitor_.Clear();
4875         } else {
4876           //  - Any other store can clear the local monitor as a side effect.
4877           local_monitor_.MaybeClear();
4878         }
4879 
4880         if (do_store) {
4881           switch (op) {
4882             case STXRB_w:
4883             case STLXRB_w:
4884             case STLRB_w:
4885             case STLLRB:
4886               MemWrite<uint8_t>(address, ReadWRegister(rt));
4887               break;
4888             case STXRH_w:
4889             case STLXRH_w:
4890             case STLRH_w:
4891             case STLLRH:
4892               MemWrite<uint16_t>(address, ReadWRegister(rt));
4893               break;
4894             case STXR_w:
4895             case STLXR_w:
4896             case STLR_w:
4897             case STLLR_w:
4898               MemWrite<uint32_t>(address, ReadWRegister(rt));
4899               break;
4900             case STXR_x:
4901             case STLXR_x:
4902             case STLR_x:
4903             case STLLR_x:
4904               MemWrite<uint64_t>(address, ReadXRegister(rt));
4905               break;
4906             case STXP_w:
4907             case STLXP_w:
4908               MemWrite<uint32_t>(address, ReadWRegister(rt));
4909               MemWrite<uint32_t>(address + element_size, ReadWRegister(rt2));
4910               break;
4911             case STXP_x:
4912             case STLXP_x:
4913               MemWrite<uint64_t>(address, ReadXRegister(rt));
4914               MemWrite<uint64_t>(address + element_size, ReadXRegister(rt2));
4915               break;
4916             default:
4917               VIXL_UNREACHABLE();
4918           }
4919 
4920           PrintRegisterFormat format =
4921               GetPrintRegisterFormatForSize(element_size);
4922           LogWrite(rt, format, address);
4923           if (is_pair) {
4924             LogWrite(rt2, format, address + element_size);
4925           }
4926         }
4927       }
4928   }
4929 }
4930 
4931 template <typename T>
4932 void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
4933   unsigned rs = instr->GetRs();
4934   unsigned rt = instr->GetRt();
4935   unsigned rn = instr->GetRn();
4936 
4937   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
4938   bool is_release = instr->ExtractBit(22) == 1;
4939 
4940   unsigned element_size = sizeof(T);
4941   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4942 
4943   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4944 
4945   T value = ReadRegister<T>(rs);
4946 
4947   T data = MemRead<T>(address);
4948 
4949   if (is_acquire) {
4950     // Approximate load-acquire by issuing a full barrier after the load.
4951     __sync_synchronize();
4952   }
4953 
4954   T result = 0;
4955   switch (instr->Mask(AtomicMemorySimpleOpMask)) {
4956     case LDADDOp:
4957       result = data + value;
4958       break;
4959     case LDCLROp:
4960       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4961       result = data & ~value;
4962       break;
4963     case LDEOROp:
4964       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4965       result = data ^ value;
4966       break;
4967     case LDSETOp:
4968       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4969       result = data | value;
4970       break;
4971 
4972     // Signed/Unsigned difference is done via the templated type T.
4973     case LDSMAXOp:
4974     case LDUMAXOp:
4975       result = (data > value) ? data : value;
4976       break;
4977     case LDSMINOp:
4978     case LDUMINOp:
4979       result = (data > value) ? value : data;
4980       break;
4981   }
4982 
4983   if (is_release) {
4984     // Approximate store-release by issuing a full barrier before the store.
4985     __sync_synchronize();
4986   }
4987 
4988   WriteRegister<T>(rt, data, NoRegLog);
4989 
4990   unsigned register_size = element_size;
4991   if (element_size < kXRegSizeInBytes) {
4992     register_size = kWRegSizeInBytes;
4993   }
4994   PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
4995   LogExtendingRead(rt, format, element_size, address);
4996 
4997   MemWrite<T>(address, result);
4998   format = GetPrintRegisterFormatForSize(element_size);
4999   LogWrite(rs, format, address);
5000 }
5001 
5002 template <typename T>
5003 void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
5004   unsigned rs = instr->GetRs();
5005   unsigned rt = instr->GetRt();
5006   unsigned rn = instr->GetRn();
5007 
5008   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
5009   bool is_release = instr->ExtractBit(22) == 1;
5010 
5011   unsigned element_size = sizeof(T);
5012   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
5013 
5014   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
5015 
5016   T data = MemRead<T>(address);
5017   if (is_acquire) {
5018     // Approximate load-acquire by issuing a full barrier after the load.
5019     __sync_synchronize();
5020   }
5021 
5022   if (is_release) {
5023     // Approximate store-release by issuing a full barrier before the store.
5024     __sync_synchronize();
5025   }
5026   MemWrite<T>(address, ReadRegister<T>(rs));
5027 
5028   WriteRegister<T>(rt, data);
5029 
5030   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
5031   LogRead(rt, format, address);
5032   LogWrite(rs, format, address);
5033 }
5034 
5035 template <typename T>
5036 void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
5037   unsigned rt = instr->GetRt();
5038   unsigned rn = instr->GetRn();
5039 
5040   unsigned element_size = sizeof(T);
5041   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
5042 
5043   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
5044 
5045   WriteRegister<T>(rt, MemRead<T>(address));
5046 
5047   // Approximate load-acquire by issuing a full barrier after the load.
5048   __sync_synchronize();
5049 
5050   LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
5051 }
5052 
5053 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
5054   V(LDADD)                                \
5055   V(LDCLR)                                \
5056   V(LDEOR)                                \
5057   V(LDSET)                                \
5058   V(LDUMAX)                               \
5059   V(LDUMIN)
5060 
5061 #define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
5062   V(LDSMAX)                              \
5063   V(LDSMIN)
5064 
5065 void Simulator::VisitAtomicMemory(const Instruction* instr) {
5066   switch (instr->Mask(AtomicMemoryMask)) {
5067 // clang-format off
5068 #define SIM_FUNC_B(A) \
5069     case A##B:        \
5070     case A##AB:       \
5071     case A##LB:       \
5072     case A##ALB:
5073 #define SIM_FUNC_H(A) \
5074     case A##H:        \
5075     case A##AH:       \
5076     case A##LH:       \
5077     case A##ALH:
5078 #define SIM_FUNC_w(A) \
5079     case A##_w:       \
5080     case A##A_w:      \
5081     case A##L_w:      \
5082     case A##AL_w:
5083 #define SIM_FUNC_x(A) \
5084     case A##_x:       \
5085     case A##A_x:      \
5086     case A##L_x:      \
5087     case A##AL_x:
5088 
5089     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
5090       AtomicMemorySimpleHelper<uint8_t>(instr);
5091       break;
5092     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
5093       AtomicMemorySimpleHelper<int8_t>(instr);
5094       break;
5095     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
5096       AtomicMemorySimpleHelper<uint16_t>(instr);
5097       break;
5098     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
5099       AtomicMemorySimpleHelper<int16_t>(instr);
5100       break;
5101     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
5102       AtomicMemorySimpleHelper<uint32_t>(instr);
5103       break;
5104     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
5105       AtomicMemorySimpleHelper<int32_t>(instr);
5106       break;
5107     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
5108       AtomicMemorySimpleHelper<uint64_t>(instr);
5109       break;
5110     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
5111       AtomicMemorySimpleHelper<int64_t>(instr);
5112       break;
5113       // clang-format on
5114 
5115     case SWPB:
5116     case SWPAB:
5117     case SWPLB:
5118     case SWPALB:
5119       AtomicMemorySwapHelper<uint8_t>(instr);
5120       break;
5121     case SWPH:
5122     case SWPAH:
5123     case SWPLH:
5124     case SWPALH:
5125       AtomicMemorySwapHelper<uint16_t>(instr);
5126       break;
5127     case SWP_w:
5128     case SWPA_w:
5129     case SWPL_w:
5130     case SWPAL_w:
5131       AtomicMemorySwapHelper<uint32_t>(instr);
5132       break;
5133     case SWP_x:
5134     case SWPA_x:
5135     case SWPL_x:
5136     case SWPAL_x:
5137       AtomicMemorySwapHelper<uint64_t>(instr);
5138       break;
5139     case LDAPRB:
5140       LoadAcquireRCpcHelper<uint8_t>(instr);
5141       break;
5142     case LDAPRH:
5143       LoadAcquireRCpcHelper<uint16_t>(instr);
5144       break;
5145     case LDAPR_w:
5146       LoadAcquireRCpcHelper<uint32_t>(instr);
5147       break;
5148     case LDAPR_x:
5149       LoadAcquireRCpcHelper<uint64_t>(instr);
5150       break;
5151   }
5152 }
5153 
5154 
5155 void Simulator::VisitLoadLiteral(const Instruction* instr) {
5156   unsigned rt = instr->GetRt();
5157   uint64_t address = instr->GetLiteralAddress<uint64_t>();
5158 
5159   // Verify that the calculated address is available to the host.
5160   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5161 
5162   switch (instr->Mask(LoadLiteralMask)) {
5163     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
5164     // print a more detailed log.
5165     case LDR_w_lit:
5166       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
5167       LogRead(rt, kPrintWReg, address);
5168       break;
5169     case LDR_x_lit:
5170       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
5171       LogRead(rt, kPrintXReg, address);
5172       break;
5173     case LDR_s_lit:
5174       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
5175       LogVRead(rt, kPrintSRegFP, address);
5176       break;
5177     case LDR_d_lit:
5178       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
5179       LogVRead(rt, kPrintDRegFP, address);
5180       break;
5181     case LDR_q_lit:
5182       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
5183       LogVRead(rt, kPrintReg1Q, address);
5184       break;
5185     case LDRSW_x_lit:
5186       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
5187       LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
5188       break;
5189 
5190     // Ignore prfm hint instructions.
5191     case PRFM_lit:
5192       break;
5193 
5194     default:
5195       VIXL_UNREACHABLE();
5196   }
5197 
5198   local_monitor_.MaybeClear();
5199 }
5200 
5201 
5202 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
5203                                        int64_t offset,
5204                                        AddrMode addrmode) {
5205   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
5206 
5207   if ((addr_reg == 31) && ((address % 16) != 0)) {
5208     // When the base register is SP the stack pointer is required to be
5209     // quadword aligned prior to the address calculation and write-backs.
5210     // Misalignment will cause a stack alignment fault.
5211     VIXL_ALIGNMENT_EXCEPTION();
5212   }
5213 
5214   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
5215     VIXL_ASSERT(offset != 0);
5216     // Only preindex should log the register update here. For Postindex, the
5217     // update will be printed automatically by LogWrittenRegisters _after_ the
5218     // memory access itself is logged.
5219     RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
5220     WriteXRegister(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
5221   }
5222 
5223   if ((addrmode == Offset) || (addrmode == PreIndex)) {
5224     address += offset;
5225   }
5226 
5227   // Verify that the calculated address is available to the host.
5228   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5229 
5230   return static_cast<uintptr_t>(address);
5231 }
5232 
5233 
5234 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
5235   MoveWideImmediateOp mov_op =
5236       static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
5237   int64_t new_xn_val = 0;
5238 
5239   bool is_64_bits = instr->GetSixtyFourBits() == 1;
5240   // Shift is limited for W operations.
5241   VIXL_ASSERT(is_64_bits || (instr->GetShiftMoveWide() < 2));
5242 
5243   // Get the shifted immediate.
5244   int64_t shift = instr->GetShiftMoveWide() * 16;
5245   int64_t shifted_imm16 = static_cast<int64_t>(instr->GetImmMoveWide())
5246                           << shift;
5247 
5248   // Compute the new value.
5249   switch (mov_op) {
5250     case MOVN_w:
5251     case MOVN_x: {
5252       new_xn_val = ~shifted_imm16;
5253       if (!is_64_bits) new_xn_val &= kWRegMask;
5254       break;
5255     }
5256     case MOVK_w:
5257     case MOVK_x: {
5258       unsigned reg_code = instr->GetRd();
5259       int64_t prev_xn_val =
5260           is_64_bits ? ReadXRegister(reg_code) : ReadWRegister(reg_code);
5261       new_xn_val = (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
5262       break;
5263     }
5264     case MOVZ_w:
5265     case MOVZ_x: {
5266       new_xn_val = shifted_imm16;
5267       break;
5268     }
5269     default:
5270       VIXL_UNREACHABLE();
5271   }
5272 
5273   // Update the destination register.
5274   WriteXRegister(instr->GetRd(), new_xn_val);
5275 }
5276 
5277 
5278 void Simulator::VisitConditionalSelect(const Instruction* instr) {
5279   uint64_t new_val = ReadXRegister(instr->GetRn());
5280 
5281   if (ConditionFailed(static_cast<Condition>(instr->GetCondition()))) {
5282     new_val = ReadXRegister(instr->GetRm());
5283     switch (instr->Mask(ConditionalSelectMask)) {
5284       case CSEL_w:
5285       case CSEL_x:
5286         break;
5287       case CSINC_w:
5288       case CSINC_x:
5289         new_val++;
5290         break;
5291       case CSINV_w:
5292       case CSINV_x:
5293         new_val = ~new_val;
5294         break;
5295       case CSNEG_w:
5296       case CSNEG_x:
5297         new_val = -new_val;
5298         break;
5299       default:
5300         VIXL_UNIMPLEMENTED();
5301     }
5302   }
5303   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5304   WriteRegister(reg_size, instr->GetRd(), new_val);
5305 }
5306 
5307 
5308 #define PAUTH_MODES_REGISTER_CONTEXT(V)   \
5309   V(i, a, kPACKeyIA, kInstructionPointer) \
5310   V(i, b, kPACKeyIB, kInstructionPointer) \
5311   V(d, a, kPACKeyDA, kDataPointer)        \
5312   V(d, b, kPACKeyDB, kDataPointer)
5313 
5314 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
5315   unsigned dst = instr->GetRd();
5316   unsigned src = instr->GetRn();
5317   Reg31Mode r31_pac = Reg31IsStackPointer;
5318 
5319   switch (form_hash_) {
5320 #define DEFINE_PAUTH_FUNCS(SUF0, SUF1, KEY, D)      \
5321   case "pac" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
5322     VIXL_ASSERT(src == kZeroRegCode);               \
5323     r31_pac = Reg31IsZeroRegister;                  \
5324     VIXL_FALLTHROUGH();                             \
5325   case "pac" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
5326     uint64_t mod = ReadXRegister(src, r31_pac);     \
5327     uint64_t ptr = ReadXRegister(dst);              \
5328     WriteXRegister(dst, AddPAC(ptr, mod, KEY, D));  \
5329     break;                                          \
5330   }                                                 \
5331   case "aut" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
5332     VIXL_ASSERT(src == kZeroRegCode);               \
5333     r31_pac = Reg31IsZeroRegister;                  \
5334     VIXL_FALLTHROUGH();                             \
5335   case "aut" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
5336     uint64_t mod = ReadXRegister(src, r31_pac);     \
5337     uint64_t ptr = ReadXRegister(dst);              \
5338     WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
5339     break;                                          \
5340   }
5341     PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
5342 #undef DEFINE_PAUTH_FUNCS
5343 
5344     case "xpaci_64z_dp_1src"_h:
5345       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
5346       break;
5347     case "xpacd_64z_dp_1src"_h:
5348       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
5349       break;
5350     case "rbit_32_dp_1src"_h:
5351       WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
5352       break;
5353     case "rbit_64_dp_1src"_h:
5354       WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
5355       break;
5356     case "rev16_32_dp_1src"_h:
5357       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
5358       break;
5359     case "rev16_64_dp_1src"_h:
5360       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
5361       break;
5362     case "rev_32_dp_1src"_h:
5363       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
5364       break;
5365     case "rev32_64_dp_1src"_h:
5366       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
5367       break;
5368     case "rev_64_dp_1src"_h:
5369       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
5370       break;
5371     case "clz_32_dp_1src"_h:
5372       WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
5373       break;
5374     case "clz_64_dp_1src"_h:
5375       WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
5376       break;
5377     case "cls_32_dp_1src"_h:
5378       WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
5379       break;
5380     case "cls_64_dp_1src"_h:
5381       WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
5382       break;
5383     case "abs_32_dp_1src"_h:
5384       WriteWRegister(dst, Abs(ReadWRegister(src)));
5385       break;
5386     case "abs_64_dp_1src"_h:
5387       WriteXRegister(dst, Abs(ReadXRegister(src)));
5388       break;
5389     case "cnt_32_dp_1src"_h:
5390       WriteWRegister(dst, CountSetBits(ReadWRegister(src)));
5391       break;
5392     case "cnt_64_dp_1src"_h:
5393       WriteXRegister(dst, CountSetBits(ReadXRegister(src)));
5394       break;
5395     case "ctz_32_dp_1src"_h:
5396       WriteWRegister(dst, CountTrailingZeros(ReadWRegister(src)));
5397       break;
5398     case "ctz_64_dp_1src"_h:
5399       WriteXRegister(dst, CountTrailingZeros(ReadXRegister(src)));
5400       break;
5401   }
5402 }
5403 
5404 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
5405   VIXL_ASSERT((n > 32) && (n <= 64));
5406   for (unsigned i = (n - 1); i >= 32; i--) {
5407     if (((data >> i) & 1) != 0) {
5408       uint64_t polysh32 = (uint64_t)poly << (i - 32);
5409       uint64_t mask = (UINT64_C(1) << i) - 1;
5410       data = ((data & mask) ^ polysh32);
5411     }
5412   }
5413   return data & 0xffffffff;
5414 }
5415 
5416 
5417 template <typename T>
5418 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
5419   unsigned size = sizeof(val) * 8;  // Number of bits in type T.
5420   VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
5421   uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
5422   uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
5423   return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
5424 }
5425 
5426 
5427 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
5428   // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
5429   // the CRC of each 32-bit word sequentially.
5430   acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
5431   return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
5432 }
5433 
5434 
5435 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
5436   Shift shift_op = NO_SHIFT;
5437   int64_t result = 0;
5438   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5439 
5440   switch (instr->Mask(DataProcessing2SourceMask)) {
5441     case SDIV_w: {
5442       int32_t rn = ReadWRegister(instr->GetRn());
5443       int32_t rm = ReadWRegister(instr->GetRm());
5444       if ((rn == kWMinInt) && (rm == -1)) {
5445         result = kWMinInt;
5446       } else if (rm == 0) {
5447         // Division by zero can be trapped, but not on A-class processors.
5448         result = 0;
5449       } else {
5450         result = rn / rm;
5451       }
5452       break;
5453     }
5454     case SDIV_x: {
5455       int64_t rn = ReadXRegister(instr->GetRn());
5456       int64_t rm = ReadXRegister(instr->GetRm());
5457       if ((rn == kXMinInt) && (rm == -1)) {
5458         result = kXMinInt;
5459       } else if (rm == 0) {
5460         // Division by zero can be trapped, but not on A-class processors.
5461         result = 0;
5462       } else {
5463         result = rn / rm;
5464       }
5465       break;
5466     }
5467     case UDIV_w: {
5468       uint32_t rn = static_cast<uint32_t>(ReadWRegister(instr->GetRn()));
5469       uint32_t rm = static_cast<uint32_t>(ReadWRegister(instr->GetRm()));
5470       if (rm == 0) {
5471         // Division by zero can be trapped, but not on A-class processors.
5472         result = 0;
5473       } else {
5474         result = rn / rm;
5475       }
5476       break;
5477     }
5478     case UDIV_x: {
5479       uint64_t rn = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5480       uint64_t rm = static_cast<uint64_t>(ReadXRegister(instr->GetRm()));
5481       if (rm == 0) {
5482         // Division by zero can be trapped, but not on A-class processors.
5483         result = 0;
5484       } else {
5485         result = rn / rm;
5486       }
5487       break;
5488     }
5489     case LSLV_w:
5490     case LSLV_x:
5491       shift_op = LSL;
5492       break;
5493     case LSRV_w:
5494     case LSRV_x:
5495       shift_op = LSR;
5496       break;
5497     case ASRV_w:
5498     case ASRV_x:
5499       shift_op = ASR;
5500       break;
5501     case RORV_w:
5502     case RORV_x:
5503       shift_op = ROR;
5504       break;
5505     case PACGA: {
5506       uint64_t dst = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5507       uint64_t src = static_cast<uint64_t>(
5508           ReadXRegister(instr->GetRm(), Reg31IsStackPointer));
5509       uint64_t code = ComputePAC(dst, src, kPACKeyGA);
5510       result = code & 0xffffffff00000000;
5511       break;
5512     }
5513     case CRC32B: {
5514       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5515       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5516       result = Crc32Checksum(acc, val, CRC32_POLY);
5517       break;
5518     }
5519     case CRC32H: {
5520       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5521       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5522       result = Crc32Checksum(acc, val, CRC32_POLY);
5523       break;
5524     }
5525     case CRC32W: {
5526       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5527       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5528       result = Crc32Checksum(acc, val, CRC32_POLY);
5529       break;
5530     }
5531     case CRC32X: {
5532       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5533       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5534       result = Crc32Checksum(acc, val, CRC32_POLY);
5535       reg_size = kWRegSize;
5536       break;
5537     }
5538     case CRC32CB: {
5539       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5540       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5541       result = Crc32Checksum(acc, val, CRC32C_POLY);
5542       break;
5543     }
5544     case CRC32CH: {
5545       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5546       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5547       result = Crc32Checksum(acc, val, CRC32C_POLY);
5548       break;
5549     }
5550     case CRC32CW: {
5551       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5552       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5553       result = Crc32Checksum(acc, val, CRC32C_POLY);
5554       break;
5555     }
5556     case CRC32CX: {
5557       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5558       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5559       result = Crc32Checksum(acc, val, CRC32C_POLY);
5560       reg_size = kWRegSize;
5561       break;
5562     }
5563     default:
5564       VIXL_UNIMPLEMENTED();
5565   }
5566 
5567   if (shift_op != NO_SHIFT) {
5568     // Shift distance encoded in the least-significant five/six bits of the
5569     // register.
5570     int mask = (instr->GetSixtyFourBits() == 1) ? 0x3f : 0x1f;
5571     unsigned shift = ReadWRegister(instr->GetRm()) & mask;
5572     result = ShiftOperand(reg_size,
5573                           ReadRegister(reg_size, instr->GetRn()),
5574                           shift_op,
5575                           shift);
5576   }
5577   WriteRegister(reg_size, instr->GetRd(), result);
5578 }
5579 
5580 void Simulator::SimulateSignedMinMax(const Instruction* instr) {
5581   int32_t wn = ReadWRegister(instr->GetRn());
5582   int32_t wm = ReadWRegister(instr->GetRm());
5583   int64_t xn = ReadXRegister(instr->GetRn());
5584   int64_t xm = ReadXRegister(instr->GetRm());
5585   int32_t imm = instr->ExtractSignedBits(17, 10);
5586   int dst = instr->GetRd();
5587 
5588   switch (form_hash_) {
5589     case "smax_64_minmax_imm"_h:
5590     case "smin_64_minmax_imm"_h:
5591       xm = imm;
5592       break;
5593     case "smax_32_minmax_imm"_h:
5594     case "smin_32_minmax_imm"_h:
5595       wm = imm;
5596       break;
5597   }
5598 
5599   switch (form_hash_) {
5600     case "smax_32_minmax_imm"_h:
5601     case "smax_32_dp_2src"_h:
5602       WriteWRegister(dst, std::max(wn, wm));
5603       break;
5604     case "smax_64_minmax_imm"_h:
5605     case "smax_64_dp_2src"_h:
5606       WriteXRegister(dst, std::max(xn, xm));
5607       break;
5608     case "smin_32_minmax_imm"_h:
5609     case "smin_32_dp_2src"_h:
5610       WriteWRegister(dst, std::min(wn, wm));
5611       break;
5612     case "smin_64_minmax_imm"_h:
5613     case "smin_64_dp_2src"_h:
5614       WriteXRegister(dst, std::min(xn, xm));
5615       break;
5616   }
5617 }
5618 
5619 void Simulator::SimulateUnsignedMinMax(const Instruction* instr) {
5620   uint64_t xn = ReadXRegister(instr->GetRn());
5621   uint64_t xm = ReadXRegister(instr->GetRm());
5622   uint32_t imm = instr->ExtractBits(17, 10);
5623   int dst = instr->GetRd();
5624 
5625   switch (form_hash_) {
5626     case "umax_64u_minmax_imm"_h:
5627     case "umax_32u_minmax_imm"_h:
5628     case "umin_64u_minmax_imm"_h:
5629     case "umin_32u_minmax_imm"_h:
5630       xm = imm;
5631       break;
5632   }
5633 
5634   switch (form_hash_) {
5635     case "umax_32u_minmax_imm"_h:
5636     case "umax_32_dp_2src"_h:
5637       xn &= 0xffff'ffff;
5638       xm &= 0xffff'ffff;
5639       VIXL_FALLTHROUGH();
5640     case "umax_64u_minmax_imm"_h:
5641     case "umax_64_dp_2src"_h:
5642       WriteXRegister(dst, std::max(xn, xm));
5643       break;
5644     case "umin_32u_minmax_imm"_h:
5645     case "umin_32_dp_2src"_h:
5646       xn &= 0xffff'ffff;
5647       xm &= 0xffff'ffff;
5648       VIXL_FALLTHROUGH();
5649     case "umin_64u_minmax_imm"_h:
5650     case "umin_64_dp_2src"_h:
5651       WriteXRegister(dst, std::min(xn, xm));
5652       break;
5653   }
5654 }
5655 
5656 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
5657   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5658 
5659   uint64_t result = 0;
5660   // Extract and sign- or zero-extend 32-bit arguments for widening operations.
5661   uint64_t rn_u32 = ReadRegister<uint32_t>(instr->GetRn());
5662   uint64_t rm_u32 = ReadRegister<uint32_t>(instr->GetRm());
5663   int64_t rn_s32 = ReadRegister<int32_t>(instr->GetRn());
5664   int64_t rm_s32 = ReadRegister<int32_t>(instr->GetRm());
5665   uint64_t rn_u64 = ReadXRegister(instr->GetRn());
5666   uint64_t rm_u64 = ReadXRegister(instr->GetRm());
5667   switch (instr->Mask(DataProcessing3SourceMask)) {
5668     case MADD_w:
5669     case MADD_x:
5670       result = ReadXRegister(instr->GetRa()) + (rn_u64 * rm_u64);
5671       break;
5672     case MSUB_w:
5673     case MSUB_x:
5674       result = ReadXRegister(instr->GetRa()) - (rn_u64 * rm_u64);
5675       break;
5676     case SMADDL_x:
5677       result = ReadXRegister(instr->GetRa()) +
5678                static_cast<uint64_t>(rn_s32 * rm_s32);
5679       break;
5680     case SMSUBL_x:
5681       result = ReadXRegister(instr->GetRa()) -
5682                static_cast<uint64_t>(rn_s32 * rm_s32);
5683       break;
5684     case UMADDL_x:
5685       result = ReadXRegister(instr->GetRa()) + (rn_u32 * rm_u32);
5686       break;
5687     case UMSUBL_x:
5688       result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
5689       break;
5690     case UMULH_x:
5691       result =
5692           internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
5693                                      ReadRegister<uint64_t>(instr->GetRm()));
5694       break;
5695     case SMULH_x:
5696       result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
5697                                           ReadXRegister(instr->GetRm()));
5698       break;
5699     default:
5700       VIXL_UNIMPLEMENTED();
5701   }
5702   WriteRegister(reg_size, instr->GetRd(), result);
5703 }
5704 
5705 
5706 void Simulator::VisitBitfield(const Instruction* instr) {
5707   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5708   int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask;
5709   int R = instr->GetImmR();
5710   int S = instr->GetImmS();
5711 
5712   if (instr->GetSixtyFourBits() != instr->GetBitN()) {
5713     VisitUnallocated(instr);
5714   }
5715 
5716   if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) {
5717     VisitUnallocated(instr);
5718   }
5719 
5720   int diff = S - R;
5721   uint64_t mask;
5722   if (diff >= 0) {
5723     mask = ~UINT64_C(0) >> (64 - (diff + 1));
5724     mask = (static_cast<unsigned>(diff) < (reg_size - 1)) ? mask : reg_mask;
5725   } else {
5726     mask = ~UINT64_C(0) >> (64 - (S + 1));
5727     mask = RotateRight(mask, R, reg_size);
5728     diff += reg_size;
5729   }
5730 
5731   // inzero indicates if the extracted bitfield is inserted into the
5732   // destination register value or in zero.
5733   // If extend is true, extend the sign of the extracted bitfield.
5734   bool inzero = false;
5735   bool extend = false;
5736   switch (instr->Mask(BitfieldMask)) {
5737     case BFM_x:
5738     case BFM_w:
5739       break;
5740     case SBFM_x:
5741     case SBFM_w:
5742       inzero = true;
5743       extend = true;
5744       break;
5745     case UBFM_x:
5746     case UBFM_w:
5747       inzero = true;
5748       break;
5749     default:
5750       VIXL_UNIMPLEMENTED();
5751   }
5752 
5753   uint64_t dst = inzero ? 0 : ReadRegister(reg_size, instr->GetRd());
5754   uint64_t src = ReadRegister(reg_size, instr->GetRn());
5755   // Rotate source bitfield into place.
5756   uint64_t result = RotateRight(src, R, reg_size);
5757   // Determine the sign extension.
5758   uint64_t topbits = (diff == 63) ? 0 : (~UINT64_C(0) << (diff + 1));
5759   uint64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
5760 
5761   // Merge sign extension, dest/zero and bitfield.
5762   result = signbits | (result & mask) | (dst & ~mask);
5763 
5764   WriteRegister(reg_size, instr->GetRd(), result);
5765 }
5766 
5767 
5768 void Simulator::VisitExtract(const Instruction* instr) {
5769   unsigned lsb = instr->GetImmS();
5770   unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
5771   uint64_t low_res =
5772       static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
5773   uint64_t high_res = (lsb == 0)
5774                           ? 0
5775                           : ReadRegister<uint64_t>(reg_size, instr->GetRn())
5776                                 << (reg_size - lsb);
5777   WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
5778 }
5779 
5780 
5781 void Simulator::VisitFPImmediate(const Instruction* instr) {
5782   AssertSupportedFPCR();
5783   unsigned dest = instr->GetRd();
5784   switch (instr->Mask(FPImmediateMask)) {
5785     case FMOV_h_imm:
5786       WriteHRegister(dest, Float16ToRawbits(instr->GetImmFP16()));
5787       break;
5788     case FMOV_s_imm:
5789       WriteSRegister(dest, instr->GetImmFP32());
5790       break;
5791     case FMOV_d_imm:
5792       WriteDRegister(dest, instr->GetImmFP64());
5793       break;
5794     default:
5795       VIXL_UNREACHABLE();
5796   }
5797 }
5798 
5799 
5800 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
5801   AssertSupportedFPCR();
5802 
5803   unsigned dst = instr->GetRd();
5804   unsigned src = instr->GetRn();
5805 
5806   FPRounding round = ReadRMode();
5807 
5808   switch (instr->Mask(FPIntegerConvertMask)) {
5809     case FCVTAS_wh:
5810       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieAway));
5811       break;
5812     case FCVTAS_xh:
5813       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieAway));
5814       break;
5815     case FCVTAS_ws:
5816       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieAway));
5817       break;
5818     case FCVTAS_xs:
5819       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieAway));
5820       break;
5821     case FCVTAS_wd:
5822       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieAway));
5823       break;
5824     case FCVTAS_xd:
5825       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieAway));
5826       break;
5827     case FCVTAU_wh:
5828       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieAway));
5829       break;
5830     case FCVTAU_xh:
5831       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieAway));
5832       break;
5833     case FCVTAU_ws:
5834       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieAway));
5835       break;
5836     case FCVTAU_xs:
5837       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieAway));
5838       break;
5839     case FCVTAU_wd:
5840       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieAway));
5841       break;
5842     case FCVTAU_xd:
5843       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieAway));
5844       break;
5845     case FCVTMS_wh:
5846       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPNegativeInfinity));
5847       break;
5848     case FCVTMS_xh:
5849       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPNegativeInfinity));
5850       break;
5851     case FCVTMS_ws:
5852       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPNegativeInfinity));
5853       break;
5854     case FCVTMS_xs:
5855       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPNegativeInfinity));
5856       break;
5857     case FCVTMS_wd:
5858       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPNegativeInfinity));
5859       break;
5860     case FCVTMS_xd:
5861       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPNegativeInfinity));
5862       break;
5863     case FCVTMU_wh:
5864       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPNegativeInfinity));
5865       break;
5866     case FCVTMU_xh:
5867       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPNegativeInfinity));
5868       break;
5869     case FCVTMU_ws:
5870       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPNegativeInfinity));
5871       break;
5872     case FCVTMU_xs:
5873       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPNegativeInfinity));
5874       break;
5875     case FCVTMU_wd:
5876       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPNegativeInfinity));
5877       break;
5878     case FCVTMU_xd:
5879       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPNegativeInfinity));
5880       break;
5881     case FCVTPS_wh:
5882       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPPositiveInfinity));
5883       break;
5884     case FCVTPS_xh:
5885       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPPositiveInfinity));
5886       break;
5887     case FCVTPS_ws:
5888       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPPositiveInfinity));
5889       break;
5890     case FCVTPS_xs:
5891       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPPositiveInfinity));
5892       break;
5893     case FCVTPS_wd:
5894       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPPositiveInfinity));
5895       break;
5896     case FCVTPS_xd:
5897       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPPositiveInfinity));
5898       break;
5899     case FCVTPU_wh:
5900       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPPositiveInfinity));
5901       break;
5902     case FCVTPU_xh:
5903       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPPositiveInfinity));
5904       break;
5905     case FCVTPU_ws:
5906       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPPositiveInfinity));
5907       break;
5908     case FCVTPU_xs:
5909       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPPositiveInfinity));
5910       break;
5911     case FCVTPU_wd:
5912       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPPositiveInfinity));
5913       break;
5914     case FCVTPU_xd:
5915       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPPositiveInfinity));
5916       break;
5917     case FCVTNS_wh:
5918       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieEven));
5919       break;
5920     case FCVTNS_xh:
5921       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieEven));
5922       break;
5923     case FCVTNS_ws:
5924       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieEven));
5925       break;
5926     case FCVTNS_xs:
5927       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieEven));
5928       break;
5929     case FCVTNS_wd:
5930       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieEven));
5931       break;
5932     case FCVTNS_xd:
5933       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieEven));
5934       break;
5935     case FCVTNU_wh:
5936       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieEven));
5937       break;
5938     case FCVTNU_xh:
5939       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieEven));
5940       break;
5941     case FCVTNU_ws:
5942       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieEven));
5943       break;
5944     case FCVTNU_xs:
5945       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieEven));
5946       break;
5947     case FCVTNU_wd:
5948       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieEven));
5949       break;
5950     case FCVTNU_xd:
5951       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieEven));
5952       break;
5953     case FCVTZS_wh:
5954       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPZero));
5955       break;
5956     case FCVTZS_xh:
5957       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPZero));
5958       break;
5959     case FCVTZS_ws:
5960       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPZero));
5961       break;
5962     case FCVTZS_xs:
5963       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPZero));
5964       break;
5965     case FCVTZS_wd:
5966       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPZero));
5967       break;
5968     case FCVTZS_xd:
5969       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPZero));
5970       break;
5971     case FCVTZU_wh:
5972       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPZero));
5973       break;
5974     case FCVTZU_xh:
5975       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPZero));
5976       break;
5977     case FCVTZU_ws:
5978       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPZero));
5979       break;
5980     case FCVTZU_xs:
5981       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPZero));
5982       break;
5983     case FCVTZU_wd:
5984       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPZero));
5985       break;
5986     case FCVTZU_xd:
5987       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPZero));
5988       break;
5989     case FJCVTZS:
5990       WriteWRegister(dst, FPToFixedJS(ReadDRegister(src)));
5991       break;
5992     case FMOV_hw:
5993       WriteHRegister(dst, ReadWRegister(src) & kHRegMask);
5994       break;
5995     case FMOV_wh:
5996       WriteWRegister(dst, ReadHRegisterBits(src));
5997       break;
5998     case FMOV_xh:
5999       WriteXRegister(dst, ReadHRegisterBits(src));
6000       break;
6001     case FMOV_hx:
6002       WriteHRegister(dst, ReadXRegister(src) & kHRegMask);
6003       break;
6004     case FMOV_ws:
6005       WriteWRegister(dst, ReadSRegisterBits(src));
6006       break;
6007     case FMOV_xd:
6008       WriteXRegister(dst, ReadDRegisterBits(src));
6009       break;
6010     case FMOV_sw:
6011       WriteSRegisterBits(dst, ReadWRegister(src));
6012       break;
6013     case FMOV_dx:
6014       WriteDRegisterBits(dst, ReadXRegister(src));
6015       break;
6016     case FMOV_d1_x:
6017       LogicVRegister(ReadVRegister(dst))
6018           .SetUint(kFormatD, 1, ReadXRegister(src));
6019       break;
6020     case FMOV_x_d1:
6021       WriteXRegister(dst, LogicVRegister(ReadVRegister(src)).Uint(kFormatD, 1));
6022       break;
6023 
6024     // A 32-bit input can be handled in the same way as a 64-bit input, since
6025     // the sign- or zero-extension will not affect the conversion.
6026     case SCVTF_dx:
6027       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), 0, round));
6028       break;
6029     case SCVTF_dw:
6030       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), 0, round));
6031       break;
6032     case UCVTF_dx:
6033       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), 0, round));
6034       break;
6035     case UCVTF_dw: {
6036       WriteDRegister(dst,
6037                      UFixedToDouble(ReadRegister<uint32_t>(src), 0, round));
6038       break;
6039     }
6040     case SCVTF_sx:
6041       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), 0, round));
6042       break;
6043     case SCVTF_sw:
6044       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), 0, round));
6045       break;
6046     case UCVTF_sx:
6047       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), 0, round));
6048       break;
6049     case UCVTF_sw: {
6050       WriteSRegister(dst, UFixedToFloat(ReadRegister<uint32_t>(src), 0, round));
6051       break;
6052     }
6053     case SCVTF_hx:
6054       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), 0, round));
6055       break;
6056     case SCVTF_hw:
6057       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), 0, round));
6058       break;
6059     case UCVTF_hx:
6060       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), 0, round));
6061       break;
6062     case UCVTF_hw: {
6063       WriteHRegister(dst,
6064                      UFixedToFloat16(ReadRegister<uint32_t>(src), 0, round));
6065       break;
6066     }
6067 
6068     default:
6069       VIXL_UNREACHABLE();
6070   }
6071 }
6072 
6073 
6074 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
6075   AssertSupportedFPCR();
6076 
6077   unsigned dst = instr->GetRd();
6078   unsigned src = instr->GetRn();
6079   int fbits = 64 - instr->GetFPScale();
6080 
6081   FPRounding round = ReadRMode();
6082 
6083   switch (instr->Mask(FPFixedPointConvertMask)) {
6084     // A 32-bit input can be handled in the same way as a 64-bit input, since
6085     // the sign- or zero-extension will not affect the conversion.
6086     case SCVTF_dx_fixed:
6087       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), fbits, round));
6088       break;
6089     case SCVTF_dw_fixed:
6090       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), fbits, round));
6091       break;
6092     case UCVTF_dx_fixed:
6093       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), fbits, round));
6094       break;
6095     case UCVTF_dw_fixed: {
6096       WriteDRegister(dst,
6097                      UFixedToDouble(ReadRegister<uint32_t>(src), fbits, round));
6098       break;
6099     }
6100     case SCVTF_sx_fixed:
6101       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), fbits, round));
6102       break;
6103     case SCVTF_sw_fixed:
6104       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), fbits, round));
6105       break;
6106     case UCVTF_sx_fixed:
6107       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), fbits, round));
6108       break;
6109     case UCVTF_sw_fixed: {
6110       WriteSRegister(dst,
6111                      UFixedToFloat(ReadRegister<uint32_t>(src), fbits, round));
6112       break;
6113     }
6114     case SCVTF_hx_fixed:
6115       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), fbits, round));
6116       break;
6117     case SCVTF_hw_fixed:
6118       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), fbits, round));
6119       break;
6120     case UCVTF_hx_fixed:
6121       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), fbits, round));
6122       break;
6123     case UCVTF_hw_fixed: {
6124       WriteHRegister(dst,
6125                      UFixedToFloat16(ReadRegister<uint32_t>(src),
6126                                      fbits,
6127                                      round));
6128       break;
6129     }
6130     case FCVTZS_xd_fixed:
6131       WriteXRegister(dst,
6132                      FPToInt64(ReadDRegister(src) * std::pow(2.0, fbits),
6133                                FPZero));
6134       break;
6135     case FCVTZS_wd_fixed:
6136       WriteWRegister(dst,
6137                      FPToInt32(ReadDRegister(src) * std::pow(2.0, fbits),
6138                                FPZero));
6139       break;
6140     case FCVTZU_xd_fixed:
6141       WriteXRegister(dst,
6142                      FPToUInt64(ReadDRegister(src) * std::pow(2.0, fbits),
6143                                 FPZero));
6144       break;
6145     case FCVTZU_wd_fixed:
6146       WriteWRegister(dst,
6147                      FPToUInt32(ReadDRegister(src) * std::pow(2.0, fbits),
6148                                 FPZero));
6149       break;
6150     case FCVTZS_xs_fixed:
6151       WriteXRegister(dst,
6152                      FPToInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
6153                                FPZero));
6154       break;
6155     case FCVTZS_ws_fixed:
6156       WriteWRegister(dst,
6157                      FPToInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
6158                                FPZero));
6159       break;
6160     case FCVTZU_xs_fixed:
6161       WriteXRegister(dst,
6162                      FPToUInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
6163                                 FPZero));
6164       break;
6165     case FCVTZU_ws_fixed:
6166       WriteWRegister(dst,
6167                      FPToUInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
6168                                 FPZero));
6169       break;
6170     case FCVTZS_xh_fixed: {
6171       double output =
6172           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6173       WriteXRegister(dst, FPToInt64(output, FPZero));
6174       break;
6175     }
6176     case FCVTZS_wh_fixed: {
6177       double output =
6178           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6179       WriteWRegister(dst, FPToInt32(output, FPZero));
6180       break;
6181     }
6182     case FCVTZU_xh_fixed: {
6183       double output =
6184           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6185       WriteXRegister(dst, FPToUInt64(output, FPZero));
6186       break;
6187     }
6188     case FCVTZU_wh_fixed: {
6189       double output =
6190           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
6191       WriteWRegister(dst, FPToUInt32(output, FPZero));
6192       break;
6193     }
6194     default:
6195       VIXL_UNREACHABLE();
6196   }
6197 }
6198 
6199 
6200 void Simulator::VisitFPCompare(const Instruction* instr) {
6201   AssertSupportedFPCR();
6202 
6203   FPTrapFlags trap = DisableTrap;
6204   switch (instr->Mask(FPCompareMask)) {
6205     case FCMPE_h:
6206       trap = EnableTrap;
6207       VIXL_FALLTHROUGH();
6208     case FCMP_h:
6209       FPCompare(ReadHRegister(instr->GetRn()),
6210                 ReadHRegister(instr->GetRm()),
6211                 trap);
6212       break;
6213     case FCMPE_s:
6214       trap = EnableTrap;
6215       VIXL_FALLTHROUGH();
6216     case FCMP_s:
6217       FPCompare(ReadSRegister(instr->GetRn()),
6218                 ReadSRegister(instr->GetRm()),
6219                 trap);
6220       break;
6221     case FCMPE_d:
6222       trap = EnableTrap;
6223       VIXL_FALLTHROUGH();
6224     case FCMP_d:
6225       FPCompare(ReadDRegister(instr->GetRn()),
6226                 ReadDRegister(instr->GetRm()),
6227                 trap);
6228       break;
6229     case FCMPE_h_zero:
6230       trap = EnableTrap;
6231       VIXL_FALLTHROUGH();
6232     case FCMP_h_zero:
6233       FPCompare(ReadHRegister(instr->GetRn()), SimFloat16(0.0), trap);
6234       break;
6235     case FCMPE_s_zero:
6236       trap = EnableTrap;
6237       VIXL_FALLTHROUGH();
6238     case FCMP_s_zero:
6239       FPCompare(ReadSRegister(instr->GetRn()), 0.0f, trap);
6240       break;
6241     case FCMPE_d_zero:
6242       trap = EnableTrap;
6243       VIXL_FALLTHROUGH();
6244     case FCMP_d_zero:
6245       FPCompare(ReadDRegister(instr->GetRn()), 0.0, trap);
6246       break;
6247     default:
6248       VIXL_UNIMPLEMENTED();
6249   }
6250 }
6251 
6252 
6253 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
6254   AssertSupportedFPCR();
6255 
6256   FPTrapFlags trap = DisableTrap;
6257   switch (instr->Mask(FPConditionalCompareMask)) {
6258     case FCCMPE_h:
6259       trap = EnableTrap;
6260       VIXL_FALLTHROUGH();
6261     case FCCMP_h:
6262       if (ConditionPassed(instr->GetCondition())) {
6263         FPCompare(ReadHRegister(instr->GetRn()),
6264                   ReadHRegister(instr->GetRm()),
6265                   trap);
6266       } else {
6267         ReadNzcv().SetFlags(instr->GetNzcv());
6268         LogSystemRegister(NZCV);
6269       }
6270       break;
6271     case FCCMPE_s:
6272       trap = EnableTrap;
6273       VIXL_FALLTHROUGH();
6274     case FCCMP_s:
6275       if (ConditionPassed(instr->GetCondition())) {
6276         FPCompare(ReadSRegister(instr->GetRn()),
6277                   ReadSRegister(instr->GetRm()),
6278                   trap);
6279       } else {
6280         ReadNzcv().SetFlags(instr->GetNzcv());
6281         LogSystemRegister(NZCV);
6282       }
6283       break;
6284     case FCCMPE_d:
6285       trap = EnableTrap;
6286       VIXL_FALLTHROUGH();
6287     case FCCMP_d:
6288       if (ConditionPassed(instr->GetCondition())) {
6289         FPCompare(ReadDRegister(instr->GetRn()),
6290                   ReadDRegister(instr->GetRm()),
6291                   trap);
6292       } else {
6293         ReadNzcv().SetFlags(instr->GetNzcv());
6294         LogSystemRegister(NZCV);
6295       }
6296       break;
6297     default:
6298       VIXL_UNIMPLEMENTED();
6299   }
6300 }
6301 
6302 
6303 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
6304   AssertSupportedFPCR();
6305 
6306   Instr selected;
6307   if (ConditionPassed(instr->GetCondition())) {
6308     selected = instr->GetRn();
6309   } else {
6310     selected = instr->GetRm();
6311   }
6312 
6313   switch (instr->Mask(FPConditionalSelectMask)) {
6314     case FCSEL_h:
6315       WriteHRegister(instr->GetRd(), ReadHRegister(selected));
6316       break;
6317     case FCSEL_s:
6318       WriteSRegister(instr->GetRd(), ReadSRegister(selected));
6319       break;
6320     case FCSEL_d:
6321       WriteDRegister(instr->GetRd(), ReadDRegister(selected));
6322       break;
6323     default:
6324       VIXL_UNIMPLEMENTED();
6325   }
6326 }
6327 
6328 
6329 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
6330   AssertSupportedFPCR();
6331 
6332   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6333   VectorFormat vform;
6334   switch (instr->Mask(FPTypeMask)) {
6335     default:
6336       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6337     case FP64:
6338       vform = kFormatD;
6339       break;
6340     case FP32:
6341       vform = kFormatS;
6342       break;
6343     case FP16:
6344       vform = kFormatH;
6345       break;
6346   }
6347 
6348   SimVRegister& rd = ReadVRegister(instr->GetRd());
6349   SimVRegister& rn = ReadVRegister(instr->GetRn());
6350   bool inexact_exception = false;
6351   FrintMode frint_mode = kFrintToInteger;
6352 
6353   unsigned fd = instr->GetRd();
6354   unsigned fn = instr->GetRn();
6355 
6356   switch (instr->Mask(FPDataProcessing1SourceMask)) {
6357     case FMOV_h:
6358       WriteHRegister(fd, ReadHRegister(fn));
6359       return;
6360     case FMOV_s:
6361       WriteSRegister(fd, ReadSRegister(fn));
6362       return;
6363     case FMOV_d:
6364       WriteDRegister(fd, ReadDRegister(fn));
6365       return;
6366     case FABS_h:
6367     case FABS_s:
6368     case FABS_d:
6369       fabs_(vform, ReadVRegister(fd), ReadVRegister(fn));
6370       // Explicitly log the register update whilst we have type information.
6371       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6372       return;
6373     case FNEG_h:
6374     case FNEG_s:
6375     case FNEG_d:
6376       fneg(vform, ReadVRegister(fd), ReadVRegister(fn));
6377       // Explicitly log the register update whilst we have type information.
6378       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6379       return;
6380     case FCVT_ds:
6381       WriteDRegister(fd, FPToDouble(ReadSRegister(fn), ReadDN()));
6382       return;
6383     case FCVT_sd:
6384       WriteSRegister(fd, FPToFloat(ReadDRegister(fn), FPTieEven, ReadDN()));
6385       return;
6386     case FCVT_hs:
6387       WriteHRegister(fd,
6388                      Float16ToRawbits(
6389                          FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN())));
6390       return;
6391     case FCVT_sh:
6392       WriteSRegister(fd, FPToFloat(ReadHRegister(fn), ReadDN()));
6393       return;
6394     case FCVT_dh:
6395       WriteDRegister(fd, FPToDouble(ReadHRegister(fn), ReadDN()));
6396       return;
6397     case FCVT_hd:
6398       WriteHRegister(fd,
6399                      Float16ToRawbits(
6400                          FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN())));
6401       return;
6402     case FSQRT_h:
6403     case FSQRT_s:
6404     case FSQRT_d:
6405       fsqrt(vform, rd, rn);
6406       // Explicitly log the register update whilst we have type information.
6407       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6408       return;
6409     case FRINT32X_s:
6410     case FRINT32X_d:
6411       inexact_exception = true;
6412       frint_mode = kFrintToInt32;
6413       break;  // Use FPCR rounding mode.
6414     case FRINT64X_s:
6415     case FRINT64X_d:
6416       inexact_exception = true;
6417       frint_mode = kFrintToInt64;
6418       break;  // Use FPCR rounding mode.
6419     case FRINT32Z_s:
6420     case FRINT32Z_d:
6421       inexact_exception = true;
6422       frint_mode = kFrintToInt32;
6423       fpcr_rounding = FPZero;
6424       break;
6425     case FRINT64Z_s:
6426     case FRINT64Z_d:
6427       inexact_exception = true;
6428       frint_mode = kFrintToInt64;
6429       fpcr_rounding = FPZero;
6430       break;
6431     case FRINTI_h:
6432     case FRINTI_s:
6433     case FRINTI_d:
6434       break;  // Use FPCR rounding mode.
6435     case FRINTX_h:
6436     case FRINTX_s:
6437     case FRINTX_d:
6438       inexact_exception = true;
6439       break;
6440     case FRINTA_h:
6441     case FRINTA_s:
6442     case FRINTA_d:
6443       fpcr_rounding = FPTieAway;
6444       break;
6445     case FRINTM_h:
6446     case FRINTM_s:
6447     case FRINTM_d:
6448       fpcr_rounding = FPNegativeInfinity;
6449       break;
6450     case FRINTN_h:
6451     case FRINTN_s:
6452     case FRINTN_d:
6453       fpcr_rounding = FPTieEven;
6454       break;
6455     case FRINTP_h:
6456     case FRINTP_s:
6457     case FRINTP_d:
6458       fpcr_rounding = FPPositiveInfinity;
6459       break;
6460     case FRINTZ_h:
6461     case FRINTZ_s:
6462     case FRINTZ_d:
6463       fpcr_rounding = FPZero;
6464       break;
6465     default:
6466       VIXL_UNIMPLEMENTED();
6467   }
6468 
6469   // Only FRINT* instructions fall through the switch above.
6470   frint(vform, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
6471   // Explicitly log the register update whilst we have type information.
6472   LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6473 }
6474 
6475 
6476 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
6477   AssertSupportedFPCR();
6478 
6479   VectorFormat vform;
6480   switch (instr->Mask(FPTypeMask)) {
6481     default:
6482       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6483     case FP64:
6484       vform = kFormatD;
6485       break;
6486     case FP32:
6487       vform = kFormatS;
6488       break;
6489     case FP16:
6490       vform = kFormatH;
6491       break;
6492   }
6493   SimVRegister& rd = ReadVRegister(instr->GetRd());
6494   SimVRegister& rn = ReadVRegister(instr->GetRn());
6495   SimVRegister& rm = ReadVRegister(instr->GetRm());
6496 
6497   switch (instr->Mask(FPDataProcessing2SourceMask)) {
6498     case FADD_h:
6499     case FADD_s:
6500     case FADD_d:
6501       fadd(vform, rd, rn, rm);
6502       break;
6503     case FSUB_h:
6504     case FSUB_s:
6505     case FSUB_d:
6506       fsub(vform, rd, rn, rm);
6507       break;
6508     case FMUL_h:
6509     case FMUL_s:
6510     case FMUL_d:
6511       fmul(vform, rd, rn, rm);
6512       break;
6513     case FNMUL_h:
6514     case FNMUL_s:
6515     case FNMUL_d:
6516       fnmul(vform, rd, rn, rm);
6517       break;
6518     case FDIV_h:
6519     case FDIV_s:
6520     case FDIV_d:
6521       fdiv(vform, rd, rn, rm);
6522       break;
6523     case FMAX_h:
6524     case FMAX_s:
6525     case FMAX_d:
6526       fmax(vform, rd, rn, rm);
6527       break;
6528     case FMIN_h:
6529     case FMIN_s:
6530     case FMIN_d:
6531       fmin(vform, rd, rn, rm);
6532       break;
6533     case FMAXNM_h:
6534     case FMAXNM_s:
6535     case FMAXNM_d:
6536       fmaxnm(vform, rd, rn, rm);
6537       break;
6538     case FMINNM_h:
6539     case FMINNM_s:
6540     case FMINNM_d:
6541       fminnm(vform, rd, rn, rm);
6542       break;
6543     default:
6544       VIXL_UNREACHABLE();
6545   }
6546   // Explicitly log the register update whilst we have type information.
6547   LogVRegister(instr->GetRd(), GetPrintRegisterFormatFP(vform));
6548 }
6549 
6550 
6551 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
6552   AssertSupportedFPCR();
6553 
6554   unsigned fd = instr->GetRd();
6555   unsigned fn = instr->GetRn();
6556   unsigned fm = instr->GetRm();
6557   unsigned fa = instr->GetRa();
6558 
6559   switch (instr->Mask(FPDataProcessing3SourceMask)) {
6560     // fd = fa +/- (fn * fm)
6561     case FMADD_h:
6562       WriteHRegister(fd,
6563                      FPMulAdd(ReadHRegister(fa),
6564                               ReadHRegister(fn),
6565                               ReadHRegister(fm)));
6566       break;
6567     case FMSUB_h:
6568       WriteHRegister(fd,
6569                      FPMulAdd(ReadHRegister(fa),
6570                               -ReadHRegister(fn),
6571                               ReadHRegister(fm)));
6572       break;
6573     case FMADD_s:
6574       WriteSRegister(fd,
6575                      FPMulAdd(ReadSRegister(fa),
6576                               ReadSRegister(fn),
6577                               ReadSRegister(fm)));
6578       break;
6579     case FMSUB_s:
6580       WriteSRegister(fd,
6581                      FPMulAdd(ReadSRegister(fa),
6582                               -ReadSRegister(fn),
6583                               ReadSRegister(fm)));
6584       break;
6585     case FMADD_d:
6586       WriteDRegister(fd,
6587                      FPMulAdd(ReadDRegister(fa),
6588                               ReadDRegister(fn),
6589                               ReadDRegister(fm)));
6590       break;
6591     case FMSUB_d:
6592       WriteDRegister(fd,
6593                      FPMulAdd(ReadDRegister(fa),
6594                               -ReadDRegister(fn),
6595                               ReadDRegister(fm)));
6596       break;
6597     // Negated variants of the above.
6598     case FNMADD_h:
6599       WriteHRegister(fd,
6600                      FPMulAdd(-ReadHRegister(fa),
6601                               -ReadHRegister(fn),
6602                               ReadHRegister(fm)));
6603       break;
6604     case FNMSUB_h:
6605       WriteHRegister(fd,
6606                      FPMulAdd(-ReadHRegister(fa),
6607                               ReadHRegister(fn),
6608                               ReadHRegister(fm)));
6609       break;
6610     case FNMADD_s:
6611       WriteSRegister(fd,
6612                      FPMulAdd(-ReadSRegister(fa),
6613                               -ReadSRegister(fn),
6614                               ReadSRegister(fm)));
6615       break;
6616     case FNMSUB_s:
6617       WriteSRegister(fd,
6618                      FPMulAdd(-ReadSRegister(fa),
6619                               ReadSRegister(fn),
6620                               ReadSRegister(fm)));
6621       break;
6622     case FNMADD_d:
6623       WriteDRegister(fd,
6624                      FPMulAdd(-ReadDRegister(fa),
6625                               -ReadDRegister(fn),
6626                               ReadDRegister(fm)));
6627       break;
6628     case FNMSUB_d:
6629       WriteDRegister(fd,
6630                      FPMulAdd(-ReadDRegister(fa),
6631                               ReadDRegister(fn),
6632                               ReadDRegister(fm)));
6633       break;
6634     default:
6635       VIXL_UNIMPLEMENTED();
6636   }
6637 }
6638 
6639 
6640 bool Simulator::FPProcessNaNs(const Instruction* instr) {
6641   unsigned fd = instr->GetRd();
6642   unsigned fn = instr->GetRn();
6643   unsigned fm = instr->GetRm();
6644   bool done = false;
6645 
6646   if (instr->Mask(FP64) == FP64) {
6647     double result = FPProcessNaNs(ReadDRegister(fn), ReadDRegister(fm));
6648     if (IsNaN(result)) {
6649       WriteDRegister(fd, result);
6650       done = true;
6651     }
6652   } else if (instr->Mask(FP32) == FP32) {
6653     float result = FPProcessNaNs(ReadSRegister(fn), ReadSRegister(fm));
6654     if (IsNaN(result)) {
6655       WriteSRegister(fd, result);
6656       done = true;
6657     }
6658   } else {
6659     VIXL_ASSERT(instr->Mask(FP16) == FP16);
6660     VIXL_UNIMPLEMENTED();
6661   }
6662 
6663   return done;
6664 }
6665 
6666 
6667 void Simulator::SysOp_W(int op, int64_t val) {
6668   switch (op) {
6669     case IVAU:
6670     case CVAC:
6671     case CVAU:
6672     case CVAP:
6673     case CVADP:
6674     case CIVAC:
6675     case CGVAC:
6676     case CGDVAC:
6677     case CGVAP:
6678     case CGDVAP:
6679     case CIGVAC:
6680     case CIGDVAC: {
6681       // Perform a placeholder memory access to ensure that we have read access
6682       // to the specified address. The read access does not require a tag match,
6683       // so temporarily disable MTE.
6684       bool mte_enabled = MetaDataDepot::MetaDataMTE::IsActive();
6685       MetaDataDepot::MetaDataMTE::SetActive(false);
6686       volatile uint8_t y = MemRead<uint8_t>(val);
6687       MetaDataDepot::MetaDataMTE::SetActive(mte_enabled);
6688       USE(y);
6689       // TODO: Implement ZVA, GVA, GZVA.
6690       break;
6691     }
6692     default:
6693       VIXL_UNIMPLEMENTED();
6694   }
6695 }
6696 
6697 void Simulator::PACHelper(int dst,
6698                           int src,
6699                           PACKey key,
6700                           decltype(&Simulator::AddPAC) pac_fn) {
6701   VIXL_ASSERT((dst == 17) || (dst == 30));
6702   VIXL_ASSERT((src == -1) || (src == 16) || (src == 31));
6703 
6704   uint64_t modifier = (src == -1) ? 0 : ReadXRegister(src, Reg31IsStackPointer);
6705   uint64_t result =
6706       (this->*pac_fn)(ReadXRegister(dst), modifier, key, kInstructionPointer);
6707   WriteXRegister(dst, result);
6708 }
6709 
6710 void Simulator::VisitSystem(const Instruction* instr) {
6711   PACKey pac_key = kPACKeyIA;  // Default key for PAC/AUTH handling.
6712 
6713   switch (form_hash_) {
6714     case "cfinv_m_pstate"_h:
6715       ReadNzcv().SetC(!ReadC());
6716       break;
6717     case "axflag_m_pstate"_h:
6718       ReadNzcv().SetN(0);
6719       ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
6720       ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
6721       ReadNzcv().SetV(0);
6722       break;
6723     case "xaflag_m_pstate"_h: {
6724       // Can't set the flags in place due to the logical dependencies.
6725       uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
6726       uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
6727       uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
6728       uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
6729       ReadNzcv().SetN(n);
6730       ReadNzcv().SetZ(z);
6731       ReadNzcv().SetC(c);
6732       ReadNzcv().SetV(v);
6733       break;
6734     }
6735     case "xpaclri_hi_hints"_h:
6736       WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
6737       break;
6738     case "clrex_bn_barriers"_h:
6739       PrintExclusiveAccessWarning();
6740       ClearLocalMonitor();
6741       break;
6742     case "msr_sr_systemmove"_h:
6743       switch (instr->GetImmSystemRegister()) {
6744         case NZCV:
6745           ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
6746           LogSystemRegister(NZCV);
6747           break;
6748         case FPCR:
6749           ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
6750           LogSystemRegister(FPCR);
6751           break;
6752         default:
6753           VIXL_UNIMPLEMENTED();
6754       }
6755       break;
6756     case "mrs_rs_systemmove"_h:
6757       switch (instr->GetImmSystemRegister()) {
6758         case NZCV:
6759           WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
6760           break;
6761         case FPCR:
6762           WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
6763           break;
6764         case RNDR:
6765         case RNDRRS: {
6766           uint64_t high = jrand48(rand_state_);
6767           uint64_t low = jrand48(rand_state_);
6768           uint64_t rand_num = (high << 32) | (low & 0xffffffff);
6769           WriteXRegister(instr->GetRt(), rand_num);
6770           // Simulate successful random number generation.
6771           // TODO: Return failure occasionally as a random number cannot be
6772           // returned in a period of time.
6773           ReadNzcv().SetRawValue(NoFlag);
6774           LogSystemRegister(NZCV);
6775           break;
6776         }
6777         default:
6778           VIXL_UNIMPLEMENTED();
6779       }
6780       break;
6781     case "nop_hi_hints"_h:
6782     case "esb_hi_hints"_h:
6783     case "csdb_hi_hints"_h:
6784       break;
6785     case "bti_hb_hints"_h:
6786       switch (instr->GetImmHint()) {
6787         case BTI_jc:
6788           break;
6789         case BTI:
6790           if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
6791             VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
6792           }
6793           break;
6794         case BTI_c:
6795           if (PcIsInGuardedPage() &&
6796               (ReadBType() == BranchFromGuardedNotToIP)) {
6797             VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
6798           }
6799           break;
6800         case BTI_j:
6801           if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
6802             VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
6803           }
6804           break;
6805         default:
6806           VIXL_UNREACHABLE();
6807       }
6808       return;
6809     case "pacib1716_hi_hints"_h:
6810       pac_key = kPACKeyIB;
6811       VIXL_FALLTHROUGH();
6812     case "pacia1716_hi_hints"_h:
6813       PACHelper(17, 16, pac_key, &Simulator::AddPAC);
6814       break;
6815     case "pacibsp_hi_hints"_h:
6816       pac_key = kPACKeyIB;
6817       VIXL_FALLTHROUGH();
6818     case "paciasp_hi_hints"_h:
6819       PACHelper(30, 31, pac_key, &Simulator::AddPAC);
6820 
6821       // Check BType allows PACI[AB]SP instructions.
6822       if (PcIsInGuardedPage()) {
6823         switch (ReadBType()) {
6824           case BranchFromGuardedNotToIP:
6825           // TODO: This case depends on the value of SCTLR_EL1.BT0, which we
6826           // assume here to be zero. This allows execution of PACI[AB]SP when
6827           // BTYPE is BranchFromGuardedNotToIP (0b11).
6828           case DefaultBType:
6829           case BranchFromUnguardedOrToIP:
6830           case BranchAndLink:
6831             break;
6832         }
6833       }
6834       break;
6835     case "pacibz_hi_hints"_h:
6836       pac_key = kPACKeyIB;
6837       VIXL_FALLTHROUGH();
6838     case "paciaz_hi_hints"_h:
6839       PACHelper(30, -1, pac_key, &Simulator::AddPAC);
6840       break;
6841     case "autib1716_hi_hints"_h:
6842       pac_key = kPACKeyIB;
6843       VIXL_FALLTHROUGH();
6844     case "autia1716_hi_hints"_h:
6845       PACHelper(17, 16, pac_key, &Simulator::AuthPAC);
6846       break;
6847     case "autibsp_hi_hints"_h:
6848       pac_key = kPACKeyIB;
6849       VIXL_FALLTHROUGH();
6850     case "autiasp_hi_hints"_h:
6851       PACHelper(30, 31, pac_key, &Simulator::AuthPAC);
6852       break;
6853     case "autibz_hi_hints"_h:
6854       pac_key = kPACKeyIB;
6855       VIXL_FALLTHROUGH();
6856     case "autiaz_hi_hints"_h:
6857       PACHelper(30, -1, pac_key, &Simulator::AuthPAC);
6858       break;
6859     case "dsb_bo_barriers"_h:
6860     case "dmb_bo_barriers"_h:
6861     case "isb_bi_barriers"_h:
6862       __sync_synchronize();
6863       break;
6864     case "sys_cr_systeminstrs"_h:
6865       SysOp_W(instr->GetSysOp(), ReadXRegister(instr->GetRt()));
6866       break;
6867     default:
6868       VIXL_UNIMPLEMENTED();
6869   }
6870 }
6871 
6872 
6873 void Simulator::VisitException(const Instruction* instr) {
6874   switch (instr->Mask(ExceptionMask)) {
6875     case HLT:
6876       switch (instr->GetImmException()) {
6877         case kUnreachableOpcode:
6878           DoUnreachable(instr);
6879           return;
6880         case kTraceOpcode:
6881           DoTrace(instr);
6882           return;
6883         case kLogOpcode:
6884           DoLog(instr);
6885           return;
6886         case kPrintfOpcode:
6887           DoPrintf(instr);
6888           return;
6889         case kRuntimeCallOpcode:
6890           DoRuntimeCall(instr);
6891           return;
6892         case kSetCPUFeaturesOpcode:
6893         case kEnableCPUFeaturesOpcode:
6894         case kDisableCPUFeaturesOpcode:
6895           DoConfigureCPUFeatures(instr);
6896           return;
6897         case kSaveCPUFeaturesOpcode:
6898           DoSaveCPUFeatures(instr);
6899           return;
6900         case kRestoreCPUFeaturesOpcode:
6901           DoRestoreCPUFeatures(instr);
6902           return;
6903         case kMTEActive:
6904           MetaDataDepot::MetaDataMTE::SetActive(true);
6905           return;
6906         case kMTEInactive:
6907           MetaDataDepot::MetaDataMTE::SetActive(false);
6908           return;
6909         default:
6910           HostBreakpoint();
6911           return;
6912       }
6913     case BRK:
6914       if (debugger_enabled_) {
6915         uint64_t next_instr =
6916             reinterpret_cast<uint64_t>(pc_->GetNextInstruction());
6917         if (!debugger_->IsBreakpoint(next_instr)) {
6918           debugger_->RegisterBreakpoint(next_instr);
6919         }
6920       } else {
6921         HostBreakpoint();
6922       }
6923       return;
6924     default:
6925       VIXL_UNIMPLEMENTED();
6926   }
6927 }
6928 
6929 
6930 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
6931   VisitUnimplemented(instr);
6932 }
6933 
6934 
6935 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
6936   VisitUnimplemented(instr);
6937 }
6938 
6939 
6940 void Simulator::VisitCryptoAES(const Instruction* instr) {
6941   VisitUnimplemented(instr);
6942 }
6943 
6944 
6945 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
6946   NEONFormatDecoder nfd(instr);
6947   VectorFormat vf = nfd.GetVectorFormat();
6948 
6949   static const NEONFormatMap map_lp =
6950       {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}};
6951   VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
6952 
6953   static const NEONFormatMap map_fcvtl = {{22}, {NF_4S, NF_2D}};
6954   VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
6955 
6956   static const NEONFormatMap map_fcvtn = {{22, 30},
6957                                           {NF_4H, NF_8H, NF_2S, NF_4S}};
6958   VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
6959 
6960   SimVRegister& rd = ReadVRegister(instr->GetRd());
6961   SimVRegister& rn = ReadVRegister(instr->GetRn());
6962 
6963   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
6964     // These instructions all use a two bit size field, except NOT and RBIT,
6965     // which use the field to encode the operation.
6966     switch (instr->Mask(NEON2RegMiscMask)) {
6967       case NEON_REV64:
6968         rev64(vf, rd, rn);
6969         break;
6970       case NEON_REV32:
6971         rev32(vf, rd, rn);
6972         break;
6973       case NEON_REV16:
6974         rev16(vf, rd, rn);
6975         break;
6976       case NEON_SUQADD:
6977         suqadd(vf, rd, rd, rn);
6978         break;
6979       case NEON_USQADD:
6980         usqadd(vf, rd, rd, rn);
6981         break;
6982       case NEON_CLS:
6983         cls(vf, rd, rn);
6984         break;
6985       case NEON_CLZ:
6986         clz(vf, rd, rn);
6987         break;
6988       case NEON_CNT:
6989         cnt(vf, rd, rn);
6990         break;
6991       case NEON_SQABS:
6992         abs(vf, rd, rn).SignedSaturate(vf);
6993         break;
6994       case NEON_SQNEG:
6995         neg(vf, rd, rn).SignedSaturate(vf);
6996         break;
6997       case NEON_CMGT_zero:
6998         cmp(vf, rd, rn, 0, gt);
6999         break;
7000       case NEON_CMGE_zero:
7001         cmp(vf, rd, rn, 0, ge);
7002         break;
7003       case NEON_CMEQ_zero:
7004         cmp(vf, rd, rn, 0, eq);
7005         break;
7006       case NEON_CMLE_zero:
7007         cmp(vf, rd, rn, 0, le);
7008         break;
7009       case NEON_CMLT_zero:
7010         cmp(vf, rd, rn, 0, lt);
7011         break;
7012       case NEON_ABS:
7013         abs(vf, rd, rn);
7014         break;
7015       case NEON_NEG:
7016         neg(vf, rd, rn);
7017         break;
7018       case NEON_SADDLP:
7019         saddlp(vf_lp, rd, rn);
7020         break;
7021       case NEON_UADDLP:
7022         uaddlp(vf_lp, rd, rn);
7023         break;
7024       case NEON_SADALP:
7025         sadalp(vf_lp, rd, rn);
7026         break;
7027       case NEON_UADALP:
7028         uadalp(vf_lp, rd, rn);
7029         break;
7030       case NEON_RBIT_NOT:
7031         vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
7032         switch (instr->GetFPType()) {
7033           case 0:
7034             not_(vf, rd, rn);
7035             break;
7036           case 1:
7037             rbit(vf, rd, rn);
7038             break;
7039           default:
7040             VIXL_UNIMPLEMENTED();
7041         }
7042         break;
7043     }
7044   } else {
7045     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
7046     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
7047     bool inexact_exception = false;
7048     FrintMode frint_mode = kFrintToInteger;
7049 
7050     // These instructions all use a one bit size field, except XTN, SQXTUN,
7051     // SHLL, SQXTN and UQXTN, which use a two bit size field.
7052     switch (instr->Mask(NEON2RegMiscFPMask)) {
7053       case NEON_FABS:
7054         fabs_(fpf, rd, rn);
7055         return;
7056       case NEON_FNEG:
7057         fneg(fpf, rd, rn);
7058         return;
7059       case NEON_FSQRT:
7060         fsqrt(fpf, rd, rn);
7061         return;
7062       case NEON_FCVTL:
7063         if (instr->Mask(NEON_Q)) {
7064           fcvtl2(vf_fcvtl, rd, rn);
7065         } else {
7066           fcvtl(vf_fcvtl, rd, rn);
7067         }
7068         return;
7069       case NEON_FCVTN:
7070         if (instr->Mask(NEON_Q)) {
7071           fcvtn2(vf_fcvtn, rd, rn);
7072         } else {
7073           fcvtn(vf_fcvtn, rd, rn);
7074         }
7075         return;
7076       case NEON_FCVTXN:
7077         if (instr->Mask(NEON_Q)) {
7078           fcvtxn2(vf_fcvtn, rd, rn);
7079         } else {
7080           fcvtxn(vf_fcvtn, rd, rn);
7081         }
7082         return;
7083 
7084       // The following instructions break from the switch statement, rather
7085       // than return.
7086       case NEON_FRINT32X:
7087         inexact_exception = true;
7088         frint_mode = kFrintToInt32;
7089         break;  // Use FPCR rounding mode.
7090       case NEON_FRINT32Z:
7091         inexact_exception = true;
7092         frint_mode = kFrintToInt32;
7093         fpcr_rounding = FPZero;
7094         break;
7095       case NEON_FRINT64X:
7096         inexact_exception = true;
7097         frint_mode = kFrintToInt64;
7098         break;  // Use FPCR rounding mode.
7099       case NEON_FRINT64Z:
7100         inexact_exception = true;
7101         frint_mode = kFrintToInt64;
7102         fpcr_rounding = FPZero;
7103         break;
7104       case NEON_FRINTI:
7105         break;  // Use FPCR rounding mode.
7106       case NEON_FRINTX:
7107         inexact_exception = true;
7108         break;
7109       case NEON_FRINTA:
7110         fpcr_rounding = FPTieAway;
7111         break;
7112       case NEON_FRINTM:
7113         fpcr_rounding = FPNegativeInfinity;
7114         break;
7115       case NEON_FRINTN:
7116         fpcr_rounding = FPTieEven;
7117         break;
7118       case NEON_FRINTP:
7119         fpcr_rounding = FPPositiveInfinity;
7120         break;
7121       case NEON_FRINTZ:
7122         fpcr_rounding = FPZero;
7123         break;
7124 
7125       case NEON_FCVTNS:
7126         fcvts(fpf, rd, rn, FPTieEven);
7127         return;
7128       case NEON_FCVTNU:
7129         fcvtu(fpf, rd, rn, FPTieEven);
7130         return;
7131       case NEON_FCVTPS:
7132         fcvts(fpf, rd, rn, FPPositiveInfinity);
7133         return;
7134       case NEON_FCVTPU:
7135         fcvtu(fpf, rd, rn, FPPositiveInfinity);
7136         return;
7137       case NEON_FCVTMS:
7138         fcvts(fpf, rd, rn, FPNegativeInfinity);
7139         return;
7140       case NEON_FCVTMU:
7141         fcvtu(fpf, rd, rn, FPNegativeInfinity);
7142         return;
7143       case NEON_FCVTZS:
7144         fcvts(fpf, rd, rn, FPZero);
7145         return;
7146       case NEON_FCVTZU:
7147         fcvtu(fpf, rd, rn, FPZero);
7148         return;
7149       case NEON_FCVTAS:
7150         fcvts(fpf, rd, rn, FPTieAway);
7151         return;
7152       case NEON_FCVTAU:
7153         fcvtu(fpf, rd, rn, FPTieAway);
7154         return;
7155       case NEON_SCVTF:
7156         scvtf(fpf, rd, rn, 0, fpcr_rounding);
7157         return;
7158       case NEON_UCVTF:
7159         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
7160         return;
7161       case NEON_URSQRTE:
7162         ursqrte(fpf, rd, rn);
7163         return;
7164       case NEON_URECPE:
7165         urecpe(fpf, rd, rn);
7166         return;
7167       case NEON_FRSQRTE:
7168         frsqrte(fpf, rd, rn);
7169         return;
7170       case NEON_FRECPE:
7171         frecpe(fpf, rd, rn, fpcr_rounding);
7172         return;
7173       case NEON_FCMGT_zero:
7174         fcmp_zero(fpf, rd, rn, gt);
7175         return;
7176       case NEON_FCMGE_zero:
7177         fcmp_zero(fpf, rd, rn, ge);
7178         return;
7179       case NEON_FCMEQ_zero:
7180         fcmp_zero(fpf, rd, rn, eq);
7181         return;
7182       case NEON_FCMLE_zero:
7183         fcmp_zero(fpf, rd, rn, le);
7184         return;
7185       case NEON_FCMLT_zero:
7186         fcmp_zero(fpf, rd, rn, lt);
7187         return;
7188       default:
7189         if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
7190             (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
7191           switch (instr->Mask(NEON2RegMiscMask)) {
7192             case NEON_XTN:
7193               xtn(vf, rd, rn);
7194               return;
7195             case NEON_SQXTN:
7196               sqxtn(vf, rd, rn);
7197               return;
7198             case NEON_UQXTN:
7199               uqxtn(vf, rd, rn);
7200               return;
7201             case NEON_SQXTUN:
7202               sqxtun(vf, rd, rn);
7203               return;
7204             case NEON_SHLL:
7205               vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7206               if (instr->Mask(NEON_Q)) {
7207                 shll2(vf, rd, rn);
7208               } else {
7209                 shll(vf, rd, rn);
7210               }
7211               return;
7212             default:
7213               VIXL_UNIMPLEMENTED();
7214           }
7215         } else {
7216           VIXL_UNIMPLEMENTED();
7217         }
7218     }
7219 
7220     // Only FRINT* instructions fall through the switch above.
7221     frint(fpf, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
7222   }
7223 }
7224 
7225 
7226 void Simulator::VisitNEON2RegMiscFP16(const Instruction* instr) {
7227   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
7228   NEONFormatDecoder nfd(instr);
7229   VectorFormat fpf = nfd.GetVectorFormat(&map_half);
7230 
7231   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
7232 
7233   SimVRegister& rd = ReadVRegister(instr->GetRd());
7234   SimVRegister& rn = ReadVRegister(instr->GetRn());
7235 
7236   switch (instr->Mask(NEON2RegMiscFP16Mask)) {
7237     case NEON_SCVTF_H:
7238       scvtf(fpf, rd, rn, 0, fpcr_rounding);
7239       return;
7240     case NEON_UCVTF_H:
7241       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
7242       return;
7243     case NEON_FCVTNS_H:
7244       fcvts(fpf, rd, rn, FPTieEven);
7245       return;
7246     case NEON_FCVTNU_H:
7247       fcvtu(fpf, rd, rn, FPTieEven);
7248       return;
7249     case NEON_FCVTPS_H:
7250       fcvts(fpf, rd, rn, FPPositiveInfinity);
7251       return;
7252     case NEON_FCVTPU_H:
7253       fcvtu(fpf, rd, rn, FPPositiveInfinity);
7254       return;
7255     case NEON_FCVTMS_H:
7256       fcvts(fpf, rd, rn, FPNegativeInfinity);
7257       return;
7258     case NEON_FCVTMU_H:
7259       fcvtu(fpf, rd, rn, FPNegativeInfinity);
7260       return;
7261     case NEON_FCVTZS_H:
7262       fcvts(fpf, rd, rn, FPZero);
7263       return;
7264     case NEON_FCVTZU_H:
7265       fcvtu(fpf, rd, rn, FPZero);
7266       return;
7267     case NEON_FCVTAS_H:
7268       fcvts(fpf, rd, rn, FPTieAway);
7269       return;
7270     case NEON_FCVTAU_H:
7271       fcvtu(fpf, rd, rn, FPTieAway);
7272       return;
7273     case NEON_FRINTI_H:
7274       frint(fpf, rd, rn, fpcr_rounding, false);
7275       return;
7276     case NEON_FRINTX_H:
7277       frint(fpf, rd, rn, fpcr_rounding, true);
7278       return;
7279     case NEON_FRINTA_H:
7280       frint(fpf, rd, rn, FPTieAway, false);
7281       return;
7282     case NEON_FRINTM_H:
7283       frint(fpf, rd, rn, FPNegativeInfinity, false);
7284       return;
7285     case NEON_FRINTN_H:
7286       frint(fpf, rd, rn, FPTieEven, false);
7287       return;
7288     case NEON_FRINTP_H:
7289       frint(fpf, rd, rn, FPPositiveInfinity, false);
7290       return;
7291     case NEON_FRINTZ_H:
7292       frint(fpf, rd, rn, FPZero, false);
7293       return;
7294     case NEON_FABS_H:
7295       fabs_(fpf, rd, rn);
7296       return;
7297     case NEON_FNEG_H:
7298       fneg(fpf, rd, rn);
7299       return;
7300     case NEON_FSQRT_H:
7301       fsqrt(fpf, rd, rn);
7302       return;
7303     case NEON_FRSQRTE_H:
7304       frsqrte(fpf, rd, rn);
7305       return;
7306     case NEON_FRECPE_H:
7307       frecpe(fpf, rd, rn, fpcr_rounding);
7308       return;
7309     case NEON_FCMGT_H_zero:
7310       fcmp_zero(fpf, rd, rn, gt);
7311       return;
7312     case NEON_FCMGE_H_zero:
7313       fcmp_zero(fpf, rd, rn, ge);
7314       return;
7315     case NEON_FCMEQ_H_zero:
7316       fcmp_zero(fpf, rd, rn, eq);
7317       return;
7318     case NEON_FCMLE_H_zero:
7319       fcmp_zero(fpf, rd, rn, le);
7320       return;
7321     case NEON_FCMLT_H_zero:
7322       fcmp_zero(fpf, rd, rn, lt);
7323       return;
7324     default:
7325       VIXL_UNIMPLEMENTED();
7326       return;
7327   }
7328 }
7329 
7330 
7331 void Simulator::VisitNEON3Same(const Instruction* instr) {
7332   NEONFormatDecoder nfd(instr);
7333   SimVRegister& rd = ReadVRegister(instr->GetRd());
7334   SimVRegister& rn = ReadVRegister(instr->GetRn());
7335   SimVRegister& rm = ReadVRegister(instr->GetRm());
7336 
7337   if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
7338     VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
7339     switch (instr->Mask(NEON3SameLogicalMask)) {
7340       case NEON_AND:
7341         and_(vf, rd, rn, rm);
7342         break;
7343       case NEON_ORR:
7344         orr(vf, rd, rn, rm);
7345         break;
7346       case NEON_ORN:
7347         orn(vf, rd, rn, rm);
7348         break;
7349       case NEON_EOR:
7350         eor(vf, rd, rn, rm);
7351         break;
7352       case NEON_BIC:
7353         bic(vf, rd, rn, rm);
7354         break;
7355       case NEON_BIF:
7356         bif(vf, rd, rn, rm);
7357         break;
7358       case NEON_BIT:
7359         bit(vf, rd, rn, rm);
7360         break;
7361       case NEON_BSL:
7362         bsl(vf, rd, rd, rn, rm);
7363         break;
7364       default:
7365         VIXL_UNIMPLEMENTED();
7366     }
7367   } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
7368     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7369     switch (instr->Mask(NEON3SameFPMask)) {
7370       case NEON_FADD:
7371         fadd(vf, rd, rn, rm);
7372         break;
7373       case NEON_FSUB:
7374         fsub(vf, rd, rn, rm);
7375         break;
7376       case NEON_FMUL:
7377         fmul(vf, rd, rn, rm);
7378         break;
7379       case NEON_FDIV:
7380         fdiv(vf, rd, rn, rm);
7381         break;
7382       case NEON_FMAX:
7383         fmax(vf, rd, rn, rm);
7384         break;
7385       case NEON_FMIN:
7386         fmin(vf, rd, rn, rm);
7387         break;
7388       case NEON_FMAXNM:
7389         fmaxnm(vf, rd, rn, rm);
7390         break;
7391       case NEON_FMINNM:
7392         fminnm(vf, rd, rn, rm);
7393         break;
7394       case NEON_FMLA:
7395         fmla(vf, rd, rd, rn, rm);
7396         break;
7397       case NEON_FMLS:
7398         fmls(vf, rd, rd, rn, rm);
7399         break;
7400       case NEON_FMULX:
7401         fmulx(vf, rd, rn, rm);
7402         break;
7403       case NEON_FACGE:
7404         fabscmp(vf, rd, rn, rm, ge);
7405         break;
7406       case NEON_FACGT:
7407         fabscmp(vf, rd, rn, rm, gt);
7408         break;
7409       case NEON_FCMEQ:
7410         fcmp(vf, rd, rn, rm, eq);
7411         break;
7412       case NEON_FCMGE:
7413         fcmp(vf, rd, rn, rm, ge);
7414         break;
7415       case NEON_FCMGT:
7416         fcmp(vf, rd, rn, rm, gt);
7417         break;
7418       case NEON_FRECPS:
7419         frecps(vf, rd, rn, rm);
7420         break;
7421       case NEON_FRSQRTS:
7422         frsqrts(vf, rd, rn, rm);
7423         break;
7424       case NEON_FABD:
7425         fabd(vf, rd, rn, rm);
7426         break;
7427       case NEON_FADDP:
7428         faddp(vf, rd, rn, rm);
7429         break;
7430       case NEON_FMAXP:
7431         fmaxp(vf, rd, rn, rm);
7432         break;
7433       case NEON_FMAXNMP:
7434         fmaxnmp(vf, rd, rn, rm);
7435         break;
7436       case NEON_FMINP:
7437         fminp(vf, rd, rn, rm);
7438         break;
7439       case NEON_FMINNMP:
7440         fminnmp(vf, rd, rn, rm);
7441         break;
7442       default:
7443         // FMLAL{2} and FMLSL{2} have special-case encodings.
7444         switch (instr->Mask(NEON3SameFHMMask)) {
7445           case NEON_FMLAL:
7446             fmlal(vf, rd, rn, rm);
7447             break;
7448           case NEON_FMLAL2:
7449             fmlal2(vf, rd, rn, rm);
7450             break;
7451           case NEON_FMLSL:
7452             fmlsl(vf, rd, rn, rm);
7453             break;
7454           case NEON_FMLSL2:
7455             fmlsl2(vf, rd, rn, rm);
7456             break;
7457           default:
7458             VIXL_UNIMPLEMENTED();
7459         }
7460     }
7461   } else {
7462     VectorFormat vf = nfd.GetVectorFormat();
7463     switch (instr->Mask(NEON3SameMask)) {
7464       case NEON_ADD:
7465         add(vf, rd, rn, rm);
7466         break;
7467       case NEON_ADDP:
7468         addp(vf, rd, rn, rm);
7469         break;
7470       case NEON_CMEQ:
7471         cmp(vf, rd, rn, rm, eq);
7472         break;
7473       case NEON_CMGE:
7474         cmp(vf, rd, rn, rm, ge);
7475         break;
7476       case NEON_CMGT:
7477         cmp(vf, rd, rn, rm, gt);
7478         break;
7479       case NEON_CMHI:
7480         cmp(vf, rd, rn, rm, hi);
7481         break;
7482       case NEON_CMHS:
7483         cmp(vf, rd, rn, rm, hs);
7484         break;
7485       case NEON_CMTST:
7486         cmptst(vf, rd, rn, rm);
7487         break;
7488       case NEON_MLS:
7489         mls(vf, rd, rd, rn, rm);
7490         break;
7491       case NEON_MLA:
7492         mla(vf, rd, rd, rn, rm);
7493         break;
7494       case NEON_MUL:
7495         mul(vf, rd, rn, rm);
7496         break;
7497       case NEON_PMUL:
7498         pmul(vf, rd, rn, rm);
7499         break;
7500       case NEON_SMAX:
7501         smax(vf, rd, rn, rm);
7502         break;
7503       case NEON_SMAXP:
7504         smaxp(vf, rd, rn, rm);
7505         break;
7506       case NEON_SMIN:
7507         smin(vf, rd, rn, rm);
7508         break;
7509       case NEON_SMINP:
7510         sminp(vf, rd, rn, rm);
7511         break;
7512       case NEON_SUB:
7513         sub(vf, rd, rn, rm);
7514         break;
7515       case NEON_UMAX:
7516         umax(vf, rd, rn, rm);
7517         break;
7518       case NEON_UMAXP:
7519         umaxp(vf, rd, rn, rm);
7520         break;
7521       case NEON_UMIN:
7522         umin(vf, rd, rn, rm);
7523         break;
7524       case NEON_UMINP:
7525         uminp(vf, rd, rn, rm);
7526         break;
7527       case NEON_SSHL:
7528         sshl(vf, rd, rn, rm);
7529         break;
7530       case NEON_USHL:
7531         ushl(vf, rd, rn, rm);
7532         break;
7533       case NEON_SABD:
7534         absdiff(vf, rd, rn, rm, true);
7535         break;
7536       case NEON_UABD:
7537         absdiff(vf, rd, rn, rm, false);
7538         break;
7539       case NEON_SABA:
7540         saba(vf, rd, rn, rm);
7541         break;
7542       case NEON_UABA:
7543         uaba(vf, rd, rn, rm);
7544         break;
7545       case NEON_UQADD:
7546         add(vf, rd, rn, rm).UnsignedSaturate(vf);
7547         break;
7548       case NEON_SQADD:
7549         add(vf, rd, rn, rm).SignedSaturate(vf);
7550         break;
7551       case NEON_UQSUB:
7552         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
7553         break;
7554       case NEON_SQSUB:
7555         sub(vf, rd, rn, rm).SignedSaturate(vf);
7556         break;
7557       case NEON_SQDMULH:
7558         sqdmulh(vf, rd, rn, rm);
7559         break;
7560       case NEON_SQRDMULH:
7561         sqrdmulh(vf, rd, rn, rm);
7562         break;
7563       case NEON_UQSHL:
7564         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
7565         break;
7566       case NEON_SQSHL:
7567         sshl(vf, rd, rn, rm).SignedSaturate(vf);
7568         break;
7569       case NEON_URSHL:
7570         ushl(vf, rd, rn, rm).Round(vf);
7571         break;
7572       case NEON_SRSHL:
7573         sshl(vf, rd, rn, rm).Round(vf);
7574         break;
7575       case NEON_UQRSHL:
7576         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
7577         break;
7578       case NEON_SQRSHL:
7579         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
7580         break;
7581       case NEON_UHADD:
7582         add(vf, rd, rn, rm).Uhalve(vf);
7583         break;
7584       case NEON_URHADD:
7585         add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
7586         break;
7587       case NEON_SHADD:
7588         add(vf, rd, rn, rm).Halve(vf);
7589         break;
7590       case NEON_SRHADD:
7591         add(vf, rd, rn, rm).Halve(vf).Round(vf);
7592         break;
7593       case NEON_UHSUB:
7594         sub(vf, rd, rn, rm).Uhalve(vf);
7595         break;
7596       case NEON_SHSUB:
7597         sub(vf, rd, rn, rm).Halve(vf);
7598         break;
7599       default:
7600         VIXL_UNIMPLEMENTED();
7601     }
7602   }
7603 }
7604 
7605 
7606 void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
7607   NEONFormatDecoder nfd(instr);
7608   SimVRegister& rd = ReadVRegister(instr->GetRd());
7609   SimVRegister& rn = ReadVRegister(instr->GetRn());
7610   SimVRegister& rm = ReadVRegister(instr->GetRm());
7611 
7612   VectorFormat vf = nfd.GetVectorFormat(nfd.FP16FormatMap());
7613   switch (instr->Mask(NEON3SameFP16Mask)) {
7614 #define SIM_FUNC(A, B) \
7615   case NEON_##A##_H:   \
7616     B(vf, rd, rn, rm); \
7617     break;
7618     SIM_FUNC(FMAXNM, fmaxnm);
7619     SIM_FUNC(FADD, fadd);
7620     SIM_FUNC(FMULX, fmulx);
7621     SIM_FUNC(FMAX, fmax);
7622     SIM_FUNC(FRECPS, frecps);
7623     SIM_FUNC(FMINNM, fminnm);
7624     SIM_FUNC(FSUB, fsub);
7625     SIM_FUNC(FMIN, fmin);
7626     SIM_FUNC(FRSQRTS, frsqrts);
7627     SIM_FUNC(FMAXNMP, fmaxnmp);
7628     SIM_FUNC(FADDP, faddp);
7629     SIM_FUNC(FMUL, fmul);
7630     SIM_FUNC(FMAXP, fmaxp);
7631     SIM_FUNC(FDIV, fdiv);
7632     SIM_FUNC(FMINNMP, fminnmp);
7633     SIM_FUNC(FABD, fabd);
7634     SIM_FUNC(FMINP, fminp);
7635 #undef SIM_FUNC
7636     case NEON_FMLA_H:
7637       fmla(vf, rd, rd, rn, rm);
7638       break;
7639     case NEON_FMLS_H:
7640       fmls(vf, rd, rd, rn, rm);
7641       break;
7642     case NEON_FCMEQ_H:
7643       fcmp(vf, rd, rn, rm, eq);
7644       break;
7645     case NEON_FCMGE_H:
7646       fcmp(vf, rd, rn, rm, ge);
7647       break;
7648     case NEON_FACGE_H:
7649       fabscmp(vf, rd, rn, rm, ge);
7650       break;
7651     case NEON_FCMGT_H:
7652       fcmp(vf, rd, rn, rm, gt);
7653       break;
7654     case NEON_FACGT_H:
7655       fabscmp(vf, rd, rn, rm, gt);
7656       break;
7657     default:
7658       VIXL_UNIMPLEMENTED();
7659       break;
7660   }
7661 }
7662 
7663 void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
7664   NEONFormatDecoder nfd(instr);
7665   SimVRegister& rd = ReadVRegister(instr->GetRd());
7666   SimVRegister& rn = ReadVRegister(instr->GetRn());
7667   SimVRegister& rm = ReadVRegister(instr->GetRm());
7668   int rot = 0;
7669   VectorFormat vf = nfd.GetVectorFormat();
7670 
7671   switch (form_hash_) {
7672     case "fcmla_asimdsame2_c"_h:
7673       rot = instr->GetImmRotFcmlaVec();
7674       fcmla(vf, rd, rn, rm, rd, rot);
7675       break;
7676     case "fcadd_asimdsame2_c"_h:
7677       rot = instr->GetImmRotFcadd();
7678       fcadd(vf, rd, rn, rm, rot);
7679       break;
7680     case "sdot_asimdsame2_d"_h:
7681       sdot(vf, rd, rn, rm);
7682       break;
7683     case "udot_asimdsame2_d"_h:
7684       udot(vf, rd, rn, rm);
7685       break;
7686     case "usdot_asimdsame2_d"_h:
7687       usdot(vf, rd, rn, rm);
7688       break;
7689     case "sqrdmlah_asimdsame2_only"_h:
7690       sqrdmlah(vf, rd, rn, rm);
7691       break;
7692     case "sqrdmlsh_asimdsame2_only"_h:
7693       sqrdmlsh(vf, rd, rn, rm);
7694       break;
7695   }
7696 }
7697 
7698 
7699 void Simulator::VisitNEON3Different(const Instruction* instr) {
7700   NEONFormatDecoder nfd(instr);
7701   VectorFormat vf = nfd.GetVectorFormat();
7702   VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7703 
7704   SimVRegister& rd = ReadVRegister(instr->GetRd());
7705   SimVRegister& rn = ReadVRegister(instr->GetRn());
7706   SimVRegister& rm = ReadVRegister(instr->GetRm());
7707 
7708   switch (instr->Mask(NEON3DifferentMask)) {
7709     case NEON_PMULL:
7710       pmull(vf_l, rd, rn, rm);
7711       break;
7712     case NEON_PMULL2:
7713       pmull2(vf_l, rd, rn, rm);
7714       break;
7715     case NEON_UADDL:
7716       uaddl(vf_l, rd, rn, rm);
7717       break;
7718     case NEON_UADDL2:
7719       uaddl2(vf_l, rd, rn, rm);
7720       break;
7721     case NEON_SADDL:
7722       saddl(vf_l, rd, rn, rm);
7723       break;
7724     case NEON_SADDL2:
7725       saddl2(vf_l, rd, rn, rm);
7726       break;
7727     case NEON_USUBL:
7728       usubl(vf_l, rd, rn, rm);
7729       break;
7730     case NEON_USUBL2:
7731       usubl2(vf_l, rd, rn, rm);
7732       break;
7733     case NEON_SSUBL:
7734       ssubl(vf_l, rd, rn, rm);
7735       break;
7736     case NEON_SSUBL2:
7737       ssubl2(vf_l, rd, rn, rm);
7738       break;
7739     case NEON_SABAL:
7740       sabal(vf_l, rd, rn, rm);
7741       break;
7742     case NEON_SABAL2:
7743       sabal2(vf_l, rd, rn, rm);
7744       break;
7745     case NEON_UABAL:
7746       uabal(vf_l, rd, rn, rm);
7747       break;
7748     case NEON_UABAL2:
7749       uabal2(vf_l, rd, rn, rm);
7750       break;
7751     case NEON_SABDL:
7752       sabdl(vf_l, rd, rn, rm);
7753       break;
7754     case NEON_SABDL2:
7755       sabdl2(vf_l, rd, rn, rm);
7756       break;
7757     case NEON_UABDL:
7758       uabdl(vf_l, rd, rn, rm);
7759       break;
7760     case NEON_UABDL2:
7761       uabdl2(vf_l, rd, rn, rm);
7762       break;
7763     case NEON_SMLAL:
7764       smlal(vf_l, rd, rn, rm);
7765       break;
7766     case NEON_SMLAL2:
7767       smlal2(vf_l, rd, rn, rm);
7768       break;
7769     case NEON_UMLAL:
7770       umlal(vf_l, rd, rn, rm);
7771       break;
7772     case NEON_UMLAL2:
7773       umlal2(vf_l, rd, rn, rm);
7774       break;
7775     case NEON_SMLSL:
7776       smlsl(vf_l, rd, rn, rm);
7777       break;
7778     case NEON_SMLSL2:
7779       smlsl2(vf_l, rd, rn, rm);
7780       break;
7781     case NEON_UMLSL:
7782       umlsl(vf_l, rd, rn, rm);
7783       break;
7784     case NEON_UMLSL2:
7785       umlsl2(vf_l, rd, rn, rm);
7786       break;
7787     case NEON_SMULL:
7788       smull(vf_l, rd, rn, rm);
7789       break;
7790     case NEON_SMULL2:
7791       smull2(vf_l, rd, rn, rm);
7792       break;
7793     case NEON_UMULL:
7794       umull(vf_l, rd, rn, rm);
7795       break;
7796     case NEON_UMULL2:
7797       umull2(vf_l, rd, rn, rm);
7798       break;
7799     case NEON_SQDMLAL:
7800       sqdmlal(vf_l, rd, rn, rm);
7801       break;
7802     case NEON_SQDMLAL2:
7803       sqdmlal2(vf_l, rd, rn, rm);
7804       break;
7805     case NEON_SQDMLSL:
7806       sqdmlsl(vf_l, rd, rn, rm);
7807       break;
7808     case NEON_SQDMLSL2:
7809       sqdmlsl2(vf_l, rd, rn, rm);
7810       break;
7811     case NEON_SQDMULL:
7812       sqdmull(vf_l, rd, rn, rm);
7813       break;
7814     case NEON_SQDMULL2:
7815       sqdmull2(vf_l, rd, rn, rm);
7816       break;
7817     case NEON_UADDW:
7818       uaddw(vf_l, rd, rn, rm);
7819       break;
7820     case NEON_UADDW2:
7821       uaddw2(vf_l, rd, rn, rm);
7822       break;
7823     case NEON_SADDW:
7824       saddw(vf_l, rd, rn, rm);
7825       break;
7826     case NEON_SADDW2:
7827       saddw2(vf_l, rd, rn, rm);
7828       break;
7829     case NEON_USUBW:
7830       usubw(vf_l, rd, rn, rm);
7831       break;
7832     case NEON_USUBW2:
7833       usubw2(vf_l, rd, rn, rm);
7834       break;
7835     case NEON_SSUBW:
7836       ssubw(vf_l, rd, rn, rm);
7837       break;
7838     case NEON_SSUBW2:
7839       ssubw2(vf_l, rd, rn, rm);
7840       break;
7841     case NEON_ADDHN:
7842       addhn(vf, rd, rn, rm);
7843       break;
7844     case NEON_ADDHN2:
7845       addhn2(vf, rd, rn, rm);
7846       break;
7847     case NEON_RADDHN:
7848       raddhn(vf, rd, rn, rm);
7849       break;
7850     case NEON_RADDHN2:
7851       raddhn2(vf, rd, rn, rm);
7852       break;
7853     case NEON_SUBHN:
7854       subhn(vf, rd, rn, rm);
7855       break;
7856     case NEON_SUBHN2:
7857       subhn2(vf, rd, rn, rm);
7858       break;
7859     case NEON_RSUBHN:
7860       rsubhn(vf, rd, rn, rm);
7861       break;
7862     case NEON_RSUBHN2:
7863       rsubhn2(vf, rd, rn, rm);
7864       break;
7865     default:
7866       VIXL_UNIMPLEMENTED();
7867   }
7868 }
7869 
7870 
7871 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
7872   NEONFormatDecoder nfd(instr);
7873 
7874   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
7875 
7876   SimVRegister& rd = ReadVRegister(instr->GetRd());
7877   SimVRegister& rn = ReadVRegister(instr->GetRn());
7878 
7879   if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
7880     VectorFormat vf = nfd.GetVectorFormat(&map_half);
7881     switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
7882       case NEON_FMAXV_H:
7883         fmaxv(vf, rd, rn);
7884         break;
7885       case NEON_FMINV_H:
7886         fminv(vf, rd, rn);
7887         break;
7888       case NEON_FMAXNMV_H:
7889         fmaxnmv(vf, rd, rn);
7890         break;
7891       case NEON_FMINNMV_H:
7892         fminnmv(vf, rd, rn);
7893         break;
7894       default:
7895         VIXL_UNIMPLEMENTED();
7896     }
7897   } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
7898     // The input operand's VectorFormat is passed for these instructions.
7899     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7900 
7901     switch (instr->Mask(NEONAcrossLanesFPMask)) {
7902       case NEON_FMAXV:
7903         fmaxv(vf, rd, rn);
7904         break;
7905       case NEON_FMINV:
7906         fminv(vf, rd, rn);
7907         break;
7908       case NEON_FMAXNMV:
7909         fmaxnmv(vf, rd, rn);
7910         break;
7911       case NEON_FMINNMV:
7912         fminnmv(vf, rd, rn);
7913         break;
7914       default:
7915         VIXL_UNIMPLEMENTED();
7916     }
7917   } else {
7918     VectorFormat vf = nfd.GetVectorFormat();
7919 
7920     switch (instr->Mask(NEONAcrossLanesMask)) {
7921       case NEON_ADDV:
7922         addv(vf, rd, rn);
7923         break;
7924       case NEON_SMAXV:
7925         smaxv(vf, rd, rn);
7926         break;
7927       case NEON_SMINV:
7928         sminv(vf, rd, rn);
7929         break;
7930       case NEON_UMAXV:
7931         umaxv(vf, rd, rn);
7932         break;
7933       case NEON_UMINV:
7934         uminv(vf, rd, rn);
7935         break;
7936       case NEON_SADDLV:
7937         saddlv(vf, rd, rn);
7938         break;
7939       case NEON_UADDLV:
7940         uaddlv(vf, rd, rn);
7941         break;
7942       default:
7943         VIXL_UNIMPLEMENTED();
7944     }
7945   }
7946 }
7947 
7948 void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
7949   NEONFormatDecoder nfd(instr);
7950   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7951 
7952   SimVRegister& rd = ReadVRegister(instr->GetRd());
7953   SimVRegister& rn = ReadVRegister(instr->GetRn());
7954 
7955   int rm_reg = instr->GetRm();
7956   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7957   if (instr->GetNEONSize() == 1) {
7958     rm_reg = instr->GetRmLow16();
7959     index = (index << 1) | instr->GetNEONM();
7960   }
7961   SimVRegister& rm = ReadVRegister(rm_reg);
7962 
7963   SimVRegister temp;
7964   VectorFormat indexform =
7965       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
7966   dup_element(indexform, temp, rm, index);
7967 
7968   bool is_2 = instr->Mask(NEON_Q) ? true : false;
7969 
7970   switch (form_hash_) {
7971     case "smull_asimdelem_l"_h:
7972       smull(vf, rd, rn, temp, is_2);
7973       break;
7974     case "umull_asimdelem_l"_h:
7975       umull(vf, rd, rn, temp, is_2);
7976       break;
7977     case "smlal_asimdelem_l"_h:
7978       smlal(vf, rd, rn, temp, is_2);
7979       break;
7980     case "umlal_asimdelem_l"_h:
7981       umlal(vf, rd, rn, temp, is_2);
7982       break;
7983     case "smlsl_asimdelem_l"_h:
7984       smlsl(vf, rd, rn, temp, is_2);
7985       break;
7986     case "umlsl_asimdelem_l"_h:
7987       umlsl(vf, rd, rn, temp, is_2);
7988       break;
7989     case "sqdmull_asimdelem_l"_h:
7990       sqdmull(vf, rd, rn, temp, is_2);
7991       break;
7992     case "sqdmlal_asimdelem_l"_h:
7993       sqdmlal(vf, rd, rn, temp, is_2);
7994       break;
7995     case "sqdmlsl_asimdelem_l"_h:
7996       sqdmlsl(vf, rd, rn, temp, is_2);
7997       break;
7998     default:
7999       VIXL_UNREACHABLE();
8000   }
8001 }
8002 
8003 void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) {
8004   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
8005   SimVRegister& rd = ReadVRegister(instr->GetRd());
8006   SimVRegister& rn = ReadVRegister(instr->GetRn());
8007   SimVRegister& rm = ReadVRegister(instr->GetRmLow16());
8008 
8009   int index =
8010       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
8011 
8012   switch (form_hash_) {
8013     case "fmlal_asimdelem_lh"_h:
8014       fmlal(vform, rd, rn, rm, index);
8015       break;
8016     case "fmlal2_asimdelem_lh"_h:
8017       fmlal2(vform, rd, rn, rm, index);
8018       break;
8019     case "fmlsl_asimdelem_lh"_h:
8020       fmlsl(vform, rd, rn, rm, index);
8021       break;
8022     case "fmlsl2_asimdelem_lh"_h:
8023       fmlsl2(vform, rd, rn, rm, index);
8024       break;
8025     default:
8026       VIXL_UNREACHABLE();
8027   }
8028 }
8029 
8030 void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) {
8031   NEONFormatDecoder nfd(instr);
8032   static const NEONFormatMap map =
8033       {{23, 22, 30},
8034        {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
8035   VectorFormat vform = nfd.GetVectorFormat(&map);
8036 
8037   SimVRegister& rd = ReadVRegister(instr->GetRd());
8038   SimVRegister& rn = ReadVRegister(instr->GetRn());
8039 
8040   int rm_reg = instr->GetRm();
8041   int index =
8042       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
8043 
8044   if ((vform == kFormat4H) || (vform == kFormat8H)) {
8045     rm_reg &= 0xf;
8046   } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
8047     index >>= 1;
8048   } else {
8049     VIXL_ASSERT(vform == kFormat2D);
8050     VIXL_ASSERT(instr->GetNEONL() == 0);
8051     index >>= 2;
8052   }
8053 
8054   SimVRegister& rm = ReadVRegister(rm_reg);
8055 
8056   switch (form_hash_) {
8057     case "fmul_asimdelem_rh_h"_h:
8058     case "fmul_asimdelem_r_sd"_h:
8059       fmul(vform, rd, rn, rm, index);
8060       break;
8061     case "fmla_asimdelem_rh_h"_h:
8062     case "fmla_asimdelem_r_sd"_h:
8063       fmla(vform, rd, rn, rm, index);
8064       break;
8065     case "fmls_asimdelem_rh_h"_h:
8066     case "fmls_asimdelem_r_sd"_h:
8067       fmls(vform, rd, rn, rm, index);
8068       break;
8069     case "fmulx_asimdelem_rh_h"_h:
8070     case "fmulx_asimdelem_r_sd"_h:
8071       fmulx(vform, rd, rn, rm, index);
8072       break;
8073     default:
8074       VIXL_UNREACHABLE();
8075   }
8076 }
8077 
8078 void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) {
8079   VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H;
8080   SimVRegister& rd = ReadVRegister(instr->GetRd());
8081   SimVRegister& rn = ReadVRegister(instr->GetRn());
8082   SimVRegister& rm = ReadVRegister(instr->GetRm());
8083   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8084 
8085   switch (form_hash_) {
8086     case "fcmla_asimdelem_c_s"_h:
8087       vform = kFormat4S;
8088       index >>= 1;
8089       VIXL_FALLTHROUGH();
8090     case "fcmla_asimdelem_c_h"_h:
8091       fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca());
8092       break;
8093     default:
8094       VIXL_UNREACHABLE();
8095   }
8096 }
8097 
8098 void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) {
8099   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
8100 
8101   SimVRegister& rd = ReadVRegister(instr->GetRd());
8102   SimVRegister& rn = ReadVRegister(instr->GetRn());
8103   SimVRegister& rm = ReadVRegister(instr->GetRm());
8104   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8105 
8106   SimVRegister temp;
8107   // NEON indexed `dot` allows the index value exceed the register size.
8108   // Promote the format to Q-sized vector format before the duplication.
8109   dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index);
8110 
8111   switch (form_hash_) {
8112     case "sdot_asimdelem_d"_h:
8113       sdot(vform, rd, rn, temp);
8114       break;
8115     case "udot_asimdelem_d"_h:
8116       udot(vform, rd, rn, temp);
8117       break;
8118     case "sudot_asimdelem_d"_h:
8119       usdot(vform, rd, temp, rn);
8120       break;
8121     case "usdot_asimdelem_d"_h:
8122       usdot(vform, rd, rn, temp);
8123       break;
8124   }
8125 }
8126 
8127 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
8128   NEONFormatDecoder nfd(instr);
8129   VectorFormat vform = nfd.GetVectorFormat();
8130 
8131   SimVRegister& rd = ReadVRegister(instr->GetRd());
8132   SimVRegister& rn = ReadVRegister(instr->GetRn());
8133 
8134   int rm_reg = instr->GetRm();
8135   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8136 
8137   if ((vform == kFormat4H) || (vform == kFormat8H)) {
8138     rm_reg &= 0xf;
8139     index = (index << 1) | instr->GetNEONM();
8140   }
8141 
8142   SimVRegister& rm = ReadVRegister(rm_reg);
8143 
8144   switch (form_hash_) {
8145     case "mul_asimdelem_r"_h:
8146       mul(vform, rd, rn, rm, index);
8147       break;
8148     case "mla_asimdelem_r"_h:
8149       mla(vform, rd, rn, rm, index);
8150       break;
8151     case "mls_asimdelem_r"_h:
8152       mls(vform, rd, rn, rm, index);
8153       break;
8154     case "sqdmulh_asimdelem_r"_h:
8155       sqdmulh(vform, rd, rn, rm, index);
8156       break;
8157     case "sqrdmulh_asimdelem_r"_h:
8158       sqrdmulh(vform, rd, rn, rm, index);
8159       break;
8160     case "sqrdmlah_asimdelem_r"_h:
8161       sqrdmlah(vform, rd, rn, rm, index);
8162       break;
8163     case "sqrdmlsh_asimdelem_r"_h:
8164       sqrdmlsh(vform, rd, rn, rm, index);
8165       break;
8166   }
8167 }
8168 
8169 
8170 void Simulator::VisitNEONCopy(const Instruction* instr) {
8171   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
8172   VectorFormat vf = nfd.GetVectorFormat();
8173 
8174   SimVRegister& rd = ReadVRegister(instr->GetRd());
8175   SimVRegister& rn = ReadVRegister(instr->GetRn());
8176   int imm5 = instr->GetImmNEON5();
8177   int tz = CountTrailingZeros(imm5, 32);
8178   int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5);
8179 
8180   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
8181     int imm4 = instr->GetImmNEON4();
8182     int rn_index = ExtractSignedBitfield32(31, tz, imm4);
8183     ins_element(vf, rd, reg_index, rn, rn_index);
8184   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
8185     ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
8186   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
8187     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
8188     value &= MaxUintFromFormat(vf);
8189     WriteXRegister(instr->GetRd(), value);
8190   } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
8191     int64_t value = LogicVRegister(rn).Int(vf, reg_index);
8192     if (instr->GetNEONQ()) {
8193       WriteXRegister(instr->GetRd(), value);
8194     } else {
8195       WriteWRegister(instr->GetRd(), (int32_t)value);
8196     }
8197   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
8198     dup_element(vf, rd, rn, reg_index);
8199   } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
8200     dup_immediate(vf, rd, ReadXRegister(instr->GetRn()));
8201   } else {
8202     VIXL_UNIMPLEMENTED();
8203   }
8204 }
8205 
8206 
8207 void Simulator::VisitNEONExtract(const Instruction* instr) {
8208   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
8209   VectorFormat vf = nfd.GetVectorFormat();
8210   SimVRegister& rd = ReadVRegister(instr->GetRd());
8211   SimVRegister& rn = ReadVRegister(instr->GetRn());
8212   SimVRegister& rm = ReadVRegister(instr->GetRm());
8213   if (instr->Mask(NEONExtractMask) == NEON_EXT) {
8214     int index = instr->GetImmNEONExt();
8215     ext(vf, rd, rn, rm, index);
8216   } else {
8217     VIXL_UNIMPLEMENTED();
8218   }
8219 }
8220 
8221 
8222 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
8223                                                AddrMode addr_mode) {
8224   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
8225   VectorFormat vf = nfd.GetVectorFormat();
8226 
8227   uint64_t addr_base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
8228   int reg_size = RegisterSizeInBytesFromFormat(vf);
8229 
8230   int reg[4];
8231   uint64_t addr[4];
8232   for (int i = 0; i < 4; i++) {
8233     reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
8234     addr[i] = addr_base + (i * reg_size);
8235   }
8236   int struct_parts = 1;
8237   int reg_count = 1;
8238   bool log_read = true;
8239 
8240   // Bit 23 determines whether this is an offset or post-index addressing mode.
8241   // In offset mode, bits 20 to 16 should be zero; these bits encode the
8242   // register or immediate in post-index mode.
8243   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
8244     VIXL_UNREACHABLE();
8245   }
8246 
8247   // We use the PostIndex mask here, as it works in this case for both Offset
8248   // and PostIndex addressing.
8249   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
8250     case NEON_LD1_4v:
8251     case NEON_LD1_4v_post:
8252       ld1(vf, ReadVRegister(reg[3]), addr[3]);
8253       reg_count++;
8254       VIXL_FALLTHROUGH();
8255     case NEON_LD1_3v:
8256     case NEON_LD1_3v_post:
8257       ld1(vf, ReadVRegister(reg[2]), addr[2]);
8258       reg_count++;
8259       VIXL_FALLTHROUGH();
8260     case NEON_LD1_2v:
8261     case NEON_LD1_2v_post:
8262       ld1(vf, ReadVRegister(reg[1]), addr[1]);
8263       reg_count++;
8264       VIXL_FALLTHROUGH();
8265     case NEON_LD1_1v:
8266     case NEON_LD1_1v_post:
8267       ld1(vf, ReadVRegister(reg[0]), addr[0]);
8268       break;
8269     case NEON_ST1_4v:
8270     case NEON_ST1_4v_post:
8271       st1(vf, ReadVRegister(reg[3]), addr[3]);
8272       reg_count++;
8273       VIXL_FALLTHROUGH();
8274     case NEON_ST1_3v:
8275     case NEON_ST1_3v_post:
8276       st1(vf, ReadVRegister(reg[2]), addr[2]);
8277       reg_count++;
8278       VIXL_FALLTHROUGH();
8279     case NEON_ST1_2v:
8280     case NEON_ST1_2v_post:
8281       st1(vf, ReadVRegister(reg[1]), addr[1]);
8282       reg_count++;
8283       VIXL_FALLTHROUGH();
8284     case NEON_ST1_1v:
8285     case NEON_ST1_1v_post:
8286       st1(vf, ReadVRegister(reg[0]), addr[0]);
8287       log_read = false;
8288       break;
8289     case NEON_LD2_post:
8290     case NEON_LD2:
8291       ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8292       struct_parts = 2;
8293       reg_count = 2;
8294       break;
8295     case NEON_ST2:
8296     case NEON_ST2_post:
8297       st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8298       struct_parts = 2;
8299       reg_count = 2;
8300       log_read = false;
8301       break;
8302     case NEON_LD3_post:
8303     case NEON_LD3:
8304       ld3(vf,
8305           ReadVRegister(reg[0]),
8306           ReadVRegister(reg[1]),
8307           ReadVRegister(reg[2]),
8308           addr[0]);
8309       struct_parts = 3;
8310       reg_count = 3;
8311       break;
8312     case NEON_ST3:
8313     case NEON_ST3_post:
8314       st3(vf,
8315           ReadVRegister(reg[0]),
8316           ReadVRegister(reg[1]),
8317           ReadVRegister(reg[2]),
8318           addr[0]);
8319       struct_parts = 3;
8320       reg_count = 3;
8321       log_read = false;
8322       break;
8323     case NEON_ST4:
8324     case NEON_ST4_post:
8325       st4(vf,
8326           ReadVRegister(reg[0]),
8327           ReadVRegister(reg[1]),
8328           ReadVRegister(reg[2]),
8329           ReadVRegister(reg[3]),
8330           addr[0]);
8331       struct_parts = 4;
8332       reg_count = 4;
8333       log_read = false;
8334       break;
8335     case NEON_LD4_post:
8336     case NEON_LD4:
8337       ld4(vf,
8338           ReadVRegister(reg[0]),
8339           ReadVRegister(reg[1]),
8340           ReadVRegister(reg[2]),
8341           ReadVRegister(reg[3]),
8342           addr[0]);
8343       struct_parts = 4;
8344       reg_count = 4;
8345       break;
8346     default:
8347       VIXL_UNIMPLEMENTED();
8348   }
8349 
8350   bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
8351   if (do_trace) {
8352     PrintRegisterFormat print_format =
8353         GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8354     const char* op;
8355     if (log_read) {
8356       op = "<-";
8357     } else {
8358       op = "->";
8359       // Stores don't represent a change to the source register's value, so only
8360       // print the relevant part of the value.
8361       print_format = GetPrintRegPartial(print_format);
8362     }
8363 
8364     VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
8365     for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
8366       uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
8367       PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
8368     }
8369   }
8370 
8371   if (addr_mode == PostIndex) {
8372     int rm = instr->GetRm();
8373     // The immediate post index addressing mode is indicated by rm = 31.
8374     // The immediate is implied by the number of vector registers used.
8375     addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
8376                             : ReadXRegister(rm);
8377     WriteXRegister(instr->GetRn(),
8378                    addr_base,
8379                    LogRegWrites,
8380                    Reg31IsStackPointer);
8381   } else {
8382     VIXL_ASSERT(addr_mode == Offset);
8383   }
8384 }
8385 
8386 
8387 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
8388   NEONLoadStoreMultiStructHelper(instr, Offset);
8389 }
8390 
8391 
8392 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
8393     const Instruction* instr) {
8394   NEONLoadStoreMultiStructHelper(instr, PostIndex);
8395 }
8396 
8397 
8398 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
8399                                                 AddrMode addr_mode) {
8400   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
8401   int rt = instr->GetRt();
8402 
8403   // Bit 23 determines whether this is an offset or post-index addressing mode.
8404   // In offset mode, bits 20 to 16 should be zero; these bits encode the
8405   // register or immediate in post-index mode.
8406   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
8407     VIXL_UNREACHABLE();
8408   }
8409 
8410   // We use the PostIndex mask here, as it works in this case for both Offset
8411   // and PostIndex addressing.
8412   bool do_load = false;
8413 
8414   bool replicating = false;
8415 
8416   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
8417   VectorFormat vf_t = nfd.GetVectorFormat();
8418 
8419   VectorFormat vf = kFormat16B;
8420   switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
8421     case NEON_LD1_b:
8422     case NEON_LD1_b_post:
8423     case NEON_LD2_b:
8424     case NEON_LD2_b_post:
8425     case NEON_LD3_b:
8426     case NEON_LD3_b_post:
8427     case NEON_LD4_b:
8428     case NEON_LD4_b_post:
8429       do_load = true;
8430       VIXL_FALLTHROUGH();
8431     case NEON_ST1_b:
8432     case NEON_ST1_b_post:
8433     case NEON_ST2_b:
8434     case NEON_ST2_b_post:
8435     case NEON_ST3_b:
8436     case NEON_ST3_b_post:
8437     case NEON_ST4_b:
8438     case NEON_ST4_b_post:
8439       break;
8440 
8441     case NEON_LD1_h:
8442     case NEON_LD1_h_post:
8443     case NEON_LD2_h:
8444     case NEON_LD2_h_post:
8445     case NEON_LD3_h:
8446     case NEON_LD3_h_post:
8447     case NEON_LD4_h:
8448     case NEON_LD4_h_post:
8449       do_load = true;
8450       VIXL_FALLTHROUGH();
8451     case NEON_ST1_h:
8452     case NEON_ST1_h_post:
8453     case NEON_ST2_h:
8454     case NEON_ST2_h_post:
8455     case NEON_ST3_h:
8456     case NEON_ST3_h_post:
8457     case NEON_ST4_h:
8458     case NEON_ST4_h_post:
8459       vf = kFormat8H;
8460       break;
8461     case NEON_LD1_s:
8462     case NEON_LD1_s_post:
8463     case NEON_LD2_s:
8464     case NEON_LD2_s_post:
8465     case NEON_LD3_s:
8466     case NEON_LD3_s_post:
8467     case NEON_LD4_s:
8468     case NEON_LD4_s_post:
8469       do_load = true;
8470       VIXL_FALLTHROUGH();
8471     case NEON_ST1_s:
8472     case NEON_ST1_s_post:
8473     case NEON_ST2_s:
8474     case NEON_ST2_s_post:
8475     case NEON_ST3_s:
8476     case NEON_ST3_s_post:
8477     case NEON_ST4_s:
8478     case NEON_ST4_s_post: {
8479       VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
8480       VIXL_STATIC_ASSERT((NEON_LD1_s_post | (1 << NEONLSSize_offset)) ==
8481                          NEON_LD1_d_post);
8482       VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
8483       VIXL_STATIC_ASSERT((NEON_ST1_s_post | (1 << NEONLSSize_offset)) ==
8484                          NEON_ST1_d_post);
8485       vf = ((instr->GetNEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
8486       break;
8487     }
8488 
8489     case NEON_LD1R:
8490     case NEON_LD1R_post:
8491     case NEON_LD2R:
8492     case NEON_LD2R_post:
8493     case NEON_LD3R:
8494     case NEON_LD3R_post:
8495     case NEON_LD4R:
8496     case NEON_LD4R_post:
8497       vf = vf_t;
8498       do_load = true;
8499       replicating = true;
8500       break;
8501 
8502     default:
8503       VIXL_UNIMPLEMENTED();
8504   }
8505 
8506   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
8507   int lane = instr->GetNEONLSIndex(index_shift);
8508   int reg_count = 0;
8509   int rt2 = (rt + 1) % kNumberOfVRegisters;
8510   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
8511   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
8512   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
8513     case NEONLoadStoreSingle1:
8514       reg_count = 1;
8515       if (replicating) {
8516         VIXL_ASSERT(do_load);
8517         ld1r(vf, ReadVRegister(rt), addr);
8518       } else if (do_load) {
8519         ld1(vf, ReadVRegister(rt), lane, addr);
8520       } else {
8521         st1(vf, ReadVRegister(rt), lane, addr);
8522       }
8523       break;
8524     case NEONLoadStoreSingle2:
8525       reg_count = 2;
8526       if (replicating) {
8527         VIXL_ASSERT(do_load);
8528         ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
8529       } else if (do_load) {
8530         ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8531       } else {
8532         st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8533       }
8534       break;
8535     case NEONLoadStoreSingle3:
8536       reg_count = 3;
8537       if (replicating) {
8538         VIXL_ASSERT(do_load);
8539         ld3r(vf,
8540              ReadVRegister(rt),
8541              ReadVRegister(rt2),
8542              ReadVRegister(rt3),
8543              addr);
8544       } else if (do_load) {
8545         ld3(vf,
8546             ReadVRegister(rt),
8547             ReadVRegister(rt2),
8548             ReadVRegister(rt3),
8549             lane,
8550             addr);
8551       } else {
8552         st3(vf,
8553             ReadVRegister(rt),
8554             ReadVRegister(rt2),
8555             ReadVRegister(rt3),
8556             lane,
8557             addr);
8558       }
8559       break;
8560     case NEONLoadStoreSingle4:
8561       reg_count = 4;
8562       if (replicating) {
8563         VIXL_ASSERT(do_load);
8564         ld4r(vf,
8565              ReadVRegister(rt),
8566              ReadVRegister(rt2),
8567              ReadVRegister(rt3),
8568              ReadVRegister(rt4),
8569              addr);
8570       } else if (do_load) {
8571         ld4(vf,
8572             ReadVRegister(rt),
8573             ReadVRegister(rt2),
8574             ReadVRegister(rt3),
8575             ReadVRegister(rt4),
8576             lane,
8577             addr);
8578       } else {
8579         st4(vf,
8580             ReadVRegister(rt),
8581             ReadVRegister(rt2),
8582             ReadVRegister(rt3),
8583             ReadVRegister(rt4),
8584             lane,
8585             addr);
8586       }
8587       break;
8588     default:
8589       VIXL_UNIMPLEMENTED();
8590   }
8591 
8592   // Trace registers and/or memory writes.
8593   PrintRegisterFormat print_format =
8594       GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8595   if (do_load) {
8596     if (ShouldTraceVRegs()) {
8597       if (replicating) {
8598         PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
8599       } else {
8600         PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
8601       }
8602     }
8603   } else {
8604     if (ShouldTraceWrites()) {
8605       // Stores don't represent a change to the source register's value, so only
8606       // print the relevant part of the value.
8607       print_format = GetPrintRegPartial(print_format);
8608       PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
8609     }
8610   }
8611 
8612   if (addr_mode == PostIndex) {
8613     int rm = instr->GetRm();
8614     int lane_size = LaneSizeInBytesFromFormat(vf);
8615     WriteXRegister(instr->GetRn(),
8616                    addr + ((rm == 31) ? (reg_count * lane_size)
8617                                       : ReadXRegister(rm)),
8618                    LogRegWrites,
8619                    Reg31IsStackPointer);
8620   }
8621 }
8622 
8623 
8624 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
8625   NEONLoadStoreSingleStructHelper(instr, Offset);
8626 }
8627 
8628 
8629 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
8630     const Instruction* instr) {
8631   NEONLoadStoreSingleStructHelper(instr, PostIndex);
8632 }
8633 
8634 
8635 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
8636   SimVRegister& rd = ReadVRegister(instr->GetRd());
8637   int cmode = instr->GetNEONCmode();
8638   int cmode_3_1 = (cmode >> 1) & 7;
8639   int cmode_3 = (cmode >> 3) & 1;
8640   int cmode_2 = (cmode >> 2) & 1;
8641   int cmode_1 = (cmode >> 1) & 1;
8642   int cmode_0 = cmode & 1;
8643   int half_enc = instr->ExtractBit(11);
8644   int q = instr->GetNEONQ();
8645   int op_bit = instr->GetNEONModImmOp();
8646   uint64_t imm8 = instr->GetImmNEONabcdefgh();
8647   // Find the format and immediate value
8648   uint64_t imm = 0;
8649   VectorFormat vform = kFormatUndefined;
8650   switch (cmode_3_1) {
8651     case 0x0:
8652     case 0x1:
8653     case 0x2:
8654     case 0x3:
8655       vform = (q == 1) ? kFormat4S : kFormat2S;
8656       imm = imm8 << (8 * cmode_3_1);
8657       break;
8658     case 0x4:
8659     case 0x5:
8660       vform = (q == 1) ? kFormat8H : kFormat4H;
8661       imm = imm8 << (8 * cmode_1);
8662       break;
8663     case 0x6:
8664       vform = (q == 1) ? kFormat4S : kFormat2S;
8665       if (cmode_0 == 0) {
8666         imm = imm8 << 8 | 0x000000ff;
8667       } else {
8668         imm = imm8 << 16 | 0x0000ffff;
8669       }
8670       break;
8671     case 0x7:
8672       if (cmode_0 == 0 && op_bit == 0) {
8673         vform = q ? kFormat16B : kFormat8B;
8674         imm = imm8;
8675       } else if (cmode_0 == 0 && op_bit == 1) {
8676         vform = q ? kFormat2D : kFormat1D;
8677         imm = 0;
8678         for (int i = 0; i < 8; ++i) {
8679           if (imm8 & (1 << i)) {
8680             imm |= (UINT64_C(0xff) << (8 * i));
8681           }
8682         }
8683       } else {  // cmode_0 == 1, cmode == 0xf.
8684         if (half_enc == 1) {
8685           vform = q ? kFormat8H : kFormat4H;
8686           imm = Float16ToRawbits(instr->GetImmNEONFP16());
8687         } else if (op_bit == 0) {
8688           vform = q ? kFormat4S : kFormat2S;
8689           imm = FloatToRawbits(instr->GetImmNEONFP32());
8690         } else if (q == 1) {
8691           vform = kFormat2D;
8692           imm = DoubleToRawbits(instr->GetImmNEONFP64());
8693         } else {
8694           VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
8695           VisitUnallocated(instr);
8696         }
8697       }
8698       break;
8699     default:
8700       VIXL_UNREACHABLE();
8701       break;
8702   }
8703 
8704   // Find the operation
8705   NEONModifiedImmediateOp op;
8706   if (cmode_3 == 0) {
8707     if (cmode_0 == 0) {
8708       op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8709     } else {  // cmode<0> == '1'
8710       op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8711     }
8712   } else {  // cmode<3> == '1'
8713     if (cmode_2 == 0) {
8714       if (cmode_0 == 0) {
8715         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8716       } else {  // cmode<0> == '1'
8717         op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8718       }
8719     } else {  // cmode<2> == '1'
8720       if (cmode_1 == 0) {
8721         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8722       } else {  // cmode<1> == '1'
8723         if (cmode_0 == 0) {
8724           op = NEONModifiedImmediate_MOVI;
8725         } else {  // cmode<0> == '1'
8726           op = NEONModifiedImmediate_MOVI;
8727         }
8728       }
8729     }
8730   }
8731 
8732   // Call the logic function
8733   if (op == NEONModifiedImmediate_ORR) {
8734     orr(vform, rd, rd, imm);
8735   } else if (op == NEONModifiedImmediate_BIC) {
8736     bic(vform, rd, rd, imm);
8737   } else if (op == NEONModifiedImmediate_MOVI) {
8738     movi(vform, rd, imm);
8739   } else if (op == NEONModifiedImmediate_MVNI) {
8740     mvni(vform, rd, imm);
8741   } else {
8742     VisitUnimplemented(instr);
8743   }
8744 }
8745 
8746 
8747 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
8748   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8749   VectorFormat vf = nfd.GetVectorFormat();
8750 
8751   SimVRegister& rd = ReadVRegister(instr->GetRd());
8752   SimVRegister& rn = ReadVRegister(instr->GetRn());
8753 
8754   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
8755     // These instructions all use a two bit size field, except NOT and RBIT,
8756     // which use the field to encode the operation.
8757     switch (instr->Mask(NEONScalar2RegMiscMask)) {
8758       case NEON_CMEQ_zero_scalar:
8759         cmp(vf, rd, rn, 0, eq);
8760         break;
8761       case NEON_CMGE_zero_scalar:
8762         cmp(vf, rd, rn, 0, ge);
8763         break;
8764       case NEON_CMGT_zero_scalar:
8765         cmp(vf, rd, rn, 0, gt);
8766         break;
8767       case NEON_CMLT_zero_scalar:
8768         cmp(vf, rd, rn, 0, lt);
8769         break;
8770       case NEON_CMLE_zero_scalar:
8771         cmp(vf, rd, rn, 0, le);
8772         break;
8773       case NEON_ABS_scalar:
8774         abs(vf, rd, rn);
8775         break;
8776       case NEON_SQABS_scalar:
8777         abs(vf, rd, rn).SignedSaturate(vf);
8778         break;
8779       case NEON_NEG_scalar:
8780         neg(vf, rd, rn);
8781         break;
8782       case NEON_SQNEG_scalar:
8783         neg(vf, rd, rn).SignedSaturate(vf);
8784         break;
8785       case NEON_SUQADD_scalar:
8786         suqadd(vf, rd, rd, rn);
8787         break;
8788       case NEON_USQADD_scalar:
8789         usqadd(vf, rd, rd, rn);
8790         break;
8791       default:
8792         VIXL_UNIMPLEMENTED();
8793         break;
8794     }
8795   } else {
8796     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8797     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8798 
8799     // These instructions all use a one bit size field, except SQXTUN, SQXTN
8800     // and UQXTN, which use a two bit size field.
8801     switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
8802       case NEON_FRECPE_scalar:
8803         frecpe(fpf, rd, rn, fpcr_rounding);
8804         break;
8805       case NEON_FRECPX_scalar:
8806         frecpx(fpf, rd, rn);
8807         break;
8808       case NEON_FRSQRTE_scalar:
8809         frsqrte(fpf, rd, rn);
8810         break;
8811       case NEON_FCMGT_zero_scalar:
8812         fcmp_zero(fpf, rd, rn, gt);
8813         break;
8814       case NEON_FCMGE_zero_scalar:
8815         fcmp_zero(fpf, rd, rn, ge);
8816         break;
8817       case NEON_FCMEQ_zero_scalar:
8818         fcmp_zero(fpf, rd, rn, eq);
8819         break;
8820       case NEON_FCMLE_zero_scalar:
8821         fcmp_zero(fpf, rd, rn, le);
8822         break;
8823       case NEON_FCMLT_zero_scalar:
8824         fcmp_zero(fpf, rd, rn, lt);
8825         break;
8826       case NEON_SCVTF_scalar:
8827         scvtf(fpf, rd, rn, 0, fpcr_rounding);
8828         break;
8829       case NEON_UCVTF_scalar:
8830         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8831         break;
8832       case NEON_FCVTNS_scalar:
8833         fcvts(fpf, rd, rn, FPTieEven);
8834         break;
8835       case NEON_FCVTNU_scalar:
8836         fcvtu(fpf, rd, rn, FPTieEven);
8837         break;
8838       case NEON_FCVTPS_scalar:
8839         fcvts(fpf, rd, rn, FPPositiveInfinity);
8840         break;
8841       case NEON_FCVTPU_scalar:
8842         fcvtu(fpf, rd, rn, FPPositiveInfinity);
8843         break;
8844       case NEON_FCVTMS_scalar:
8845         fcvts(fpf, rd, rn, FPNegativeInfinity);
8846         break;
8847       case NEON_FCVTMU_scalar:
8848         fcvtu(fpf, rd, rn, FPNegativeInfinity);
8849         break;
8850       case NEON_FCVTZS_scalar:
8851         fcvts(fpf, rd, rn, FPZero);
8852         break;
8853       case NEON_FCVTZU_scalar:
8854         fcvtu(fpf, rd, rn, FPZero);
8855         break;
8856       case NEON_FCVTAS_scalar:
8857         fcvts(fpf, rd, rn, FPTieAway);
8858         break;
8859       case NEON_FCVTAU_scalar:
8860         fcvtu(fpf, rd, rn, FPTieAway);
8861         break;
8862       case NEON_FCVTXN_scalar:
8863         // Unlike all of the other FP instructions above, fcvtxn encodes dest
8864         // size S as size<0>=1. There's only one case, so we ignore the form.
8865         VIXL_ASSERT(instr->ExtractBit(22) == 1);
8866         fcvtxn(kFormatS, rd, rn);
8867         break;
8868       default:
8869         switch (instr->Mask(NEONScalar2RegMiscMask)) {
8870           case NEON_SQXTN_scalar:
8871             sqxtn(vf, rd, rn);
8872             break;
8873           case NEON_UQXTN_scalar:
8874             uqxtn(vf, rd, rn);
8875             break;
8876           case NEON_SQXTUN_scalar:
8877             sqxtun(vf, rd, rn);
8878             break;
8879           default:
8880             VIXL_UNIMPLEMENTED();
8881         }
8882     }
8883   }
8884 }
8885 
8886 
8887 void Simulator::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
8888   VectorFormat fpf = kFormatH;
8889   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8890 
8891   SimVRegister& rd = ReadVRegister(instr->GetRd());
8892   SimVRegister& rn = ReadVRegister(instr->GetRn());
8893 
8894   switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
8895     case NEON_FRECPE_H_scalar:
8896       frecpe(fpf, rd, rn, fpcr_rounding);
8897       break;
8898     case NEON_FRECPX_H_scalar:
8899       frecpx(fpf, rd, rn);
8900       break;
8901     case NEON_FRSQRTE_H_scalar:
8902       frsqrte(fpf, rd, rn);
8903       break;
8904     case NEON_FCMGT_H_zero_scalar:
8905       fcmp_zero(fpf, rd, rn, gt);
8906       break;
8907     case NEON_FCMGE_H_zero_scalar:
8908       fcmp_zero(fpf, rd, rn, ge);
8909       break;
8910     case NEON_FCMEQ_H_zero_scalar:
8911       fcmp_zero(fpf, rd, rn, eq);
8912       break;
8913     case NEON_FCMLE_H_zero_scalar:
8914       fcmp_zero(fpf, rd, rn, le);
8915       break;
8916     case NEON_FCMLT_H_zero_scalar:
8917       fcmp_zero(fpf, rd, rn, lt);
8918       break;
8919     case NEON_SCVTF_H_scalar:
8920       scvtf(fpf, rd, rn, 0, fpcr_rounding);
8921       break;
8922     case NEON_UCVTF_H_scalar:
8923       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8924       break;
8925     case NEON_FCVTNS_H_scalar:
8926       fcvts(fpf, rd, rn, FPTieEven);
8927       break;
8928     case NEON_FCVTNU_H_scalar:
8929       fcvtu(fpf, rd, rn, FPTieEven);
8930       break;
8931     case NEON_FCVTPS_H_scalar:
8932       fcvts(fpf, rd, rn, FPPositiveInfinity);
8933       break;
8934     case NEON_FCVTPU_H_scalar:
8935       fcvtu(fpf, rd, rn, FPPositiveInfinity);
8936       break;
8937     case NEON_FCVTMS_H_scalar:
8938       fcvts(fpf, rd, rn, FPNegativeInfinity);
8939       break;
8940     case NEON_FCVTMU_H_scalar:
8941       fcvtu(fpf, rd, rn, FPNegativeInfinity);
8942       break;
8943     case NEON_FCVTZS_H_scalar:
8944       fcvts(fpf, rd, rn, FPZero);
8945       break;
8946     case NEON_FCVTZU_H_scalar:
8947       fcvtu(fpf, rd, rn, FPZero);
8948       break;
8949     case NEON_FCVTAS_H_scalar:
8950       fcvts(fpf, rd, rn, FPTieAway);
8951       break;
8952     case NEON_FCVTAU_H_scalar:
8953       fcvtu(fpf, rd, rn, FPTieAway);
8954       break;
8955   }
8956 }
8957 
8958 
8959 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
8960   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
8961   VectorFormat vf = nfd.GetVectorFormat();
8962 
8963   SimVRegister& rd = ReadVRegister(instr->GetRd());
8964   SimVRegister& rn = ReadVRegister(instr->GetRn());
8965   SimVRegister& rm = ReadVRegister(instr->GetRm());
8966   switch (instr->Mask(NEONScalar3DiffMask)) {
8967     case NEON_SQDMLAL_scalar:
8968       sqdmlal(vf, rd, rn, rm);
8969       break;
8970     case NEON_SQDMLSL_scalar:
8971       sqdmlsl(vf, rd, rn, rm);
8972       break;
8973     case NEON_SQDMULL_scalar:
8974       sqdmull(vf, rd, rn, rm);
8975       break;
8976     default:
8977       VIXL_UNIMPLEMENTED();
8978   }
8979 }
8980 
8981 
8982 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
8983   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8984   VectorFormat vf = nfd.GetVectorFormat();
8985 
8986   SimVRegister& rd = ReadVRegister(instr->GetRd());
8987   SimVRegister& rn = ReadVRegister(instr->GetRn());
8988   SimVRegister& rm = ReadVRegister(instr->GetRm());
8989 
8990   if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
8991     vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8992     switch (instr->Mask(NEONScalar3SameFPMask)) {
8993       case NEON_FMULX_scalar:
8994         fmulx(vf, rd, rn, rm);
8995         break;
8996       case NEON_FACGE_scalar:
8997         fabscmp(vf, rd, rn, rm, ge);
8998         break;
8999       case NEON_FACGT_scalar:
9000         fabscmp(vf, rd, rn, rm, gt);
9001         break;
9002       case NEON_FCMEQ_scalar:
9003         fcmp(vf, rd, rn, rm, eq);
9004         break;
9005       case NEON_FCMGE_scalar:
9006         fcmp(vf, rd, rn, rm, ge);
9007         break;
9008       case NEON_FCMGT_scalar:
9009         fcmp(vf, rd, rn, rm, gt);
9010         break;
9011       case NEON_FRECPS_scalar:
9012         frecps(vf, rd, rn, rm);
9013         break;
9014       case NEON_FRSQRTS_scalar:
9015         frsqrts(vf, rd, rn, rm);
9016         break;
9017       case NEON_FABD_scalar:
9018         fabd(vf, rd, rn, rm);
9019         break;
9020       default:
9021         VIXL_UNIMPLEMENTED();
9022     }
9023   } else {
9024     switch (instr->Mask(NEONScalar3SameMask)) {
9025       case NEON_ADD_scalar:
9026         add(vf, rd, rn, rm);
9027         break;
9028       case NEON_SUB_scalar:
9029         sub(vf, rd, rn, rm);
9030         break;
9031       case NEON_CMEQ_scalar:
9032         cmp(vf, rd, rn, rm, eq);
9033         break;
9034       case NEON_CMGE_scalar:
9035         cmp(vf, rd, rn, rm, ge);
9036         break;
9037       case NEON_CMGT_scalar:
9038         cmp(vf, rd, rn, rm, gt);
9039         break;
9040       case NEON_CMHI_scalar:
9041         cmp(vf, rd, rn, rm, hi);
9042         break;
9043       case NEON_CMHS_scalar:
9044         cmp(vf, rd, rn, rm, hs);
9045         break;
9046       case NEON_CMTST_scalar:
9047         cmptst(vf, rd, rn, rm);
9048         break;
9049       case NEON_USHL_scalar:
9050         ushl(vf, rd, rn, rm);
9051         break;
9052       case NEON_SSHL_scalar:
9053         sshl(vf, rd, rn, rm);
9054         break;
9055       case NEON_SQDMULH_scalar:
9056         sqdmulh(vf, rd, rn, rm);
9057         break;
9058       case NEON_SQRDMULH_scalar:
9059         sqrdmulh(vf, rd, rn, rm);
9060         break;
9061       case NEON_UQADD_scalar:
9062         add(vf, rd, rn, rm).UnsignedSaturate(vf);
9063         break;
9064       case NEON_SQADD_scalar:
9065         add(vf, rd, rn, rm).SignedSaturate(vf);
9066         break;
9067       case NEON_UQSUB_scalar:
9068         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
9069         break;
9070       case NEON_SQSUB_scalar:
9071         sub(vf, rd, rn, rm).SignedSaturate(vf);
9072         break;
9073       case NEON_UQSHL_scalar:
9074         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
9075         break;
9076       case NEON_SQSHL_scalar:
9077         sshl(vf, rd, rn, rm).SignedSaturate(vf);
9078         break;
9079       case NEON_URSHL_scalar:
9080         ushl(vf, rd, rn, rm).Round(vf);
9081         break;
9082       case NEON_SRSHL_scalar:
9083         sshl(vf, rd, rn, rm).Round(vf);
9084         break;
9085       case NEON_UQRSHL_scalar:
9086         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
9087         break;
9088       case NEON_SQRSHL_scalar:
9089         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
9090         break;
9091       default:
9092         VIXL_UNIMPLEMENTED();
9093     }
9094   }
9095 }
9096 
9097 void Simulator::VisitNEONScalar3SameFP16(const Instruction* instr) {
9098   SimVRegister& rd = ReadVRegister(instr->GetRd());
9099   SimVRegister& rn = ReadVRegister(instr->GetRn());
9100   SimVRegister& rm = ReadVRegister(instr->GetRm());
9101 
9102   switch (instr->Mask(NEONScalar3SameFP16Mask)) {
9103     case NEON_FABD_H_scalar:
9104       fabd(kFormatH, rd, rn, rm);
9105       break;
9106     case NEON_FMULX_H_scalar:
9107       fmulx(kFormatH, rd, rn, rm);
9108       break;
9109     case NEON_FCMEQ_H_scalar:
9110       fcmp(kFormatH, rd, rn, rm, eq);
9111       break;
9112     case NEON_FCMGE_H_scalar:
9113       fcmp(kFormatH, rd, rn, rm, ge);
9114       break;
9115     case NEON_FCMGT_H_scalar:
9116       fcmp(kFormatH, rd, rn, rm, gt);
9117       break;
9118     case NEON_FACGE_H_scalar:
9119       fabscmp(kFormatH, rd, rn, rm, ge);
9120       break;
9121     case NEON_FACGT_H_scalar:
9122       fabscmp(kFormatH, rd, rn, rm, gt);
9123       break;
9124     case NEON_FRECPS_H_scalar:
9125       frecps(kFormatH, rd, rn, rm);
9126       break;
9127     case NEON_FRSQRTS_H_scalar:
9128       frsqrts(kFormatH, rd, rn, rm);
9129       break;
9130     default:
9131       VIXL_UNREACHABLE();
9132   }
9133 }
9134 
9135 
9136 void Simulator::VisitNEONScalar3SameExtra(const Instruction* instr) {
9137   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
9138   VectorFormat vf = nfd.GetVectorFormat();
9139 
9140   SimVRegister& rd = ReadVRegister(instr->GetRd());
9141   SimVRegister& rn = ReadVRegister(instr->GetRn());
9142   SimVRegister& rm = ReadVRegister(instr->GetRm());
9143 
9144   switch (instr->Mask(NEONScalar3SameExtraMask)) {
9145     case NEON_SQRDMLAH_scalar:
9146       sqrdmlah(vf, rd, rn, rm);
9147       break;
9148     case NEON_SQRDMLSH_scalar:
9149       sqrdmlsh(vf, rd, rn, rm);
9150       break;
9151     default:
9152       VIXL_UNIMPLEMENTED();
9153   }
9154 }
9155 
9156 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
9157   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
9158   VectorFormat vf = nfd.GetVectorFormat();
9159   VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
9160 
9161   SimVRegister& rd = ReadVRegister(instr->GetRd());
9162   SimVRegister& rn = ReadVRegister(instr->GetRn());
9163   ByElementOp Op = NULL;
9164 
9165   int rm_reg = instr->GetRm();
9166   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
9167   if (instr->GetNEONSize() == 1) {
9168     rm_reg &= 0xf;
9169     index = (index << 1) | instr->GetNEONM();
9170   }
9171 
9172   switch (instr->Mask(NEONScalarByIndexedElementMask)) {
9173     case NEON_SQDMULL_byelement_scalar:
9174       Op = &Simulator::sqdmull;
9175       break;
9176     case NEON_SQDMLAL_byelement_scalar:
9177       Op = &Simulator::sqdmlal;
9178       break;
9179     case NEON_SQDMLSL_byelement_scalar:
9180       Op = &Simulator::sqdmlsl;
9181       break;
9182     case NEON_SQDMULH_byelement_scalar:
9183       Op = &Simulator::sqdmulh;
9184       vf = vf_r;
9185       break;
9186     case NEON_SQRDMULH_byelement_scalar:
9187       Op = &Simulator::sqrdmulh;
9188       vf = vf_r;
9189       break;
9190     case NEON_SQRDMLAH_byelement_scalar:
9191       Op = &Simulator::sqrdmlah;
9192       vf = vf_r;
9193       break;
9194     case NEON_SQRDMLSH_byelement_scalar:
9195       Op = &Simulator::sqrdmlsh;
9196       vf = vf_r;
9197       break;
9198     default:
9199       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
9200       index = instr->GetNEONH();
9201       if (instr->GetFPType() == 0) {
9202         index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
9203         rm_reg &= 0xf;
9204         vf = kFormatH;
9205       } else if ((instr->GetFPType() & 1) == 0) {
9206         index = (index << 1) | instr->GetNEONL();
9207       }
9208       switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
9209         case NEON_FMUL_H_byelement_scalar:
9210         case NEON_FMUL_byelement_scalar:
9211           Op = &Simulator::fmul;
9212           break;
9213         case NEON_FMLA_H_byelement_scalar:
9214         case NEON_FMLA_byelement_scalar:
9215           Op = &Simulator::fmla;
9216           break;
9217         case NEON_FMLS_H_byelement_scalar:
9218         case NEON_FMLS_byelement_scalar:
9219           Op = &Simulator::fmls;
9220           break;
9221         case NEON_FMULX_H_byelement_scalar:
9222         case NEON_FMULX_byelement_scalar:
9223           Op = &Simulator::fmulx;
9224           break;
9225         default:
9226           VIXL_UNIMPLEMENTED();
9227       }
9228   }
9229 
9230   (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
9231 }
9232 
9233 
9234 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
9235   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
9236   VectorFormat vf = nfd.GetVectorFormat();
9237 
9238   SimVRegister& rd = ReadVRegister(instr->GetRd());
9239   SimVRegister& rn = ReadVRegister(instr->GetRn());
9240 
9241   if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
9242     int imm5 = instr->GetImmNEON5();
9243     int tz = CountTrailingZeros(imm5, 32);
9244     int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5);
9245     dup_element(vf, rd, rn, rn_index);
9246   } else {
9247     VIXL_UNIMPLEMENTED();
9248   }
9249 }
9250 
9251 
9252 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
9253   NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarPairwiseFormatMap());
9254   VectorFormat vf = nfd.GetVectorFormat();
9255 
9256   SimVRegister& rd = ReadVRegister(instr->GetRd());
9257   SimVRegister& rn = ReadVRegister(instr->GetRn());
9258   switch (instr->Mask(NEONScalarPairwiseMask)) {
9259     case NEON_ADDP_scalar: {
9260       // All pairwise operations except ADDP use bit U to differentiate FP16
9261       // from FP32/FP64 variations.
9262       NEONFormatDecoder nfd_addp(instr, NEONFormatDecoder::FPScalarFormatMap());
9263       addp(nfd_addp.GetVectorFormat(), rd, rn);
9264       break;
9265     }
9266     case NEON_FADDP_h_scalar:
9267     case NEON_FADDP_scalar:
9268       faddp(vf, rd, rn);
9269       break;
9270     case NEON_FMAXP_h_scalar:
9271     case NEON_FMAXP_scalar:
9272       fmaxp(vf, rd, rn);
9273       break;
9274     case NEON_FMAXNMP_h_scalar:
9275     case NEON_FMAXNMP_scalar:
9276       fmaxnmp(vf, rd, rn);
9277       break;
9278     case NEON_FMINP_h_scalar:
9279     case NEON_FMINP_scalar:
9280       fminp(vf, rd, rn);
9281       break;
9282     case NEON_FMINNMP_h_scalar:
9283     case NEON_FMINNMP_scalar:
9284       fminnmp(vf, rd, rn);
9285       break;
9286     default:
9287       VIXL_UNIMPLEMENTED();
9288   }
9289 }
9290 
9291 
9292 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
9293   SimVRegister& rd = ReadVRegister(instr->GetRd());
9294   SimVRegister& rn = ReadVRegister(instr->GetRn());
9295   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9296 
9297   static const NEONFormatMap map = {{22, 21, 20, 19},
9298                                     {NF_UNDEF,
9299                                      NF_B,
9300                                      NF_H,
9301                                      NF_H,
9302                                      NF_S,
9303                                      NF_S,
9304                                      NF_S,
9305                                      NF_S,
9306                                      NF_D,
9307                                      NF_D,
9308                                      NF_D,
9309                                      NF_D,
9310                                      NF_D,
9311                                      NF_D,
9312                                      NF_D,
9313                                      NF_D}};
9314   NEONFormatDecoder nfd(instr, &map);
9315   VectorFormat vf = nfd.GetVectorFormat();
9316 
9317   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9318   int immh_immb = instr->GetImmNEONImmhImmb();
9319   int right_shift = (16 << highest_set_bit) - immh_immb;
9320   int left_shift = immh_immb - (8 << highest_set_bit);
9321   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
9322     case NEON_SHL_scalar:
9323       shl(vf, rd, rn, left_shift);
9324       break;
9325     case NEON_SLI_scalar:
9326       sli(vf, rd, rn, left_shift);
9327       break;
9328     case NEON_SQSHL_imm_scalar:
9329       sqshl(vf, rd, rn, left_shift);
9330       break;
9331     case NEON_UQSHL_imm_scalar:
9332       uqshl(vf, rd, rn, left_shift);
9333       break;
9334     case NEON_SQSHLU_scalar:
9335       sqshlu(vf, rd, rn, left_shift);
9336       break;
9337     case NEON_SRI_scalar:
9338       sri(vf, rd, rn, right_shift);
9339       break;
9340     case NEON_SSHR_scalar:
9341       sshr(vf, rd, rn, right_shift);
9342       break;
9343     case NEON_USHR_scalar:
9344       ushr(vf, rd, rn, right_shift);
9345       break;
9346     case NEON_SRSHR_scalar:
9347       sshr(vf, rd, rn, right_shift).Round(vf);
9348       break;
9349     case NEON_URSHR_scalar:
9350       ushr(vf, rd, rn, right_shift).Round(vf);
9351       break;
9352     case NEON_SSRA_scalar:
9353       ssra(vf, rd, rn, right_shift);
9354       break;
9355     case NEON_USRA_scalar:
9356       usra(vf, rd, rn, right_shift);
9357       break;
9358     case NEON_SRSRA_scalar:
9359       srsra(vf, rd, rn, right_shift);
9360       break;
9361     case NEON_URSRA_scalar:
9362       ursra(vf, rd, rn, right_shift);
9363       break;
9364     case NEON_UQSHRN_scalar:
9365       uqshrn(vf, rd, rn, right_shift);
9366       break;
9367     case NEON_UQRSHRN_scalar:
9368       uqrshrn(vf, rd, rn, right_shift);
9369       break;
9370     case NEON_SQSHRN_scalar:
9371       sqshrn(vf, rd, rn, right_shift);
9372       break;
9373     case NEON_SQRSHRN_scalar:
9374       sqrshrn(vf, rd, rn, right_shift);
9375       break;
9376     case NEON_SQSHRUN_scalar:
9377       sqshrun(vf, rd, rn, right_shift);
9378       break;
9379     case NEON_SQRSHRUN_scalar:
9380       sqrshrun(vf, rd, rn, right_shift);
9381       break;
9382     case NEON_FCVTZS_imm_scalar:
9383       fcvts(vf, rd, rn, FPZero, right_shift);
9384       break;
9385     case NEON_FCVTZU_imm_scalar:
9386       fcvtu(vf, rd, rn, FPZero, right_shift);
9387       break;
9388     case NEON_SCVTF_imm_scalar:
9389       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9390       break;
9391     case NEON_UCVTF_imm_scalar:
9392       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9393       break;
9394     default:
9395       VIXL_UNIMPLEMENTED();
9396   }
9397 }
9398 
9399 
9400 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
9401   SimVRegister& rd = ReadVRegister(instr->GetRd());
9402   SimVRegister& rn = ReadVRegister(instr->GetRn());
9403   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9404 
9405   // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
9406   // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
9407   static const NEONFormatMap map = {{22, 21, 20, 19, 30},
9408                                     {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
9409                                      NF_4H,    NF_8H,    NF_4H,    NF_8H,
9410                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9411                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9412                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9413                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9414                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9415                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
9416   NEONFormatDecoder nfd(instr, &map);
9417   VectorFormat vf = nfd.GetVectorFormat();
9418 
9419   // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
9420   static const NEONFormatMap map_l =
9421       {{22, 21, 20, 19},
9422        {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
9423   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
9424 
9425   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9426   int immh_immb = instr->GetImmNEONImmhImmb();
9427   int right_shift = (16 << highest_set_bit) - immh_immb;
9428   int left_shift = immh_immb - (8 << highest_set_bit);
9429 
9430   switch (instr->Mask(NEONShiftImmediateMask)) {
9431     case NEON_SHL:
9432       shl(vf, rd, rn, left_shift);
9433       break;
9434     case NEON_SLI:
9435       sli(vf, rd, rn, left_shift);
9436       break;
9437     case NEON_SQSHLU:
9438       sqshlu(vf, rd, rn, left_shift);
9439       break;
9440     case NEON_SRI:
9441       sri(vf, rd, rn, right_shift);
9442       break;
9443     case NEON_SSHR:
9444       sshr(vf, rd, rn, right_shift);
9445       break;
9446     case NEON_USHR:
9447       ushr(vf, rd, rn, right_shift);
9448       break;
9449     case NEON_SRSHR:
9450       sshr(vf, rd, rn, right_shift).Round(vf);
9451       break;
9452     case NEON_URSHR:
9453       ushr(vf, rd, rn, right_shift).Round(vf);
9454       break;
9455     case NEON_SSRA:
9456       ssra(vf, rd, rn, right_shift);
9457       break;
9458     case NEON_USRA:
9459       usra(vf, rd, rn, right_shift);
9460       break;
9461     case NEON_SRSRA:
9462       srsra(vf, rd, rn, right_shift);
9463       break;
9464     case NEON_URSRA:
9465       ursra(vf, rd, rn, right_shift);
9466       break;
9467     case NEON_SQSHL_imm:
9468       sqshl(vf, rd, rn, left_shift);
9469       break;
9470     case NEON_UQSHL_imm:
9471       uqshl(vf, rd, rn, left_shift);
9472       break;
9473     case NEON_SCVTF_imm:
9474       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9475       break;
9476     case NEON_UCVTF_imm:
9477       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9478       break;
9479     case NEON_FCVTZS_imm:
9480       fcvts(vf, rd, rn, FPZero, right_shift);
9481       break;
9482     case NEON_FCVTZU_imm:
9483       fcvtu(vf, rd, rn, FPZero, right_shift);
9484       break;
9485     case NEON_SSHLL:
9486       vf = vf_l;
9487       if (instr->Mask(NEON_Q)) {
9488         sshll2(vf, rd, rn, left_shift);
9489       } else {
9490         sshll(vf, rd, rn, left_shift);
9491       }
9492       break;
9493     case NEON_USHLL:
9494       vf = vf_l;
9495       if (instr->Mask(NEON_Q)) {
9496         ushll2(vf, rd, rn, left_shift);
9497       } else {
9498         ushll(vf, rd, rn, left_shift);
9499       }
9500       break;
9501     case NEON_SHRN:
9502       if (instr->Mask(NEON_Q)) {
9503         shrn2(vf, rd, rn, right_shift);
9504       } else {
9505         shrn(vf, rd, rn, right_shift);
9506       }
9507       break;
9508     case NEON_RSHRN:
9509       if (instr->Mask(NEON_Q)) {
9510         rshrn2(vf, rd, rn, right_shift);
9511       } else {
9512         rshrn(vf, rd, rn, right_shift);
9513       }
9514       break;
9515     case NEON_UQSHRN:
9516       if (instr->Mask(NEON_Q)) {
9517         uqshrn2(vf, rd, rn, right_shift);
9518       } else {
9519         uqshrn(vf, rd, rn, right_shift);
9520       }
9521       break;
9522     case NEON_UQRSHRN:
9523       if (instr->Mask(NEON_Q)) {
9524         uqrshrn2(vf, rd, rn, right_shift);
9525       } else {
9526         uqrshrn(vf, rd, rn, right_shift);
9527       }
9528       break;
9529     case NEON_SQSHRN:
9530       if (instr->Mask(NEON_Q)) {
9531         sqshrn2(vf, rd, rn, right_shift);
9532       } else {
9533         sqshrn(vf, rd, rn, right_shift);
9534       }
9535       break;
9536     case NEON_SQRSHRN:
9537       if (instr->Mask(NEON_Q)) {
9538         sqrshrn2(vf, rd, rn, right_shift);
9539       } else {
9540         sqrshrn(vf, rd, rn, right_shift);
9541       }
9542       break;
9543     case NEON_SQSHRUN:
9544       if (instr->Mask(NEON_Q)) {
9545         sqshrun2(vf, rd, rn, right_shift);
9546       } else {
9547         sqshrun(vf, rd, rn, right_shift);
9548       }
9549       break;
9550     case NEON_SQRSHRUN:
9551       if (instr->Mask(NEON_Q)) {
9552         sqrshrun2(vf, rd, rn, right_shift);
9553       } else {
9554         sqrshrun(vf, rd, rn, right_shift);
9555       }
9556       break;
9557     default:
9558       VIXL_UNIMPLEMENTED();
9559   }
9560 }
9561 
9562 
9563 void Simulator::VisitNEONTable(const Instruction* instr) {
9564   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
9565   VectorFormat vf = nfd.GetVectorFormat();
9566 
9567   SimVRegister& rd = ReadVRegister(instr->GetRd());
9568   SimVRegister& rn = ReadVRegister(instr->GetRn());
9569   SimVRegister& rn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfVRegisters);
9570   SimVRegister& rn3 = ReadVRegister((instr->GetRn() + 2) % kNumberOfVRegisters);
9571   SimVRegister& rn4 = ReadVRegister((instr->GetRn() + 3) % kNumberOfVRegisters);
9572   SimVRegister& rm = ReadVRegister(instr->GetRm());
9573 
9574   switch (instr->Mask(NEONTableMask)) {
9575     case NEON_TBL_1v:
9576       tbl(vf, rd, rn, rm);
9577       break;
9578     case NEON_TBL_2v:
9579       tbl(vf, rd, rn, rn2, rm);
9580       break;
9581     case NEON_TBL_3v:
9582       tbl(vf, rd, rn, rn2, rn3, rm);
9583       break;
9584     case NEON_TBL_4v:
9585       tbl(vf, rd, rn, rn2, rn3, rn4, rm);
9586       break;
9587     case NEON_TBX_1v:
9588       tbx(vf, rd, rn, rm);
9589       break;
9590     case NEON_TBX_2v:
9591       tbx(vf, rd, rn, rn2, rm);
9592       break;
9593     case NEON_TBX_3v:
9594       tbx(vf, rd, rn, rn2, rn3, rm);
9595       break;
9596     case NEON_TBX_4v:
9597       tbx(vf, rd, rn, rn2, rn3, rn4, rm);
9598       break;
9599     default:
9600       VIXL_UNIMPLEMENTED();
9601   }
9602 }
9603 
9604 
9605 void Simulator::VisitNEONPerm(const Instruction* instr) {
9606   NEONFormatDecoder nfd(instr);
9607   VectorFormat vf = nfd.GetVectorFormat();
9608 
9609   SimVRegister& rd = ReadVRegister(instr->GetRd());
9610   SimVRegister& rn = ReadVRegister(instr->GetRn());
9611   SimVRegister& rm = ReadVRegister(instr->GetRm());
9612 
9613   switch (instr->Mask(NEONPermMask)) {
9614     case NEON_TRN1:
9615       trn1(vf, rd, rn, rm);
9616       break;
9617     case NEON_TRN2:
9618       trn2(vf, rd, rn, rm);
9619       break;
9620     case NEON_UZP1:
9621       uzp1(vf, rd, rn, rm);
9622       break;
9623     case NEON_UZP2:
9624       uzp2(vf, rd, rn, rm);
9625       break;
9626     case NEON_ZIP1:
9627       zip1(vf, rd, rn, rm);
9628       break;
9629     case NEON_ZIP2:
9630       zip2(vf, rd, rn, rm);
9631       break;
9632     default:
9633       VIXL_UNIMPLEMENTED();
9634   }
9635 }
9636 
9637 void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
9638   SimVRegister& zd = ReadVRegister(instr->GetRd());
9639   SimVRegister& zn = ReadVRegister(instr->GetRn());
9640   SimVRegister& zm = ReadVRegister(instr->GetRm());
9641   SimVRegister temp;
9642 
9643   VectorFormat vform = kFormatVnD;
9644   mov(vform, temp, zm);
9645 
9646   switch (instr->Mask(SVEAddressGenerationMask)) {
9647     case ADR_z_az_d_s32_scaled:
9648       sxt(vform, temp, temp, kSRegSize);
9649       break;
9650     case ADR_z_az_d_u32_scaled:
9651       uxt(vform, temp, temp, kSRegSize);
9652       break;
9653     case ADR_z_az_s_same_scaled:
9654       vform = kFormatVnS;
9655       break;
9656     case ADR_z_az_d_same_scaled:
9657       // Nothing to do.
9658       break;
9659     default:
9660       VIXL_UNIMPLEMENTED();
9661       break;
9662   }
9663 
9664   int shift_amount = instr->ExtractBits(11, 10);
9665   shl(vform, temp, temp, shift_amount);
9666   add(vform, zd, zn, temp);
9667 }
9668 
9669 void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
9670     const Instruction* instr) {
9671   Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
9672   switch (op) {
9673     case AND_z_zi:
9674     case EOR_z_zi:
9675     case ORR_z_zi: {
9676       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9677       uint64_t imm = instr->GetSVEImmLogical();
9678       // Valid immediate is a non-zero bits
9679       VIXL_ASSERT(imm != 0);
9680       SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
9681                               op),
9682                           SVEFormatFromLaneSizeInBytesLog2(lane_size),
9683                           ReadVRegister(instr->GetRd()),
9684                           imm);
9685       break;
9686     }
9687     default:
9688       VIXL_UNIMPLEMENTED();
9689       break;
9690   }
9691 }
9692 
9693 void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
9694   switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
9695     case DUPM_z_i: {
9696       /* DUPM uses the same lane size and immediate encoding as bitwise logical
9697        * immediate instructions. */
9698       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9699       uint64_t imm = instr->GetSVEImmLogical();
9700       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9701       dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
9702       break;
9703     }
9704     default:
9705       VIXL_UNIMPLEMENTED();
9706       break;
9707   }
9708 }
9709 
9710 void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
9711   SimVRegister& zd = ReadVRegister(instr->GetRd());
9712   SimVRegister& zn = ReadVRegister(instr->GetRn());
9713   SimVRegister& zm = ReadVRegister(instr->GetRm());
9714   Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
9715 
9716   LogicalOp logical_op = LogicalOpMask;
9717   switch (op) {
9718     case AND_z_zz:
9719       logical_op = AND;
9720       break;
9721     case BIC_z_zz:
9722       logical_op = BIC;
9723       break;
9724     case EOR_z_zz:
9725       logical_op = EOR;
9726       break;
9727     case ORR_z_zz:
9728       logical_op = ORR;
9729       break;
9730     default:
9731       VIXL_UNIMPLEMENTED();
9732       break;
9733   }
9734   // Lane size of registers is irrelevant to the bitwise operations, so perform
9735   // the operation on D-sized lanes.
9736   SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
9737 }
9738 
9739 void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
9740   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9741   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9742 
9743   SimVRegister scratch;
9744   SimVRegister result;
9745 
9746   bool for_division = false;
9747   Shift shift_op = NO_SHIFT;
9748   switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
9749     case ASRD_z_p_zi:
9750       shift_op = ASR;
9751       for_division = true;
9752       break;
9753     case ASR_z_p_zi:
9754       shift_op = ASR;
9755       break;
9756     case LSL_z_p_zi:
9757       shift_op = LSL;
9758       break;
9759     case LSR_z_p_zi:
9760       shift_op = LSR;
9761       break;
9762     default:
9763       VIXL_UNIMPLEMENTED();
9764       break;
9765   }
9766 
9767   std::pair<int, int> shift_and_lane_size =
9768       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
9769   unsigned lane_size = shift_and_lane_size.second;
9770   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9771   int shift_dist = shift_and_lane_size.first;
9772 
9773   if ((shift_op == ASR) && for_division) {
9774     asrd(vform, result, zdn, shift_dist);
9775   } else {
9776     if (shift_op == LSL) {
9777       // Shift distance is computed differently for LSL. Convert the result.
9778       shift_dist = (8 << lane_size) - shift_dist;
9779     }
9780     dup_immediate(vform, scratch, shift_dist);
9781     SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
9782   }
9783   mov_merging(vform, zdn, pg, result);
9784 }
9785 
9786 void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
9787     const Instruction* instr) {
9788   VectorFormat vform = instr->GetSVEVectorFormat();
9789   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9790   SimVRegister& zm = ReadVRegister(instr->GetRn());
9791   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9792   SimVRegister result;
9793 
9794   // SVE uses the whole (saturated) lane for the shift amount.
9795   bool shift_in_ls_byte = false;
9796 
9797   switch (form_hash_) {
9798     case "asrr_z_p_zz"_h:
9799       sshr(vform, result, zm, zdn);
9800       break;
9801     case "asr_z_p_zz"_h:
9802       sshr(vform, result, zdn, zm);
9803       break;
9804     case "lslr_z_p_zz"_h:
9805       sshl(vform, result, zm, zdn, shift_in_ls_byte);
9806       break;
9807     case "lsl_z_p_zz"_h:
9808       sshl(vform, result, zdn, zm, shift_in_ls_byte);
9809       break;
9810     case "lsrr_z_p_zz"_h:
9811       ushr(vform, result, zm, zdn);
9812       break;
9813     case "lsr_z_p_zz"_h:
9814       ushr(vform, result, zdn, zm);
9815       break;
9816     case "sqrshl_z_p_zz"_h:
9817       sshl(vform, result, zdn, zm, shift_in_ls_byte)
9818           .Round(vform)
9819           .SignedSaturate(vform);
9820       break;
9821     case "sqrshlr_z_p_zz"_h:
9822       sshl(vform, result, zm, zdn, shift_in_ls_byte)
9823           .Round(vform)
9824           .SignedSaturate(vform);
9825       break;
9826     case "sqshl_z_p_zz"_h:
9827       sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform);
9828       break;
9829     case "sqshlr_z_p_zz"_h:
9830       sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform);
9831       break;
9832     case "srshl_z_p_zz"_h:
9833       sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9834       break;
9835     case "srshlr_z_p_zz"_h:
9836       sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9837       break;
9838     case "uqrshl_z_p_zz"_h:
9839       ushl(vform, result, zdn, zm, shift_in_ls_byte)
9840           .Round(vform)
9841           .UnsignedSaturate(vform);
9842       break;
9843     case "uqrshlr_z_p_zz"_h:
9844       ushl(vform, result, zm, zdn, shift_in_ls_byte)
9845           .Round(vform)
9846           .UnsignedSaturate(vform);
9847       break;
9848     case "uqshl_z_p_zz"_h:
9849       ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform);
9850       break;
9851     case "uqshlr_z_p_zz"_h:
9852       ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform);
9853       break;
9854     case "urshl_z_p_zz"_h:
9855       ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9856       break;
9857     case "urshlr_z_p_zz"_h:
9858       ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9859       break;
9860     default:
9861       VIXL_UNIMPLEMENTED();
9862       break;
9863   }
9864   mov_merging(vform, zdn, pg, result);
9865 }
9866 
9867 void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
9868     const Instruction* instr) {
9869   VectorFormat vform = instr->GetSVEVectorFormat();
9870   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9871   SimVRegister& zm = ReadVRegister(instr->GetRn());
9872   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9873 
9874   SimVRegister result;
9875   Shift shift_op = ASR;
9876 
9877   switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
9878     case ASR_z_p_zw:
9879       break;
9880     case LSL_z_p_zw:
9881       shift_op = LSL;
9882       break;
9883     case LSR_z_p_zw:
9884       shift_op = LSR;
9885       break;
9886     default:
9887       VIXL_UNIMPLEMENTED();
9888       break;
9889   }
9890   SVEBitwiseShiftHelper(shift_op,
9891                         vform,
9892                         result,
9893                         zdn,
9894                         zm,
9895                         /* is_wide_elements = */ true);
9896   mov_merging(vform, zdn, pg, result);
9897 }
9898 
9899 void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
9900   SimVRegister& zd = ReadVRegister(instr->GetRd());
9901   SimVRegister& zn = ReadVRegister(instr->GetRn());
9902 
9903   Shift shift_op = NO_SHIFT;
9904   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9905     case ASR_z_zi:
9906     case ASR_z_zw:
9907       shift_op = ASR;
9908       break;
9909     case LSL_z_zi:
9910     case LSL_z_zw:
9911       shift_op = LSL;
9912       break;
9913     case LSR_z_zi:
9914     case LSR_z_zw:
9915       shift_op = LSR;
9916       break;
9917     default:
9918       VIXL_UNIMPLEMENTED();
9919       break;
9920   }
9921 
9922   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9923     case ASR_z_zi:
9924     case LSL_z_zi:
9925     case LSR_z_zi: {
9926       SimVRegister scratch;
9927       std::pair<int, int> shift_and_lane_size =
9928           instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
9929       unsigned lane_size = shift_and_lane_size.second;
9930       VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
9931       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9932       int shift_dist = shift_and_lane_size.first;
9933       if (shift_op == LSL) {
9934         // Shift distance is computed differently for LSL. Convert the result.
9935         shift_dist = (8 << lane_size) - shift_dist;
9936       }
9937       dup_immediate(vform, scratch, shift_dist);
9938       SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
9939       break;
9940     }
9941     case ASR_z_zw:
9942     case LSL_z_zw:
9943     case LSR_z_zw:
9944       SVEBitwiseShiftHelper(shift_op,
9945                             instr->GetSVEVectorFormat(),
9946                             zd,
9947                             zn,
9948                             ReadVRegister(instr->GetRm()),
9949                             true);
9950       break;
9951     default:
9952       VIXL_UNIMPLEMENTED();
9953       break;
9954   }
9955 }
9956 
9957 void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
9958   // Although the instructions have a separate encoding class, the lane size is
9959   // encoded in the same way as most other SVE instructions.
9960   VectorFormat vform = instr->GetSVEVectorFormat();
9961 
9962   int pattern = instr->GetImmSVEPredicateConstraint();
9963   int count = GetPredicateConstraintLaneCount(vform, pattern);
9964   int multiplier = instr->ExtractBits(19, 16) + 1;
9965 
9966   switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
9967     case DECB_r_rs:
9968     case DECD_r_rs:
9969     case DECH_r_rs:
9970     case DECW_r_rs:
9971       count = -count;
9972       break;
9973     case INCB_r_rs:
9974     case INCD_r_rs:
9975     case INCH_r_rs:
9976     case INCW_r_rs:
9977       // Nothing to do.
9978       break;
9979     default:
9980       VIXL_UNIMPLEMENTED();
9981       return;
9982   }
9983 
9984   WriteXRegister(instr->GetRd(),
9985                  IncDecN(ReadXRegister(instr->GetRd()),
9986                          count * multiplier,
9987                          kXRegSize));
9988 }
9989 
9990 void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
9991   VectorFormat vform = instr->GetSVEVectorFormat();
9992   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9993     VIXL_UNIMPLEMENTED();
9994   }
9995 
9996   int pattern = instr->GetImmSVEPredicateConstraint();
9997   int count = GetPredicateConstraintLaneCount(vform, pattern);
9998   int multiplier = instr->ExtractBits(19, 16) + 1;
9999 
10000   switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
10001     case DECD_z_zs:
10002     case DECH_z_zs:
10003     case DECW_z_zs:
10004       count = -count;
10005       break;
10006     case INCD_z_zs:
10007     case INCH_z_zs:
10008     case INCW_z_zs:
10009       // Nothing to do.
10010       break;
10011     default:
10012       VIXL_UNIMPLEMENTED();
10013       break;
10014   }
10015 
10016   SimVRegister& zd = ReadVRegister(instr->GetRd());
10017   SimVRegister scratch;
10018   dup_immediate(vform,
10019                 scratch,
10020                 IncDecN(0,
10021                         count * multiplier,
10022                         LaneSizeInBitsFromFormat(vform)));
10023   add(vform, zd, zd, scratch);
10024 }
10025 
10026 void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
10027     const Instruction* instr) {
10028   // Although the instructions have a separate encoding class, the lane size is
10029   // encoded in the same way as most other SVE instructions.
10030   VectorFormat vform = instr->GetSVEVectorFormat();
10031 
10032   int pattern = instr->GetImmSVEPredicateConstraint();
10033   int count = GetPredicateConstraintLaneCount(vform, pattern);
10034   int multiplier = instr->ExtractBits(19, 16) + 1;
10035 
10036   unsigned width = kXRegSize;
10037   bool is_signed = false;
10038 
10039   switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
10040     case SQDECB_r_rs_sx:
10041     case SQDECD_r_rs_sx:
10042     case SQDECH_r_rs_sx:
10043     case SQDECW_r_rs_sx:
10044       width = kWRegSize;
10045       VIXL_FALLTHROUGH();
10046     case SQDECB_r_rs_x:
10047     case SQDECD_r_rs_x:
10048     case SQDECH_r_rs_x:
10049     case SQDECW_r_rs_x:
10050       is_signed = true;
10051       count = -count;
10052       break;
10053     case SQINCB_r_rs_sx:
10054     case SQINCD_r_rs_sx:
10055     case SQINCH_r_rs_sx:
10056     case SQINCW_r_rs_sx:
10057       width = kWRegSize;
10058       VIXL_FALLTHROUGH();
10059     case SQINCB_r_rs_x:
10060     case SQINCD_r_rs_x:
10061     case SQINCH_r_rs_x:
10062     case SQINCW_r_rs_x:
10063       is_signed = true;
10064       break;
10065     case UQDECB_r_rs_uw:
10066     case UQDECD_r_rs_uw:
10067     case UQDECH_r_rs_uw:
10068     case UQDECW_r_rs_uw:
10069       width = kWRegSize;
10070       VIXL_FALLTHROUGH();
10071     case UQDECB_r_rs_x:
10072     case UQDECD_r_rs_x:
10073     case UQDECH_r_rs_x:
10074     case UQDECW_r_rs_x:
10075       count = -count;
10076       break;
10077     case UQINCB_r_rs_uw:
10078     case UQINCD_r_rs_uw:
10079     case UQINCH_r_rs_uw:
10080     case UQINCW_r_rs_uw:
10081       width = kWRegSize;
10082       VIXL_FALLTHROUGH();
10083     case UQINCB_r_rs_x:
10084     case UQINCD_r_rs_x:
10085     case UQINCH_r_rs_x:
10086     case UQINCW_r_rs_x:
10087       // Nothing to do.
10088       break;
10089     default:
10090       VIXL_UNIMPLEMENTED();
10091       break;
10092   }
10093 
10094   WriteXRegister(instr->GetRd(),
10095                  IncDecN(ReadXRegister(instr->GetRd()),
10096                          count * multiplier,
10097                          width,
10098                          true,
10099                          is_signed));
10100 }
10101 
10102 void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
10103     const Instruction* instr) {
10104   VectorFormat vform = instr->GetSVEVectorFormat();
10105   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10106     VIXL_UNIMPLEMENTED();
10107   }
10108 
10109   int pattern = instr->GetImmSVEPredicateConstraint();
10110   int count = GetPredicateConstraintLaneCount(vform, pattern);
10111   int multiplier = instr->ExtractBits(19, 16) + 1;
10112 
10113   SimVRegister& zd = ReadVRegister(instr->GetRd());
10114   SimVRegister scratch;
10115   dup_immediate(vform,
10116                 scratch,
10117                 IncDecN(0,
10118                         count * multiplier,
10119                         LaneSizeInBitsFromFormat(vform)));
10120 
10121   switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
10122     case SQDECD_z_zs:
10123     case SQDECH_z_zs:
10124     case SQDECW_z_zs:
10125       sub(vform, zd, zd, scratch).SignedSaturate(vform);
10126       break;
10127     case SQINCD_z_zs:
10128     case SQINCH_z_zs:
10129     case SQINCW_z_zs:
10130       add(vform, zd, zd, scratch).SignedSaturate(vform);
10131       break;
10132     case UQDECD_z_zs:
10133     case UQDECH_z_zs:
10134     case UQDECW_z_zs:
10135       sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
10136       break;
10137     case UQINCD_z_zs:
10138     case UQINCH_z_zs:
10139     case UQINCW_z_zs:
10140       add(vform, zd, zd, scratch).UnsignedSaturate(vform);
10141       break;
10142     default:
10143       VIXL_UNIMPLEMENTED();
10144       break;
10145   }
10146 }
10147 
10148 void Simulator::VisitSVEElementCount(const Instruction* instr) {
10149   switch (instr->Mask(SVEElementCountMask)) {
10150     case CNTB_r_s:
10151     case CNTD_r_s:
10152     case CNTH_r_s:
10153     case CNTW_r_s:
10154       // All handled below.
10155       break;
10156     default:
10157       VIXL_UNIMPLEMENTED();
10158       break;
10159   }
10160 
10161   // Although the instructions are separated, the lane size is encoded in the
10162   // same way as most other SVE instructions.
10163   VectorFormat vform = instr->GetSVEVectorFormat();
10164 
10165   int pattern = instr->GetImmSVEPredicateConstraint();
10166   int count = GetPredicateConstraintLaneCount(vform, pattern);
10167   int multiplier = instr->ExtractBits(19, 16) + 1;
10168   WriteXRegister(instr->GetRd(), count * multiplier);
10169 }
10170 
10171 void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
10172   VectorFormat vform = instr->GetSVEVectorFormat();
10173   SimVRegister& vdn = ReadVRegister(instr->GetRd());
10174   SimVRegister& zm = ReadVRegister(instr->GetRn());
10175   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10176 
10177   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10178 
10179   switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
10180     case FADDA_v_p_z:
10181       fadda(vform, vdn, pg, zm);
10182       break;
10183     default:
10184       VIXL_UNIMPLEMENTED();
10185       break;
10186   }
10187 }
10188 
10189 void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
10190   VectorFormat vform = instr->GetSVEVectorFormat();
10191   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10192   SimVRegister& zm = ReadVRegister(instr->GetRn());
10193   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10194 
10195   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10196 
10197   SimVRegister result;
10198   switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
10199     case FABD_z_p_zz:
10200       fabd(vform, result, zdn, zm);
10201       break;
10202     case FADD_z_p_zz:
10203       fadd(vform, result, zdn, zm);
10204       break;
10205     case FDIVR_z_p_zz:
10206       fdiv(vform, result, zm, zdn);
10207       break;
10208     case FDIV_z_p_zz:
10209       fdiv(vform, result, zdn, zm);
10210       break;
10211     case FMAXNM_z_p_zz:
10212       fmaxnm(vform, result, zdn, zm);
10213       break;
10214     case FMAX_z_p_zz:
10215       fmax(vform, result, zdn, zm);
10216       break;
10217     case FMINNM_z_p_zz:
10218       fminnm(vform, result, zdn, zm);
10219       break;
10220     case FMIN_z_p_zz:
10221       fmin(vform, result, zdn, zm);
10222       break;
10223     case FMULX_z_p_zz:
10224       fmulx(vform, result, zdn, zm);
10225       break;
10226     case FMUL_z_p_zz:
10227       fmul(vform, result, zdn, zm);
10228       break;
10229     case FSCALE_z_p_zz:
10230       fscale(vform, result, zdn, zm);
10231       break;
10232     case FSUBR_z_p_zz:
10233       fsub(vform, result, zm, zdn);
10234       break;
10235     case FSUB_z_p_zz:
10236       fsub(vform, result, zdn, zm);
10237       break;
10238     default:
10239       VIXL_UNIMPLEMENTED();
10240       break;
10241   }
10242   mov_merging(vform, zdn, pg, result);
10243 }
10244 
10245 void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
10246     const Instruction* instr) {
10247   VectorFormat vform = instr->GetSVEVectorFormat();
10248   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10249     VIXL_UNIMPLEMENTED();
10250   }
10251 
10252   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10253   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10254   SimVRegister result;
10255 
10256   int i1 = instr->ExtractBit(5);
10257   SimVRegister add_sub_imm, min_max_imm, mul_imm;
10258   uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
10259   uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
10260   uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
10261   dup_immediate(vform, add_sub_imm, i1 ? one : half);
10262   dup_immediate(vform, min_max_imm, i1 ? one : 0);
10263   dup_immediate(vform, mul_imm, i1 ? two : half);
10264 
10265   switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
10266     case FADD_z_p_zs:
10267       fadd(vform, result, zdn, add_sub_imm);
10268       break;
10269     case FMAXNM_z_p_zs:
10270       fmaxnm(vform, result, zdn, min_max_imm);
10271       break;
10272     case FMAX_z_p_zs:
10273       fmax(vform, result, zdn, min_max_imm);
10274       break;
10275     case FMINNM_z_p_zs:
10276       fminnm(vform, result, zdn, min_max_imm);
10277       break;
10278     case FMIN_z_p_zs:
10279       fmin(vform, result, zdn, min_max_imm);
10280       break;
10281     case FMUL_z_p_zs:
10282       fmul(vform, result, zdn, mul_imm);
10283       break;
10284     case FSUBR_z_p_zs:
10285       fsub(vform, result, add_sub_imm, zdn);
10286       break;
10287     case FSUB_z_p_zs:
10288       fsub(vform, result, zdn, add_sub_imm);
10289       break;
10290     default:
10291       VIXL_UNIMPLEMENTED();
10292       break;
10293   }
10294   mov_merging(vform, zdn, pg, result);
10295 }
10296 
10297 void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
10298   VectorFormat vform = instr->GetSVEVectorFormat();
10299   SimVRegister& zd = ReadVRegister(instr->GetRd());
10300   SimVRegister& zm = ReadVRegister(instr->GetRn());
10301 
10302   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10303 
10304   switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
10305     case FTMAD_z_zzi:
10306       ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
10307       break;
10308     default:
10309       VIXL_UNIMPLEMENTED();
10310       break;
10311   }
10312 }
10313 
10314 void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
10315   VectorFormat vform = instr->GetSVEVectorFormat();
10316   SimVRegister& zd = ReadVRegister(instr->GetRd());
10317   SimVRegister& zn = ReadVRegister(instr->GetRn());
10318   SimVRegister& zm = ReadVRegister(instr->GetRm());
10319 
10320   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10321 
10322   switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
10323     case FADD_z_zz:
10324       fadd(vform, zd, zn, zm);
10325       break;
10326     case FMUL_z_zz:
10327       fmul(vform, zd, zn, zm);
10328       break;
10329     case FRECPS_z_zz:
10330       frecps(vform, zd, zn, zm);
10331       break;
10332     case FRSQRTS_z_zz:
10333       frsqrts(vform, zd, zn, zm);
10334       break;
10335     case FSUB_z_zz:
10336       fsub(vform, zd, zn, zm);
10337       break;
10338     case FTSMUL_z_zz:
10339       ftsmul(vform, zd, zn, zm);
10340       break;
10341     default:
10342       VIXL_UNIMPLEMENTED();
10343       break;
10344   }
10345 }
10346 
10347 void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
10348   SimPRegister& pd = ReadPRegister(instr->GetPd());
10349   SimVRegister& zn = ReadVRegister(instr->GetRn());
10350   SimVRegister& zm = ReadVRegister(instr->GetRm());
10351   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10352   VectorFormat vform = instr->GetSVEVectorFormat();
10353   SimVRegister result;
10354 
10355   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10356 
10357   switch (instr->Mask(SVEFPCompareVectorsMask)) {
10358     case FACGE_p_p_zz:
10359       fabscmp(vform, result, zn, zm, ge);
10360       break;
10361     case FACGT_p_p_zz:
10362       fabscmp(vform, result, zn, zm, gt);
10363       break;
10364     case FCMEQ_p_p_zz:
10365       fcmp(vform, result, zn, zm, eq);
10366       break;
10367     case FCMGE_p_p_zz:
10368       fcmp(vform, result, zn, zm, ge);
10369       break;
10370     case FCMGT_p_p_zz:
10371       fcmp(vform, result, zn, zm, gt);
10372       break;
10373     case FCMNE_p_p_zz:
10374       fcmp(vform, result, zn, zm, ne);
10375       break;
10376     case FCMUO_p_p_zz:
10377       fcmp(vform, result, zn, zm, uo);
10378       break;
10379     default:
10380       VIXL_UNIMPLEMENTED();
10381       break;
10382   }
10383 
10384   ExtractFromSimVRegister(vform, pd, result);
10385   mov_zeroing(pd, pg, pd);
10386 }
10387 
10388 void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
10389   SimPRegister& pd = ReadPRegister(instr->GetPd());
10390   SimVRegister& zn = ReadVRegister(instr->GetRn());
10391   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10392   VectorFormat vform = instr->GetSVEVectorFormat();
10393 
10394   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10395 
10396   SimVRegister result;
10397   SimVRegister zeros;
10398   dup_immediate(kFormatVnD, zeros, 0);
10399 
10400   switch (instr->Mask(SVEFPCompareWithZeroMask)) {
10401     case FCMEQ_p_p_z0:
10402       fcmp(vform, result, zn, zeros, eq);
10403       break;
10404     case FCMGE_p_p_z0:
10405       fcmp(vform, result, zn, zeros, ge);
10406       break;
10407     case FCMGT_p_p_z0:
10408       fcmp(vform, result, zn, zeros, gt);
10409       break;
10410     case FCMLE_p_p_z0:
10411       fcmp(vform, result, zn, zeros, le);
10412       break;
10413     case FCMLT_p_p_z0:
10414       fcmp(vform, result, zn, zeros, lt);
10415       break;
10416     case FCMNE_p_p_z0:
10417       fcmp(vform, result, zn, zeros, ne);
10418       break;
10419     default:
10420       VIXL_UNIMPLEMENTED();
10421       break;
10422   }
10423 
10424   ExtractFromSimVRegister(vform, pd, result);
10425   mov_zeroing(pd, pg, pd);
10426 }
10427 
10428 void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
10429   VectorFormat vform = instr->GetSVEVectorFormat();
10430 
10431   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10432     VIXL_UNIMPLEMENTED();
10433   }
10434 
10435   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10436   SimVRegister& zm = ReadVRegister(instr->GetRn());
10437   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10438   int rot = instr->ExtractBit(16);
10439 
10440   SimVRegister result;
10441 
10442   switch (instr->Mask(SVEFPComplexAdditionMask)) {
10443     case FCADD_z_p_zz:
10444       fcadd(vform, result, zdn, zm, rot);
10445       break;
10446     default:
10447       VIXL_UNIMPLEMENTED();
10448       break;
10449   }
10450   mov_merging(vform, zdn, pg, result);
10451 }
10452 
10453 void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
10454   VectorFormat vform = instr->GetSVEVectorFormat();
10455 
10456   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10457     VIXL_UNIMPLEMENTED();
10458   }
10459 
10460   SimVRegister& zda = ReadVRegister(instr->GetRd());
10461   SimVRegister& zn = ReadVRegister(instr->GetRn());
10462   SimVRegister& zm = ReadVRegister(instr->GetRm());
10463   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10464   int rot = instr->ExtractBits(14, 13);
10465 
10466   SimVRegister result;
10467 
10468   switch (instr->Mask(SVEFPComplexMulAddMask)) {
10469     case FCMLA_z_p_zzz:
10470       fcmla(vform, result, zn, zm, zda, rot);
10471       break;
10472     default:
10473       VIXL_UNIMPLEMENTED();
10474       break;
10475   }
10476   mov_merging(vform, zda, pg, result);
10477 }
10478 
10479 void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
10480   SimVRegister& zda = ReadVRegister(instr->GetRd());
10481   SimVRegister& zn = ReadVRegister(instr->GetRn());
10482   int rot = instr->ExtractBits(11, 10);
10483   unsigned zm_code = instr->GetRm();
10484   int index = -1;
10485   VectorFormat vform, vform_dup;
10486 
10487   switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
10488     case FCMLA_z_zzzi_h:
10489       vform = kFormatVnH;
10490       vform_dup = kFormatVnS;
10491       index = zm_code >> 3;
10492       zm_code &= 0x7;
10493       break;
10494     case FCMLA_z_zzzi_s:
10495       vform = kFormatVnS;
10496       vform_dup = kFormatVnD;
10497       index = zm_code >> 4;
10498       zm_code &= 0xf;
10499       break;
10500     default:
10501       VIXL_UNIMPLEMENTED();
10502       break;
10503   }
10504 
10505   if (index >= 0) {
10506     SimVRegister temp;
10507     dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
10508     fcmla(vform, zda, zn, temp, zda, rot);
10509   }
10510 }
10511 
10512 typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
10513                                                   LogicVRegister dst,
10514                                                   const LogicVRegister& src);
10515 
10516 void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
10517   VectorFormat vform = instr->GetSVEVectorFormat();
10518   SimVRegister& vd = ReadVRegister(instr->GetRd());
10519   SimVRegister& zn = ReadVRegister(instr->GetRn());
10520   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10521   int lane_size = LaneSizeInBitsFromFormat(vform);
10522 
10523   uint64_t inactive_value = 0;
10524   FastReduceFn fn = nullptr;
10525 
10526   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10527 
10528   switch (instr->Mask(SVEFPFastReductionMask)) {
10529     case FADDV_v_p_z:
10530       fn = &Simulator::faddv;
10531       break;
10532     case FMAXNMV_v_p_z:
10533       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10534       fn = &Simulator::fmaxnmv;
10535       break;
10536     case FMAXV_v_p_z:
10537       inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
10538       fn = &Simulator::fmaxv;
10539       break;
10540     case FMINNMV_v_p_z:
10541       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10542       fn = &Simulator::fminnmv;
10543       break;
10544     case FMINV_v_p_z:
10545       inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
10546       fn = &Simulator::fminv;
10547       break;
10548     default:
10549       VIXL_UNIMPLEMENTED();
10550       break;
10551   }
10552 
10553   SimVRegister scratch;
10554   dup_immediate(vform, scratch, inactive_value);
10555   mov_merging(vform, scratch, pg, zn);
10556   if (fn != nullptr) (this->*fn)(vform, vd, scratch);
10557 }
10558 
10559 void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
10560   VectorFormat vform = kFormatUndefined;
10561 
10562   switch (instr->Mask(SVEFPMulIndexMask)) {
10563     case FMUL_z_zzi_d:
10564       vform = kFormatVnD;
10565       break;
10566     case FMUL_z_zzi_h_i3h:
10567     case FMUL_z_zzi_h:
10568       vform = kFormatVnH;
10569       break;
10570     case FMUL_z_zzi_s:
10571       vform = kFormatVnS;
10572       break;
10573     default:
10574       VIXL_UNIMPLEMENTED();
10575       break;
10576   }
10577 
10578   SimVRegister& zd = ReadVRegister(instr->GetRd());
10579   SimVRegister& zn = ReadVRegister(instr->GetRn());
10580   SimVRegister temp;
10581 
10582   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10583   fmul(vform, zd, zn, temp);
10584 }
10585 
10586 void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
10587   VectorFormat vform = instr->GetSVEVectorFormat();
10588   SimVRegister& zd = ReadVRegister(instr->GetRd());
10589   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10590   SimVRegister result;
10591 
10592   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10593 
10594   if (instr->ExtractBit(15) == 0) {
10595     // Floating-point multiply-accumulate writing addend.
10596     SimVRegister& zm = ReadVRegister(instr->GetRm());
10597     SimVRegister& zn = ReadVRegister(instr->GetRn());
10598 
10599     switch (instr->Mask(SVEFPMulAddMask)) {
10600       // zda = zda + zn * zm
10601       case FMLA_z_p_zzz:
10602         fmla(vform, result, zd, zn, zm);
10603         break;
10604       // zda = -zda + -zn * zm
10605       case FNMLA_z_p_zzz:
10606         fneg(vform, result, zd);
10607         fmls(vform, result, result, zn, zm);
10608         break;
10609       // zda = zda + -zn * zm
10610       case FMLS_z_p_zzz:
10611         fmls(vform, result, zd, zn, zm);
10612         break;
10613       // zda = -zda + zn * zm
10614       case FNMLS_z_p_zzz:
10615         fneg(vform, result, zd);
10616         fmla(vform, result, result, zn, zm);
10617         break;
10618       default:
10619         VIXL_UNIMPLEMENTED();
10620         break;
10621     }
10622   } else {
10623     // Floating-point multiply-accumulate writing multiplicand.
10624     SimVRegister& za = ReadVRegister(instr->GetRm());
10625     SimVRegister& zm = ReadVRegister(instr->GetRn());
10626 
10627     switch (instr->Mask(SVEFPMulAddMask)) {
10628       // zdn = za + zdn * zm
10629       case FMAD_z_p_zzz:
10630         fmla(vform, result, za, zd, zm);
10631         break;
10632       // zdn = -za + -zdn * zm
10633       case FNMAD_z_p_zzz:
10634         fneg(vform, result, za);
10635         fmls(vform, result, result, zd, zm);
10636         break;
10637       // zdn = za + -zdn * zm
10638       case FMSB_z_p_zzz:
10639         fmls(vform, result, za, zd, zm);
10640         break;
10641       // zdn = -za + zdn * zm
10642       case FNMSB_z_p_zzz:
10643         fneg(vform, result, za);
10644         fmla(vform, result, result, zd, zm);
10645         break;
10646       default:
10647         VIXL_UNIMPLEMENTED();
10648         break;
10649     }
10650   }
10651 
10652   mov_merging(vform, zd, pg, result);
10653 }
10654 
10655 void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
10656   VectorFormat vform = kFormatUndefined;
10657 
10658   switch (instr->Mask(SVEFPMulAddIndexMask)) {
10659     case FMLA_z_zzzi_d:
10660     case FMLS_z_zzzi_d:
10661       vform = kFormatVnD;
10662       break;
10663     case FMLA_z_zzzi_s:
10664     case FMLS_z_zzzi_s:
10665       vform = kFormatVnS;
10666       break;
10667     case FMLA_z_zzzi_h:
10668     case FMLS_z_zzzi_h:
10669     case FMLA_z_zzzi_h_i3h:
10670     case FMLS_z_zzzi_h_i3h:
10671       vform = kFormatVnH;
10672       break;
10673     default:
10674       VIXL_UNIMPLEMENTED();
10675       break;
10676   }
10677 
10678   SimVRegister& zd = ReadVRegister(instr->GetRd());
10679   SimVRegister& zn = ReadVRegister(instr->GetRn());
10680   SimVRegister temp;
10681 
10682   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10683   if (instr->ExtractBit(10) == 1) {
10684     fmls(vform, zd, zd, zn, temp);
10685   } else {
10686     fmla(vform, zd, zd, zn, temp);
10687   }
10688 }
10689 
10690 void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
10691   SimVRegister& zd = ReadVRegister(instr->GetRd());
10692   SimVRegister& zn = ReadVRegister(instr->GetRn());
10693   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10694   int dst_data_size;
10695   int src_data_size;
10696 
10697   switch (instr->Mask(SVEFPConvertToIntMask)) {
10698     case FCVTZS_z_p_z_d2w:
10699     case FCVTZU_z_p_z_d2w:
10700       dst_data_size = kSRegSize;
10701       src_data_size = kDRegSize;
10702       break;
10703     case FCVTZS_z_p_z_d2x:
10704     case FCVTZU_z_p_z_d2x:
10705       dst_data_size = kDRegSize;
10706       src_data_size = kDRegSize;
10707       break;
10708     case FCVTZS_z_p_z_fp162h:
10709     case FCVTZU_z_p_z_fp162h:
10710       dst_data_size = kHRegSize;
10711       src_data_size = kHRegSize;
10712       break;
10713     case FCVTZS_z_p_z_fp162w:
10714     case FCVTZU_z_p_z_fp162w:
10715       dst_data_size = kSRegSize;
10716       src_data_size = kHRegSize;
10717       break;
10718     case FCVTZS_z_p_z_fp162x:
10719     case FCVTZU_z_p_z_fp162x:
10720       dst_data_size = kDRegSize;
10721       src_data_size = kHRegSize;
10722       break;
10723     case FCVTZS_z_p_z_s2w:
10724     case FCVTZU_z_p_z_s2w:
10725       dst_data_size = kSRegSize;
10726       src_data_size = kSRegSize;
10727       break;
10728     case FCVTZS_z_p_z_s2x:
10729     case FCVTZU_z_p_z_s2x:
10730       dst_data_size = kDRegSize;
10731       src_data_size = kSRegSize;
10732       break;
10733     default:
10734       VIXL_UNIMPLEMENTED();
10735       dst_data_size = 0;
10736       src_data_size = 0;
10737       break;
10738   }
10739 
10740   VectorFormat vform =
10741       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10742 
10743   if (instr->ExtractBit(16) == 0) {
10744     fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10745   } else {
10746     fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10747   }
10748 }
10749 
10750 void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
10751   SimVRegister& zd = ReadVRegister(instr->GetRd());
10752   SimVRegister& zn = ReadVRegister(instr->GetRn());
10753   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10754   VectorFormat dst_data_size = kFormatUndefined;
10755   VectorFormat src_data_size = kFormatUndefined;
10756 
10757   switch (instr->Mask(SVEFPConvertPrecisionMask)) {
10758     case FCVT_z_p_z_d2h:
10759       dst_data_size = kFormatVnH;
10760       src_data_size = kFormatVnD;
10761       break;
10762     case FCVT_z_p_z_d2s:
10763       dst_data_size = kFormatVnS;
10764       src_data_size = kFormatVnD;
10765       break;
10766     case FCVT_z_p_z_h2d:
10767       dst_data_size = kFormatVnD;
10768       src_data_size = kFormatVnH;
10769       break;
10770     case FCVT_z_p_z_h2s:
10771       dst_data_size = kFormatVnS;
10772       src_data_size = kFormatVnH;
10773       break;
10774     case FCVT_z_p_z_s2d:
10775       dst_data_size = kFormatVnD;
10776       src_data_size = kFormatVnS;
10777       break;
10778     case FCVT_z_p_z_s2h:
10779       dst_data_size = kFormatVnH;
10780       src_data_size = kFormatVnS;
10781       break;
10782     default:
10783       VIXL_UNIMPLEMENTED();
10784       break;
10785   }
10786 
10787   fcvt(dst_data_size, src_data_size, zd, pg, zn);
10788 }
10789 
10790 void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
10791   SimVRegister& zd = ReadVRegister(instr->GetRd());
10792   SimVRegister& zn = ReadVRegister(instr->GetRn());
10793   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10794   VectorFormat vform = instr->GetSVEVectorFormat();
10795   SimVRegister result;
10796 
10797   switch (instr->Mask(SVEFPUnaryOpMask)) {
10798     case FRECPX_z_p_z:
10799       frecpx(vform, result, zn);
10800       break;
10801     case FSQRT_z_p_z:
10802       fsqrt(vform, result, zn);
10803       break;
10804     default:
10805       VIXL_UNIMPLEMENTED();
10806       break;
10807   }
10808   mov_merging(vform, zd, pg, result);
10809 }
10810 
10811 void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
10812   SimVRegister& zd = ReadVRegister(instr->GetRd());
10813   SimVRegister& zn = ReadVRegister(instr->GetRn());
10814   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10815   VectorFormat vform = instr->GetSVEVectorFormat();
10816   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10817   bool exact_exception = false;
10818 
10819   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10820 
10821   switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
10822     case FRINTA_z_p_z:
10823       fpcr_rounding = FPTieAway;
10824       break;
10825     case FRINTI_z_p_z:
10826       break;  // Use FPCR rounding mode.
10827     case FRINTM_z_p_z:
10828       fpcr_rounding = FPNegativeInfinity;
10829       break;
10830     case FRINTN_z_p_z:
10831       fpcr_rounding = FPTieEven;
10832       break;
10833     case FRINTP_z_p_z:
10834       fpcr_rounding = FPPositiveInfinity;
10835       break;
10836     case FRINTX_z_p_z:
10837       exact_exception = true;
10838       break;
10839     case FRINTZ_z_p_z:
10840       fpcr_rounding = FPZero;
10841       break;
10842     default:
10843       VIXL_UNIMPLEMENTED();
10844       break;
10845   }
10846 
10847   SimVRegister result;
10848   frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
10849   mov_merging(vform, zd, pg, result);
10850 }
10851 
10852 void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
10853   SimVRegister& zd = ReadVRegister(instr->GetRd());
10854   SimVRegister& zn = ReadVRegister(instr->GetRn());
10855   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10856   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10857   int dst_data_size;
10858   int src_data_size;
10859 
10860   switch (instr->Mask(SVEIntConvertToFPMask)) {
10861     case SCVTF_z_p_z_h2fp16:
10862     case UCVTF_z_p_z_h2fp16:
10863       dst_data_size = kHRegSize;
10864       src_data_size = kHRegSize;
10865       break;
10866     case SCVTF_z_p_z_w2d:
10867     case UCVTF_z_p_z_w2d:
10868       dst_data_size = kDRegSize;
10869       src_data_size = kSRegSize;
10870       break;
10871     case SCVTF_z_p_z_w2fp16:
10872     case UCVTF_z_p_z_w2fp16:
10873       dst_data_size = kHRegSize;
10874       src_data_size = kSRegSize;
10875       break;
10876     case SCVTF_z_p_z_w2s:
10877     case UCVTF_z_p_z_w2s:
10878       dst_data_size = kSRegSize;
10879       src_data_size = kSRegSize;
10880       break;
10881     case SCVTF_z_p_z_x2d:
10882     case UCVTF_z_p_z_x2d:
10883       dst_data_size = kDRegSize;
10884       src_data_size = kDRegSize;
10885       break;
10886     case SCVTF_z_p_z_x2fp16:
10887     case UCVTF_z_p_z_x2fp16:
10888       dst_data_size = kHRegSize;
10889       src_data_size = kDRegSize;
10890       break;
10891     case SCVTF_z_p_z_x2s:
10892     case UCVTF_z_p_z_x2s:
10893       dst_data_size = kSRegSize;
10894       src_data_size = kDRegSize;
10895       break;
10896     default:
10897       VIXL_UNIMPLEMENTED();
10898       dst_data_size = 0;
10899       src_data_size = 0;
10900       break;
10901   }
10902 
10903   VectorFormat vform =
10904       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10905 
10906   if (instr->ExtractBit(16) == 0) {
10907     scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10908   } else {
10909     ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10910   }
10911 }
10912 
10913 void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
10914   VectorFormat vform = instr->GetSVEVectorFormat();
10915   SimVRegister& zd = ReadVRegister(instr->GetRd());
10916   SimVRegister& zn = ReadVRegister(instr->GetRn());
10917   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10918 
10919   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10920 
10921   switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
10922     case FRECPE_z_z:
10923       frecpe(vform, zd, zn, fpcr_rounding);
10924       break;
10925     case FRSQRTE_z_z:
10926       frsqrte(vform, zd, zn);
10927       break;
10928     default:
10929       VIXL_UNIMPLEMENTED();
10930       break;
10931   }
10932 }
10933 
10934 void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
10935   VectorFormat vform = instr->GetSVEVectorFormat();
10936   SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
10937 
10938   int count = CountActiveLanes(vform, pg);
10939 
10940   if (instr->ExtractBit(11) == 0) {
10941     SimVRegister& zdn = ReadVRegister(instr->GetRd());
10942     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10943       case DECP_z_p_z:
10944         sub_uint(vform, zdn, zdn, count);
10945         break;
10946       case INCP_z_p_z:
10947         add_uint(vform, zdn, zdn, count);
10948         break;
10949       case SQDECP_z_p_z:
10950         sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10951         break;
10952       case SQINCP_z_p_z:
10953         add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10954         break;
10955       case UQDECP_z_p_z:
10956         sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10957         break;
10958       case UQINCP_z_p_z:
10959         add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10960         break;
10961       default:
10962         VIXL_UNIMPLEMENTED();
10963         break;
10964     }
10965   } else {
10966     bool is_saturating = (instr->ExtractBit(18) == 0);
10967     bool decrement =
10968         is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
10969     bool is_signed = (instr->ExtractBit(16) == 0);
10970     bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
10971     unsigned width = sf ? kXRegSize : kWRegSize;
10972 
10973     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10974       case DECP_r_p_r:
10975       case INCP_r_p_r:
10976       case SQDECP_r_p_r_sx:
10977       case SQDECP_r_p_r_x:
10978       case SQINCP_r_p_r_sx:
10979       case SQINCP_r_p_r_x:
10980       case UQDECP_r_p_r_uw:
10981       case UQDECP_r_p_r_x:
10982       case UQINCP_r_p_r_uw:
10983       case UQINCP_r_p_r_x:
10984         WriteXRegister(instr->GetRd(),
10985                        IncDecN(ReadXRegister(instr->GetRd()),
10986                                decrement ? -count : count,
10987                                width,
10988                                is_saturating,
10989                                is_signed));
10990         break;
10991       default:
10992         VIXL_UNIMPLEMENTED();
10993         break;
10994     }
10995   }
10996 }
10997 
10998 uint64_t Simulator::IncDecN(uint64_t acc,
10999                             int64_t delta,
11000                             unsigned n,
11001                             bool is_saturating,
11002                             bool is_signed) {
11003   VIXL_ASSERT(n <= 64);
11004   VIXL_ASSERT(IsIntN(n, delta));
11005 
11006   uint64_t sign_mask = UINT64_C(1) << (n - 1);
11007   uint64_t mask = GetUintMask(n);
11008 
11009   acc &= mask;  // Ignore initial accumulator high bits.
11010   uint64_t result = (acc + delta) & mask;
11011 
11012   bool result_negative = ((result & sign_mask) != 0);
11013 
11014   if (is_saturating) {
11015     if (is_signed) {
11016       bool acc_negative = ((acc & sign_mask) != 0);
11017       bool delta_negative = delta < 0;
11018 
11019       // If the signs of the operands are the same, but different from the
11020       // result, there was an overflow.
11021       if ((acc_negative == delta_negative) &&
11022           (acc_negative != result_negative)) {
11023         if (result_negative) {
11024           // Saturate to [..., INT<n>_MAX].
11025           result_negative = false;
11026           result = mask & ~sign_mask;  // E.g. 0x000000007fffffff
11027         } else {
11028           // Saturate to [INT<n>_MIN, ...].
11029           result_negative = true;
11030           result = ~mask | sign_mask;  // E.g. 0xffffffff80000000
11031         }
11032       }
11033     } else {
11034       if ((delta < 0) && (result > acc)) {
11035         // Saturate to [0, ...].
11036         result = 0;
11037       } else if ((delta > 0) && (result < acc)) {
11038         // Saturate to [..., UINT<n>_MAX].
11039         result = mask;
11040       }
11041     }
11042   }
11043 
11044   // Sign-extend if necessary.
11045   if (result_negative && is_signed) result |= ~mask;
11046 
11047   return result;
11048 }
11049 
11050 void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
11051   VectorFormat vform = instr->GetSVEVectorFormat();
11052   SimVRegister& zd = ReadVRegister(instr->GetRd());
11053   switch (instr->Mask(SVEIndexGenerationMask)) {
11054     case INDEX_z_ii:
11055     case INDEX_z_ir:
11056     case INDEX_z_ri:
11057     case INDEX_z_rr: {
11058       uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
11059                                              : instr->ExtractSignedBits(9, 5);
11060       uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
11061                                             : instr->ExtractSignedBits(20, 16);
11062       index(vform, zd, start, step);
11063       break;
11064     }
11065     default:
11066       VIXL_UNIMPLEMENTED();
11067       break;
11068   }
11069 }
11070 
11071 void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
11072   VectorFormat vform = instr->GetSVEVectorFormat();
11073   SimVRegister& zd = ReadVRegister(instr->GetRd());
11074   SimVRegister& zn = ReadVRegister(instr->GetRn());
11075   SimVRegister& zm = ReadVRegister(instr->GetRm());
11076   switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
11077     case ADD_z_zz:
11078       add(vform, zd, zn, zm);
11079       break;
11080     case SQADD_z_zz:
11081       add(vform, zd, zn, zm).SignedSaturate(vform);
11082       break;
11083     case SQSUB_z_zz:
11084       sub(vform, zd, zn, zm).SignedSaturate(vform);
11085       break;
11086     case SUB_z_zz:
11087       sub(vform, zd, zn, zm);
11088       break;
11089     case UQADD_z_zz:
11090       add(vform, zd, zn, zm).UnsignedSaturate(vform);
11091       break;
11092     case UQSUB_z_zz:
11093       sub(vform, zd, zn, zm).UnsignedSaturate(vform);
11094       break;
11095     default:
11096       VIXL_UNIMPLEMENTED();
11097       break;
11098   }
11099 }
11100 
11101 void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
11102     const Instruction* instr) {
11103   VectorFormat vform = instr->GetSVEVectorFormat();
11104   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11105   SimVRegister& zm = ReadVRegister(instr->GetRn());
11106   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11107   SimVRegister result;
11108 
11109   switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
11110     case ADD_z_p_zz:
11111       add(vform, result, zdn, zm);
11112       break;
11113     case SUBR_z_p_zz:
11114       sub(vform, result, zm, zdn);
11115       break;
11116     case SUB_z_p_zz:
11117       sub(vform, result, zdn, zm);
11118       break;
11119     default:
11120       VIXL_UNIMPLEMENTED();
11121       break;
11122   }
11123   mov_merging(vform, zdn, pg, result);
11124 }
11125 
11126 void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
11127   VectorFormat vform = instr->GetSVEVectorFormat();
11128   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11129   SimVRegister& zm = ReadVRegister(instr->GetRn());
11130   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11131   SimVRegister result;
11132 
11133   switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
11134     case AND_z_p_zz:
11135       SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
11136       break;
11137     case BIC_z_p_zz:
11138       SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
11139       break;
11140     case EOR_z_p_zz:
11141       SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
11142       break;
11143     case ORR_z_p_zz:
11144       SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
11145       break;
11146     default:
11147       VIXL_UNIMPLEMENTED();
11148       break;
11149   }
11150   mov_merging(vform, zdn, pg, result);
11151 }
11152 
11153 void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
11154   VectorFormat vform = instr->GetSVEVectorFormat();
11155   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11156   SimVRegister& zm = ReadVRegister(instr->GetRn());
11157   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11158   SimVRegister result;
11159 
11160   switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
11161     case MUL_z_p_zz:
11162       mul(vform, result, zdn, zm);
11163       break;
11164     case SMULH_z_p_zz:
11165       smulh(vform, result, zdn, zm);
11166       break;
11167     case UMULH_z_p_zz:
11168       umulh(vform, result, zdn, zm);
11169       break;
11170     default:
11171       VIXL_UNIMPLEMENTED();
11172       break;
11173   }
11174   mov_merging(vform, zdn, pg, result);
11175 }
11176 
11177 void Simulator::VisitSVEIntMinMaxDifference_Predicated(
11178     const Instruction* instr) {
11179   VectorFormat vform = instr->GetSVEVectorFormat();
11180   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11181   SimVRegister& zm = ReadVRegister(instr->GetRn());
11182   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11183   SimVRegister result;
11184 
11185   switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
11186     case SABD_z_p_zz:
11187       absdiff(vform, result, zdn, zm, true);
11188       break;
11189     case SMAX_z_p_zz:
11190       smax(vform, result, zdn, zm);
11191       break;
11192     case SMIN_z_p_zz:
11193       smin(vform, result, zdn, zm);
11194       break;
11195     case UABD_z_p_zz:
11196       absdiff(vform, result, zdn, zm, false);
11197       break;
11198     case UMAX_z_p_zz:
11199       umax(vform, result, zdn, zm);
11200       break;
11201     case UMIN_z_p_zz:
11202       umin(vform, result, zdn, zm);
11203       break;
11204     default:
11205       VIXL_UNIMPLEMENTED();
11206       break;
11207   }
11208   mov_merging(vform, zdn, pg, result);
11209 }
11210 
11211 void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
11212   VectorFormat vform = instr->GetSVEVectorFormat();
11213   SimVRegister& zd = ReadVRegister(instr->GetRd());
11214   SimVRegister scratch;
11215 
11216   switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
11217     case MUL_z_zi:
11218       dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
11219       mul(vform, zd, zd, scratch);
11220       break;
11221     default:
11222       VIXL_UNIMPLEMENTED();
11223       break;
11224   }
11225 }
11226 
11227 void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
11228   VectorFormat vform = instr->GetSVEVectorFormat();
11229   SimVRegister& zdn = ReadVRegister(instr->GetRd());
11230   SimVRegister& zm = ReadVRegister(instr->GetRn());
11231   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11232   SimVRegister result;
11233 
11234   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
11235 
11236   switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
11237     case SDIVR_z_p_zz:
11238       sdiv(vform, result, zm, zdn);
11239       break;
11240     case SDIV_z_p_zz:
11241       sdiv(vform, result, zdn, zm);
11242       break;
11243     case UDIVR_z_p_zz:
11244       udiv(vform, result, zm, zdn);
11245       break;
11246     case UDIV_z_p_zz:
11247       udiv(vform, result, zdn, zm);
11248       break;
11249     default:
11250       VIXL_UNIMPLEMENTED();
11251       break;
11252   }
11253   mov_merging(vform, zdn, pg, result);
11254 }
11255 
11256 void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
11257   VectorFormat vform = instr->GetSVEVectorFormat();
11258   SimVRegister& zd = ReadVRegister(instr->GetRd());
11259   SimVRegister scratch;
11260 
11261   uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
11262   int64_t signed_imm = instr->GetImmSVEIntWideSigned();
11263 
11264   switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
11265     case SMAX_z_zi:
11266       dup_immediate(vform, scratch, signed_imm);
11267       smax(vform, zd, zd, scratch);
11268       break;
11269     case SMIN_z_zi:
11270       dup_immediate(vform, scratch, signed_imm);
11271       smin(vform, zd, zd, scratch);
11272       break;
11273     case UMAX_z_zi:
11274       dup_immediate(vform, scratch, unsigned_imm);
11275       umax(vform, zd, zd, scratch);
11276       break;
11277     case UMIN_z_zi:
11278       dup_immediate(vform, scratch, unsigned_imm);
11279       umin(vform, zd, zd, scratch);
11280       break;
11281     default:
11282       VIXL_UNIMPLEMENTED();
11283       break;
11284   }
11285 }
11286 
11287 void Simulator::VisitSVEIntCompareScalarCountAndLimit(
11288     const Instruction* instr) {
11289   unsigned rn_code = instr->GetRn();
11290   unsigned rm_code = instr->GetRm();
11291   SimPRegister& pd = ReadPRegister(instr->GetPd());
11292   VectorFormat vform = instr->GetSVEVectorFormat();
11293 
11294   bool is_64_bit = instr->ExtractBit(12) == 1;
11295   int rsize = is_64_bit ? kXRegSize : kWRegSize;
11296   uint64_t mask = is_64_bit ? kXRegMask : kWRegMask;
11297 
11298   uint64_t usrc1 = ReadXRegister(rn_code);
11299   int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11300   uint64_t usrc2 = ssrc2 & mask;
11301 
11302   bool reverse = (form_hash_ == "whilege_p_p_rr"_h) ||
11303                  (form_hash_ == "whilegt_p_p_rr"_h) ||
11304                  (form_hash_ == "whilehi_p_p_rr"_h) ||
11305                  (form_hash_ == "whilehs_p_p_rr"_h);
11306 
11307   int lane_count = LaneCountFromFormat(vform);
11308   bool last = true;
11309   for (int i = 0; i < lane_count; i++) {
11310     usrc1 &= mask;
11311     int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1);
11312 
11313     bool cond = false;
11314     switch (form_hash_) {
11315       case "whilele_p_p_rr"_h:
11316         cond = ssrc1 <= ssrc2;
11317         break;
11318       case "whilelo_p_p_rr"_h:
11319         cond = usrc1 < usrc2;
11320         break;
11321       case "whilels_p_p_rr"_h:
11322         cond = usrc1 <= usrc2;
11323         break;
11324       case "whilelt_p_p_rr"_h:
11325         cond = ssrc1 < ssrc2;
11326         break;
11327       case "whilege_p_p_rr"_h:
11328         cond = ssrc1 >= ssrc2;
11329         break;
11330       case "whilegt_p_p_rr"_h:
11331         cond = ssrc1 > ssrc2;
11332         break;
11333       case "whilehi_p_p_rr"_h:
11334         cond = usrc1 > usrc2;
11335         break;
11336       case "whilehs_p_p_rr"_h:
11337         cond = usrc1 >= usrc2;
11338         break;
11339       default:
11340         VIXL_UNIMPLEMENTED();
11341         break;
11342     }
11343     last = last && cond;
11344     LogicPRegister dst(pd);
11345     int lane = reverse ? ((lane_count - 1) - i) : i;
11346     dst.SetActive(vform, lane, last);
11347     usrc1 += reverse ? -1 : 1;
11348   }
11349 
11350   PredTest(vform, GetPTrue(), pd);
11351   LogSystemRegister(NZCV);
11352 }
11353 
11354 void Simulator::VisitSVEConditionallyTerminateScalars(
11355     const Instruction* instr) {
11356   unsigned rn_code = instr->GetRn();
11357   unsigned rm_code = instr->GetRm();
11358   bool is_64_bit = instr->ExtractBit(22) == 1;
11359   uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
11360   uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11361   bool term = false;
11362   switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
11363     case CTERMEQ_rr:
11364       term = src1 == src2;
11365       break;
11366     case CTERMNE_rr:
11367       term = src1 != src2;
11368       break;
11369     default:
11370       VIXL_UNIMPLEMENTED();
11371       break;
11372   }
11373   ReadNzcv().SetN(term ? 1 : 0);
11374   ReadNzcv().SetV(term ? 0 : !ReadC());
11375   LogSystemRegister(NZCV);
11376 }
11377 
11378 void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
11379   bool commute_inputs = false;
11380   Condition cond = al;
11381   switch (instr->Mask(SVEIntCompareSignedImmMask)) {
11382     case CMPEQ_p_p_zi:
11383       cond = eq;
11384       break;
11385     case CMPGE_p_p_zi:
11386       cond = ge;
11387       break;
11388     case CMPGT_p_p_zi:
11389       cond = gt;
11390       break;
11391     case CMPLE_p_p_zi:
11392       cond = ge;
11393       commute_inputs = true;
11394       break;
11395     case CMPLT_p_p_zi:
11396       cond = gt;
11397       commute_inputs = true;
11398       break;
11399     case CMPNE_p_p_zi:
11400       cond = ne;
11401       break;
11402     default:
11403       VIXL_UNIMPLEMENTED();
11404       break;
11405   }
11406 
11407   VectorFormat vform = instr->GetSVEVectorFormat();
11408   SimVRegister src2;
11409   dup_immediate(vform,
11410                 src2,
11411                 ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
11412   SVEIntCompareVectorsHelper(cond,
11413                              vform,
11414                              ReadPRegister(instr->GetPd()),
11415                              ReadPRegister(instr->GetPgLow8()),
11416                              commute_inputs ? src2
11417                                             : ReadVRegister(instr->GetRn()),
11418                              commute_inputs ? ReadVRegister(instr->GetRn())
11419                                             : src2);
11420 }
11421 
11422 void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
11423   bool commute_inputs = false;
11424   Condition cond = al;
11425   switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
11426     case CMPHI_p_p_zi:
11427       cond = hi;
11428       break;
11429     case CMPHS_p_p_zi:
11430       cond = hs;
11431       break;
11432     case CMPLO_p_p_zi:
11433       cond = hi;
11434       commute_inputs = true;
11435       break;
11436     case CMPLS_p_p_zi:
11437       cond = hs;
11438       commute_inputs = true;
11439       break;
11440     default:
11441       VIXL_UNIMPLEMENTED();
11442       break;
11443   }
11444 
11445   VectorFormat vform = instr->GetSVEVectorFormat();
11446   SimVRegister src2;
11447   dup_immediate(vform, src2, instr->ExtractBits(20, 14));
11448   SVEIntCompareVectorsHelper(cond,
11449                              vform,
11450                              ReadPRegister(instr->GetPd()),
11451                              ReadPRegister(instr->GetPgLow8()),
11452                              commute_inputs ? src2
11453                                             : ReadVRegister(instr->GetRn()),
11454                              commute_inputs ? ReadVRegister(instr->GetRn())
11455                                             : src2);
11456 }
11457 
11458 void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
11459   Instr op = instr->Mask(SVEIntCompareVectorsMask);
11460   bool is_wide_elements = false;
11461   switch (op) {
11462     case CMPEQ_p_p_zw:
11463     case CMPGE_p_p_zw:
11464     case CMPGT_p_p_zw:
11465     case CMPHI_p_p_zw:
11466     case CMPHS_p_p_zw:
11467     case CMPLE_p_p_zw:
11468     case CMPLO_p_p_zw:
11469     case CMPLS_p_p_zw:
11470     case CMPLT_p_p_zw:
11471     case CMPNE_p_p_zw:
11472       is_wide_elements = true;
11473       break;
11474   }
11475 
11476   Condition cond;
11477   switch (op) {
11478     case CMPEQ_p_p_zw:
11479     case CMPEQ_p_p_zz:
11480       cond = eq;
11481       break;
11482     case CMPGE_p_p_zw:
11483     case CMPGE_p_p_zz:
11484       cond = ge;
11485       break;
11486     case CMPGT_p_p_zw:
11487     case CMPGT_p_p_zz:
11488       cond = gt;
11489       break;
11490     case CMPHI_p_p_zw:
11491     case CMPHI_p_p_zz:
11492       cond = hi;
11493       break;
11494     case CMPHS_p_p_zw:
11495     case CMPHS_p_p_zz:
11496       cond = hs;
11497       break;
11498     case CMPNE_p_p_zw:
11499     case CMPNE_p_p_zz:
11500       cond = ne;
11501       break;
11502     case CMPLE_p_p_zw:
11503       cond = le;
11504       break;
11505     case CMPLO_p_p_zw:
11506       cond = lo;
11507       break;
11508     case CMPLS_p_p_zw:
11509       cond = ls;
11510       break;
11511     case CMPLT_p_p_zw:
11512       cond = lt;
11513       break;
11514     default:
11515       VIXL_UNIMPLEMENTED();
11516       cond = al;
11517       break;
11518   }
11519 
11520   SVEIntCompareVectorsHelper(cond,
11521                              instr->GetSVEVectorFormat(),
11522                              ReadPRegister(instr->GetPd()),
11523                              ReadPRegister(instr->GetPgLow8()),
11524                              ReadVRegister(instr->GetRn()),
11525                              ReadVRegister(instr->GetRm()),
11526                              is_wide_elements);
11527 }
11528 
11529 void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
11530   VectorFormat vform = instr->GetSVEVectorFormat();
11531   SimVRegister& zd = ReadVRegister(instr->GetRd());
11532   SimVRegister& zn = ReadVRegister(instr->GetRn());
11533 
11534   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11535               (vform == kFormatVnD));
11536 
11537   switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
11538     case FEXPA_z_z:
11539       fexpa(vform, zd, zn);
11540       break;
11541     default:
11542       VIXL_UNIMPLEMENTED();
11543       break;
11544   }
11545 }
11546 
11547 void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
11548   VectorFormat vform = instr->GetSVEVectorFormat();
11549   SimVRegister& zd = ReadVRegister(instr->GetRd());
11550   SimVRegister& zn = ReadVRegister(instr->GetRn());
11551   SimVRegister& zm = ReadVRegister(instr->GetRm());
11552 
11553   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11554               (vform == kFormatVnD));
11555 
11556   switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
11557     case FTSSEL_z_zz:
11558       ftssel(vform, zd, zn, zm);
11559       break;
11560     default:
11561       VIXL_UNIMPLEMENTED();
11562       break;
11563   }
11564 }
11565 
11566 void Simulator::VisitSVEConstructivePrefix_Unpredicated(
11567     const Instruction* instr) {
11568   SimVRegister& zd = ReadVRegister(instr->GetRd());
11569   SimVRegister& zn = ReadVRegister(instr->GetRn());
11570 
11571   switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
11572     case MOVPRFX_z_z:
11573       mov(kFormatVnD, zd, zn);  // The lane size is arbitrary.
11574       break;
11575     default:
11576       VIXL_UNIMPLEMENTED();
11577       break;
11578   }
11579 }
11580 
11581 void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
11582   VectorFormat vform = instr->GetSVEVectorFormat();
11583 
11584   SimVRegister& zd = ReadVRegister(instr->GetRd());
11585   SimVRegister& zm = ReadVRegister(instr->GetRm());
11586 
11587   SimVRegister result;
11588   switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
11589     case MLA_z_p_zzz:
11590       mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11591       break;
11592     case MLS_z_p_zzz:
11593       mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11594       break;
11595     case MAD_z_p_zzz:
11596       // 'za' is encoded in 'Rn'.
11597       mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11598       break;
11599     case MSB_z_p_zzz: {
11600       // 'za' is encoded in 'Rn'.
11601       mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11602       break;
11603     }
11604     default:
11605       VIXL_UNIMPLEMENTED();
11606       break;
11607   }
11608   mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
11609 }
11610 
11611 void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
11612   VectorFormat vform = instr->GetSVEVectorFormat();
11613   SimVRegister& zda = ReadVRegister(instr->GetRd());
11614   SimVRegister& zn = ReadVRegister(instr->GetRn());
11615   SimVRegister& zm = ReadVRegister(instr->GetRm());
11616 
11617   switch (form_hash_) {
11618     case "sdot_z_zzz"_h:
11619       sdot(vform, zda, zn, zm);
11620       break;
11621     case "udot_z_zzz"_h:
11622       udot(vform, zda, zn, zm);
11623       break;
11624     case "usdot_z_zzz_s"_h:
11625       usdot(vform, zda, zn, zm);
11626       break;
11627     default:
11628       VIXL_UNIMPLEMENTED();
11629       break;
11630   }
11631 }
11632 
11633 void Simulator::VisitSVEMovprfx(const Instruction* instr) {
11634   VectorFormat vform = instr->GetSVEVectorFormat();
11635   SimVRegister& zn = ReadVRegister(instr->GetRn());
11636   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11637   SimVRegister& zd = ReadVRegister(instr->GetRd());
11638 
11639   switch (instr->Mask(SVEMovprfxMask)) {
11640     case MOVPRFX_z_p_z:
11641       if (instr->ExtractBit(16)) {
11642         mov_merging(vform, zd, pg, zn);
11643       } else {
11644         mov_zeroing(vform, zd, pg, zn);
11645       }
11646       break;
11647     default:
11648       VIXL_UNIMPLEMENTED();
11649       break;
11650   }
11651 }
11652 
11653 void Simulator::VisitSVEIntReduction(const Instruction* instr) {
11654   VectorFormat vform = instr->GetSVEVectorFormat();
11655   SimVRegister& vd = ReadVRegister(instr->GetRd());
11656   SimVRegister& zn = ReadVRegister(instr->GetRn());
11657   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11658 
11659   if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
11660     switch (instr->Mask(SVEIntReductionLogicalMask)) {
11661       case ANDV_r_p_z:
11662         andv(vform, vd, pg, zn);
11663         break;
11664       case EORV_r_p_z:
11665         eorv(vform, vd, pg, zn);
11666         break;
11667       case ORV_r_p_z:
11668         orv(vform, vd, pg, zn);
11669         break;
11670       default:
11671         VIXL_UNIMPLEMENTED();
11672         break;
11673     }
11674   } else {
11675     switch (instr->Mask(SVEIntReductionMask)) {
11676       case SADDV_r_p_z:
11677         saddv(vform, vd, pg, zn);
11678         break;
11679       case SMAXV_r_p_z:
11680         smaxv(vform, vd, pg, zn);
11681         break;
11682       case SMINV_r_p_z:
11683         sminv(vform, vd, pg, zn);
11684         break;
11685       case UADDV_r_p_z:
11686         uaddv(vform, vd, pg, zn);
11687         break;
11688       case UMAXV_r_p_z:
11689         umaxv(vform, vd, pg, zn);
11690         break;
11691       case UMINV_r_p_z:
11692         uminv(vform, vd, pg, zn);
11693         break;
11694       default:
11695         VIXL_UNIMPLEMENTED();
11696         break;
11697     }
11698   }
11699 }
11700 
11701 void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
11702   VectorFormat vform = instr->GetSVEVectorFormat();
11703   SimVRegister& zn = ReadVRegister(instr->GetRn());
11704 
11705   SimVRegister result;
11706   switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
11707     case ABS_z_p_z:
11708       abs(vform, result, zn);
11709       break;
11710     case CLS_z_p_z:
11711       cls(vform, result, zn);
11712       break;
11713     case CLZ_z_p_z:
11714       clz(vform, result, zn);
11715       break;
11716     case CNOT_z_p_z:
11717       cnot(vform, result, zn);
11718       break;
11719     case CNT_z_p_z:
11720       cnt(vform, result, zn);
11721       break;
11722     case FABS_z_p_z:
11723       fabs_(vform, result, zn);
11724       break;
11725     case FNEG_z_p_z:
11726       fneg(vform, result, zn);
11727       break;
11728     case NEG_z_p_z:
11729       neg(vform, result, zn);
11730       break;
11731     case NOT_z_p_z:
11732       not_(vform, result, zn);
11733       break;
11734     case SXTB_z_p_z:
11735     case SXTH_z_p_z:
11736     case SXTW_z_p_z:
11737       sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11738       break;
11739     case UXTB_z_p_z:
11740     case UXTH_z_p_z:
11741     case UXTW_z_p_z:
11742       uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11743       break;
11744     default:
11745       VIXL_UNIMPLEMENTED();
11746       break;
11747   }
11748 
11749   SimVRegister& zd = ReadVRegister(instr->GetRd());
11750   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11751   mov_merging(vform, zd, pg, result);
11752 }
11753 
11754 void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
11755   // There is only one instruction in this group.
11756   VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
11757 
11758   VectorFormat vform = instr->GetSVEVectorFormat();
11759   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
11760   SimVRegister& zd = ReadVRegister(instr->GetRd());
11761 
11762   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
11763 
11764   SimVRegister result;
11765   switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
11766     case FCPY_z_p_i: {
11767       int imm8 = instr->ExtractBits(12, 5);
11768       uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
11769                                            Instruction::Imm8ToFP64(imm8));
11770       dup_immediate(vform, result, value);
11771       break;
11772     }
11773     default:
11774       VIXL_UNIMPLEMENTED();
11775       break;
11776   }
11777   mov_merging(vform, zd, pg, result);
11778 }
11779 
11780 void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
11781     const Instruction* instr) {
11782   VectorFormat vform = instr->GetSVEVectorFormat();
11783   SimVRegister& zd = ReadVRegister(instr->GetRd());
11784   SimVRegister scratch;
11785 
11786   uint64_t imm = instr->GetImmSVEIntWideUnsigned();
11787   imm <<= instr->ExtractBit(13) * 8;
11788 
11789   switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
11790     case ADD_z_zi:
11791       add_uint(vform, zd, zd, imm);
11792       break;
11793     case SQADD_z_zi:
11794       add_uint(vform, zd, zd, imm).SignedSaturate(vform);
11795       break;
11796     case SQSUB_z_zi:
11797       sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
11798       break;
11799     case SUBR_z_zi:
11800       dup_immediate(vform, scratch, imm);
11801       sub(vform, zd, scratch, zd);
11802       break;
11803     case SUB_z_zi:
11804       sub_uint(vform, zd, zd, imm);
11805       break;
11806     case UQADD_z_zi:
11807       add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11808       break;
11809     case UQSUB_z_zi:
11810       sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11811       break;
11812     default:
11813       break;
11814   }
11815 }
11816 
11817 void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
11818   SimVRegister& zd = ReadVRegister(instr->GetRd());
11819 
11820   VectorFormat format = instr->GetSVEVectorFormat();
11821   int64_t imm = instr->GetImmSVEIntWideSigned();
11822   int shift = instr->ExtractBit(13) * 8;
11823   imm *= 1 << shift;
11824 
11825   switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
11826     case DUP_z_i:
11827       // The encoding of byte-sized lanes with lsl #8 is undefined.
11828       if ((format == kFormatVnB) && (shift == 8)) {
11829         VIXL_UNIMPLEMENTED();
11830       } else {
11831         dup_immediate(format, zd, imm);
11832       }
11833       break;
11834     default:
11835       VIXL_UNIMPLEMENTED();
11836       break;
11837   }
11838 }
11839 
11840 void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
11841   VectorFormat vform = instr->GetSVEVectorFormat();
11842   SimVRegister& zd = ReadVRegister(instr->GetRd());
11843 
11844   switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
11845     case FDUP_z_i:
11846       switch (vform) {
11847         case kFormatVnH:
11848           dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
11849           break;
11850         case kFormatVnS:
11851           dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
11852           break;
11853         case kFormatVnD:
11854           dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
11855           break;
11856         default:
11857           VIXL_UNIMPLEMENTED();
11858       }
11859       break;
11860     default:
11861       VIXL_UNIMPLEMENTED();
11862       break;
11863   }
11864 }
11865 
11866 void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
11867     const Instruction* instr) {
11868   switch (instr->Mask(
11869       SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
11870     case LD1H_z_p_bz_s_x32_scaled:
11871     case LD1SH_z_p_bz_s_x32_scaled:
11872     case LDFF1H_z_p_bz_s_x32_scaled:
11873     case LDFF1SH_z_p_bz_s_x32_scaled:
11874       break;
11875     default:
11876       VIXL_UNIMPLEMENTED();
11877       break;
11878   }
11879 
11880   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11881   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11882 }
11883 
11884 void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
11885     const Instruction* instr) {
11886   switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
11887     case LD1B_z_p_bz_s_x32_unscaled:
11888     case LD1H_z_p_bz_s_x32_unscaled:
11889     case LD1SB_z_p_bz_s_x32_unscaled:
11890     case LD1SH_z_p_bz_s_x32_unscaled:
11891     case LD1W_z_p_bz_s_x32_unscaled:
11892     case LDFF1B_z_p_bz_s_x32_unscaled:
11893     case LDFF1H_z_p_bz_s_x32_unscaled:
11894     case LDFF1SB_z_p_bz_s_x32_unscaled:
11895     case LDFF1SH_z_p_bz_s_x32_unscaled:
11896     case LDFF1W_z_p_bz_s_x32_unscaled:
11897       break;
11898     default:
11899       VIXL_UNIMPLEMENTED();
11900       break;
11901   }
11902 
11903   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11904   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11905 }
11906 
11907 void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
11908     const Instruction* instr) {
11909   switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
11910     case LD1B_z_p_ai_s:
11911       VIXL_UNIMPLEMENTED();
11912       break;
11913     case LD1H_z_p_ai_s:
11914       VIXL_UNIMPLEMENTED();
11915       break;
11916     case LD1SB_z_p_ai_s:
11917       VIXL_UNIMPLEMENTED();
11918       break;
11919     case LD1SH_z_p_ai_s:
11920       VIXL_UNIMPLEMENTED();
11921       break;
11922     case LD1W_z_p_ai_s:
11923       VIXL_UNIMPLEMENTED();
11924       break;
11925     case LDFF1B_z_p_ai_s:
11926       VIXL_UNIMPLEMENTED();
11927       break;
11928     case LDFF1H_z_p_ai_s:
11929       VIXL_UNIMPLEMENTED();
11930       break;
11931     case LDFF1SB_z_p_ai_s:
11932       VIXL_UNIMPLEMENTED();
11933       break;
11934     case LDFF1SH_z_p_ai_s:
11935       VIXL_UNIMPLEMENTED();
11936       break;
11937     case LDFF1W_z_p_ai_s:
11938       VIXL_UNIMPLEMENTED();
11939       break;
11940     default:
11941       VIXL_UNIMPLEMENTED();
11942       break;
11943   }
11944 }
11945 
11946 void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
11947     const Instruction* instr) {
11948   switch (
11949       instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
11950     case LD1W_z_p_bz_s_x32_scaled:
11951     case LDFF1W_z_p_bz_s_x32_scaled:
11952       break;
11953     default:
11954       VIXL_UNIMPLEMENTED();
11955       break;
11956   }
11957 
11958   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11959   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11960 }
11961 
11962 void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
11963     const Instruction* instr) {
11964   switch (
11965       instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
11966     // Ignore prefetch hint instructions.
11967     case PRFB_i_p_bz_s_x32_scaled:
11968     case PRFD_i_p_bz_s_x32_scaled:
11969     case PRFH_i_p_bz_s_x32_scaled:
11970     case PRFW_i_p_bz_s_x32_scaled:
11971       break;
11972     default:
11973       VIXL_UNIMPLEMENTED();
11974       break;
11975   }
11976 }
11977 
11978 void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
11979     const Instruction* instr) {
11980   switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
11981     // Ignore prefetch hint instructions.
11982     case PRFB_i_p_ai_s:
11983     case PRFD_i_p_ai_s:
11984     case PRFH_i_p_ai_s:
11985     case PRFW_i_p_ai_s:
11986       break;
11987     default:
11988       VIXL_UNIMPLEMENTED();
11989       break;
11990   }
11991 }
11992 
11993 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
11994     const Instruction* instr) {
11995   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
11996     // Ignore prefetch hint instructions.
11997     case PRFB_i_p_bi_s:
11998     case PRFD_i_p_bi_s:
11999     case PRFH_i_p_bi_s:
12000     case PRFW_i_p_bi_s:
12001       break;
12002     default:
12003       VIXL_UNIMPLEMENTED();
12004       break;
12005   }
12006 }
12007 
12008 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
12009     const Instruction* instr) {
12010   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
12011     // Ignore prefetch hint instructions.
12012     case PRFB_i_p_br_s:
12013     case PRFD_i_p_br_s:
12014     case PRFH_i_p_br_s:
12015     case PRFW_i_p_br_s:
12016       if (instr->GetRm() == kZeroRegCode) {
12017         VIXL_UNIMPLEMENTED();
12018       }
12019       break;
12020     default:
12021       VIXL_UNIMPLEMENTED();
12022       break;
12023   }
12024 }
12025 
12026 void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
12027   bool is_signed;
12028   switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
12029     case LD1RB_z_p_bi_u8:
12030     case LD1RB_z_p_bi_u16:
12031     case LD1RB_z_p_bi_u32:
12032     case LD1RB_z_p_bi_u64:
12033     case LD1RH_z_p_bi_u16:
12034     case LD1RH_z_p_bi_u32:
12035     case LD1RH_z_p_bi_u64:
12036     case LD1RW_z_p_bi_u32:
12037     case LD1RW_z_p_bi_u64:
12038     case LD1RD_z_p_bi_u64:
12039       is_signed = false;
12040       break;
12041     case LD1RSB_z_p_bi_s16:
12042     case LD1RSB_z_p_bi_s32:
12043     case LD1RSB_z_p_bi_s64:
12044     case LD1RSH_z_p_bi_s32:
12045     case LD1RSH_z_p_bi_s64:
12046     case LD1RSW_z_p_bi_s64:
12047       is_signed = true;
12048       break;
12049     default:
12050       // This encoding group is complete, so no other values should be possible.
12051       VIXL_UNREACHABLE();
12052       is_signed = false;
12053       break;
12054   }
12055 
12056   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12057   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
12058   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12059   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12060   uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
12061   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset;
12062   VectorFormat unpack_vform =
12063       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
12064   SimVRegister temp;
12065   ld1r(vform, unpack_vform, temp, base, is_signed);
12066   mov_zeroing(vform,
12067               ReadVRegister(instr->GetRt()),
12068               ReadPRegister(instr->GetPgLow8()),
12069               temp);
12070 }
12071 
12072 void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
12073   switch (instr->Mask(SVELoadPredicateRegisterMask)) {
12074     case LDR_p_bi: {
12075       SimPRegister& pt = ReadPRegister(instr->GetPt());
12076       int pl = GetPredicateLengthInBytes();
12077       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12078       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12079       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12080       uint64_t address = base + multiplier * pl;
12081       for (int i = 0; i < pl; i++) {
12082         pt.Insert(i, MemRead<uint8_t>(address + i));
12083       }
12084       LogPRead(instr->GetPt(), address);
12085       break;
12086     }
12087     default:
12088       VIXL_UNIMPLEMENTED();
12089       break;
12090   }
12091 }
12092 
12093 void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
12094   switch (instr->Mask(SVELoadVectorRegisterMask)) {
12095     case LDR_z_bi: {
12096       SimVRegister& zt = ReadVRegister(instr->GetRt());
12097       int vl = GetVectorLengthInBytes();
12098       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12099       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12100       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12101       uint64_t address = base + multiplier * vl;
12102       for (int i = 0; i < vl; i++) {
12103         zt.Insert(i, MemRead<uint8_t>(address + i));
12104       }
12105       LogZRead(instr->GetRt(), address);
12106       break;
12107     }
12108     default:
12109       VIXL_UNIMPLEMENTED();
12110       break;
12111   }
12112 }
12113 
12114 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
12115     const Instruction* instr) {
12116   switch (instr->Mask(
12117       SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
12118     case LD1D_z_p_bz_d_x32_scaled:
12119     case LD1H_z_p_bz_d_x32_scaled:
12120     case LD1SH_z_p_bz_d_x32_scaled:
12121     case LD1SW_z_p_bz_d_x32_scaled:
12122     case LD1W_z_p_bz_d_x32_scaled:
12123     case LDFF1H_z_p_bz_d_x32_scaled:
12124     case LDFF1W_z_p_bz_d_x32_scaled:
12125     case LDFF1D_z_p_bz_d_x32_scaled:
12126     case LDFF1SH_z_p_bz_d_x32_scaled:
12127     case LDFF1SW_z_p_bz_d_x32_scaled:
12128       break;
12129     default:
12130       VIXL_UNIMPLEMENTED();
12131       break;
12132   }
12133 
12134   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
12135   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
12136 }
12137 
12138 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
12139     const Instruction* instr) {
12140   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
12141     case LD1D_z_p_bz_d_64_scaled:
12142     case LD1H_z_p_bz_d_64_scaled:
12143     case LD1SH_z_p_bz_d_64_scaled:
12144     case LD1SW_z_p_bz_d_64_scaled:
12145     case LD1W_z_p_bz_d_64_scaled:
12146     case LDFF1H_z_p_bz_d_64_scaled:
12147     case LDFF1W_z_p_bz_d_64_scaled:
12148     case LDFF1D_z_p_bz_d_64_scaled:
12149     case LDFF1SH_z_p_bz_d_64_scaled:
12150     case LDFF1SW_z_p_bz_d_64_scaled:
12151       break;
12152     default:
12153       VIXL_UNIMPLEMENTED();
12154       break;
12155   }
12156 
12157   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
12158 }
12159 
12160 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
12161     const Instruction* instr) {
12162   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
12163     case LD1B_z_p_bz_d_64_unscaled:
12164     case LD1D_z_p_bz_d_64_unscaled:
12165     case LD1H_z_p_bz_d_64_unscaled:
12166     case LD1SB_z_p_bz_d_64_unscaled:
12167     case LD1SH_z_p_bz_d_64_unscaled:
12168     case LD1SW_z_p_bz_d_64_unscaled:
12169     case LD1W_z_p_bz_d_64_unscaled:
12170     case LDFF1B_z_p_bz_d_64_unscaled:
12171     case LDFF1D_z_p_bz_d_64_unscaled:
12172     case LDFF1H_z_p_bz_d_64_unscaled:
12173     case LDFF1SB_z_p_bz_d_64_unscaled:
12174     case LDFF1SH_z_p_bz_d_64_unscaled:
12175     case LDFF1SW_z_p_bz_d_64_unscaled:
12176     case LDFF1W_z_p_bz_d_64_unscaled:
12177       break;
12178     default:
12179       VIXL_UNIMPLEMENTED();
12180       break;
12181   }
12182 
12183   SVEGatherLoadScalarPlusVectorHelper(instr,
12184                                       kFormatVnD,
12185                                       NO_SVE_OFFSET_MODIFIER);
12186 }
12187 
12188 void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
12189     const Instruction* instr) {
12190   switch (instr->Mask(
12191       SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
12192     case LD1B_z_p_bz_d_x32_unscaled:
12193     case LD1D_z_p_bz_d_x32_unscaled:
12194     case LD1H_z_p_bz_d_x32_unscaled:
12195     case LD1SB_z_p_bz_d_x32_unscaled:
12196     case LD1SH_z_p_bz_d_x32_unscaled:
12197     case LD1SW_z_p_bz_d_x32_unscaled:
12198     case LD1W_z_p_bz_d_x32_unscaled:
12199     case LDFF1B_z_p_bz_d_x32_unscaled:
12200     case LDFF1H_z_p_bz_d_x32_unscaled:
12201     case LDFF1W_z_p_bz_d_x32_unscaled:
12202     case LDFF1D_z_p_bz_d_x32_unscaled:
12203     case LDFF1SB_z_p_bz_d_x32_unscaled:
12204     case LDFF1SH_z_p_bz_d_x32_unscaled:
12205     case LDFF1SW_z_p_bz_d_x32_unscaled:
12206       break;
12207     default:
12208       VIXL_UNIMPLEMENTED();
12209       break;
12210   }
12211 
12212   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
12213   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
12214 }
12215 
12216 void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
12217     const Instruction* instr) {
12218   switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
12219     case LD1B_z_p_ai_d:
12220     case LD1D_z_p_ai_d:
12221     case LD1H_z_p_ai_d:
12222     case LD1SB_z_p_ai_d:
12223     case LD1SH_z_p_ai_d:
12224     case LD1SW_z_p_ai_d:
12225     case LD1W_z_p_ai_d:
12226     case LDFF1B_z_p_ai_d:
12227     case LDFF1D_z_p_ai_d:
12228     case LDFF1H_z_p_ai_d:
12229     case LDFF1SB_z_p_ai_d:
12230     case LDFF1SH_z_p_ai_d:
12231     case LDFF1SW_z_p_ai_d:
12232     case LDFF1W_z_p_ai_d:
12233       break;
12234     default:
12235       VIXL_UNIMPLEMENTED();
12236       break;
12237   }
12238   bool is_signed = instr->ExtractBit(14) == 0;
12239   bool is_ff = instr->ExtractBit(13) == 1;
12240   // Note that these instructions don't use the Dtype encoding.
12241   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
12242   uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
12243   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
12244   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12245   if (is_ff) {
12246     VIXL_UNIMPLEMENTED();
12247   } else {
12248     SVEStructuredLoadHelper(kFormatVnD,
12249                             ReadPRegister(instr->GetPgLow8()),
12250                             instr->GetRt(),
12251                             addr,
12252                             is_signed);
12253   }
12254 }
12255 
12256 void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
12257     const Instruction* instr) {
12258   switch (
12259       instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
12260     // Ignore prefetch hint instructions.
12261     case PRFB_i_p_bz_d_64_scaled:
12262     case PRFD_i_p_bz_d_64_scaled:
12263     case PRFH_i_p_bz_d_64_scaled:
12264     case PRFW_i_p_bz_d_64_scaled:
12265       break;
12266     default:
12267       VIXL_UNIMPLEMENTED();
12268       break;
12269   }
12270 }
12271 
12272 void Simulator::
12273     VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
12274         const Instruction* instr) {
12275   switch (instr->Mask(
12276       SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12277     // Ignore prefetch hint instructions.
12278     case PRFB_i_p_bz_d_x32_scaled:
12279     case PRFD_i_p_bz_d_x32_scaled:
12280     case PRFH_i_p_bz_d_x32_scaled:
12281     case PRFW_i_p_bz_d_x32_scaled:
12282       break;
12283     default:
12284       VIXL_UNIMPLEMENTED();
12285       break;
12286   }
12287 }
12288 
12289 void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
12290     const Instruction* instr) {
12291   switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
12292     // Ignore prefetch hint instructions.
12293     case PRFB_i_p_ai_d:
12294     case PRFD_i_p_ai_d:
12295     case PRFH_i_p_ai_d:
12296     case PRFW_i_p_ai_d:
12297       break;
12298     default:
12299       VIXL_UNIMPLEMENTED();
12300       break;
12301   }
12302 }
12303 
12304 void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
12305     const Instruction* instr) {
12306   bool is_signed;
12307   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
12308     case LDFF1B_z_p_br_u8:
12309     case LDFF1B_z_p_br_u16:
12310     case LDFF1B_z_p_br_u32:
12311     case LDFF1B_z_p_br_u64:
12312     case LDFF1H_z_p_br_u16:
12313     case LDFF1H_z_p_br_u32:
12314     case LDFF1H_z_p_br_u64:
12315     case LDFF1W_z_p_br_u32:
12316     case LDFF1W_z_p_br_u64:
12317     case LDFF1D_z_p_br_u64:
12318       is_signed = false;
12319       break;
12320     case LDFF1SB_z_p_br_s16:
12321     case LDFF1SB_z_p_br_s32:
12322     case LDFF1SB_z_p_br_s64:
12323     case LDFF1SH_z_p_br_s32:
12324     case LDFF1SH_z_p_br_s64:
12325     case LDFF1SW_z_p_br_s64:
12326       is_signed = true;
12327       break;
12328     default:
12329       // This encoding group is complete, so no other values should be possible.
12330       VIXL_UNREACHABLE();
12331       is_signed = false;
12332       break;
12333   }
12334 
12335   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12336   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12337   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12338   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12339   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12340   uint64_t offset = ReadXRegister(instr->GetRm());
12341   offset <<= msize_in_bytes_log2;
12342   LogicSVEAddressVector addr(base + offset);
12343   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12344   SVEFaultTolerantLoadHelper(vform,
12345                              ReadPRegister(instr->GetPgLow8()),
12346                              instr->GetRt(),
12347                              addr,
12348                              kSVEFirstFaultLoad,
12349                              is_signed);
12350 }
12351 
12352 void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
12353     const Instruction* instr) {
12354   bool is_signed = false;
12355   switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
12356     case LDNF1B_z_p_bi_u16:
12357     case LDNF1B_z_p_bi_u32:
12358     case LDNF1B_z_p_bi_u64:
12359     case LDNF1B_z_p_bi_u8:
12360     case LDNF1D_z_p_bi_u64:
12361     case LDNF1H_z_p_bi_u16:
12362     case LDNF1H_z_p_bi_u32:
12363     case LDNF1H_z_p_bi_u64:
12364     case LDNF1W_z_p_bi_u32:
12365     case LDNF1W_z_p_bi_u64:
12366       break;
12367     case LDNF1SB_z_p_bi_s16:
12368     case LDNF1SB_z_p_bi_s32:
12369     case LDNF1SB_z_p_bi_s64:
12370     case LDNF1SH_z_p_bi_s32:
12371     case LDNF1SH_z_p_bi_s64:
12372     case LDNF1SW_z_p_bi_s64:
12373       is_signed = true;
12374       break;
12375     default:
12376       VIXL_UNIMPLEMENTED();
12377       break;
12378   }
12379   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12380   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12381   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12382   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12383   int vl = GetVectorLengthInBytes();
12384   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12385   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12386   uint64_t offset =
12387       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12388   LogicSVEAddressVector addr(base + offset);
12389   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12390   SVEFaultTolerantLoadHelper(vform,
12391                              ReadPRegister(instr->GetPgLow8()),
12392                              instr->GetRt(),
12393                              addr,
12394                              kSVENonFaultLoad,
12395                              is_signed);
12396 }
12397 
12398 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
12399     const Instruction* instr) {
12400   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12401   VectorFormat vform = kFormatUndefined;
12402 
12403   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
12404     case LDNT1B_z_p_bi_contiguous:
12405       vform = kFormatVnB;
12406       break;
12407     case LDNT1D_z_p_bi_contiguous:
12408       vform = kFormatVnD;
12409       break;
12410     case LDNT1H_z_p_bi_contiguous:
12411       vform = kFormatVnH;
12412       break;
12413     case LDNT1W_z_p_bi_contiguous:
12414       vform = kFormatVnS;
12415       break;
12416     default:
12417       VIXL_UNIMPLEMENTED();
12418       break;
12419   }
12420   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12421   int vl = GetVectorLengthInBytes();
12422   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12423   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12424   LogicSVEAddressVector addr(base + offset);
12425   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12426   SVEStructuredLoadHelper(vform,
12427                           pg,
12428                           instr->GetRt(),
12429                           addr,
12430                           /* is_signed = */ false);
12431 }
12432 
12433 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
12434     const Instruction* instr) {
12435   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12436   VectorFormat vform = kFormatUndefined;
12437 
12438   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
12439     case LDNT1B_z_p_br_contiguous:
12440       vform = kFormatVnB;
12441       break;
12442     case LDNT1D_z_p_br_contiguous:
12443       vform = kFormatVnD;
12444       break;
12445     case LDNT1H_z_p_br_contiguous:
12446       vform = kFormatVnH;
12447       break;
12448     case LDNT1W_z_p_br_contiguous:
12449       vform = kFormatVnS;
12450       break;
12451     default:
12452       VIXL_UNIMPLEMENTED();
12453       break;
12454   }
12455   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12456   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12457   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12458   LogicSVEAddressVector addr(base + offset);
12459   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12460   SVEStructuredLoadHelper(vform,
12461                           pg,
12462                           instr->GetRt(),
12463                           addr,
12464                           /* is_signed = */ false);
12465 }
12466 
12467 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
12468     const Instruction* instr) {
12469   SimVRegister& zt = ReadVRegister(instr->GetRt());
12470   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12471 
12472   uint64_t dwords = 2;
12473   VectorFormat vform_dst = kFormatVnQ;
12474   if ((form_hash_ == "ld1rob_z_p_bi_u8"_h) ||
12475       (form_hash_ == "ld1roh_z_p_bi_u16"_h) ||
12476       (form_hash_ == "ld1row_z_p_bi_u32"_h) ||
12477       (form_hash_ == "ld1rod_z_p_bi_u64"_h)) {
12478     dwords = 4;
12479     vform_dst = kFormatVnO;
12480   }
12481 
12482   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12483   uint64_t offset =
12484       instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes;
12485   int msz = instr->ExtractBits(24, 23);
12486   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12487 
12488   for (unsigned i = 0; i < dwords; i++) {
12489     ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes));
12490   }
12491   mov_zeroing(vform, zt, pg, zt);
12492   dup_element(vform_dst, zt, zt, 0);
12493 }
12494 
12495 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
12496     const Instruction* instr) {
12497   SimVRegister& zt = ReadVRegister(instr->GetRt());
12498   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12499 
12500   uint64_t bytes = 16;
12501   VectorFormat vform_dst = kFormatVnQ;
12502   if ((form_hash_ == "ld1rob_z_p_br_contiguous"_h) ||
12503       (form_hash_ == "ld1roh_z_p_br_contiguous"_h) ||
12504       (form_hash_ == "ld1row_z_p_br_contiguous"_h) ||
12505       (form_hash_ == "ld1rod_z_p_br_contiguous"_h)) {
12506     bytes = 32;
12507     vform_dst = kFormatVnO;
12508   }
12509 
12510   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12511   uint64_t offset = ReadXRegister(instr->GetRm());
12512   int msz = instr->ExtractBits(24, 23);
12513   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12514   offset <<= msz;
12515   for (unsigned i = 0; i < bytes; i++) {
12516     ld1(kFormatVnB, zt, i, addr + offset + i);
12517   }
12518   mov_zeroing(vform, zt, pg, zt);
12519   dup_element(vform_dst, zt, zt, 0);
12520 }
12521 
12522 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
12523     const Instruction* instr) {
12524   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
12525     case LD2B_z_p_bi_contiguous:
12526     case LD2D_z_p_bi_contiguous:
12527     case LD2H_z_p_bi_contiguous:
12528     case LD2W_z_p_bi_contiguous:
12529     case LD3B_z_p_bi_contiguous:
12530     case LD3D_z_p_bi_contiguous:
12531     case LD3H_z_p_bi_contiguous:
12532     case LD3W_z_p_bi_contiguous:
12533     case LD4B_z_p_bi_contiguous:
12534     case LD4D_z_p_bi_contiguous:
12535     case LD4H_z_p_bi_contiguous:
12536     case LD4W_z_p_bi_contiguous: {
12537       int vl = GetVectorLengthInBytes();
12538       int msz = instr->ExtractBits(24, 23);
12539       int reg_count = instr->ExtractBits(22, 21) + 1;
12540       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12541       LogicSVEAddressVector addr(
12542           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12543       addr.SetMsizeInBytesLog2(msz);
12544       addr.SetRegCount(reg_count);
12545       SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12546                               ReadPRegister(instr->GetPgLow8()),
12547                               instr->GetRt(),
12548                               addr);
12549       break;
12550     }
12551     default:
12552       VIXL_UNIMPLEMENTED();
12553       break;
12554   }
12555 }
12556 
12557 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
12558     const Instruction* instr) {
12559   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
12560     case LD2B_z_p_br_contiguous:
12561     case LD2D_z_p_br_contiguous:
12562     case LD2H_z_p_br_contiguous:
12563     case LD2W_z_p_br_contiguous:
12564     case LD3B_z_p_br_contiguous:
12565     case LD3D_z_p_br_contiguous:
12566     case LD3H_z_p_br_contiguous:
12567     case LD3W_z_p_br_contiguous:
12568     case LD4B_z_p_br_contiguous:
12569     case LD4D_z_p_br_contiguous:
12570     case LD4H_z_p_br_contiguous:
12571     case LD4W_z_p_br_contiguous: {
12572       int msz = instr->ExtractBits(24, 23);
12573       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
12574       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12575       LogicSVEAddressVector addr(
12576           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12577       addr.SetMsizeInBytesLog2(msz);
12578       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
12579       SVEStructuredLoadHelper(vform,
12580                               ReadPRegister(instr->GetPgLow8()),
12581                               instr->GetRt(),
12582                               addr,
12583                               false);
12584       break;
12585     }
12586     default:
12587       VIXL_UNIMPLEMENTED();
12588       break;
12589   }
12590 }
12591 
12592 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
12593     const Instruction* instr) {
12594   switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
12595     case ST1H_z_p_bz_s_x32_scaled:
12596     case ST1W_z_p_bz_s_x32_scaled: {
12597       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12598       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12599       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12600       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12601       SVEOffsetModifier mod =
12602           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12603       LogicSVEAddressVector addr(base,
12604                                  &ReadVRegister(instr->GetRm()),
12605                                  kFormatVnS,
12606                                  mod,
12607                                  scale);
12608       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12609       SVEStructuredStoreHelper(kFormatVnS,
12610                                ReadPRegister(instr->GetPgLow8()),
12611                                instr->GetRt(),
12612                                addr);
12613       break;
12614     }
12615     default:
12616       VIXL_UNIMPLEMENTED();
12617       break;
12618   }
12619 }
12620 
12621 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
12622     const Instruction* instr) {
12623   switch (
12624       instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
12625     case ST1B_z_p_bz_s_x32_unscaled:
12626     case ST1H_z_p_bz_s_x32_unscaled:
12627     case ST1W_z_p_bz_s_x32_unscaled: {
12628       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12629       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12630       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12631       SVEOffsetModifier mod =
12632           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12633       LogicSVEAddressVector addr(base,
12634                                  &ReadVRegister(instr->GetRm()),
12635                                  kFormatVnS,
12636                                  mod);
12637       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12638       SVEStructuredStoreHelper(kFormatVnS,
12639                                ReadPRegister(instr->GetPgLow8()),
12640                                instr->GetRt(),
12641                                addr);
12642       break;
12643     }
12644     default:
12645       VIXL_UNIMPLEMENTED();
12646       break;
12647   }
12648 }
12649 
12650 void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
12651     const Instruction* instr) {
12652   int msz = 0;
12653   switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
12654     case ST1B_z_p_ai_s:
12655       msz = 0;
12656       break;
12657     case ST1H_z_p_ai_s:
12658       msz = 1;
12659       break;
12660     case ST1W_z_p_ai_s:
12661       msz = 2;
12662       break;
12663     default:
12664       VIXL_UNIMPLEMENTED();
12665       break;
12666   }
12667   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12668   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
12669   addr.SetMsizeInBytesLog2(msz);
12670   SVEStructuredStoreHelper(kFormatVnS,
12671                            ReadPRegister(instr->GetPgLow8()),
12672                            instr->GetRt(),
12673                            addr);
12674 }
12675 
12676 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
12677     const Instruction* instr) {
12678   switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
12679     case ST1D_z_p_bz_d_64_scaled:
12680     case ST1H_z_p_bz_d_64_scaled:
12681     case ST1W_z_p_bz_d_64_scaled: {
12682       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12683       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12684       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12685       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12686       LogicSVEAddressVector addr(base,
12687                                  &ReadVRegister(instr->GetRm()),
12688                                  kFormatVnD,
12689                                  SVE_LSL,
12690                                  scale);
12691       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12692       SVEStructuredStoreHelper(kFormatVnD,
12693                                ReadPRegister(instr->GetPgLow8()),
12694                                instr->GetRt(),
12695                                addr);
12696       break;
12697     }
12698     default:
12699       VIXL_UNIMPLEMENTED();
12700       break;
12701   }
12702 }
12703 
12704 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
12705     const Instruction* instr) {
12706   switch (
12707       instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
12708     case ST1B_z_p_bz_d_64_unscaled:
12709     case ST1D_z_p_bz_d_64_unscaled:
12710     case ST1H_z_p_bz_d_64_unscaled:
12711     case ST1W_z_p_bz_d_64_unscaled: {
12712       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12713       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12714       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12715       LogicSVEAddressVector addr(base,
12716                                  &ReadVRegister(instr->GetRm()),
12717                                  kFormatVnD,
12718                                  NO_SVE_OFFSET_MODIFIER);
12719       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12720       SVEStructuredStoreHelper(kFormatVnD,
12721                                ReadPRegister(instr->GetPgLow8()),
12722                                instr->GetRt(),
12723                                addr);
12724       break;
12725     }
12726     default:
12727       VIXL_UNIMPLEMENTED();
12728       break;
12729   }
12730 }
12731 
12732 void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
12733     const Instruction* instr) {
12734   switch (instr->Mask(
12735       SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12736     case ST1D_z_p_bz_d_x32_scaled:
12737     case ST1H_z_p_bz_d_x32_scaled:
12738     case ST1W_z_p_bz_d_x32_scaled: {
12739       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12740       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12741       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12742       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12743       SVEOffsetModifier mod =
12744           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12745       LogicSVEAddressVector addr(base,
12746                                  &ReadVRegister(instr->GetRm()),
12747                                  kFormatVnD,
12748                                  mod,
12749                                  scale);
12750       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12751       SVEStructuredStoreHelper(kFormatVnD,
12752                                ReadPRegister(instr->GetPgLow8()),
12753                                instr->GetRt(),
12754                                addr);
12755       break;
12756     }
12757     default:
12758       VIXL_UNIMPLEMENTED();
12759       break;
12760   }
12761 }
12762 
12763 void Simulator::
12764     VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
12765         const Instruction* instr) {
12766   switch (instr->Mask(
12767       SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
12768     case ST1B_z_p_bz_d_x32_unscaled:
12769     case ST1D_z_p_bz_d_x32_unscaled:
12770     case ST1H_z_p_bz_d_x32_unscaled:
12771     case ST1W_z_p_bz_d_x32_unscaled: {
12772       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12773       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12774       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12775       SVEOffsetModifier mod =
12776           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12777       LogicSVEAddressVector addr(base,
12778                                  &ReadVRegister(instr->GetRm()),
12779                                  kFormatVnD,
12780                                  mod);
12781       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12782       SVEStructuredStoreHelper(kFormatVnD,
12783                                ReadPRegister(instr->GetPgLow8()),
12784                                instr->GetRt(),
12785                                addr);
12786       break;
12787     }
12788     default:
12789       VIXL_UNIMPLEMENTED();
12790       break;
12791   }
12792 }
12793 
12794 void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
12795     const Instruction* instr) {
12796   int msz = 0;
12797   switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
12798     case ST1B_z_p_ai_d:
12799       msz = 0;
12800       break;
12801     case ST1D_z_p_ai_d:
12802       msz = 3;
12803       break;
12804     case ST1H_z_p_ai_d:
12805       msz = 1;
12806       break;
12807     case ST1W_z_p_ai_d:
12808       msz = 2;
12809       break;
12810     default:
12811       VIXL_UNIMPLEMENTED();
12812       break;
12813   }
12814   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12815   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
12816   addr.SetMsizeInBytesLog2(msz);
12817   SVEStructuredStoreHelper(kFormatVnD,
12818                            ReadPRegister(instr->GetPgLow8()),
12819                            instr->GetRt(),
12820                            addr);
12821 }
12822 
12823 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
12824     const Instruction* instr) {
12825   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12826   VectorFormat vform = kFormatUndefined;
12827 
12828   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
12829     case STNT1B_z_p_bi_contiguous:
12830       vform = kFormatVnB;
12831       break;
12832     case STNT1D_z_p_bi_contiguous:
12833       vform = kFormatVnD;
12834       break;
12835     case STNT1H_z_p_bi_contiguous:
12836       vform = kFormatVnH;
12837       break;
12838     case STNT1W_z_p_bi_contiguous:
12839       vform = kFormatVnS;
12840       break;
12841     default:
12842       VIXL_UNIMPLEMENTED();
12843       break;
12844   }
12845   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12846   int vl = GetVectorLengthInBytes();
12847   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12848   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12849   LogicSVEAddressVector addr(base + offset);
12850   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12851   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12852 }
12853 
12854 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
12855     const Instruction* instr) {
12856   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12857   VectorFormat vform = kFormatUndefined;
12858 
12859   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
12860     case STNT1B_z_p_br_contiguous:
12861       vform = kFormatVnB;
12862       break;
12863     case STNT1D_z_p_br_contiguous:
12864       vform = kFormatVnD;
12865       break;
12866     case STNT1H_z_p_br_contiguous:
12867       vform = kFormatVnH;
12868       break;
12869     case STNT1W_z_p_br_contiguous:
12870       vform = kFormatVnS;
12871       break;
12872     default:
12873       VIXL_UNIMPLEMENTED();
12874       break;
12875   }
12876   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12877   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12878   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12879   LogicSVEAddressVector addr(base + offset);
12880   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12881   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12882 }
12883 
12884 void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
12885     const Instruction* instr) {
12886   switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
12887     case ST1B_z_p_bi:
12888     case ST1D_z_p_bi:
12889     case ST1H_z_p_bi:
12890     case ST1W_z_p_bi: {
12891       int vl = GetVectorLengthInBytes();
12892       int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12893       int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
12894       VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
12895       int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12896       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12897       uint64_t offset =
12898           (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12899       VectorFormat vform =
12900           SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12901       LogicSVEAddressVector addr(base + offset);
12902       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12903       SVEStructuredStoreHelper(vform,
12904                                ReadPRegister(instr->GetPgLow8()),
12905                                instr->GetRt(),
12906                                addr);
12907       break;
12908     }
12909     default:
12910       VIXL_UNIMPLEMENTED();
12911       break;
12912   }
12913 }
12914 
12915 void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
12916     const Instruction* instr) {
12917   switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
12918     case ST1B_z_p_br:
12919     case ST1D_z_p_br:
12920     case ST1H_z_p_br:
12921     case ST1W_z_p_br: {
12922       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12923       uint64_t offset = ReadXRegister(instr->GetRm());
12924       offset <<= instr->ExtractBits(24, 23);
12925       VectorFormat vform =
12926           SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
12927       LogicSVEAddressVector addr(base + offset);
12928       addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
12929       SVEStructuredStoreHelper(vform,
12930                                ReadPRegister(instr->GetPgLow8()),
12931                                instr->GetRt(),
12932                                addr);
12933       break;
12934     }
12935     default:
12936       VIXL_UNIMPLEMENTED();
12937       break;
12938   }
12939 }
12940 
12941 void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
12942     const Instruction* instr) {
12943   VectorFormat vform = instr->GetSVEVectorFormat();
12944   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12945   SimVRegister z_result;
12946 
12947   switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
12948     case CPY_z_p_v:
12949       dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
12950       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
12951       break;
12952     default:
12953       VIXL_UNIMPLEMENTED();
12954       break;
12955   }
12956 }
12957 
12958 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
12959     const Instruction* instr) {
12960   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
12961     case ST2B_z_p_bi_contiguous:
12962     case ST2D_z_p_bi_contiguous:
12963     case ST2H_z_p_bi_contiguous:
12964     case ST2W_z_p_bi_contiguous:
12965     case ST3B_z_p_bi_contiguous:
12966     case ST3D_z_p_bi_contiguous:
12967     case ST3H_z_p_bi_contiguous:
12968     case ST3W_z_p_bi_contiguous:
12969     case ST4B_z_p_bi_contiguous:
12970     case ST4D_z_p_bi_contiguous:
12971     case ST4H_z_p_bi_contiguous:
12972     case ST4W_z_p_bi_contiguous: {
12973       int vl = GetVectorLengthInBytes();
12974       int msz = instr->ExtractBits(24, 23);
12975       int reg_count = instr->ExtractBits(22, 21) + 1;
12976       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12977       LogicSVEAddressVector addr(
12978           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12979       addr.SetMsizeInBytesLog2(msz);
12980       addr.SetRegCount(reg_count);
12981       SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12982                                ReadPRegister(instr->GetPgLow8()),
12983                                instr->GetRt(),
12984                                addr);
12985       break;
12986     }
12987     default:
12988       VIXL_UNIMPLEMENTED();
12989       break;
12990   }
12991 }
12992 
12993 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
12994     const Instruction* instr) {
12995   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
12996     case ST2B_z_p_br_contiguous:
12997     case ST2D_z_p_br_contiguous:
12998     case ST2H_z_p_br_contiguous:
12999     case ST2W_z_p_br_contiguous:
13000     case ST3B_z_p_br_contiguous:
13001     case ST3D_z_p_br_contiguous:
13002     case ST3H_z_p_br_contiguous:
13003     case ST3W_z_p_br_contiguous:
13004     case ST4B_z_p_br_contiguous:
13005     case ST4D_z_p_br_contiguous:
13006     case ST4H_z_p_br_contiguous:
13007     case ST4W_z_p_br_contiguous: {
13008       int msz = instr->ExtractBits(24, 23);
13009       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
13010       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
13011       LogicSVEAddressVector addr(
13012           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
13013       addr.SetMsizeInBytesLog2(msz);
13014       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
13015       SVEStructuredStoreHelper(vform,
13016                                ReadPRegister(instr->GetPgLow8()),
13017                                instr->GetRt(),
13018                                addr);
13019       break;
13020     }
13021     default:
13022       VIXL_UNIMPLEMENTED();
13023       break;
13024   }
13025 }
13026 
13027 void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
13028   switch (instr->Mask(SVEStorePredicateRegisterMask)) {
13029     case STR_p_bi: {
13030       SimPRegister& pt = ReadPRegister(instr->GetPt());
13031       int pl = GetPredicateLengthInBytes();
13032       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
13033       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
13034       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
13035       uint64_t address = base + multiplier * pl;
13036       for (int i = 0; i < pl; i++) {
13037         MemWrite(address + i, pt.GetLane<uint8_t>(i));
13038       }
13039       LogPWrite(instr->GetPt(), address);
13040       break;
13041     }
13042     default:
13043       VIXL_UNIMPLEMENTED();
13044       break;
13045   }
13046 }
13047 
13048 void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
13049   switch (instr->Mask(SVEStoreVectorRegisterMask)) {
13050     case STR_z_bi: {
13051       SimVRegister& zt = ReadVRegister(instr->GetRt());
13052       int vl = GetVectorLengthInBytes();
13053       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
13054       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
13055       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
13056       uint64_t address = base + multiplier * vl;
13057       for (int i = 0; i < vl; i++) {
13058         MemWrite(address + i, zt.GetLane<uint8_t>(i));
13059       }
13060       LogZWrite(instr->GetRt(), address);
13061       break;
13062     }
13063     default:
13064       VIXL_UNIMPLEMENTED();
13065       break;
13066   }
13067 }
13068 
13069 void Simulator::VisitSVEMulIndex(const Instruction* instr) {
13070   VectorFormat vform = instr->GetSVEVectorFormat();
13071   SimVRegister& zda = ReadVRegister(instr->GetRd());
13072   SimVRegister& zn = ReadVRegister(instr->GetRn());
13073   std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex();
13074   SimVRegister zm = ReadVRegister(zm_and_index.first);
13075   int index = zm_and_index.second;
13076 
13077   SimVRegister temp;
13078   dup_elements_to_segments(vform, temp, zm, index);
13079 
13080   switch (form_hash_) {
13081     case "sdot_z_zzzi_d"_h:
13082     case "sdot_z_zzzi_s"_h:
13083       sdot(vform, zda, zn, temp);
13084       break;
13085     case "udot_z_zzzi_d"_h:
13086     case "udot_z_zzzi_s"_h:
13087       udot(vform, zda, zn, temp);
13088       break;
13089     case "sudot_z_zzzi_s"_h:
13090       usdot(vform, zda, temp, zn);
13091       break;
13092     case "usdot_z_zzzi_s"_h:
13093       usdot(vform, zda, zn, temp);
13094       break;
13095     default:
13096       VIXL_UNIMPLEMENTED();
13097       break;
13098   }
13099 }
13100 
13101 void Simulator::SimulateMatrixMul(const Instruction* instr) {
13102   VectorFormat vform = kFormatVnS;
13103   SimVRegister& dn = ReadVRegister(instr->GetRd());
13104   SimVRegister& n = ReadVRegister(instr->GetRn());
13105   SimVRegister& m = ReadVRegister(instr->GetRm());
13106 
13107   bool n_signed = false;
13108   bool m_signed = false;
13109   switch (form_hash_) {
13110     case "smmla_asimdsame2_g"_h:
13111       vform = kFormat4S;
13112       VIXL_FALLTHROUGH();
13113     case "smmla_z_zzz"_h:
13114       n_signed = m_signed = true;
13115       break;
13116     case "ummla_asimdsame2_g"_h:
13117       vform = kFormat4S;
13118       VIXL_FALLTHROUGH();
13119     case "ummla_z_zzz"_h:
13120       // Nothing to do.
13121       break;
13122     case "usmmla_asimdsame2_g"_h:
13123       vform = kFormat4S;
13124       VIXL_FALLTHROUGH();
13125     case "usmmla_z_zzz"_h:
13126       m_signed = true;
13127       break;
13128     default:
13129       VIXL_UNIMPLEMENTED();
13130       break;
13131   }
13132   matmul(vform, dn, n, m, n_signed, m_signed);
13133 }
13134 
13135 void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
13136   VectorFormat vform = instr->GetSVEVectorFormat();
13137   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13138   SimVRegister& zn = ReadVRegister(instr->GetRn());
13139   SimVRegister& zm = ReadVRegister(instr->GetRm());
13140 
13141   switch (form_hash_) {
13142     case "fmmla_z_zzz_s"_h:
13143     case "fmmla_z_zzz_d"_h:
13144       fmatmul(vform, zdn, zn, zm);
13145       break;
13146     default:
13147       VIXL_UNIMPLEMENTED();
13148       break;
13149   }
13150 }
13151 
13152 void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
13153   SimPRegister& pd = ReadPRegister(instr->GetPd());
13154   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13155   SimPRegister& pn = ReadPRegister(instr->GetPn());
13156   SimPRegister result;
13157 
13158   switch (instr->Mask(SVEPartitionBreakConditionMask)) {
13159     case BRKAS_p_p_p_z:
13160     case BRKA_p_p_p:
13161       brka(result, pg, pn);
13162       break;
13163     case BRKBS_p_p_p_z:
13164     case BRKB_p_p_p:
13165       brkb(result, pg, pn);
13166       break;
13167     default:
13168       VIXL_UNIMPLEMENTED();
13169       break;
13170   }
13171 
13172   if (instr->ExtractBit(4) == 1) {
13173     mov_merging(pd, pg, result);
13174   } else {
13175     mov_zeroing(pd, pg, result);
13176   }
13177 
13178   // Set flag if needed.
13179   if (instr->ExtractBit(22) == 1) {
13180     PredTest(kFormatVnB, pg, pd);
13181   }
13182 }
13183 
13184 void Simulator::VisitSVEPropagateBreakToNextPartition(
13185     const Instruction* instr) {
13186   SimPRegister& pdm = ReadPRegister(instr->GetPd());
13187   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13188   SimPRegister& pn = ReadPRegister(instr->GetPn());
13189 
13190   switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
13191     case BRKNS_p_p_pp:
13192     case BRKN_p_p_pp:
13193       brkn(pdm, pg, pn);
13194       break;
13195     default:
13196       VIXL_UNIMPLEMENTED();
13197       break;
13198   }
13199 
13200   // Set flag if needed.
13201   if (instr->ExtractBit(22) == 1) {
13202     // Note that this ignores `pg`.
13203     PredTest(kFormatVnB, GetPTrue(), pdm);
13204   }
13205 }
13206 
13207 void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
13208   SimPRegister& pd = ReadPRegister(instr->GetPd());
13209   SimPRegister& pn = ReadPRegister(instr->GetPn());
13210 
13211   SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
13212   SimVRegister zero;
13213   dup_immediate(kFormatVnB, zero, 0);
13214 
13215   switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
13216     case PUNPKHI_p_p:
13217       zip2(kFormatVnB, temp, temp, zero);
13218       break;
13219     case PUNPKLO_p_p:
13220       zip1(kFormatVnB, temp, temp, zero);
13221       break;
13222     default:
13223       VIXL_UNIMPLEMENTED();
13224       break;
13225   }
13226   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
13227 }
13228 
13229 void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
13230   VectorFormat vform = instr->GetSVEVectorFormat();
13231   SimPRegister& pd = ReadPRegister(instr->GetPd());
13232   SimPRegister& pn = ReadPRegister(instr->GetPn());
13233   SimPRegister& pm = ReadPRegister(instr->GetPm());
13234 
13235   SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
13236   SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
13237 
13238   switch (instr->Mask(SVEPermutePredicateElementsMask)) {
13239     case TRN1_p_pp:
13240       trn1(vform, temp0, temp0, temp1);
13241       break;
13242     case TRN2_p_pp:
13243       trn2(vform, temp0, temp0, temp1);
13244       break;
13245     case UZP1_p_pp:
13246       uzp1(vform, temp0, temp0, temp1);
13247       break;
13248     case UZP2_p_pp:
13249       uzp2(vform, temp0, temp0, temp1);
13250       break;
13251     case ZIP1_p_pp:
13252       zip1(vform, temp0, temp0, temp1);
13253       break;
13254     case ZIP2_p_pp:
13255       zip2(vform, temp0, temp0, temp1);
13256       break;
13257     default:
13258       VIXL_UNIMPLEMENTED();
13259       break;
13260   }
13261   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
13262 }
13263 
13264 void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
13265   switch (instr->Mask(SVEReversePredicateElementsMask)) {
13266     case REV_p_p: {
13267       VectorFormat vform = instr->GetSVEVectorFormat();
13268       SimPRegister& pn = ReadPRegister(instr->GetPn());
13269       SimPRegister& pd = ReadPRegister(instr->GetPd());
13270       SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
13271       rev(vform, temp, temp);
13272       Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
13273       break;
13274     }
13275     default:
13276       VIXL_UNIMPLEMENTED();
13277       break;
13278   }
13279 }
13280 
13281 void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
13282   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13283   // Second source register "Zm" is encoded where "Zn" would usually be.
13284   SimVRegister& zm = ReadVRegister(instr->GetRn());
13285 
13286   int index = instr->GetSVEExtractImmediate();
13287   int vl = GetVectorLengthInBytes();
13288   index = (index >= vl) ? 0 : index;
13289 
13290   switch (instr->Mask(SVEPermuteVectorExtractMask)) {
13291     case EXT_z_zi_des:
13292       ext(kFormatVnB, zdn, zdn, zm, index);
13293       break;
13294     default:
13295       VIXL_UNIMPLEMENTED();
13296       break;
13297   }
13298 }
13299 
13300 void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
13301   VectorFormat vform = instr->GetSVEVectorFormat();
13302   SimVRegister& zd = ReadVRegister(instr->GetRd());
13303   SimVRegister& zn = ReadVRegister(instr->GetRn());
13304   SimVRegister& zm = ReadVRegister(instr->GetRm());
13305 
13306   switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
13307     case TRN1_z_zz:
13308       trn1(vform, zd, zn, zm);
13309       break;
13310     case TRN2_z_zz:
13311       trn2(vform, zd, zn, zm);
13312       break;
13313     case UZP1_z_zz:
13314       uzp1(vform, zd, zn, zm);
13315       break;
13316     case UZP2_z_zz:
13317       uzp2(vform, zd, zn, zm);
13318       break;
13319     case ZIP1_z_zz:
13320       zip1(vform, zd, zn, zm);
13321       break;
13322     case ZIP2_z_zz:
13323       zip2(vform, zd, zn, zm);
13324       break;
13325     default:
13326       VIXL_UNIMPLEMENTED();
13327       break;
13328   }
13329 }
13330 
13331 void Simulator::VisitSVEConditionallyBroadcastElementToVector(
13332     const Instruction* instr) {
13333   VectorFormat vform = instr->GetSVEVectorFormat();
13334   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13335   SimVRegister& zm = ReadVRegister(instr->GetRn());
13336   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13337 
13338   int active_offset = -1;
13339   switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
13340     case CLASTA_z_p_zz:
13341       active_offset = 1;
13342       break;
13343     case CLASTB_z_p_zz:
13344       active_offset = 0;
13345       break;
13346     default:
13347       VIXL_UNIMPLEMENTED();
13348       break;
13349   }
13350 
13351   if (active_offset >= 0) {
13352     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13353     if (value.first) {
13354       dup_immediate(vform, zdn, value.second);
13355     } else {
13356       // Trigger a line of trace for the operation, even though it doesn't
13357       // change the register value.
13358       mov(vform, zdn, zdn);
13359     }
13360   }
13361 }
13362 
13363 void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
13364     const Instruction* instr) {
13365   VectorFormat vform = instr->GetSVEVectorFormat();
13366   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13367   SimVRegister& zm = ReadVRegister(instr->GetRn());
13368   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13369 
13370   int active_offset = -1;
13371   switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
13372     case CLASTA_v_p_z:
13373       active_offset = 1;
13374       break;
13375     case CLASTB_v_p_z:
13376       active_offset = 0;
13377       break;
13378     default:
13379       VIXL_UNIMPLEMENTED();
13380       break;
13381   }
13382 
13383   if (active_offset >= 0) {
13384     LogicVRegister dst(vdn);
13385     uint64_t src1_value = dst.Uint(vform, 0);
13386     std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
13387     dup_immediate(vform, vdn, 0);
13388     dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
13389   }
13390 }
13391 
13392 void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
13393     const Instruction* instr) {
13394   VectorFormat vform = instr->GetSVEVectorFormat();
13395   SimVRegister& zm = ReadVRegister(instr->GetRn());
13396   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13397 
13398   int active_offset = -1;
13399   switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
13400     case CLASTA_r_p_z:
13401       active_offset = 1;
13402       break;
13403     case CLASTB_r_p_z:
13404       active_offset = 0;
13405       break;
13406     default:
13407       VIXL_UNIMPLEMENTED();
13408       break;
13409   }
13410 
13411   if (active_offset >= 0) {
13412     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13413     uint64_t masked_src = ReadXRegister(instr->GetRd()) &
13414                           GetUintMask(LaneSizeInBitsFromFormat(vform));
13415     WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
13416   }
13417 }
13418 
13419 void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
13420     const Instruction* instr) {
13421   VectorFormat vform = instr->GetSVEVectorFormat();
13422   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13423   SimVRegister& zm = ReadVRegister(instr->GetRn());
13424   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13425 
13426   int active_offset = -1;
13427   switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
13428     case LASTA_v_p_z:
13429       active_offset = 1;
13430       break;
13431     case LASTB_v_p_z:
13432       active_offset = 0;
13433       break;
13434     default:
13435       VIXL_UNIMPLEMENTED();
13436       break;
13437   }
13438 
13439   if (active_offset >= 0) {
13440     LogicVRegister dst(vdn);
13441     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13442     dup_immediate(vform, vdn, 0);
13443     dst.SetUint(vform, 0, value.second);
13444   }
13445 }
13446 
13447 void Simulator::VisitSVEExtractElementToGeneralRegister(
13448     const Instruction* instr) {
13449   VectorFormat vform = instr->GetSVEVectorFormat();
13450   SimVRegister& zm = ReadVRegister(instr->GetRn());
13451   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13452 
13453   int active_offset = -1;
13454   switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
13455     case LASTA_r_p_z:
13456       active_offset = 1;
13457       break;
13458     case LASTB_r_p_z:
13459       active_offset = 0;
13460       break;
13461     default:
13462       VIXL_UNIMPLEMENTED();
13463       break;
13464   }
13465 
13466   if (active_offset >= 0) {
13467     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13468     WriteXRegister(instr->GetRd(), value.second);
13469   }
13470 }
13471 
13472 void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
13473   VectorFormat vform = instr->GetSVEVectorFormat();
13474   SimVRegister& zd = ReadVRegister(instr->GetRd());
13475   SimVRegister& zn = ReadVRegister(instr->GetRn());
13476   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13477 
13478   switch (instr->Mask(SVECompressActiveElementsMask)) {
13479     case COMPACT_z_p_z:
13480       compact(vform, zd, pg, zn);
13481       break;
13482     default:
13483       VIXL_UNIMPLEMENTED();
13484       break;
13485   }
13486 }
13487 
13488 void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
13489     const Instruction* instr) {
13490   VectorFormat vform = instr->GetSVEVectorFormat();
13491   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13492   SimVRegister z_result;
13493 
13494   switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
13495     case CPY_z_p_r:
13496       dup_immediate(vform,
13497                     z_result,
13498                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13499       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
13500       break;
13501     default:
13502       VIXL_UNIMPLEMENTED();
13503       break;
13504   }
13505 }
13506 
13507 void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
13508   VectorFormat vform = instr->GetSVEVectorFormat();
13509   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
13510   SimVRegister& zd = ReadVRegister(instr->GetRd());
13511 
13512   SimVRegister result;
13513   switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
13514     case CPY_z_p_i: {
13515       // Use unsigned arithmetic to avoid undefined behaviour during the shift.
13516       uint64_t imm8 = instr->GetImmSVEIntWideSigned();
13517       dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
13518       break;
13519     }
13520     default:
13521       VIXL_UNIMPLEMENTED();
13522       break;
13523   }
13524 
13525   if (instr->ExtractBit(14) != 0) {
13526     mov_merging(vform, zd, pg, result);
13527   } else {
13528     mov_zeroing(vform, zd, pg, result);
13529   }
13530 }
13531 
13532 void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
13533   SimVRegister& zd = ReadVRegister(instr->GetRd());
13534   SimVRegister& zn = ReadVRegister(instr->GetRn());
13535   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13536   SimVRegister result;
13537 
13538   // In NEON, the chunk size in which elements are REVersed is in the
13539   // instruction mnemonic, and the element size attached to the register.
13540   // SVE reverses the semantics; the mapping to logic functions below is to
13541   // account for this.
13542   VectorFormat chunk_form = instr->GetSVEVectorFormat();
13543   VectorFormat element_form = kFormatUndefined;
13544 
13545   switch (instr->Mask(SVEReverseWithinElementsMask)) {
13546     case RBIT_z_p_z:
13547       rbit(chunk_form, result, zn);
13548       break;
13549     case REVB_z_z:
13550       VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
13551                   (chunk_form == kFormatVnD));
13552       element_form = kFormatVnB;
13553       break;
13554     case REVH_z_z:
13555       VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
13556       element_form = kFormatVnH;
13557       break;
13558     case REVW_z_z:
13559       VIXL_ASSERT(chunk_form == kFormatVnD);
13560       element_form = kFormatVnS;
13561       break;
13562     default:
13563       VIXL_UNIMPLEMENTED();
13564       break;
13565   }
13566 
13567   if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
13568     VIXL_ASSERT(element_form != kFormatUndefined);
13569     switch (chunk_form) {
13570       case kFormatVnH:
13571         rev16(element_form, result, zn);
13572         break;
13573       case kFormatVnS:
13574         rev32(element_form, result, zn);
13575         break;
13576       case kFormatVnD:
13577         rev64(element_form, result, zn);
13578         break;
13579       default:
13580         VIXL_UNIMPLEMENTED();
13581     }
13582   }
13583 
13584   mov_merging(chunk_form, zd, pg, result);
13585 }
13586 
13587 void Simulator::VisitSVEVectorSplice(const Instruction* instr) {
13588   VectorFormat vform = instr->GetSVEVectorFormat();
13589   SimVRegister& zd = ReadVRegister(instr->GetRd());
13590   SimVRegister& zn = ReadVRegister(instr->GetRn());
13591   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13592   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13593 
13594   switch (form_hash_) {
13595     case "splice_z_p_zz_des"_h:
13596       splice(vform, zd, pg, zd, zn);
13597       break;
13598     case "splice_z_p_zz_con"_h:
13599       splice(vform, zd, pg, zn, zn2);
13600       break;
13601     default:
13602       VIXL_UNIMPLEMENTED();
13603       break;
13604   }
13605 }
13606 
13607 void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
13608   SimVRegister& zd = ReadVRegister(instr->GetRd());
13609   switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
13610     case DUP_z_r:
13611       dup_immediate(instr->GetSVEVectorFormat(),
13612                     zd,
13613                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13614       break;
13615     default:
13616       VIXL_UNIMPLEMENTED();
13617       break;
13618   }
13619 }
13620 
13621 void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
13622   SimVRegister& zd = ReadVRegister(instr->GetRd());
13623   VectorFormat vform = instr->GetSVEVectorFormat();
13624   switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
13625     case INSR_z_v:
13626       insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
13627       break;
13628     default:
13629       VIXL_UNIMPLEMENTED();
13630       break;
13631   }
13632 }
13633 
13634 void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
13635   SimVRegister& zd = ReadVRegister(instr->GetRd());
13636   VectorFormat vform = instr->GetSVEVectorFormat();
13637   switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
13638     case INSR_z_r:
13639       insr(vform, zd, ReadXRegister(instr->GetRn()));
13640       break;
13641     default:
13642       VIXL_UNIMPLEMENTED();
13643       break;
13644   }
13645 }
13646 
13647 void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
13648   SimVRegister& zd = ReadVRegister(instr->GetRd());
13649   switch (instr->Mask(SVEBroadcastIndexElementMask)) {
13650     case DUP_z_zi: {
13651       std::pair<int, int> index_and_lane_size =
13652           instr->GetSVEPermuteIndexAndLaneSizeLog2();
13653       int index = index_and_lane_size.first;
13654       int lane_size_in_bytes_log_2 = index_and_lane_size.second;
13655       VectorFormat vform =
13656           SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
13657       if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
13658         // Out of bounds, set the destination register to zero.
13659         dup_immediate(kFormatVnD, zd, 0);
13660       } else {
13661         dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
13662       }
13663       return;
13664     }
13665     default:
13666       VIXL_UNIMPLEMENTED();
13667       break;
13668   }
13669 }
13670 
13671 void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
13672   SimVRegister& zd = ReadVRegister(instr->GetRd());
13673   VectorFormat vform = instr->GetSVEVectorFormat();
13674   switch (instr->Mask(SVEReverseVectorElementsMask)) {
13675     case REV_z_z:
13676       rev(vform, zd, ReadVRegister(instr->GetRn()));
13677       break;
13678     default:
13679       VIXL_UNIMPLEMENTED();
13680       break;
13681   }
13682 }
13683 
13684 void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
13685   SimVRegister& zd = ReadVRegister(instr->GetRd());
13686   VectorFormat vform = instr->GetSVEVectorFormat();
13687   switch (instr->Mask(SVEUnpackVectorElementsMask)) {
13688     case SUNPKHI_z_z:
13689       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
13690       break;
13691     case SUNPKLO_z_z:
13692       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
13693       break;
13694     case UUNPKHI_z_z:
13695       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
13696       break;
13697     case UUNPKLO_z_z:
13698       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
13699       break;
13700     default:
13701       VIXL_UNIMPLEMENTED();
13702       break;
13703   }
13704 }
13705 
13706 void Simulator::VisitSVETableLookup(const Instruction* instr) {
13707   VectorFormat vform = instr->GetSVEVectorFormat();
13708   SimVRegister& zd = ReadVRegister(instr->GetRd());
13709   SimVRegister& zn = ReadVRegister(instr->GetRn());
13710   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13711   SimVRegister& zm = ReadVRegister(instr->GetRm());
13712 
13713   switch (form_hash_) {
13714     case "tbl_z_zz_1"_h:
13715       tbl(vform, zd, zn, zm);
13716       break;
13717     case "tbl_z_zz_2"_h:
13718       tbl(vform, zd, zn, zn2, zm);
13719       break;
13720     case "tbx_z_zz"_h:
13721       tbx(vform, zd, zn, zm);
13722       break;
13723     default:
13724       VIXL_UNIMPLEMENTED();
13725       break;
13726   }
13727 }
13728 
13729 void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
13730   VectorFormat vform = instr->GetSVEVectorFormat();
13731   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13732   SimPRegister& pn = ReadPRegister(instr->GetPn());
13733 
13734   switch (instr->Mask(SVEPredicateCountMask)) {
13735     case CNTP_r_p_p: {
13736       WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
13737       break;
13738     }
13739     default:
13740       VIXL_UNIMPLEMENTED();
13741       break;
13742   }
13743 }
13744 
13745 void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
13746   Instr op = instr->Mask(SVEPredicateLogicalMask);
13747   SimPRegister& pd = ReadPRegister(instr->GetPd());
13748   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13749   SimPRegister& pn = ReadPRegister(instr->GetPn());
13750   SimPRegister& pm = ReadPRegister(instr->GetPm());
13751   SimPRegister result;
13752   switch (op) {
13753     case ANDS_p_p_pp_z:
13754     case AND_p_p_pp_z:
13755     case BICS_p_p_pp_z:
13756     case BIC_p_p_pp_z:
13757     case EORS_p_p_pp_z:
13758     case EOR_p_p_pp_z:
13759     case NANDS_p_p_pp_z:
13760     case NAND_p_p_pp_z:
13761     case NORS_p_p_pp_z:
13762     case NOR_p_p_pp_z:
13763     case ORNS_p_p_pp_z:
13764     case ORN_p_p_pp_z:
13765     case ORRS_p_p_pp_z:
13766     case ORR_p_p_pp_z:
13767       SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
13768                                 result,
13769                                 pn,
13770                                 pm);
13771       break;
13772     case SEL_p_p_pp:
13773       sel(pd, pg, pn, pm);
13774       return;
13775     default:
13776       VIXL_UNIMPLEMENTED();
13777       break;
13778   }
13779 
13780   mov_zeroing(pd, pg, result);
13781   if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
13782     PredTest(kFormatVnB, pg, pd);
13783   }
13784 }
13785 
13786 void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
13787   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13788   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13789   switch (instr->Mask(SVEPredicateFirstActiveMask)) {
13790     case PFIRST_p_p_p:
13791       pfirst(pdn, pg, pdn);
13792       // TODO: Is this broken when pg == pdn?
13793       PredTest(kFormatVnB, pg, pdn);
13794       break;
13795     default:
13796       VIXL_UNIMPLEMENTED();
13797       break;
13798   }
13799 }
13800 
13801 void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
13802   // This group only contains PTRUE{S}, and there are no unallocated encodings.
13803   VIXL_STATIC_ASSERT(
13804       SVEPredicateInitializeMask ==
13805       (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
13806   VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
13807               (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
13808 
13809   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13810   VectorFormat vform = instr->GetSVEVectorFormat();
13811 
13812   ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
13813   if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
13814 }
13815 
13816 void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
13817   // This group only contains PNEXT, and there are no unallocated encodings.
13818   VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
13819   VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
13820 
13821   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13822   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13823   VectorFormat vform = instr->GetSVEVectorFormat();
13824 
13825   pnext(vform, pdn, pg, pdn);
13826   // TODO: Is this broken when pg == pdn?
13827   PredTest(vform, pg, pdn);
13828 }
13829 
13830 void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
13831     const Instruction* instr) {
13832   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13833   LogicPRegister pg(ReadPRegister(instr->GetPn()));
13834   FlagsUpdate flags = LeaveFlags;
13835   switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
13836     case RDFFR_p_p_f:
13837       // Do nothing.
13838       break;
13839     case RDFFRS_p_p_f:
13840       flags = SetFlags;
13841       break;
13842     default:
13843       VIXL_UNIMPLEMENTED();
13844       break;
13845   }
13846 
13847   LogicPRegister ffr(ReadFFR());
13848   mov_zeroing(pd, pg, ffr);
13849 
13850   if (flags == SetFlags) {
13851     PredTest(kFormatVnB, pg, pd);
13852   }
13853 }
13854 
13855 void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
13856     const Instruction* instr) {
13857   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13858   LogicPRegister ffr(ReadFFR());
13859   switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
13860     case RDFFR_p_f:
13861       mov(pd, ffr);
13862       break;
13863     default:
13864       VIXL_UNIMPLEMENTED();
13865       break;
13866   }
13867 }
13868 
13869 void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
13870   switch (instr->Mask(SVEPredicateTestMask)) {
13871     case PTEST_p_p:
13872       PredTest(kFormatVnB,
13873                ReadPRegister(instr->ExtractBits(13, 10)),
13874                ReadPRegister(instr->GetPn()));
13875       break;
13876     default:
13877       VIXL_UNIMPLEMENTED();
13878       break;
13879   }
13880 }
13881 
13882 void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
13883   switch (instr->Mask(SVEPredicateZeroMask)) {
13884     case PFALSE_p:
13885       pfalse(ReadPRegister(instr->GetPd()));
13886       break;
13887     default:
13888       VIXL_UNIMPLEMENTED();
13889       break;
13890   }
13891 }
13892 
13893 void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
13894   SimPRegister& pd = ReadPRegister(instr->GetPd());
13895   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13896   SimPRegister& pn = ReadPRegister(instr->GetPn());
13897   SimPRegister& pm = ReadPRegister(instr->GetPm());
13898 
13899   bool set_flags = false;
13900   switch (instr->Mask(SVEPropagateBreakMask)) {
13901     case BRKPAS_p_p_pp:
13902       set_flags = true;
13903       VIXL_FALLTHROUGH();
13904     case BRKPA_p_p_pp:
13905       brkpa(pd, pg, pn, pm);
13906       break;
13907     case BRKPBS_p_p_pp:
13908       set_flags = true;
13909       VIXL_FALLTHROUGH();
13910     case BRKPB_p_p_pp:
13911       brkpb(pd, pg, pn, pm);
13912       break;
13913     default:
13914       VIXL_UNIMPLEMENTED();
13915       break;
13916   }
13917 
13918   if (set_flags) {
13919     PredTest(kFormatVnB, pg, pd);
13920   }
13921 }
13922 
13923 void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
13924   uint64_t length = 0;
13925   switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
13926     case ADDPL_r_ri:
13927       length = GetPredicateLengthInBytes();
13928       break;
13929     case ADDVL_r_ri:
13930       length = GetVectorLengthInBytes();
13931       break;
13932     default:
13933       VIXL_UNIMPLEMENTED();
13934   }
13935   uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
13936   WriteXRegister(instr->GetRd(),
13937                  base + (length * instr->GetImmSVEVLScale()),
13938                  LogRegWrites,
13939                  Reg31IsStackPointer);
13940 }
13941 
13942 void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
13943   int64_t scale = instr->GetImmSVEVLScale();
13944 
13945   switch (instr->Mask(SVEStackFrameSizeMask)) {
13946     case RDVL_r_i:
13947       WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
13948       break;
13949     default:
13950       VIXL_UNIMPLEMENTED();
13951   }
13952 }
13953 
13954 void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
13955   // The only instruction in this group is `sel`, and there are no unused
13956   // encodings.
13957   VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
13958 
13959   VectorFormat vform = instr->GetSVEVectorFormat();
13960   SimVRegister& zd = ReadVRegister(instr->GetRd());
13961   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13962   SimVRegister& zn = ReadVRegister(instr->GetRn());
13963   SimVRegister& zm = ReadVRegister(instr->GetRm());
13964 
13965   sel(vform, zd, pg, zn, zm);
13966 }
13967 
13968 void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
13969   switch (instr->Mask(SVEFFRInitialiseMask)) {
13970     case SETFFR_f: {
13971       LogicPRegister ffr(ReadFFR());
13972       ffr.SetAllBits();
13973       break;
13974     }
13975     default:
13976       VIXL_UNIMPLEMENTED();
13977       break;
13978   }
13979 }
13980 
13981 void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
13982   switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
13983     case WRFFR_f_p: {
13984       SimPRegister pn(ReadPRegister(instr->GetPn()));
13985       bool last_active = true;
13986       for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
13987         bool active = pn.GetBit(i);
13988         if (active && !last_active) {
13989           // `pn` is non-monotonic. This is UNPREDICTABLE.
13990           VIXL_ABORT();
13991         }
13992         last_active = active;
13993       }
13994       mov(ReadFFR(), pn);
13995       break;
13996     }
13997     default:
13998       VIXL_UNIMPLEMENTED();
13999       break;
14000   }
14001 }
14002 
14003 void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
14004   bool is_signed;
14005   switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
14006     case LD1B_z_p_bi_u8:
14007     case LD1B_z_p_bi_u16:
14008     case LD1B_z_p_bi_u32:
14009     case LD1B_z_p_bi_u64:
14010     case LD1H_z_p_bi_u16:
14011     case LD1H_z_p_bi_u32:
14012     case LD1H_z_p_bi_u64:
14013     case LD1W_z_p_bi_u32:
14014     case LD1W_z_p_bi_u64:
14015     case LD1D_z_p_bi_u64:
14016       is_signed = false;
14017       break;
14018     case LD1SB_z_p_bi_s16:
14019     case LD1SB_z_p_bi_s32:
14020     case LD1SB_z_p_bi_s64:
14021     case LD1SH_z_p_bi_s32:
14022     case LD1SH_z_p_bi_s64:
14023     case LD1SW_z_p_bi_s64:
14024       is_signed = true;
14025       break;
14026     default:
14027       // This encoding group is complete, so no other values should be possible.
14028       VIXL_UNREACHABLE();
14029       is_signed = false;
14030       break;
14031   }
14032 
14033   int vl = GetVectorLengthInBytes();
14034   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
14035   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
14036   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
14037   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
14038   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14039   uint64_t offset =
14040       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
14041   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
14042   LogicSVEAddressVector addr(base + offset);
14043   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
14044   SVEStructuredLoadHelper(vform,
14045                           ReadPRegister(instr->GetPgLow8()),
14046                           instr->GetRt(),
14047                           addr,
14048                           is_signed);
14049 }
14050 
14051 void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
14052     const Instruction* instr) {
14053   bool is_signed;
14054   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
14055     case LD1B_z_p_br_u8:
14056     case LD1B_z_p_br_u16:
14057     case LD1B_z_p_br_u32:
14058     case LD1B_z_p_br_u64:
14059     case LD1H_z_p_br_u16:
14060     case LD1H_z_p_br_u32:
14061     case LD1H_z_p_br_u64:
14062     case LD1W_z_p_br_u32:
14063     case LD1W_z_p_br_u64:
14064     case LD1D_z_p_br_u64:
14065       is_signed = false;
14066       break;
14067     case LD1SB_z_p_br_s16:
14068     case LD1SB_z_p_br_s32:
14069     case LD1SB_z_p_br_s64:
14070     case LD1SH_z_p_br_s32:
14071     case LD1SH_z_p_br_s64:
14072     case LD1SW_z_p_br_s64:
14073       is_signed = true;
14074       break;
14075     default:
14076       // This encoding group is complete, so no other values should be possible.
14077       VIXL_UNREACHABLE();
14078       is_signed = false;
14079       break;
14080   }
14081 
14082   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
14083   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
14084   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
14085   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
14086   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14087   uint64_t offset = ReadXRegister(instr->GetRm());
14088   offset <<= msize_in_bytes_log2;
14089   LogicSVEAddressVector addr(base + offset);
14090   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
14091   SVEStructuredLoadHelper(vform,
14092                           ReadPRegister(instr->GetPgLow8()),
14093                           instr->GetRt(),
14094                           addr,
14095                           is_signed);
14096 }
14097 
14098 void Simulator::DoUnreachable(const Instruction* instr) {
14099   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14100               (instr->GetImmException() == kUnreachableOpcode));
14101 
14102   fprintf(stream_,
14103           "Hit UNREACHABLE marker at pc=%p.\n",
14104           reinterpret_cast<const void*>(instr));
14105   abort();
14106 }
14107 
14108 void Simulator::Simulate_XdSP_XnSP_Xm(const Instruction* instr) {
14109   VIXL_ASSERT(form_hash_ == Hash("irg_64i_dp_2src"));
14110   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14111   uint64_t rm = ReadXRegister(instr->GetRm());
14112   uint64_t tag = GenerateRandomTag(rm & 0xffff);
14113   uint64_t new_val = GetAddressWithAllocationTag(rn, tag);
14114   WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
14115 }
14116 
14117 void Simulator::SimulateMTEAddSubTag(const Instruction* instr) {
14118   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14119   uint64_t rn_tag = GetAllocationTagFromAddress(rn);
14120   uint64_t tag_offset = instr->ExtractBits(13, 10);
14121   // TODO: implement GCR_EL1.Exclude to provide a tag exclusion list.
14122   uint64_t new_tag = ChooseNonExcludedTag(rn_tag, tag_offset);
14123 
14124   uint64_t offset = instr->ExtractBits(21, 16) * kMTETagGranuleInBytes;
14125   int carry = 0;
14126   if (form_hash_ == Hash("subg_64_addsub_immtags")) {
14127     offset = ~offset;
14128     carry = 1;
14129   } else {
14130     VIXL_ASSERT(form_hash_ == Hash("addg_64_addsub_immtags"));
14131   }
14132   uint64_t new_val =
14133       AddWithCarry(kXRegSize, /* set_flags = */ false, rn, offset, carry);
14134   new_val = GetAddressWithAllocationTag(new_val, new_tag);
14135   WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
14136 }
14137 
14138 void Simulator::SimulateMTETagMaskInsert(const Instruction* instr) {
14139   VIXL_ASSERT(form_hash_ == Hash("gmi_64g_dp_2src"));
14140   uint64_t mask = ReadXRegister(instr->GetRm());
14141   uint64_t tag = GetAllocationTagFromAddress(
14142       ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
14143   uint64_t mask_bit = 1 << tag;
14144   WriteXRegister(instr->GetRd(), mask | mask_bit);
14145 }
14146 
14147 void Simulator::SimulateMTESubPointer(const Instruction* instr) {
14148   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14149   uint64_t rm = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
14150 
14151   VIXL_ASSERT((form_hash_ == Hash("subps_64s_dp_2src")) ||
14152               (form_hash_ == Hash("subp_64s_dp_2src")));
14153   bool set_flags = (form_hash_ == Hash("subps_64s_dp_2src"));
14154 
14155   rn = ExtractSignedBitfield64(55, 0, rn);
14156   rm = ExtractSignedBitfield64(55, 0, rm);
14157   uint64_t new_val = AddWithCarry(kXRegSize, set_flags, rn, ~rm, 1);
14158   WriteXRegister(instr->GetRd(), new_val);
14159 }
14160 
14161 void Simulator::SimulateMTEStoreTagPair(const Instruction* instr) {
14162   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
14163   uint64_t rt = ReadXRegister(instr->GetRt());
14164   uint64_t rt2 = ReadXRegister(instr->GetRt2());
14165   int offset = instr->GetImmLSPair() * static_cast<int>(kMTETagGranuleInBytes);
14166 
14167   AddrMode addr_mode = Offset;
14168   switch (form_hash_) {
14169     case Hash("stgp_64_ldstpair_off"):
14170       // Default is the offset mode.
14171       break;
14172     case Hash("stgp_64_ldstpair_post"):
14173       addr_mode = PostIndex;
14174       break;
14175     case Hash("stgp_64_ldstpair_pre"):
14176       addr_mode = PreIndex;
14177       break;
14178     default:
14179       VIXL_UNIMPLEMENTED();
14180   }
14181 
14182   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
14183   if (!IsAligned(address, kMTETagGranuleInBytes)) {
14184     VIXL_ALIGNMENT_EXCEPTION();
14185   }
14186 
14187   int tag = GetAllocationTagFromAddress(rn);
14188   meta_data_.SetMTETag(address, tag);
14189 
14190   MemWrite<uint64_t>(address, rt);
14191   MemWrite<uint64_t>(address + kXRegSizeInBytes, rt2);
14192 }
14193 
14194 void Simulator::SimulateMTEStoreTag(const Instruction* instr) {
14195   uint64_t rt = ReadXRegister(instr->GetRt(), Reg31IsStackPointer);
14196   int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
14197 
14198   AddrMode addr_mode = Offset;
14199   switch (form_hash_) {
14200     case Hash("st2g_64soffset_ldsttags"):
14201     case Hash("stg_64soffset_ldsttags"):
14202     case Hash("stz2g_64soffset_ldsttags"):
14203     case Hash("stzg_64soffset_ldsttags"):
14204       // Default is the offset mode.
14205       break;
14206     case Hash("st2g_64spost_ldsttags"):
14207     case Hash("stg_64spost_ldsttags"):
14208     case Hash("stz2g_64spost_ldsttags"):
14209     case Hash("stzg_64spost_ldsttags"):
14210       addr_mode = PostIndex;
14211       break;
14212     case Hash("st2g_64spre_ldsttags"):
14213     case Hash("stg_64spre_ldsttags"):
14214     case Hash("stz2g_64spre_ldsttags"):
14215     case Hash("stzg_64spre_ldsttags"):
14216       addr_mode = PreIndex;
14217       break;
14218     default:
14219       VIXL_UNIMPLEMENTED();
14220   }
14221 
14222   bool is_pair = false;
14223   switch (form_hash_) {
14224     case Hash("st2g_64soffset_ldsttags"):
14225     case Hash("st2g_64spost_ldsttags"):
14226     case Hash("st2g_64spre_ldsttags"):
14227     case Hash("stz2g_64soffset_ldsttags"):
14228     case Hash("stz2g_64spost_ldsttags"):
14229     case Hash("stz2g_64spre_ldsttags"):
14230       is_pair = true;
14231       break;
14232     default:
14233       break;
14234   }
14235 
14236   bool is_zeroing = false;
14237   switch (form_hash_) {
14238     case Hash("stz2g_64soffset_ldsttags"):
14239     case Hash("stz2g_64spost_ldsttags"):
14240     case Hash("stz2g_64spre_ldsttags"):
14241     case Hash("stzg_64soffset_ldsttags"):
14242     case Hash("stzg_64spost_ldsttags"):
14243     case Hash("stzg_64spre_ldsttags"):
14244       is_zeroing = true;
14245       break;
14246     default:
14247       break;
14248   }
14249 
14250   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
14251 
14252   if (is_zeroing) {
14253     if (!IsAligned(reinterpret_cast<uintptr_t>(address),
14254                    kMTETagGranuleInBytes)) {
14255       VIXL_ALIGNMENT_EXCEPTION();
14256     }
14257     VIXL_STATIC_ASSERT(kMTETagGranuleInBytes >= sizeof(uint64_t));
14258     VIXL_STATIC_ASSERT(kMTETagGranuleInBytes % sizeof(uint64_t) == 0);
14259 
14260     size_t fill_size = kMTETagGranuleInBytes;
14261     if (is_pair) {
14262       fill_size += kMTETagGranuleInBytes;
14263     }
14264 
14265     size_t fill_offset = 0;
14266     while (fill_offset < fill_size) {
14267       MemWrite<uint64_t>(address + fill_offset, 0);
14268       fill_offset += sizeof(uint64_t);
14269     }
14270   }
14271 
14272   int tag = GetAllocationTagFromAddress(rt);
14273   meta_data_.SetMTETag(address, tag, instr);
14274   if (is_pair) {
14275     meta_data_.SetMTETag(address + kMTETagGranuleInBytes, tag, instr);
14276   }
14277 }
14278 
14279 void Simulator::SimulateMTELoadTag(const Instruction* instr) {
14280   uint64_t rt = ReadXRegister(instr->GetRt());
14281   int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
14282 
14283   switch (form_hash_) {
14284     case Hash("ldg_64loffset_ldsttags"):
14285       break;
14286     default:
14287       VIXL_UNIMPLEMENTED();
14288   }
14289 
14290   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, Offset);
14291   address = AlignDown(address, kMTETagGranuleInBytes);
14292   uint64_t tag = meta_data_.GetMTETag(address, instr);
14293   WriteXRegister(instr->GetRt(), GetAddressWithAllocationTag(rt, tag));
14294 }
14295 
14296 void Simulator::SimulateCpyFP(const Instruction* instr) {
14297   MOPSPHelper<"cpy"_h>(instr);
14298   LogSystemRegister(NZCV);
14299 }
14300 
14301 void Simulator::SimulateCpyP(const Instruction* instr) {
14302   MOPSPHelper<"cpy"_h>(instr);
14303 
14304   int d = instr->GetRd();
14305   int n = instr->GetRn();
14306   int s = instr->GetRs();
14307 
14308   // Determine copy direction. For cases in which direction is implementation
14309   // defined, use forward.
14310   bool is_backwards = false;
14311   uint64_t xs = ReadXRegister(s);
14312   uint64_t xd = ReadXRegister(d);
14313   uint64_t xn = ReadXRegister(n);
14314 
14315   // Ignore the top byte of addresses for comparisons. We can use xn as is,
14316   // as it should have zero in bits 63:55.
14317   uint64_t xs_tbi = ExtractUnsignedBitfield64(55, 0, xs);
14318   uint64_t xd_tbi = ExtractUnsignedBitfield64(55, 0, xd);
14319   VIXL_ASSERT(ExtractUnsignedBitfield64(63, 55, xn) == 0);
14320   if ((xs_tbi < xd_tbi) && ((xs_tbi + xn) > xd_tbi)) {
14321     is_backwards = true;
14322     WriteXRegister(s, xs + xn);
14323     WriteXRegister(d, xd + xn);
14324   }
14325 
14326   ReadNzcv().SetN(is_backwards ? 1 : 0);
14327   LogSystemRegister(NZCV);
14328 }
14329 
14330 void Simulator::SimulateCpyM(const Instruction* instr) {
14331   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
14332   VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "cpy"_h));
14333 
14334   int d = instr->GetRd();
14335   int n = instr->GetRn();
14336   int s = instr->GetRs();
14337 
14338   uint64_t xd = ReadXRegister(d);
14339   uint64_t xn = ReadXRegister(n);
14340   uint64_t xs = ReadXRegister(s);
14341   bool is_backwards = ReadN();
14342 
14343   int step = 1;
14344   if (is_backwards) {
14345     step = -1;
14346     xs--;
14347     xd--;
14348   }
14349 
14350   while (xn--) {
14351     uint8_t temp = MemRead<uint8_t>(xs);
14352     MemWrite<uint8_t>(xd, temp);
14353     LogMemTransfer(xd, xs, temp);
14354     xs += step;
14355     xd += step;
14356   }
14357 
14358   if (is_backwards) {
14359     xs++;
14360     xd++;
14361   }
14362 
14363   WriteXRegister(d, xd);
14364   WriteXRegister(n, 0);
14365   WriteXRegister(s, xs);
14366 }
14367 
14368 void Simulator::SimulateCpyE(const Instruction* instr) {
14369   USE(instr);
14370   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
14371   VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "cpy"_h));
14372   // This implementation does nothing in the epilogue; all copying is completed
14373   // in the "main" part.
14374 }
14375 
14376 void Simulator::SimulateSetP(const Instruction* instr) {
14377   MOPSPHelper<"set"_h>(instr);
14378   LogSystemRegister(NZCV);
14379 }
14380 
14381 void Simulator::SimulateSetM(const Instruction* instr) {
14382   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
14383   VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "set"_h));
14384 
14385   uint64_t xd = ReadXRegister(instr->GetRd());
14386   uint64_t xn = ReadXRegister(instr->GetRn());
14387   uint64_t xs = ReadXRegister(instr->GetRs());
14388 
14389   while (xn--) {
14390     LogWrite(instr->GetRs(), GetPrintRegPartial(kPrintRegLaneSizeB), xd);
14391     MemWrite<uint8_t>(xd++, xs);
14392   }
14393   WriteXRegister(instr->GetRd(), xd);
14394   WriteXRegister(instr->GetRn(), 0);
14395 }
14396 
14397 void Simulator::SimulateSetE(const Instruction* instr) {
14398   USE(instr);
14399   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
14400   VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "set"_h));
14401   // This implementation does nothing in the epilogue; all setting is completed
14402   // in the "main" part.
14403 }
14404 
14405 void Simulator::SimulateSetGP(const Instruction* instr) {
14406   MOPSPHelper<"setg"_h>(instr);
14407 
14408   uint64_t xd = ReadXRegister(instr->GetRd());
14409   uint64_t xn = ReadXRegister(instr->GetRn());
14410 
14411   if ((xn > 0) && !IsAligned(xd, kMTETagGranuleInBytes)) {
14412     VIXL_ALIGNMENT_EXCEPTION();
14413   }
14414 
14415   if (!IsAligned(xn, kMTETagGranuleInBytes)) {
14416     VIXL_ALIGNMENT_EXCEPTION();
14417   }
14418 
14419   LogSystemRegister(NZCV);
14420 }
14421 
14422 void Simulator::SimulateSetGM(const Instruction* instr) {
14423   uint64_t xd = ReadXRegister(instr->GetRd());
14424   uint64_t xn = ReadXRegister(instr->GetRn());
14425 
14426   int tag = GetAllocationTagFromAddress(xd);
14427   while (xn) {
14428     meta_data_.SetMTETag(xd, tag);
14429     xd += 16;
14430     xn -= 16;
14431   }
14432   SimulateSetM(instr);
14433 }
14434 
14435 void Simulator::DoTrace(const Instruction* instr) {
14436   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14437               (instr->GetImmException() == kTraceOpcode));
14438 
14439   // Read the arguments encoded inline in the instruction stream.
14440   uint32_t parameters;
14441   uint32_t command;
14442 
14443   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
14444   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
14445   memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
14446 
14447   switch (command) {
14448     case TRACE_ENABLE:
14449       SetTraceParameters(GetTraceParameters() | parameters);
14450       break;
14451     case TRACE_DISABLE:
14452       SetTraceParameters(GetTraceParameters() & ~parameters);
14453       break;
14454     default:
14455       VIXL_UNREACHABLE();
14456   }
14457 
14458   WritePc(instr->GetInstructionAtOffset(kTraceLength));
14459 }
14460 
14461 
14462 void Simulator::DoLog(const Instruction* instr) {
14463   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14464               (instr->GetImmException() == kLogOpcode));
14465 
14466   // Read the arguments encoded inline in the instruction stream.
14467   uint32_t parameters;
14468 
14469   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
14470   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
14471 
14472   // We don't support a one-shot LOG_DISASM.
14473   VIXL_ASSERT((parameters & LOG_DISASM) == 0);
14474   // Print the requested information.
14475   if (parameters & LOG_SYSREGS) PrintSystemRegisters();
14476   if (parameters & LOG_REGS) PrintRegisters();
14477   if (parameters & LOG_VREGS) PrintVRegisters();
14478 
14479   WritePc(instr->GetInstructionAtOffset(kLogLength));
14480 }
14481 
14482 
14483 void Simulator::DoPrintf(const Instruction* instr) {
14484   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14485               (instr->GetImmException() == kPrintfOpcode));
14486 
14487   // Read the arguments encoded inline in the instruction stream.
14488   uint32_t arg_count;
14489   uint32_t arg_pattern_list;
14490   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
14491   memcpy(&arg_count, instr + kPrintfArgCountOffset, sizeof(arg_count));
14492   memcpy(&arg_pattern_list,
14493          instr + kPrintfArgPatternListOffset,
14494          sizeof(arg_pattern_list));
14495 
14496   VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
14497   VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
14498 
14499   // We need to call the host printf function with a set of arguments defined by
14500   // arg_pattern_list. Because we don't know the types and sizes of the
14501   // arguments, this is very difficult to do in a robust and portable way. To
14502   // work around the problem, we pick apart the format string, and print one
14503   // format placeholder at a time.
14504 
14505   // Allocate space for the format string. We take a copy, so we can modify it.
14506   // Leave enough space for one extra character per expected argument (plus the
14507   // '\0' termination).
14508   const char* format_base = ReadRegister<const char*>(0);
14509   VIXL_ASSERT(format_base != NULL);
14510   size_t length = strlen(format_base) + 1;
14511   char* const format = allocator_.New<char[]>(length + arg_count);
14512   // A list of chunks, each with exactly one format placeholder.
14513   const char* chunks[kPrintfMaxArgCount];
14514 
14515   // Copy the format string and search for format placeholders.
14516   uint32_t placeholder_count = 0;
14517   char* format_scratch = format;
14518   for (size_t i = 0; i < length; i++) {
14519     if (format_base[i] != '%') {
14520       *format_scratch++ = format_base[i];
14521     } else {
14522       if (format_base[i + 1] == '%') {
14523         // Ignore explicit "%%" sequences.
14524         *format_scratch++ = format_base[i];
14525         i++;
14526         // Chunks after the first are passed as format strings to printf, so we
14527         // need to escape '%' characters in those chunks.
14528         if (placeholder_count > 0) *format_scratch++ = format_base[i];
14529       } else {
14530         VIXL_CHECK(placeholder_count < arg_count);
14531         // Insert '\0' before placeholders, and store their locations.
14532         *format_scratch++ = '\0';
14533         chunks[placeholder_count++] = format_scratch;
14534         *format_scratch++ = format_base[i];
14535       }
14536     }
14537   }
14538   VIXL_CHECK(placeholder_count == arg_count);
14539 
14540   // Finally, call printf with each chunk, passing the appropriate register
14541   // argument. Normally, printf returns the number of bytes transmitted, so we
14542   // can emulate a single printf call by adding the result from each chunk. If
14543   // any call returns a negative (error) value, though, just return that value.
14544 
14545   printf("%s", clr_printf);
14546 
14547   // Because '\0' is inserted before each placeholder, the first string in
14548   // 'format' contains no format placeholders and should be printed literally.
14549   int result = printf("%s", format);
14550   int pcs_r = 1;  // Start at x1. x0 holds the format string.
14551   int pcs_f = 0;  // Start at d0.
14552   if (result >= 0) {
14553     for (uint32_t i = 0; i < placeholder_count; i++) {
14554       int part_result = -1;
14555 
14556       uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
14557       arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
14558       switch (arg_pattern) {
14559         case kPrintfArgW:
14560           part_result = printf(chunks[i], ReadWRegister(pcs_r++));
14561           break;
14562         case kPrintfArgX:
14563           part_result = printf(chunks[i], ReadXRegister(pcs_r++));
14564           break;
14565         case kPrintfArgD:
14566           part_result = printf(chunks[i], ReadDRegister(pcs_f++));
14567           break;
14568         default:
14569           VIXL_UNREACHABLE();
14570       }
14571 
14572       if (part_result < 0) {
14573         // Handle error values.
14574         result = part_result;
14575         break;
14576       }
14577 
14578       result += part_result;
14579     }
14580   }
14581 
14582   printf("%s", clr_normal);
14583 
14584   // Printf returns its result in x0 (just like the C library's printf).
14585   WriteXRegister(0, result);
14586 
14587   // The printf parameters are inlined in the code, so skip them.
14588   WritePc(instr->GetInstructionAtOffset(kPrintfLength));
14589 
14590   // Set LR as if we'd just called a native printf function.
14591   WriteLr(ReadPc());
14592   allocator_.DeleteArray(format);
14593 }
14594 
14595 
14596 #ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
14597 void Simulator::DoRuntimeCall(const Instruction* instr) {
14598   VIXL_STATIC_ASSERT(kRuntimeCallAddressSize == sizeof(uintptr_t));
14599   // The appropriate `Simulator::SimulateRuntimeCall()` wrapper and the function
14600   // to call are passed inlined in the assembly.
14601   uintptr_t call_wrapper_address =
14602       MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset);
14603   uintptr_t function_address =
14604       MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset);
14605   RuntimeCallType call_type = static_cast<RuntimeCallType>(
14606       MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
14607   auto runtime_call_wrapper =
14608       reinterpret_cast<void (*)(Simulator*, uintptr_t)>(call_wrapper_address);
14609 
14610   if (call_type == kCallRuntime) {
14611     WriteRegister(kLinkRegCode,
14612                   instr->GetInstructionAtOffset(kRuntimeCallLength));
14613   }
14614   runtime_call_wrapper(this, function_address);
14615   // Read the return address from `lr` and write it into `pc`.
14616   WritePc(ReadRegister<Instruction*>(kLinkRegCode));
14617 }
14618 #else
14619 void Simulator::DoRuntimeCall(const Instruction* instr) {
14620   USE(instr);
14621   VIXL_UNREACHABLE();
14622 }
14623 #endif
14624 
14625 
14626 void Simulator::DoConfigureCPUFeatures(const Instruction* instr) {
14627   VIXL_ASSERT(instr->Mask(ExceptionMask) == HLT);
14628 
14629   typedef ConfigureCPUFeaturesElementType ElementType;
14630   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <
14631               std::numeric_limits<ElementType>::max());
14632 
14633   // k{Set,Enable,Disable}CPUFeatures have the same parameter encoding.
14634 
14635   size_t element_size = sizeof(ElementType);
14636   size_t offset = kConfigureCPUFeaturesListOffset;
14637 
14638   // Read the kNone-terminated list of features.
14639   CPUFeatures parameters;
14640   while (true) {
14641     ElementType feature = MemRead<ElementType>(instr + offset);
14642     offset += element_size;
14643     if (feature == static_cast<ElementType>(CPUFeatures::kNone)) break;
14644     parameters.Combine(static_cast<CPUFeatures::Feature>(feature));
14645   }
14646 
14647   switch (instr->GetImmException()) {
14648     case kSetCPUFeaturesOpcode:
14649       SetCPUFeatures(parameters);
14650       break;
14651     case kEnableCPUFeaturesOpcode:
14652       GetCPUFeatures()->Combine(parameters);
14653       break;
14654     case kDisableCPUFeaturesOpcode:
14655       GetCPUFeatures()->Remove(parameters);
14656       break;
14657     default:
14658       VIXL_UNREACHABLE();
14659       break;
14660   }
14661 
14662   WritePc(instr->GetInstructionAtOffset(AlignUp(offset, kInstructionSize)));
14663 }
14664 
14665 
14666 void Simulator::DoSaveCPUFeatures(const Instruction* instr) {
14667   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14668               (instr->GetImmException() == kSaveCPUFeaturesOpcode));
14669   USE(instr);
14670 
14671   saved_cpu_features_.push_back(*GetCPUFeatures());
14672 }
14673 
14674 
14675 void Simulator::DoRestoreCPUFeatures(const Instruction* instr) {
14676   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14677               (instr->GetImmException() == kRestoreCPUFeaturesOpcode));
14678   USE(instr);
14679 
14680   SetCPUFeatures(saved_cpu_features_.back());
14681   saved_cpu_features_.pop_back();
14682 }
14683 
14684 void* Simulator::Mmap(
14685     void* address, size_t length, int prot, int flags, int fd, off_t offset) {
14686   // The underlying system `mmap` in the simulated environment doesn't recognize
14687   // PROT_BTI and PROT_MTE. Although the kernel probably just ignores the bits
14688   // it doesn't know, mask those protections out before calling is safer.
14689   int intenal_prot = prot;
14690   prot &= ~(PROT_BTI | PROT_MTE);
14691 
14692   uint64_t address2 = reinterpret_cast<uint64_t>(
14693       mmap(address, length, prot, flags, fd, offset));
14694 
14695   if (intenal_prot & PROT_MTE) {
14696     // The returning address of `mmap` isn't tagged.
14697     int tag = static_cast<int>(GenerateRandomTag());
14698     SetGranuleTag(address2, tag, length);
14699     address2 = GetAddressWithAllocationTag(address2, tag);
14700   }
14701 
14702   return reinterpret_cast<void*>(address2);
14703 }
14704 
14705 
14706 int Simulator::Munmap(void* address, size_t length, int prot) {
14707   if (prot & PROT_MTE) {
14708     // Untag the address since `munmap` doesn't recognize the memory tagging
14709     // managed by the Simulator.
14710     address = AddressUntag(address);
14711     CleanGranuleTag(reinterpret_cast<char*>(address), length);
14712   }
14713 
14714   return munmap(address, length);
14715 }
14716 
14717 
14718 }  // namespace aarch64
14719 }  // namespace vixl
14720 
14721 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
14722