• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <errno.h>
30 #include <unistd.h>
31 
32 #include <cmath>
33 #include <cstring>
34 #include <limits>
35 
36 #include "simulator-aarch64.h"
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 using vixl::internal::SimFloat16;
42 
43 const Instruction* Simulator::kEndOfSimAddress = NULL;
44 
SetBits(int msb,int lsb,uint32_t bits)45 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
46   int width = msb - lsb + 1;
47   VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
48 
49   bits <<= lsb;
50   uint32_t mask = ((1 << width) - 1) << lsb;
51   VIXL_ASSERT((mask & write_ignore_mask_) == 0);
52 
53   value_ = (value_ & ~mask) | (bits & mask);
54 }
55 
56 
DefaultValueFor(SystemRegister id)57 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
58   switch (id) {
59     case NZCV:
60       return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
61     case FPCR:
62       return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
63     default:
64       VIXL_UNREACHABLE();
65       return SimSystemRegister();
66   }
67 }
68 
69 const Simulator::FormToVisitorFnMap Simulator::FORM_TO_VISITOR = {
70     DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
71     SIM_AUD_VISITOR_MAP(Simulator),
72     {"smlal_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
73     {"smlsl_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
74     {"smull_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
75     {"sqdmlal_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
76     {"sqdmlsl_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
77     {"sqdmull_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
78     {"umlal_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
79     {"umlsl_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
80     {"umull_asimdelem_l", &Simulator::SimulateNEONMulByElementLong},
81     {"fcmla_asimdelem_c_h", &Simulator::SimulateNEONComplexMulByElement},
82     {"fcmla_asimdelem_c_s", &Simulator::SimulateNEONComplexMulByElement},
83     {"fmlal2_asimdelem_lh", &Simulator::SimulateNEONFPMulByElementLong},
84     {"fmlal_asimdelem_lh", &Simulator::SimulateNEONFPMulByElementLong},
85     {"fmlsl2_asimdelem_lh", &Simulator::SimulateNEONFPMulByElementLong},
86     {"fmlsl_asimdelem_lh", &Simulator::SimulateNEONFPMulByElementLong},
87     {"fmla_asimdelem_rh_h", &Simulator::SimulateNEONFPMulByElement},
88     {"fmls_asimdelem_rh_h", &Simulator::SimulateNEONFPMulByElement},
89     {"fmulx_asimdelem_rh_h", &Simulator::SimulateNEONFPMulByElement},
90     {"fmul_asimdelem_rh_h", &Simulator::SimulateNEONFPMulByElement},
91     {"fmla_asimdelem_r_sd", &Simulator::SimulateNEONFPMulByElement},
92     {"fmls_asimdelem_r_sd", &Simulator::SimulateNEONFPMulByElement},
93     {"fmulx_asimdelem_r_sd", &Simulator::SimulateNEONFPMulByElement},
94     {"fmul_asimdelem_r_sd", &Simulator::SimulateNEONFPMulByElement},
95     {"sdot_asimdelem_d", &Simulator::SimulateNEONDotProdByElement},
96     {"udot_asimdelem_d", &Simulator::SimulateNEONDotProdByElement},
97     {"adclb_z_zzz", &Simulator::SimulateSVEAddSubCarry},
98     {"adclt_z_zzz", &Simulator::SimulateSVEAddSubCarry},
99     {"addhnb_z_zz", &Simulator::SimulateSVEAddSubHigh},
100     {"addhnt_z_zz", &Simulator::SimulateSVEAddSubHigh},
101     {"addp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
102     {"bcax_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
103     {"bdep_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
104     {"bext_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
105     {"bgrp_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
106     {"bsl1n_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
107     {"bsl2n_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
108     {"bsl_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
109     {"cadd_z_zz", &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
110     {"cdot_z_zzz", &Simulator::SimulateSVEComplexDotProduct},
111     {"cdot_z_zzzi_d", &Simulator::SimulateSVEComplexDotProduct},
112     {"cdot_z_zzzi_s", &Simulator::SimulateSVEComplexDotProduct},
113     {"cmla_z_zzz", &Simulator::SimulateSVEComplexIntMulAdd},
114     {"cmla_z_zzzi_h", &Simulator::SimulateSVEComplexIntMulAdd},
115     {"cmla_z_zzzi_s", &Simulator::SimulateSVEComplexIntMulAdd},
116     {"eor3_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
117     {"eorbt_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
118     {"eortb_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
119     {"ext_z_zi_con", &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm},
120     {"faddp_z_p_zz", &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
121     {"fcvtlt_z_p_z_h2s", &Simulator::SimulateSVEFPConvertLong},
122     {"fcvtlt_z_p_z_s2d", &Simulator::SimulateSVEFPConvertLong},
123     {"fcvtnt_z_p_z_d2s", &Simulator::Simulate_ZdS_PgM_ZnD},
124     {"fcvtnt_z_p_z_s2h", &Simulator::Simulate_ZdH_PgM_ZnS},
125     {"fcvtx_z_p_z_d2s", &Simulator::Simulate_ZdS_PgM_ZnD},
126     {"fcvtxnt_z_p_z_d2s", &Simulator::Simulate_ZdS_PgM_ZnD},
127     {"flogb_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT},
128     {"fmaxnmp_z_p_zz", &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
129     {"fmaxp_z_p_zz", &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
130     {"fminnmp_z_p_zz", &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
131     {"fminp_z_p_zz", &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
132     {"fmlalb_z_zzz", &Simulator::Simulate_ZdaS_ZnH_ZmH},
133     {"fmlalb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
134     {"fmlalt_z_zzz", &Simulator::Simulate_ZdaS_ZnH_ZmH},
135     {"fmlalt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
136     {"fmlslb_z_zzz", &Simulator::Simulate_ZdaS_ZnH_ZmH},
137     {"fmlslb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
138     {"fmlslt_z_zzz", &Simulator::Simulate_ZdaS_ZnH_ZmH},
139     {"fmlslt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
140     {"histcnt_z_p_zz", &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT},
141     {"histseg_z_zz", &Simulator::Simulate_ZdB_ZnB_ZmB},
142     {"ldnt1b_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
143     {"ldnt1b_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
144     {"ldnt1d_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
145     {"ldnt1h_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
146     {"ldnt1h_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
147     {"ldnt1sb_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
148     {"ldnt1sb_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
149     {"ldnt1sh_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
150     {"ldnt1sh_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
151     {"ldnt1sw_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
152     {"ldnt1w_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
153     {"ldnt1w_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
154     {"match_p_p_zz", &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
155     {"mla_z_zzzi_d", &Simulator::SimulateSVEMlaMlsIndex},
156     {"mla_z_zzzi_h", &Simulator::SimulateSVEMlaMlsIndex},
157     {"mla_z_zzzi_s", &Simulator::SimulateSVEMlaMlsIndex},
158     {"mls_z_zzzi_d", &Simulator::SimulateSVEMlaMlsIndex},
159     {"mls_z_zzzi_h", &Simulator::SimulateSVEMlaMlsIndex},
160     {"mls_z_zzzi_s", &Simulator::SimulateSVEMlaMlsIndex},
161     {"mul_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
162     {"mul_z_zzi_d", &Simulator::SimulateSVEMulIndex},
163     {"mul_z_zzi_h", &Simulator::SimulateSVEMulIndex},
164     {"mul_z_zzi_s", &Simulator::SimulateSVEMulIndex},
165     {"nbsl_z_zzz", &Simulator::SimulateSVEBitwiseTernary},
166     {"nmatch_p_p_zz", &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
167     {"pmul_z_zz", &Simulator::Simulate_ZdB_ZnB_ZmB},
168     {"pmullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
169     {"pmullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
170     {"raddhnb_z_zz", &Simulator::SimulateSVEAddSubHigh},
171     {"raddhnt_z_zz", &Simulator::SimulateSVEAddSubHigh},
172     {"rshrnb_z_zi", &Simulator::SimulateSVENarrow},
173     {"rshrnt_z_zi", &Simulator::SimulateSVENarrow},
174     {"rsubhnb_z_zz", &Simulator::SimulateSVEAddSubHigh},
175     {"rsubhnt_z_zz", &Simulator::SimulateSVEAddSubHigh},
176     {"saba_z_zzz", &Simulator::Simulate_ZdaT_ZnT_ZmT},
177     {"sabalb_z_zzz", &Simulator::SimulateSVEInterleavedArithLong},
178     {"sabalt_z_zzz", &Simulator::SimulateSVEInterleavedArithLong},
179     {"sabdlb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
180     {"sabdlt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
181     {"sadalp_z_p_z", &Simulator::Simulate_ZdaT_PgM_ZnTb},
182     {"saddlb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
183     {"saddlbt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
184     {"saddlt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
185     {"saddwb_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
186     {"saddwt_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
187     {"sbclb_z_zzz", &Simulator::SimulateSVEAddSubCarry},
188     {"sbclt_z_zzz", &Simulator::SimulateSVEAddSubCarry},
189     {"shadd_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
190     {"shrnb_z_zi", &Simulator::SimulateSVENarrow},
191     {"shrnt_z_zi", &Simulator::SimulateSVENarrow},
192     {"shsub_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
193     {"shsubr_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
194     {"sli_z_zzi", &Simulator::Simulate_ZdT_ZnT_const},
195     {"smaxp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
196     {"sminp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
197     {"smlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
198     {"smlalb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
199     {"smlalb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
200     {"smlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
201     {"smlalt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
202     {"smlalt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
203     {"smlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
204     {"smlslb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
205     {"smlslb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
206     {"smlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
207     {"smlslt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
208     {"smlslt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
209     {"smulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
210     {"smullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
211     {"smullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
212     {"smullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
213     {"smullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
214     {"smullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
215     {"smullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
216     {"splice_z_p_zz_con", &Simulator::VisitSVEVectorSplice},
217     {"sqabs_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT},
218     {"sqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
219     {"sqcadd_z_zz", &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
220     {"sqdmlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
221     {"sqdmlalb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
222     {"sqdmlalb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
223     {"sqdmlalbt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
224     {"sqdmlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
225     {"sqdmlalt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
226     {"sqdmlalt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
227     {"sqdmlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
228     {"sqdmlslb_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
229     {"sqdmlslb_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
230     {"sqdmlslbt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
231     {"sqdmlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
232     {"sqdmlslt_z_zzzi_d", &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
233     {"sqdmlslt_z_zzzi_s", &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
234     {"sqdmulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
235     {"sqdmulh_z_zzi_d", &Simulator::SimulateSVESaturatingMulHighIndex},
236     {"sqdmulh_z_zzi_h", &Simulator::SimulateSVESaturatingMulHighIndex},
237     {"sqdmulh_z_zzi_s", &Simulator::SimulateSVESaturatingMulHighIndex},
238     {"sqdmullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
239     {"sqdmullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
240     {"sqdmullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
241     {"sqdmullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
242     {"sqdmullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
243     {"sqdmullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
244     {"sqneg_z_p_z", &Simulator::Simulate_ZdT_PgM_ZnT},
245     {"sqrdcmlah_z_zzz", &Simulator::SimulateSVEComplexIntMulAdd},
246     {"sqrdcmlah_z_zzzi_h", &Simulator::SimulateSVEComplexIntMulAdd},
247     {"sqrdcmlah_z_zzzi_s", &Simulator::SimulateSVEComplexIntMulAdd},
248     {"sqrdmlah_z_zzz", &Simulator::SimulateSVESaturatingMulAddHigh},
249     {"sqrdmlah_z_zzzi_d", &Simulator::SimulateSVESaturatingMulAddHigh},
250     {"sqrdmlah_z_zzzi_h", &Simulator::SimulateSVESaturatingMulAddHigh},
251     {"sqrdmlah_z_zzzi_s", &Simulator::SimulateSVESaturatingMulAddHigh},
252     {"sqrdmlsh_z_zzz", &Simulator::SimulateSVESaturatingMulAddHigh},
253     {"sqrdmlsh_z_zzzi_d", &Simulator::SimulateSVESaturatingMulAddHigh},
254     {"sqrdmlsh_z_zzzi_h", &Simulator::SimulateSVESaturatingMulAddHigh},
255     {"sqrdmlsh_z_zzzi_s", &Simulator::SimulateSVESaturatingMulAddHigh},
256     {"sqrdmulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
257     {"sqrdmulh_z_zzi_d", &Simulator::SimulateSVESaturatingMulHighIndex},
258     {"sqrdmulh_z_zzi_h", &Simulator::SimulateSVESaturatingMulHighIndex},
259     {"sqrdmulh_z_zzi_s", &Simulator::SimulateSVESaturatingMulHighIndex},
260     {"sqrshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
261     {"sqrshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
262     {"sqrshrnb_z_zi", &Simulator::SimulateSVENarrow},
263     {"sqrshrnt_z_zi", &Simulator::SimulateSVENarrow},
264     {"sqrshrunb_z_zi", &Simulator::SimulateSVENarrow},
265     {"sqrshrunt_z_zi", &Simulator::SimulateSVENarrow},
266     {"sqshl_z_p_zi", &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
267     {"sqshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
268     {"sqshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
269     {"sqshlu_z_p_zi", &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
270     {"sqshrnb_z_zi", &Simulator::SimulateSVENarrow},
271     {"sqshrnt_z_zi", &Simulator::SimulateSVENarrow},
272     {"sqshrunb_z_zi", &Simulator::SimulateSVENarrow},
273     {"sqshrunt_z_zi", &Simulator::SimulateSVENarrow},
274     {"sqsub_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
275     {"sqsubr_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
276     {"sqxtnb_z_zz", &Simulator::SimulateSVENarrow},
277     {"sqxtnt_z_zz", &Simulator::SimulateSVENarrow},
278     {"sqxtunb_z_zz", &Simulator::SimulateSVENarrow},
279     {"sqxtunt_z_zz", &Simulator::SimulateSVENarrow},
280     {"srhadd_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
281     {"sri_z_zzi", &Simulator::Simulate_ZdT_ZnT_const},
282     {"srshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
283     {"srshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
284     {"srshr_z_p_zi", &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
285     {"srsra_z_zi", &Simulator::Simulate_ZdaT_ZnT_const},
286     {"sshllb_z_zi", &Simulator::SimulateSVEShiftLeftImm},
287     {"sshllt_z_zi", &Simulator::SimulateSVEShiftLeftImm},
288     {"ssra_z_zi", &Simulator::Simulate_ZdaT_ZnT_const},
289     {"ssublb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
290     {"ssublbt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
291     {"ssublt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
292     {"ssubltb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
293     {"ssubwb_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
294     {"ssubwt_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
295     {"stnt1b_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
296     {"stnt1b_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
297     {"stnt1d_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
298     {"stnt1h_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
299     {"stnt1h_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
300     {"stnt1w_z_p_ar_d_64_unscaled", &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
301     {"stnt1w_z_p_ar_s_x32_unscaled", &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
302     {"subhnb_z_zz", &Simulator::SimulateSVEAddSubHigh},
303     {"subhnt_z_zz", &Simulator::SimulateSVEAddSubHigh},
304     {"suqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
305     {"tbl_z_zz_2", &Simulator::VisitSVETableLookup},
306     {"tbx_z_zz", &Simulator::VisitSVETableLookup},
307     {"uaba_z_zzz", &Simulator::Simulate_ZdaT_ZnT_ZmT},
308     {"uabalb_z_zzz", &Simulator::SimulateSVEInterleavedArithLong},
309     {"uabalt_z_zzz", &Simulator::SimulateSVEInterleavedArithLong},
310     {"uabdlb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
311     {"uabdlt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
312     {"uadalp_z_p_z", &Simulator::Simulate_ZdaT_PgM_ZnTb},
313     {"uaddlb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
314     {"uaddlt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
315     {"uaddwb_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
316     {"uaddwt_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
317     {"uhadd_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
318     {"uhsub_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
319     {"uhsubr_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
320     {"umaxp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
321     {"uminp_z_p_zz", &Simulator::SimulateSVEIntArithPair},
322     {"umlalb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
323     {"umlalb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
324     {"umlalb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
325     {"umlalt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
326     {"umlalt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
327     {"umlalt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
328     {"umlslb_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
329     {"umlslb_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
330     {"umlslb_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
331     {"umlslt_z_zzz", &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
332     {"umlslt_z_zzzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
333     {"umlslt_z_zzzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
334     {"umulh_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmT},
335     {"umullb_z_zz", &Simulator::SimulateSVEIntMulLongVec},
336     {"umullb_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
337     {"umullb_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
338     {"umullt_z_zz", &Simulator::SimulateSVEIntMulLongVec},
339     {"umullt_z_zzi_d", &Simulator::SimulateSVESaturatingIntMulLongIdx},
340     {"umullt_z_zzi_s", &Simulator::SimulateSVESaturatingIntMulLongIdx},
341     {"uqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
342     {"uqrshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
343     {"uqrshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
344     {"uqrshrnb_z_zi", &Simulator::SimulateSVENarrow},
345     {"uqrshrnt_z_zi", &Simulator::SimulateSVENarrow},
346     {"uqshl_z_p_zi", &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
347     {"uqshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
348     {"uqshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
349     {"uqshrnb_z_zi", &Simulator::SimulateSVENarrow},
350     {"uqshrnt_z_zi", &Simulator::SimulateSVENarrow},
351     {"uqsub_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
352     {"uqsubr_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
353     {"uqxtnb_z_zz", &Simulator::SimulateSVENarrow},
354     {"uqxtnt_z_zz", &Simulator::SimulateSVENarrow},
355     {"urecpe_z_p_z", &Simulator::Simulate_ZdS_PgM_ZnS},
356     {"urhadd_z_p_zz", &Simulator::SimulateSVEHalvingAddSub},
357     {"urshl_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
358     {"urshlr_z_p_zz", &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
359     {"urshr_z_p_zi", &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
360     {"ursqrte_z_p_z", &Simulator::Simulate_ZdS_PgM_ZnS},
361     {"ursra_z_zi", &Simulator::Simulate_ZdaT_ZnT_const},
362     {"ushllb_z_zi", &Simulator::SimulateSVEShiftLeftImm},
363     {"ushllt_z_zi", &Simulator::SimulateSVEShiftLeftImm},
364     {"usqadd_z_p_zz", &Simulator::SimulateSVESaturatingArithmetic},
365     {"usra_z_zi", &Simulator::Simulate_ZdaT_ZnT_const},
366     {"usublb_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
367     {"usublt_z_zz", &Simulator::SimulateSVEInterleavedArithLong},
368     {"usubwb_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
369     {"usubwt_z_zz", &Simulator::Simulate_ZdT_ZnT_ZmTb},
370     {"whilege_p_p_rr", &Simulator::VisitSVEIntCompareScalarCountAndLimit},
371     {"whilegt_p_p_rr", &Simulator::VisitSVEIntCompareScalarCountAndLimit},
372     {"whilehi_p_p_rr", &Simulator::VisitSVEIntCompareScalarCountAndLimit},
373     {"whilehs_p_p_rr", &Simulator::VisitSVEIntCompareScalarCountAndLimit},
374     {"whilerw_p_rr", &Simulator::Simulate_PdT_Xn_Xm},
375     {"whilewr_p_rr", &Simulator::Simulate_PdT_Xn_Xm},
376     {"xar_z_zzi", &Simulator::SimulateSVEExclusiveOrRotate},
377     {"smmla_z_zzz", &Simulator::SimulateMatrixMul},
378     {"ummla_z_zzz", &Simulator::SimulateMatrixMul},
379     {"usmmla_z_zzz", &Simulator::SimulateMatrixMul},
380     {"smmla_asimdsame2_g", &Simulator::SimulateMatrixMul},
381     {"ummla_asimdsame2_g", &Simulator::SimulateMatrixMul},
382     {"usmmla_asimdsame2_g", &Simulator::SimulateMatrixMul},
383     {"fmmla_z_zzz_s", &Simulator::SimulateSVEFPMatrixMul},
384     {"fmmla_z_zzz_d", &Simulator::SimulateSVEFPMatrixMul},
385     {"ld1row_z_p_bi_u32",
386       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
387     {"ld1row_z_p_br_contiguous",
388       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
389     {"ld1rod_z_p_bi_u64",
390       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
391     {"ld1rod_z_p_br_contiguous",
392       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
393     {"ld1rob_z_p_bi_u8",
394       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
395     {"ld1rob_z_p_br_contiguous",
396       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
397     {"ld1roh_z_p_bi_u16",
398       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
399     {"ld1roh_z_p_br_contiguous",
400       &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
401     {"usdot_z_zzz_s", &Simulator::VisitSVEIntMulAddUnpredicated},
402     {"sudot_z_zzzi_s", &Simulator::VisitSVEMulIndex},
403     {"usdot_z_zzzi_s", &Simulator::VisitSVEMulIndex},
404     {"usdot_asimdsame2_d", &Simulator::VisitNEON3SameExtra},
405     {"sudot_asimdelem_d", &Simulator::SimulateNEONDotProdByElement},
406     {"usdot_asimdelem_d", &Simulator::SimulateNEONDotProdByElement},
407 };
408 
GetFormToVisitorFnMap()409 const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
410   return &FORM_TO_VISITOR;
411 }
412 
413 #ifndef PANDA_BUILD
Simulator(Decoder * decoder,FILE * stream,SimStack::Allocated stack)414 Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
415     : memory_(std::move(stack)),
416       last_instr_(NULL),
417       cpu_features_auditor_(decoder, CPUFeatures::All()) {
418 #else
419 Simulator::Simulator(PandaAllocator* allocator, Decoder* decoder, SimStack::Allocated stack, FILE* stream)
420     : memory_(std::move(stack)),
421       last_instr_(NULL),
422       allocator_(allocator),
423       cpu_features_auditor_(decoder, CPUFeatures::All()),
424       saved_cpu_features_(allocator_.Adapter()) {
425 #endif
426   // Ensure that shift operations act as the simulator expects.
427   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
428   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
429 
430   // Set up a placeholder pipe for CanReadMemory.
431   VIXL_CHECK(pipe(placeholder_pipe_fd_) == 0);
432 
433   // Set up the decoder.
434   decoder_ = decoder;
435   decoder_->AppendVisitor(this);
436 
437   stream_ = stream;
438 
439 #ifndef PANDA_BUILD
440   print_disasm_ = new PrintDisassembler(stream_);
441 #else
442   print_disasm_ = allocator_.New<PrintDisassembler>(allocator, stream_);
443 #endif
444   // The Simulator and Disassembler share the same available list, held by the
445   // auditor. The Disassembler only annotates instructions with features that
446   // are _not_ available, so registering the auditor should have no effect
447   // unless the simulator is about to abort (due to missing features). In
448   // practice, this means that with trace enabled, the simulator will crash just
449   // after the disassembler prints the instruction, with the missing features
450   // enumerated.
451   print_disasm_->RegisterCPUFeaturesAuditor(&cpu_features_auditor_);
452 
453   SetColouredTrace(false);
454   trace_parameters_ = LOG_NONE;
455 
456   // We have to configure the SVE vector register length before calling
457   // ResetState().
458   SetVectorLengthInBits(kZRegMinSize);
459 
460   ResetState();
461 
462   // Print a warning about exclusive-access instructions, but only the first
463   // time they are encountered. This warning can be silenced using
464   // SilenceExclusiveAccessWarning().
465   print_exclusive_access_warning_ = true;
466 
467   guard_pages_ = false;
468 
469   // Initialize the common state of RNDR and RNDRRS.
470   uint16_t seed[3] = {11, 22, 33};
471   VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
472   memcpy(rand_state_, seed, sizeof(rand_state_));
473 
474   // Initialize all bits of pseudo predicate register to true.
475   LogicPRegister ones(pregister_all_true_);
476   ones.SetAllBits();
477 }
478 
479 void Simulator::ResetSystemRegisters() {
480   // Reset the system registers.
481   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
482   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
483   ResetFFR();
484 }
485 
486 void Simulator::ResetRegisters() {
487   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
488     WriteXRegister(i, 0xbadbeef);
489   }
490   // Returning to address 0 exits the Simulator.
491   WriteLr(kEndOfSimAddress);
492 }
493 
494 void Simulator::ResetVRegisters() {
495   // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
496   VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
497   int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
498   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
499     VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
500     vregisters_[i].NotifyAccessAsZ();
501     for (int lane = 0; lane < lane_count; lane++) {
502       // Encode the register number and (D-sized) lane into each NaN, to
503       // make them easier to trace.
504       uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
505                           (0x0000000000000001 * lane);
506       VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
507       VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
508       vregisters_[i].Insert(lane, nan_bits);
509     }
510   }
511 }
512 
513 void Simulator::ResetPRegisters() {
514   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
515   int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
516   // Ensure the register configuration fits in this bit encoding.
517   VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
518   VIXL_ASSERT(lane_count <= UINT8_MAX);
519   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
520     VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
521     for (int lane = 0; lane < lane_count; lane++) {
522       // Encode the register number and (H-sized) lane into each lane slot.
523       uint16_t bits = (0x0100 * lane) | i;
524       pregisters_[i].Insert(lane, bits);
525     }
526   }
527 }
528 
529 void Simulator::ResetFFR() {
530   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
531   int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
532   ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
533 }
534 
535 void Simulator::ResetState() {
536   ResetSystemRegisters();
537   ResetRegisters();
538   ResetVRegisters();
539   ResetPRegisters();
540 
541   WriteSp(memory_.GetStack().GetBase());
542 
543   pc_ = NULL;
544   pc_modified_ = false;
545 
546   // BTI state.
547   btype_ = DefaultBType;
548   next_btype_ = DefaultBType;
549 }
550 
551 void Simulator::SetVectorLengthInBits(unsigned vector_length) {
552   VIXL_ASSERT((vector_length >= kZRegMinSize) &&
553               (vector_length <= kZRegMaxSize));
554   VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
555   vector_length_ = vector_length;
556 
557   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
558     vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
559   }
560   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
561     pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
562   }
563 
564   ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
565 
566   ResetVRegisters();
567   ResetPRegisters();
568   ResetFFR();
569 }
570 
571 Simulator::~Simulator() {
572   // The decoder may outlive the simulator.
573   decoder_->RemoveVisitor(print_disasm_);
574 #ifndef VIXL_USE_PANDA_ALLOC
575   delete print_disasm_;
576 #endif
577   close(placeholder_pipe_fd_[0]);
578   close(placeholder_pipe_fd_[1]);
579 }
580 
581 
582 void Simulator::Run() {
583   // Flush any written registers before executing anything, so that
584   // manually-set registers are logged _before_ the first instruction.
585   LogAllWrittenRegisters();
586 
587   while (pc_ != kEndOfSimAddress) {
588     ExecuteInstruction();
589   }
590 }
591 
592 
593 void Simulator::RunFrom(const Instruction* first) {
594   WritePc(first, NoBranchLog);
595   Run();
596 }
597 
598 
599 // clang-format off
600 const char* Simulator::xreg_names[] = {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",
601                                        "x6",  "x7",  "x8",  "x9",  "x10", "x11",
602                                        "x12", "x13", "x14", "x15", "x16", "x17",
603                                        "x18", "x19", "x20", "x21", "x22", "x23",
604                                        "x24", "x25", "x26", "x27", "x28", "x29",
605                                        "lr",  "xzr", "sp"};
606 
607 const char* Simulator::wreg_names[] = {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",
608                                        "w6",  "w7",  "w8",  "w9",  "w10", "w11",
609                                        "w12", "w13", "w14", "w15", "w16", "w17",
610                                        "w18", "w19", "w20", "w21", "w22", "w23",
611                                        "w24", "w25", "w26", "w27", "w28", "w29",
612                                        "w30", "wzr", "wsp"};
613 
614 const char* Simulator::breg_names[] = {"b0",  "b1",  "b2",  "b3",  "b4",  "b5",
615                                        "b6",  "b7",  "b8",  "b9",  "b10", "b11",
616                                        "b12", "b13", "b14", "b15", "b16", "b17",
617                                        "b18", "b19", "b20", "b21", "b22", "b23",
618                                        "b24", "b25", "b26", "b27", "b28", "b29",
619                                        "b30", "b31"};
620 
621 const char* Simulator::hreg_names[] = {"h0",  "h1",  "h2",  "h3",  "h4",  "h5",
622                                        "h6",  "h7",  "h8",  "h9",  "h10", "h11",
623                                        "h12", "h13", "h14", "h15", "h16", "h17",
624                                        "h18", "h19", "h20", "h21", "h22", "h23",
625                                        "h24", "h25", "h26", "h27", "h28", "h29",
626                                        "h30", "h31"};
627 
628 const char* Simulator::sreg_names[] = {"s0",  "s1",  "s2",  "s3",  "s4",  "s5",
629                                        "s6",  "s7",  "s8",  "s9",  "s10", "s11",
630                                        "s12", "s13", "s14", "s15", "s16", "s17",
631                                        "s18", "s19", "s20", "s21", "s22", "s23",
632                                        "s24", "s25", "s26", "s27", "s28", "s29",
633                                        "s30", "s31"};
634 
635 const char* Simulator::dreg_names[] = {"d0",  "d1",  "d2",  "d3",  "d4",  "d5",
636                                        "d6",  "d7",  "d8",  "d9",  "d10", "d11",
637                                        "d12", "d13", "d14", "d15", "d16", "d17",
638                                        "d18", "d19", "d20", "d21", "d22", "d23",
639                                        "d24", "d25", "d26", "d27", "d28", "d29",
640                                        "d30", "d31"};
641 
642 const char* Simulator::vreg_names[] = {"v0",  "v1",  "v2",  "v3",  "v4",  "v5",
643                                        "v6",  "v7",  "v8",  "v9",  "v10", "v11",
644                                        "v12", "v13", "v14", "v15", "v16", "v17",
645                                        "v18", "v19", "v20", "v21", "v22", "v23",
646                                        "v24", "v25", "v26", "v27", "v28", "v29",
647                                        "v30", "v31"};
648 
649 const char* Simulator::zreg_names[] = {"z0",  "z1",  "z2",  "z3",  "z4",  "z5",
650                                        "z6",  "z7",  "z8",  "z9",  "z10", "z11",
651                                        "z12", "z13", "z14", "z15", "z16", "z17",
652                                        "z18", "z19", "z20", "z21", "z22", "z23",
653                                        "z24", "z25", "z26", "z27", "z28", "z29",
654                                        "z30", "z31"};
655 
656 const char* Simulator::preg_names[] = {"p0",  "p1",  "p2",  "p3",  "p4",  "p5",
657                                        "p6",  "p7",  "p8",  "p9",  "p10", "p11",
658                                        "p12", "p13", "p14", "p15"};
659 // clang-format on
660 
661 
662 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
663   // If the code represents the stack pointer, index the name after zr.
664   if ((code == kSPRegInternalCode) ||
665       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
666     code = kZeroRegCode + 1;
667   }
668   VIXL_ASSERT(code < ArrayLength(wreg_names));
669   return wreg_names[code];
670 }
671 
672 
673 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
674   // If the code represents the stack pointer, index the name after zr.
675   if ((code == kSPRegInternalCode) ||
676       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
677     code = kZeroRegCode + 1;
678   }
679   VIXL_ASSERT(code < ArrayLength(xreg_names));
680   return xreg_names[code];
681 }
682 
683 
684 const char* Simulator::BRegNameForCode(unsigned code) {
685   VIXL_ASSERT(code < kNumberOfVRegisters);
686   return breg_names[code];
687 }
688 
689 
690 const char* Simulator::HRegNameForCode(unsigned code) {
691   VIXL_ASSERT(code < kNumberOfVRegisters);
692   return hreg_names[code];
693 }
694 
695 
696 const char* Simulator::SRegNameForCode(unsigned code) {
697   VIXL_ASSERT(code < kNumberOfVRegisters);
698   return sreg_names[code];
699 }
700 
701 
702 const char* Simulator::DRegNameForCode(unsigned code) {
703   VIXL_ASSERT(code < kNumberOfVRegisters);
704   return dreg_names[code];
705 }
706 
707 
708 const char* Simulator::VRegNameForCode(unsigned code) {
709   VIXL_ASSERT(code < kNumberOfVRegisters);
710   return vreg_names[code];
711 }
712 
713 
714 const char* Simulator::ZRegNameForCode(unsigned code) {
715   VIXL_ASSERT(code < kNumberOfZRegisters);
716   return zreg_names[code];
717 }
718 
719 
720 const char* Simulator::PRegNameForCode(unsigned code) {
721   VIXL_ASSERT(code < kNumberOfPRegisters);
722   return preg_names[code];
723 }
724 
725 SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
726   SimVRegister ones, result;
727   dup_immediate(kFormatVnB, ones, 0xff);
728   mov_zeroing(kFormatVnB, result, pg, ones);
729   return result;
730 }
731 
732 void Simulator::ExtractFromSimVRegister(VectorFormat vform,
733                                         SimPRegister& pd,
734                                         SimVRegister vreg) {
735   SimVRegister zero;
736   dup_immediate(kFormatVnB, zero, 0);
737   SVEIntCompareVectorsHelper(ne,
738                              vform,
739                              pd,
740                              GetPTrue(),
741                              vreg,
742                              zero,
743                              false,
744                              LeaveFlags);
745 }
746 
747 #define COLOUR(colour_code) "\033[0;" colour_code "m"
748 #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
749 #define COLOUR_HIGHLIGHT "\033[43m"
750 #define NORMAL ""
751 #define GREY "30"
752 #define RED "31"
753 #define GREEN "32"
754 #define YELLOW "33"
755 #define BLUE "34"
756 #define MAGENTA "35"
757 #define CYAN "36"
758 #define WHITE "37"
759 void Simulator::SetColouredTrace(bool value) {
760   coloured_trace_ = value;
761 
762   clr_normal = value ? COLOUR(NORMAL) : "";
763   clr_flag_name = value ? COLOUR_BOLD(WHITE) : "";
764   clr_flag_value = value ? COLOUR(NORMAL) : "";
765   clr_reg_name = value ? COLOUR_BOLD(CYAN) : "";
766   clr_reg_value = value ? COLOUR(CYAN) : "";
767   clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
768   clr_vreg_value = value ? COLOUR(MAGENTA) : "";
769   clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
770   clr_preg_value = value ? COLOUR(GREEN) : "";
771   clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
772   clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
773   clr_warning_message = value ? COLOUR(YELLOW) : "";
774   clr_printf = value ? COLOUR(GREEN) : "";
775   clr_branch_marker = value ? COLOUR(GREY) COLOUR_HIGHLIGHT : "";
776 
777   if (value) {
778     print_disasm_->SetCPUFeaturesPrefix("// Needs: " COLOUR_BOLD(RED));
779     print_disasm_->SetCPUFeaturesSuffix(COLOUR(NORMAL));
780   } else {
781     print_disasm_->SetCPUFeaturesPrefix("// Needs: ");
782     print_disasm_->SetCPUFeaturesSuffix("");
783   }
784 }
785 
786 
787 void Simulator::SetTraceParameters(int parameters) {
788   bool disasm_before = trace_parameters_ & LOG_DISASM;
789   trace_parameters_ = parameters;
790   bool disasm_after = trace_parameters_ & LOG_DISASM;
791 
792   if (disasm_before != disasm_after) {
793     if (disasm_after) {
794       decoder_->InsertVisitorBefore(print_disasm_, this);
795     } else {
796       decoder_->RemoveVisitor(print_disasm_);
797     }
798   }
799 }
800 
801 // Helpers ---------------------------------------------------------------------
802 uint64_t Simulator::AddWithCarry(unsigned reg_size,
803                                  bool set_flags,
804                                  uint64_t left,
805                                  uint64_t right,
806                                  int carry_in) {
807   std::pair<uint64_t, uint8_t> result_and_flags =
808       AddWithCarry(reg_size, left, right, carry_in);
809   if (set_flags) {
810     uint8_t flags = result_and_flags.second;
811     ReadNzcv().SetN((flags >> 3) & 1);
812     ReadNzcv().SetZ((flags >> 2) & 1);
813     ReadNzcv().SetC((flags >> 1) & 1);
814     ReadNzcv().SetV((flags >> 0) & 1);
815     LogSystemRegister(NZCV);
816   }
817   return result_and_flags.first;
818 }
819 
820 std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size,
821                                                      uint64_t left,
822                                                      uint64_t right,
823                                                      int carry_in) {
824   VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
825   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
826 
827   uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
828   uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
829   uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
830 
831   left &= reg_mask;
832   right &= reg_mask;
833   uint64_t result = (left + right + carry_in) & reg_mask;
834 
835   // NZCV bits, ordered N in bit 3 to V in bit 0.
836   uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0;
837   nzcv |= CalcZFlag(result) ? 4 : 0;
838 
839   // Compute the C flag by comparing the result to the max unsigned integer.
840   uint64_t max_uint_2op = max_uint - carry_in;
841   bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
842   nzcv |= C ? 2 : 0;
843 
844   // Overflow iff the sign bit is the same for the two inputs and different
845   // for the result.
846   uint64_t left_sign = left & sign_mask;
847   uint64_t right_sign = right & sign_mask;
848   uint64_t result_sign = result & sign_mask;
849   bool V = (left_sign == right_sign) && (left_sign != result_sign);
850   nzcv |= V ? 1 : 0;
851 
852   return std::make_pair(result, nzcv);
853 }
854 
855 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
856 
857 vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) {
858   std::pair<uint64_t, uint8_t> sum_lo =
859       AddWithCarry(kXRegSize, x.second, y.second, 0);
860   int carry_in = (sum_lo.second & 0x2) >> 1;  // C flag in NZCV result.
861   std::pair<uint64_t, uint8_t> sum_hi =
862       AddWithCarry(kXRegSize, x.first, y.first, carry_in);
863   return std::make_pair(sum_hi.first, sum_lo.first);
864 }
865 
866 vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
867   // Negate the integer value. Throw an assertion when the input is INT128_MIN.
868   VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
869   x.first = ~x.first;
870   x.second = ~x.second;
871   return Add128(x, {0, 1});
872 }
873 
874 vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
875   bool neg_result = false;
876   if ((x >> 63) == 1) {
877     x = -x;
878     neg_result = !neg_result;
879   }
880   if ((y >> 63) == 1) {
881     y = -y;
882     neg_result = !neg_result;
883   }
884 
885   uint64_t x_lo = x & 0xffffffff;
886   uint64_t x_hi = x >> 32;
887   uint64_t y_lo = y & 0xffffffff;
888   uint64_t y_hi = y >> 32;
889 
890   uint64_t t1 = x_lo * y_hi;
891   uint64_t t2 = x_hi * y_lo;
892   vixl_uint128_t a = std::make_pair(0, x_lo * y_lo);
893   vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32);
894   vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32);
895   vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0);
896 
897   vixl_uint128_t result = Add128(a, b);
898   result = Add128(result, c);
899   result = Add128(result, d);
900   return neg_result ? std::make_pair(-result.first - 1, -result.second)
901                     : result;
902 }
903 
904 int64_t Simulator::ShiftOperand(unsigned reg_size,
905                                 uint64_t uvalue,
906                                 Shift shift_type,
907                                 unsigned amount) const {
908   VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
909               (reg_size == kSRegSize) || (reg_size == kDRegSize));
910   if (amount > 0) {
911     uint64_t mask = GetUintMask(reg_size);
912     bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
913     // The behavior is undefined in c++ if the shift amount greater than or
914     // equal to the register lane size. Work out the shifted result based on
915     // architectural behavior before performing the c++ type shfit operations.
916     switch (shift_type) {
917       case LSL:
918         if (amount >= reg_size) {
919           return UINT64_C(0);
920         }
921         uvalue <<= amount;
922         break;
923       case LSR:
924         if (amount >= reg_size) {
925           return UINT64_C(0);
926         }
927         uvalue >>= amount;
928         break;
929       case ASR:
930         if (amount >= reg_size) {
931           return is_negative ? ~UINT64_C(0) : UINT64_C(0);
932         }
933         uvalue >>= amount;
934         if (is_negative) {
935           // Simulate sign-extension to 64 bits.
936           uvalue |= ~UINT64_C(0) << (reg_size - amount);
937         }
938         break;
939       case ROR: {
940         uvalue = RotateRight(uvalue, amount, reg_size);
941         break;
942       }
943       default:
944         VIXL_UNIMPLEMENTED();
945         return 0;
946     }
947     uvalue &= mask;
948   }
949 
950   int64_t result;
951   memcpy(&result, &uvalue, sizeof(result));
952   return result;
953 }
954 
955 
956 int64_t Simulator::ExtendValue(unsigned reg_size,
957                                int64_t value,
958                                Extend extend_type,
959                                unsigned left_shift) const {
960   switch (extend_type) {
961     case UXTB:
962       value &= kByteMask;
963       break;
964     case UXTH:
965       value &= kHalfWordMask;
966       break;
967     case UXTW:
968       value &= kWordMask;
969       break;
970     case SXTB:
971       value &= kByteMask;
972       if ((value & 0x80) != 0) {
973         value |= ~UINT64_C(0) << 8;
974       }
975       break;
976     case SXTH:
977       value &= kHalfWordMask;
978       if ((value & 0x8000) != 0) {
979         value |= ~UINT64_C(0) << 16;
980       }
981       break;
982     case SXTW:
983       value &= kWordMask;
984       if ((value & 0x80000000) != 0) {
985         value |= ~UINT64_C(0) << 32;
986       }
987       break;
988     case UXTX:
989     case SXTX:
990       break;
991     default:
992       VIXL_UNREACHABLE();
993   }
994   return ShiftOperand(reg_size, value, LSL, left_shift);
995 }
996 
997 
998 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
999   AssertSupportedFPCR();
1000 
1001   // TODO: This assumes that the C++ implementation handles comparisons in the
1002   // way that we expect (as per AssertSupportedFPCR()).
1003   bool process_exception = false;
1004   if ((IsNaN(val0) != 0) || (IsNaN(val1) != 0)) {
1005     ReadNzcv().SetRawValue(FPUnorderedFlag);
1006     if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
1007         (trap == EnableTrap)) {
1008       process_exception = true;
1009     }
1010   } else if (val0 < val1) {
1011     ReadNzcv().SetRawValue(FPLessThanFlag);
1012   } else if (val0 > val1) {
1013     ReadNzcv().SetRawValue(FPGreaterThanFlag);
1014   } else if (val0 == val1) {
1015     ReadNzcv().SetRawValue(FPEqualFlag);
1016   } else {
1017     VIXL_UNREACHABLE();
1018   }
1019   LogSystemRegister(NZCV);
1020   if (process_exception) FPProcessException();
1021 }
1022 
1023 
1024 uint64_t Simulator::ComputeMemOperandAddress(const MemOperand& mem_op) const {
1025   VIXL_ASSERT(mem_op.IsValid());
1026   int64_t base = ReadRegister<int64_t>(mem_op.GetBaseRegister());
1027   if (mem_op.IsImmediateOffset()) {
1028     return base + mem_op.GetOffset();
1029   } else {
1030     VIXL_ASSERT(mem_op.GetRegisterOffset().IsValid());
1031     int64_t offset = ReadRegister<int64_t>(mem_op.GetRegisterOffset());
1032     unsigned shift_amount = mem_op.GetShiftAmount();
1033     if (mem_op.GetShift() != NO_SHIFT) {
1034       offset = ShiftOperand(kXRegSize, offset, mem_op.GetShift(), shift_amount);
1035     }
1036     if (mem_op.GetExtend() != NO_EXTEND) {
1037       offset = ExtendValue(kXRegSize, offset, mem_op.GetExtend(), shift_amount);
1038     }
1039     return static_cast<uint64_t>(base + offset);
1040   }
1041 }
1042 
1043 
1044 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
1045     unsigned reg_size, unsigned lane_size) {
1046   VIXL_ASSERT(reg_size >= lane_size);
1047 
1048   uint32_t format = 0;
1049   if (reg_size != lane_size) {
1050     switch (reg_size) {
1051       default:
1052         VIXL_UNREACHABLE();
1053         break;
1054       case kQRegSizeInBytes:
1055         format = kPrintRegAsQVector;
1056         break;
1057       case kDRegSizeInBytes:
1058         format = kPrintRegAsDVector;
1059         break;
1060     }
1061   }
1062 
1063   switch (lane_size) {
1064     default:
1065       VIXL_UNREACHABLE();
1066       break;
1067     case kQRegSizeInBytes:
1068       format |= kPrintReg1Q;
1069       break;
1070     case kDRegSizeInBytes:
1071       format |= kPrintReg1D;
1072       break;
1073     case kSRegSizeInBytes:
1074       format |= kPrintReg1S;
1075       break;
1076     case kHRegSizeInBytes:
1077       format |= kPrintReg1H;
1078       break;
1079     case kBRegSizeInBytes:
1080       format |= kPrintReg1B;
1081       break;
1082   }
1083   // These sizes would be duplicate case labels.
1084   VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
1085   VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
1086   VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
1087   VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
1088 
1089   return static_cast<PrintRegisterFormat>(format);
1090 }
1091 
1092 
1093 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
1094     VectorFormat vform) {
1095   switch (vform) {
1096     default:
1097       VIXL_UNREACHABLE();
1098       return kPrintReg16B;
1099     case kFormat16B:
1100       return kPrintReg16B;
1101     case kFormat8B:
1102       return kPrintReg8B;
1103     case kFormat8H:
1104       return kPrintReg8H;
1105     case kFormat4H:
1106       return kPrintReg4H;
1107     case kFormat4S:
1108       return kPrintReg4S;
1109     case kFormat2S:
1110       return kPrintReg2S;
1111     case kFormat2D:
1112       return kPrintReg2D;
1113     case kFormat1D:
1114       return kPrintReg1D;
1115 
1116     case kFormatB:
1117       return kPrintReg1B;
1118     case kFormatH:
1119       return kPrintReg1H;
1120     case kFormatS:
1121       return kPrintReg1S;
1122     case kFormatD:
1123       return kPrintReg1D;
1124 
1125     case kFormatVnB:
1126       return kPrintRegVnB;
1127     case kFormatVnH:
1128       return kPrintRegVnH;
1129     case kFormatVnS:
1130       return kPrintRegVnS;
1131     case kFormatVnD:
1132       return kPrintRegVnD;
1133   }
1134 }
1135 
1136 
1137 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
1138     VectorFormat vform) {
1139   switch (vform) {
1140     default:
1141       VIXL_UNREACHABLE();
1142       return kPrintReg16B;
1143     case kFormat8H:
1144       return kPrintReg8HFP;
1145     case kFormat4H:
1146       return kPrintReg4HFP;
1147     case kFormat4S:
1148       return kPrintReg4SFP;
1149     case kFormat2S:
1150       return kPrintReg2SFP;
1151     case kFormat2D:
1152       return kPrintReg2DFP;
1153     case kFormat1D:
1154       return kPrintReg1DFP;
1155     case kFormatH:
1156       return kPrintReg1HFP;
1157     case kFormatS:
1158       return kPrintReg1SFP;
1159     case kFormatD:
1160       return kPrintReg1DFP;
1161   }
1162 }
1163 
1164 void Simulator::PrintRegisters() {
1165   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1166     if (i == kSpRegCode) i = kSPRegInternalCode;
1167     PrintRegister(i);
1168   }
1169 }
1170 
1171 void Simulator::PrintVRegisters() {
1172   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1173     PrintVRegister(i);
1174   }
1175 }
1176 
1177 void Simulator::PrintZRegisters() {
1178   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
1179     PrintZRegister(i);
1180   }
1181 }
1182 
1183 void Simulator::PrintWrittenRegisters() {
1184   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
1185     if (registers_[i].WrittenSinceLastLog()) {
1186       if (i == kSpRegCode) i = kSPRegInternalCode;
1187       PrintRegister(i);
1188     }
1189   }
1190 }
1191 
1192 void Simulator::PrintWrittenVRegisters() {
1193   bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
1194   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
1195     if (vregisters_[i].WrittenSinceLastLog()) {
1196       // Z registers are initialised in the constructor before the user can
1197       // configure the CPU features, so we must also check for SVE here.
1198       if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
1199         PrintZRegister(i);
1200       } else {
1201         PrintVRegister(i);
1202       }
1203     }
1204   }
1205 }
1206 
1207 void Simulator::PrintWrittenPRegisters() {
1208   // P registers are initialised in the constructor before the user can
1209   // configure the CPU features, so we must check for SVE here.
1210   if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
1211   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1212     if (pregisters_[i].WrittenSinceLastLog()) {
1213       PrintPRegister(i);
1214     }
1215   }
1216   if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
1217 }
1218 
1219 void Simulator::PrintSystemRegisters() {
1220   PrintSystemRegister(NZCV);
1221   PrintSystemRegister(FPCR);
1222 }
1223 
1224 void Simulator::PrintRegisterValue(const uint8_t* value,
1225                                    int value_size,
1226                                    PrintRegisterFormat format) {
1227   int print_width = GetPrintRegSizeInBytes(format);
1228   VIXL_ASSERT(print_width <= value_size);
1229   for (int i = value_size - 1; i >= print_width; i--) {
1230     // Pad with spaces so that values align vertically.
1231     fprintf(stream_, "  ");
1232     // If we aren't explicitly printing a partial value, ensure that the
1233     // unprinted bits are zero.
1234     VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
1235   }
1236   fprintf(stream_, "0x");
1237   for (int i = print_width - 1; i >= 0; i--) {
1238     fprintf(stream_, "%02x", value[i]);
1239   }
1240 }
1241 
1242 void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
1243                                                 uint16_t lane_mask,
1244                                                 PrintRegisterFormat format) {
1245   VIXL_ASSERT((format & kPrintRegAsFP) != 0);
1246   int lane_size = GetPrintRegLaneSizeInBytes(format);
1247   fprintf(stream_, " (");
1248   bool last_inactive = false;
1249   const char* sep = "";
1250   for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
1251     bool access = (lane_mask & (1 << (i * lane_size))) != 0;
1252     if (access) {
1253       // Read the lane as a double, so we can format all FP types in the same
1254       // way. We squash NaNs, and a double can exactly represent any other value
1255       // that the smaller types can represent, so this is lossless.
1256       double element;
1257       switch (lane_size) {
1258         case kHRegSizeInBytes: {
1259           Float16 element_fp16;
1260           VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
1261           memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
1262           element = FPToDouble(element_fp16, kUseDefaultNaN);
1263           break;
1264         }
1265         case kSRegSizeInBytes: {
1266           float element_fp32;
1267           memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
1268           element = static_cast<double>(element_fp32);
1269           break;
1270         }
1271         case kDRegSizeInBytes: {
1272           memcpy(&element, &value[i * lane_size], sizeof(element));
1273           break;
1274         }
1275         default:
1276           VIXL_UNREACHABLE();
1277           fprintf(stream_, "{UnknownFPValue}");
1278           continue;
1279       }
1280       if (IsNaN(element)) {
1281         // The fprintf behaviour for NaNs is implementation-defined. Always
1282         // print "nan", so that traces are consistent.
1283         fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
1284       } else {
1285         fprintf(stream_,
1286                 "%s%s%#.4g%s",
1287                 sep,
1288                 clr_vreg_value,
1289                 element,
1290                 clr_normal);
1291       }
1292       last_inactive = false;
1293     } else if (!last_inactive) {
1294       // Replace each contiguous sequence of inactive lanes with "...".
1295       fprintf(stream_, "%s...", sep);
1296       last_inactive = true;
1297     }
1298   }
1299   fprintf(stream_, ")");
1300 }
1301 
1302 void Simulator::PrintRegister(int code,
1303                               PrintRegisterFormat format,
1304                               const char* suffix) {
1305   VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
1306               (static_cast<unsigned>(code) == kSPRegInternalCode));
1307   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
1308   VIXL_ASSERT((format & kPrintRegAsFP) == 0);
1309 
1310   SimRegister* reg;
1311   SimRegister zero;
1312   if (code == kZeroRegCode) {
1313     reg = &zero;
1314   } else {
1315     // registers_[31] holds the SP.
1316     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
1317     reg = &registers_[code % kNumberOfRegisters];
1318   }
1319 
1320   // We trace register writes as whole register values, implying that any
1321   // unprinted bits are all zero:
1322   //   "#       x{code}: 0x{-----value----}"
1323   //   "#       w{code}:         0x{-value}"
1324   // Stores trace partial register values, implying nothing about the unprinted
1325   // bits:
1326   //   "# x{code}<63:0>: 0x{-----value----}"
1327   //   "# x{code}<31:0>:         0x{-value}"
1328   //   "# x{code}<15:0>:             0x{--}"
1329   //   "#  x{code}<7:0>:               0x{}"
1330 
1331   bool is_partial = (format & kPrintRegPartial) != 0;
1332   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1333   std::stringstream name;
1334   if (is_partial) {
1335     name << XRegNameForCode(code) << GetPartialRegSuffix(format);
1336   } else {
1337     // Notify the register that it has been logged, but only if we're printing
1338     // all of it.
1339     reg->NotifyRegisterLogged();
1340     switch (print_reg_size) {
1341       case kWRegSize:
1342         name << WRegNameForCode(code);
1343         break;
1344       case kXRegSize:
1345         name << XRegNameForCode(code);
1346         break;
1347       default:
1348         VIXL_UNREACHABLE();
1349         return;
1350     }
1351   }
1352 
1353   fprintf(stream_,
1354           "# %s%*s: %s",
1355           clr_reg_name,
1356           kPrintRegisterNameFieldWidth,
1357           name.str().c_str(),
1358           clr_reg_value);
1359   PrintRegisterValue(*reg, format);
1360   fprintf(stream_, "%s%s", clr_normal, suffix);
1361 }
1362 
1363 void Simulator::PrintVRegister(int code,
1364                                PrintRegisterFormat format,
1365                                const char* suffix) {
1366   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
1367   VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
1368               ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
1369               ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
1370 
1371   // We trace register writes as whole register values, implying that any
1372   // unprinted bits are all zero:
1373   //   "#        v{code}: 0x{-------------value------------}"
1374   //   "#        d{code}:                 0x{-----value----}"
1375   //   "#        s{code}:                         0x{-value}"
1376   //   "#        h{code}:                             0x{--}"
1377   //   "#        b{code}:                               0x{}"
1378   // Stores trace partial register values, implying nothing about the unprinted
1379   // bits:
1380   //   "# v{code}<127:0>: 0x{-------------value------------}"
1381   //   "#  v{code}<63:0>:                 0x{-----value----}"
1382   //   "#  v{code}<31:0>:                         0x{-value}"
1383   //   "#  v{code}<15:0>:                             0x{--}"
1384   //   "#   v{code}<7:0>:                               0x{}"
1385 
1386   bool is_partial = ((format & kPrintRegPartial) != 0);
1387   std::stringstream name;
1388   unsigned print_reg_size = GetPrintRegSizeInBits(format);
1389   if (is_partial) {
1390     name << VRegNameForCode(code) << GetPartialRegSuffix(format);
1391   } else {
1392     // Notify the register that it has been logged, but only if we're printing
1393     // all of it.
1394     vregisters_[code].NotifyRegisterLogged();
1395     switch (print_reg_size) {
1396       case kBRegSize:
1397         name << BRegNameForCode(code);
1398         break;
1399       case kHRegSize:
1400         name << HRegNameForCode(code);
1401         break;
1402       case kSRegSize:
1403         name << SRegNameForCode(code);
1404         break;
1405       case kDRegSize:
1406         name << DRegNameForCode(code);
1407         break;
1408       case kQRegSize:
1409         name << VRegNameForCode(code);
1410         break;
1411       default:
1412         VIXL_UNREACHABLE();
1413         return;
1414     }
1415   }
1416 
1417   fprintf(stream_,
1418           "# %s%*s: %s",
1419           clr_vreg_name,
1420           kPrintRegisterNameFieldWidth,
1421           name.str().c_str(),
1422           clr_vreg_value);
1423   PrintRegisterValue(vregisters_[code], format);
1424   fprintf(stream_, "%s", clr_normal);
1425   if ((format & kPrintRegAsFP) != 0) {
1426     PrintRegisterValueFPAnnotations(vregisters_[code], format);
1427   }
1428   fprintf(stream_, "%s", suffix);
1429 }
1430 
1431 void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
1432                                                    int reg_count,
1433                                                    uint16_t focus_mask,
1434                                                    PrintRegisterFormat format) {
1435   bool print_fp = (format & kPrintRegAsFP) != 0;
1436   // Suppress FP formatting, so we can specify the lanes we're interested in.
1437   PrintRegisterFormat format_no_fp =
1438       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1439 
1440   for (int r = 0; r < reg_count; r++) {
1441     int code = (rt_code + r) % kNumberOfVRegisters;
1442     PrintVRegister(code, format_no_fp, "");
1443     if (print_fp) {
1444       PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
1445     }
1446     fprintf(stream_, "\n");
1447   }
1448 }
1449 
1450 void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
1451                                                    int q_index,
1452                                                    int reg_count,
1453                                                    uint16_t focus_mask,
1454                                                    PrintRegisterFormat format) {
1455   bool print_fp = (format & kPrintRegAsFP) != 0;
1456   // Suppress FP formatting, so we can specify the lanes we're interested in.
1457   PrintRegisterFormat format_no_fp =
1458       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
1459 
1460   PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
1461 
1462   const unsigned size = kQRegSizeInBytes;
1463   unsigned byte_index = q_index * size;
1464   const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
1465   VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
1466 
1467   for (int r = 0; r < reg_count; r++) {
1468     int code = (rt_code + r) % kNumberOfZRegisters;
1469     PrintPartialZRegister(code, q_index, format_no_fp, "");
1470     if (print_fp) {
1471       PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
1472     }
1473     fprintf(stream_, "\n");
1474   }
1475 }
1476 
1477 void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
1478   // We're going to print the register in parts, so force a partial format.
1479   format = GetPrintRegPartial(format);
1480   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1481   int vl = GetVectorLengthInBits();
1482   VIXL_ASSERT((vl % kQRegSize) == 0);
1483   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1484     PrintPartialZRegister(code, i, format);
1485   }
1486   vregisters_[code].NotifyRegisterLogged();
1487 }
1488 
1489 void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
1490   // We're going to print the register in parts, so force a partial format.
1491   format = GetPrintRegPartial(format);
1492   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1493   int vl = GetVectorLengthInBits();
1494   VIXL_ASSERT((vl % kQRegSize) == 0);
1495   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1496     PrintPartialPRegister(code, i, format);
1497   }
1498   pregisters_[code].NotifyRegisterLogged();
1499 }
1500 
1501 void Simulator::PrintFFR(PrintRegisterFormat format) {
1502   // We're going to print the register in parts, so force a partial format.
1503   format = GetPrintRegPartial(format);
1504   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1505   int vl = GetVectorLengthInBits();
1506   VIXL_ASSERT((vl % kQRegSize) == 0);
1507   SimPRegister& ffr = ReadFFR();
1508   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
1509     PrintPartialPRegister("FFR", ffr, i, format);
1510   }
1511   ffr.NotifyRegisterLogged();
1512 }
1513 
1514 void Simulator::PrintPartialZRegister(int code,
1515                                       int q_index,
1516                                       PrintRegisterFormat format,
1517                                       const char* suffix) {
1518   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
1519   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1520   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1521   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1522 
1523   // We _only_ trace partial Z register values in Q-sized chunks, because
1524   // they're often too large to reasonably fit on a single line. Each line
1525   // implies nothing about the unprinted bits.
1526   //   "# z{code}<127:0>: 0x{-------------value------------}"
1527 
1528   format = GetPrintRegAsQChunkOfSVE(format);
1529 
1530   const unsigned size = kQRegSizeInBytes;
1531   unsigned byte_index = q_index * size;
1532   const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
1533   VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
1534 
1535   int lsb = q_index * kQRegSize;
1536   int msb = lsb + kQRegSize - 1;
1537   std::stringstream name;
1538   name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
1539 
1540   fprintf(stream_,
1541           "# %s%*s: %s",
1542           clr_vreg_name,
1543           kPrintRegisterNameFieldWidth,
1544           name.str().c_str(),
1545           clr_vreg_value);
1546   PrintRegisterValue(value, size, format);
1547   fprintf(stream_, "%s", clr_normal);
1548   if ((format & kPrintRegAsFP) != 0) {
1549     PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
1550   }
1551   fprintf(stream_, "%s", suffix);
1552 }
1553 
1554 void Simulator::PrintPartialPRegister(const char* name,
1555                                       const SimPRegister& reg,
1556                                       int q_index,
1557                                       PrintRegisterFormat format,
1558                                       const char* suffix) {
1559   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
1560   VIXL_ASSERT((format & kPrintRegPartial) != 0);
1561   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
1562 
1563   // We don't currently use the format for anything here.
1564   USE(format);
1565 
1566   // We _only_ trace partial P register values, because they're often too large
1567   // to reasonably fit on a single line. Each line implies nothing about the
1568   // unprinted bits.
1569   //
1570   // We print values in binary, with spaces between each bit, in order for the
1571   // bits to align with the Z register bytes that they predicate.
1572   //   "# {name}<15:0>: 0b{-------------value------------}"
1573 
1574   int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
1575   int lsb = q_index * print_size_in_bits;
1576   int msb = lsb + print_size_in_bits - 1;
1577   std::stringstream prefix;
1578   prefix << name << '<' << msb << ':' << lsb << '>';
1579 
1580   fprintf(stream_,
1581           "# %s%*s: %s0b",
1582           clr_preg_name,
1583           kPrintRegisterNameFieldWidth,
1584           prefix.str().c_str(),
1585           clr_preg_value);
1586   for (int i = msb; i >= lsb; i--) {
1587     fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
1588   }
1589   fprintf(stream_, "%s%s", clr_normal, suffix);
1590 }
1591 
1592 void Simulator::PrintPartialPRegister(int code,
1593                                       int q_index,
1594                                       PrintRegisterFormat format,
1595                                       const char* suffix) {
1596   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
1597   PrintPartialPRegister(PRegNameForCode(code),
1598                         pregisters_[code],
1599                         q_index,
1600                         format,
1601                         suffix);
1602 }
1603 
1604 void Simulator::PrintSystemRegister(SystemRegister id) {
1605   switch (id) {
1606     case NZCV:
1607       fprintf(stream_,
1608               "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
1609               clr_flag_name,
1610               clr_flag_value,
1611               ReadNzcv().GetN(),
1612               ReadNzcv().GetZ(),
1613               ReadNzcv().GetC(),
1614               ReadNzcv().GetV(),
1615               clr_normal);
1616       break;
1617     case FPCR: {
1618       static const char* rmode[] = {"0b00 (Round to Nearest)",
1619                                     "0b01 (Round towards Plus Infinity)",
1620                                     "0b10 (Round towards Minus Infinity)",
1621                                     "0b11 (Round towards Zero)"};
1622       VIXL_ASSERT(ReadFpcr().GetRMode() < ArrayLength(rmode));
1623       fprintf(stream_,
1624               "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
1625               clr_flag_name,
1626               clr_flag_value,
1627               ReadFpcr().GetAHP(),
1628               ReadFpcr().GetDN(),
1629               ReadFpcr().GetFZ(),
1630               rmode[ReadFpcr().GetRMode()],
1631               clr_normal);
1632       break;
1633     }
1634     default:
1635       VIXL_UNREACHABLE();
1636   }
1637 }
1638 
1639 uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
1640                                        uint16_t future_access_mask,
1641                                        int struct_element_count,
1642                                        int lane_size_in_bytes,
1643                                        const char* op,
1644                                        uintptr_t address,
1645                                        int reg_size_in_bytes) {
1646   // We want to assume that we'll access at least one lane.
1647   VIXL_ASSERT(access_mask != 0);
1648   VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
1649               (reg_size_in_bytes == kQRegSizeInBytes));
1650   bool started_annotation = false;
1651   // Indent to match the register field, the fixed formatting, and the value
1652   // prefix ("0x"): "# {name}: 0x"
1653   fprintf(stream_, "# %*s    ", kPrintRegisterNameFieldWidth, "");
1654   // First, annotate the lanes (byte by byte).
1655   for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
1656     bool access = (access_mask & (1 << lane)) != 0;
1657     bool future = (future_access_mask & (1 << lane)) != 0;
1658     if (started_annotation) {
1659       // If we've started an annotation, draw a horizontal line in addition to
1660       // any other symbols.
1661       if (access) {
1662         fprintf(stream_, "─╨");
1663       } else if (future) {
1664         fprintf(stream_, "─║");
1665       } else {
1666         fprintf(stream_, "──");
1667       }
1668     } else {
1669       if (access) {
1670         started_annotation = true;
1671         fprintf(stream_, " ╙");
1672       } else if (future) {
1673         fprintf(stream_, " ║");
1674       } else {
1675         fprintf(stream_, "  ");
1676       }
1677     }
1678   }
1679   VIXL_ASSERT(started_annotation);
1680   fprintf(stream_, "─ 0x");
1681   int lane_size_in_nibbles = lane_size_in_bytes * 2;
1682   // Print the most-significant struct element first.
1683   const char* sep = "";
1684   for (int i = struct_element_count - 1; i >= 0; i--) {
1685     int offset = lane_size_in_bytes * i;
1686     uint64_t nibble = MemReadUint(lane_size_in_bytes, address + offset);
1687     fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
1688     sep = "'";
1689   }
1690   fprintf(stream_,
1691           " %s %s0x%016" PRIxPTR "%s\n",
1692           op,
1693           clr_memory_address,
1694           address,
1695           clr_normal);
1696   return future_access_mask & ~access_mask;
1697 }
1698 
1699 void Simulator::PrintAccess(int code,
1700                             PrintRegisterFormat format,
1701                             const char* op,
1702                             uintptr_t address) {
1703   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1704   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1705   if ((format & kPrintRegPartial) == 0) {
1706     registers_[code].NotifyRegisterLogged();
1707   }
1708   // Scalar-format accesses use a simple format:
1709   //   "# {reg}: 0x{value} -> {address}"
1710 
1711   // Suppress the newline, so the access annotation goes on the same line.
1712   PrintRegister(code, format, "");
1713   fprintf(stream_,
1714           " %s %s0x%016" PRIxPTR "%s\n",
1715           op,
1716           clr_memory_address,
1717           address,
1718           clr_normal);
1719 }
1720 
1721 void Simulator::PrintVAccess(int code,
1722                              PrintRegisterFormat format,
1723                              const char* op,
1724                              uintptr_t address) {
1725   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1726 
1727   // Scalar-format accesses use a simple format:
1728   //   "# v{code}: 0x{value} -> {address}"
1729 
1730   // Suppress the newline, so the access annotation goes on the same line.
1731   PrintVRegister(code, format, "");
1732   fprintf(stream_,
1733           " %s %s0x%016" PRIxPTR "%s\n",
1734           op,
1735           clr_memory_address,
1736           address,
1737           clr_normal);
1738 }
1739 
1740 void Simulator::PrintVStructAccess(int rt_code,
1741                                    int reg_count,
1742                                    PrintRegisterFormat format,
1743                                    const char* op,
1744                                    uintptr_t address) {
1745   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1746 
1747   // For example:
1748   //   "# v{code}: 0x{value}"
1749   //   "#     ...: 0x{value}"
1750   //   "#              ║   ╙─ {struct_value} -> {lowest_address}"
1751   //   "#              ╙───── {struct_value} -> {highest_address}"
1752 
1753   uint16_t lane_mask = GetPrintRegLaneMask(format);
1754   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1755 
1756   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
1757   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1758   for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
1759     uint16_t access_mask = 1 << i;
1760     VIXL_ASSERT((lane_mask & access_mask) != 0);
1761     lane_mask = PrintPartialAccess(access_mask,
1762                                    lane_mask,
1763                                    reg_count,
1764                                    lane_size_in_bytes,
1765                                    op,
1766                                    address + (i * reg_count));
1767   }
1768 }
1769 
1770 void Simulator::PrintVSingleStructAccess(int rt_code,
1771                                          int reg_count,
1772                                          int lane,
1773                                          PrintRegisterFormat format,
1774                                          const char* op,
1775                                          uintptr_t address) {
1776   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1777 
1778   // For example:
1779   //   "# v{code}: 0x{value}"
1780   //   "#     ...: 0x{value}"
1781   //   "#              ╙───── {struct_value} -> {address}"
1782 
1783   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1784   uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
1785   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1786   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1787 }
1788 
1789 void Simulator::PrintVReplicatingStructAccess(int rt_code,
1790                                               int reg_count,
1791                                               PrintRegisterFormat format,
1792                                               const char* op,
1793                                               uintptr_t address) {
1794   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1795 
1796   // For example:
1797   //   "# v{code}: 0x{value}"
1798   //   "#     ...: 0x{value}"
1799   //   "#            ╙─╨─╨─╨─ {struct_value} -> {address}"
1800 
1801   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
1802   uint16_t lane_mask = GetPrintRegLaneMask(format);
1803   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
1804   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
1805 }
1806 
1807 void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
1808   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1809 
1810   // Scalar-format accesses are split into separate chunks, each of which uses a
1811   // simple format:
1812   //   "#   z{code}<127:0>: 0x{value} -> {address}"
1813   //   "# z{code}<255:128>: 0x{value} -> {address + 16}"
1814   //   "# z{code}<383:256>: 0x{value} -> {address + 32}"
1815   // etc
1816 
1817   int vl = GetVectorLengthInBits();
1818   VIXL_ASSERT((vl % kQRegSize) == 0);
1819   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1820     // Suppress the newline, so the access annotation goes on the same line.
1821     PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
1822     fprintf(stream_,
1823             " %s %s0x%016" PRIxPTR "%s\n",
1824             op,
1825             clr_memory_address,
1826             address,
1827             clr_normal);
1828     address += kQRegSizeInBytes;
1829   }
1830 }
1831 
1832 void Simulator::PrintZStructAccess(int rt_code,
1833                                    int reg_count,
1834                                    const LogicPRegister& pg,
1835                                    PrintRegisterFormat format,
1836                                    int msize_in_bytes,
1837                                    const char* op,
1838                                    const LogicSVEAddressVector& addr) {
1839   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1840 
1841   // For example:
1842   //   "# z{code}<255:128>: 0x{value}"
1843   //   "#     ...<255:128>: 0x{value}"
1844   //   "#                       ║   ╙─ {struct_value} -> {first_address}"
1845   //   "#                       ╙───── {struct_value} -> {last_address}"
1846 
1847   // We're going to print the register in parts, so force a partial format.
1848   bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
1849   format = GetPrintRegPartial(format);
1850 
1851   int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
1852   int vl = GetVectorLengthInBits();
1853   VIXL_ASSERT((vl % kQRegSize) == 0);
1854   int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
1855   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1856     uint16_t pred =
1857         pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
1858     if ((pred == 0) && skip_inactive_chunks) continue;
1859 
1860     PrintZRegistersForStructuredAccess(rt_code,
1861                                        q_index,
1862                                        reg_count,
1863                                        pred,
1864                                        format);
1865     if (pred == 0) {
1866       // This register chunk has no active lanes. The loop below would print
1867       // nothing, so leave a blank line to keep structures grouped together.
1868       fprintf(stream_, "#\n");
1869       continue;
1870     }
1871     for (int i = 0; i < lanes_per_q; i++) {
1872       uint16_t access = 1 << (i * esize_in_bytes);
1873       int lane = (q_index * lanes_per_q) + i;
1874       // Skip inactive lanes.
1875       if ((pred & access) == 0) continue;
1876       pred = PrintPartialAccess(access,
1877                                 pred,
1878                                 reg_count,
1879                                 msize_in_bytes,
1880                                 op,
1881                                 addr.GetStructAddress(lane));
1882     }
1883   }
1884 
1885   // We print the whole register, even for stores.
1886   for (int i = 0; i < reg_count; i++) {
1887     vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
1888   }
1889 }
1890 
1891 void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
1892   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
1893 
1894   // Scalar-format accesses are split into separate chunks, each of which uses a
1895   // simple format:
1896   //   "#  p{code}<15:0>: 0b{value} -> {address}"
1897   //   "# p{code}<31:16>: 0b{value} -> {address + 2}"
1898   //   "# p{code}<47:32>: 0b{value} -> {address + 4}"
1899   // etc
1900 
1901   int vl = GetVectorLengthInBits();
1902   VIXL_ASSERT((vl % kQRegSize) == 0);
1903   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
1904     // Suppress the newline, so the access annotation goes on the same line.
1905     PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
1906     fprintf(stream_,
1907             " %s %s0x%016" PRIxPTR "%s\n",
1908             op,
1909             clr_memory_address,
1910             address,
1911             clr_normal);
1912     address += kQRegSizeInBytes;
1913   }
1914 }
1915 
1916 void Simulator::PrintRead(int rt_code,
1917                           PrintRegisterFormat format,
1918                           uintptr_t address) {
1919   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1920   registers_[rt_code].NotifyRegisterLogged();
1921   PrintAccess(rt_code, format, "<-", address);
1922 }
1923 
1924 void Simulator::PrintExtendingRead(int rt_code,
1925                                    PrintRegisterFormat format,
1926                                    int access_size_in_bytes,
1927                                    uintptr_t address) {
1928   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
1929   if (access_size_in_bytes == reg_size_in_bytes) {
1930     // There is no extension here, so print a simple load.
1931     PrintRead(rt_code, format, address);
1932     return;
1933   }
1934   VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
1935 
1936   // For sign- and zero-extension, make it clear that the resulting register
1937   // value is different from what is loaded from memory.
1938   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1939   registers_[rt_code].NotifyRegisterLogged();
1940   PrintRegister(rt_code, format);
1941   PrintPartialAccess(1,
1942                      0,
1943                      1,
1944                      access_size_in_bytes,
1945                      "<-",
1946                      address,
1947                      kXRegSizeInBytes);
1948 }
1949 
1950 void Simulator::PrintVRead(int rt_code,
1951                            PrintRegisterFormat format,
1952                            uintptr_t address) {
1953   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1954   vregisters_[rt_code].NotifyRegisterLogged();
1955   PrintVAccess(rt_code, format, "<-", address);
1956 }
1957 
1958 void Simulator::PrintWrite(int rt_code,
1959                            PrintRegisterFormat format,
1960                            uintptr_t address) {
1961   // Because this trace doesn't represent a change to the source register's
1962   // value, only print the relevant part of the value.
1963   format = GetPrintRegPartial(format);
1964   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1965   registers_[rt_code].NotifyRegisterLogged();
1966   PrintAccess(rt_code, format, "->", address);
1967 }
1968 
1969 void Simulator::PrintVWrite(int rt_code,
1970                             PrintRegisterFormat format,
1971                             uintptr_t address) {
1972   // Because this trace doesn't represent a change to the source register's
1973   // value, only print the relevant part of the value.
1974   format = GetPrintRegPartial(format);
1975   // It only makes sense to write scalar values here. Vectors are handled by
1976   // PrintVStructAccess.
1977   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
1978   PrintVAccess(rt_code, format, "->", address);
1979 }
1980 
1981 void Simulator::PrintTakenBranch(const Instruction* target) {
1982   fprintf(stream_,
1983           "# %sBranch%s to 0x%016" PRIx64 ".\n",
1984           clr_branch_marker,
1985           clr_normal,
1986           reinterpret_cast<uint64_t>(target));
1987 }
1988 
1989 // Visitors---------------------------------------------------------------------
1990 
1991 
1992 void Simulator::Visit(Metadata* metadata, const Instruction* instr) {
1993   VIXL_ASSERT(metadata->count("form") > 0);
1994   const std::string& form = (*metadata)["form"];
1995   const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap();
1996   if ((fv->count(form) > 0) && fv->at(form)) {
1997     form_hash_ = Hash(form.c_str());
1998     fv->at(form)(this, instr);
1999   } else {
2000     VisitUnimplemented(instr);
2001   }
2002 }
2003 
2004 void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2005   VectorFormat vform = instr->GetSVEVectorFormat();
2006   SimPRegister& pd = ReadPRegister(instr->GetPd());
2007   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2008   SimVRegister& zm = ReadVRegister(instr->GetRm());
2009   SimVRegister& zn = ReadVRegister(instr->GetRn());
2010 
2011   switch (form_hash_) {
2012     case Hash("match_p_p_zz"):
2013       match(vform, pd, zn, zm, /* negate_match = */ false);
2014       break;
2015     case Hash("nmatch_p_p_zz"):
2016       match(vform, pd, zn, zm, /* negate_match = */ true);
2017       break;
2018     default:
2019       VIXL_UNIMPLEMENTED();
2020   }
2021   mov_zeroing(pd, pg, pd);
2022   PredTest(vform, pg, pd);
2023 }
2024 
2025 void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) {
2026   VectorFormat vform = instr->GetSVEVectorFormat();
2027   SimPRegister& pd = ReadPRegister(instr->GetPd());
2028   uint64_t src1 = ReadXRegister(instr->GetRn());
2029   uint64_t src2 = ReadXRegister(instr->GetRm());
2030 
2031   uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1);
2032   absdiff >>= LaneSizeInBytesLog2FromFormat(vform);
2033 
2034   bool no_conflict = false;
2035   switch (form_hash_) {
2036     case Hash("whilerw_p_rr"):
2037       no_conflict = (absdiff == 0);
2038       break;
2039     case Hash("whilewr_p_rr"):
2040       no_conflict = (absdiff == 0) || (src2 <= src1);
2041       break;
2042     default:
2043       VIXL_UNIMPLEMENTED();
2044   }
2045 
2046   LogicPRegister dst(pd);
2047   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2048     dst.SetActive(vform,
2049                   i,
2050                   no_conflict || (static_cast<uint64_t>(i) < absdiff));
2051   }
2052 
2053   PredTest(vform, GetPTrue(), pd);
2054 }
2055 
2056 void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) {
2057   VIXL_ASSERT(form_hash_ == Hash("ext_z_zi_con"));
2058 
2059   SimVRegister& zd = ReadVRegister(instr->GetRd());
2060   SimVRegister& zn = ReadVRegister(instr->GetRn());
2061   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
2062 
2063   int index = instr->GetSVEExtractImmediate();
2064   int vl = GetVectorLengthInBytes();
2065   index = (index >= vl) ? 0 : index;
2066 
2067   ext(kFormatVnB, zd, zn, zn2, index);
2068 }
2069 
2070 void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) {
2071   SimVRegister& zd = ReadVRegister(instr->GetRd());
2072   SimVRegister& zm = ReadVRegister(instr->GetRm());
2073   SimVRegister& zn = ReadVRegister(instr->GetRn());
2074 
2075   switch (form_hash_) {
2076     case Hash("histseg_z_zz"):
2077       if (instr->GetSVEVectorFormat() == kFormatVnB) {
2078         histogram(kFormatVnB,
2079                   zd,
2080                   GetPTrue(),
2081                   zn,
2082                   zm,
2083                   /* do_segmented = */ true);
2084       } else {
2085         VIXL_UNIMPLEMENTED();
2086       }
2087       break;
2088     case Hash("pmul_z_zz"):
2089       pmul(kFormatVnB, zd, zn, zm);
2090       break;
2091     default:
2092       VIXL_UNIMPLEMENTED();
2093   }
2094 }
2095 
2096 void Simulator::SimulateSVEMulIndex(const Instruction* instr) {
2097   VectorFormat vform = instr->GetSVEVectorFormat();
2098   SimVRegister& zd = ReadVRegister(instr->GetRd());
2099   SimVRegister& zn = ReadVRegister(instr->GetRn());
2100 
2101   // The encoding for B and H-sized lanes are redefined to encode the most
2102   // significant bit of index for H-sized lanes. B-sized lanes are not
2103   // supported.
2104   if (vform == kFormatVnB) vform = kFormatVnH;
2105 
2106   VIXL_ASSERT((form_hash_ == Hash("mul_z_zzi_d")) ||
2107               (form_hash_ == Hash("mul_z_zzi_h")) ||
2108               (form_hash_ == Hash("mul_z_zzi_s")));
2109 
2110   SimVRegister temp;
2111   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2112   mul(vform, zd, zn, temp);
2113 }
2114 
2115 void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) {
2116   VectorFormat vform = instr->GetSVEVectorFormat();
2117   SimVRegister& zda = ReadVRegister(instr->GetRd());
2118   SimVRegister& zn = ReadVRegister(instr->GetRn());
2119 
2120   // The encoding for B and H-sized lanes are redefined to encode the most
2121   // significant bit of index for H-sized lanes. B-sized lanes are not
2122   // supported.
2123   if (vform == kFormatVnB) vform = kFormatVnH;
2124 
2125   VIXL_ASSERT((form_hash_ == Hash("mla_z_zzzi_d")) ||
2126               (form_hash_ == Hash("mla_z_zzzi_h")) ||
2127               (form_hash_ == Hash("mla_z_zzzi_s")) ||
2128               (form_hash_ == Hash("mls_z_zzzi_d")) ||
2129               (form_hash_ == Hash("mls_z_zzzi_h")) ||
2130               (form_hash_ == Hash("mls_z_zzzi_s")));
2131 
2132   SimVRegister temp;
2133   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2134   if (instr->ExtractBit(10) == 0) {
2135     mla(vform, zda, zda, zn, temp);
2136   } else {
2137     mls(vform, zda, zda, zn, temp);
2138   }
2139 }
2140 
2141 void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) {
2142   VectorFormat vform = instr->GetSVEVectorFormat();
2143   SimVRegister& zd = ReadVRegister(instr->GetRd());
2144   SimVRegister& zn = ReadVRegister(instr->GetRn());
2145 
2146   // The encoding for B and H-sized lanes are redefined to encode the most
2147   // significant bit of index for H-sized lanes. B-sized lanes are not
2148   // supported.
2149   if (vform == kFormatVnB) {
2150     vform = kFormatVnH;
2151   }
2152 
2153   SimVRegister temp;
2154   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
2155   switch (form_hash_) {
2156     case Hash("sqdmulh_z_zzi_h"):
2157     case Hash("sqdmulh_z_zzi_s"):
2158     case Hash("sqdmulh_z_zzi_d"):
2159       sqdmulh(vform, zd, zn, temp);
2160       break;
2161     case Hash("sqrdmulh_z_zzi_h"):
2162     case Hash("sqrdmulh_z_zzi_s"):
2163     case Hash("sqrdmulh_z_zzi_d"):
2164       sqrdmulh(vform, zd, zn, temp);
2165       break;
2166     default:
2167       VIXL_UNIMPLEMENTED();
2168   }
2169 }
2170 
2171 void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
2172   VectorFormat vform = instr->GetSVEVectorFormat();
2173   SimVRegister& zd = ReadVRegister(instr->GetRd());
2174   SimVRegister& zn = ReadVRegister(instr->GetRn());
2175 
2176   SimVRegister temp, zm_idx, zn_b, zn_t;
2177   // Instead of calling the indexed form of the instruction logic, we call the
2178   // vector form, which can reuse existing function logic without modification.
2179   // Select the specified elements based on the index input and than pack them
2180   // to the corresponding position.
2181   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2182   dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
2183   pack_even_elements(vform_half, zm_idx, temp);
2184 
2185   pack_even_elements(vform_half, zn_b, zn);
2186   pack_odd_elements(vform_half, zn_t, zn);
2187 
2188   switch (form_hash_) {
2189     case Hash("smullb_z_zzi_s"):
2190     case Hash("smullb_z_zzi_d"):
2191       smull(vform, zd, zn_b, zm_idx);
2192       break;
2193     case Hash("smullt_z_zzi_s"):
2194     case Hash("smullt_z_zzi_d"):
2195       smull(vform, zd, zn_t, zm_idx);
2196       break;
2197     case Hash("sqdmullb_z_zzi_d"):
2198       sqdmull(vform, zd, zn_b, zm_idx);
2199       break;
2200     case Hash("sqdmullt_z_zzi_d"):
2201       sqdmull(vform, zd, zn_t, zm_idx);
2202       break;
2203     case Hash("umullb_z_zzi_s"):
2204     case Hash("umullb_z_zzi_d"):
2205       umull(vform, zd, zn_b, zm_idx);
2206       break;
2207     case Hash("umullt_z_zzi_s"):
2208     case Hash("umullt_z_zzi_d"):
2209       umull(vform, zd, zn_t, zm_idx);
2210       break;
2211     case Hash("sqdmullb_z_zzi_s"):
2212       sqdmull(vform, zd, zn_b, zm_idx);
2213       break;
2214     case Hash("sqdmullt_z_zzi_s"):
2215       sqdmull(vform, zd, zn_t, zm_idx);
2216       break;
2217     case Hash("smlalb_z_zzzi_s"):
2218     case Hash("smlalb_z_zzzi_d"):
2219       smlal(vform, zd, zn_b, zm_idx);
2220       break;
2221     case Hash("smlalt_z_zzzi_s"):
2222     case Hash("smlalt_z_zzzi_d"):
2223       smlal(vform, zd, zn_t, zm_idx);
2224       break;
2225     case Hash("smlslb_z_zzzi_s"):
2226     case Hash("smlslb_z_zzzi_d"):
2227       smlsl(vform, zd, zn_b, zm_idx);
2228       break;
2229     case Hash("smlslt_z_zzzi_s"):
2230     case Hash("smlslt_z_zzzi_d"):
2231       smlsl(vform, zd, zn_t, zm_idx);
2232       break;
2233     case Hash("umlalb_z_zzzi_s"):
2234     case Hash("umlalb_z_zzzi_d"):
2235       umlal(vform, zd, zn_b, zm_idx);
2236       break;
2237     case Hash("umlalt_z_zzzi_s"):
2238     case Hash("umlalt_z_zzzi_d"):
2239       umlal(vform, zd, zn_t, zm_idx);
2240       break;
2241     case Hash("umlslb_z_zzzi_s"):
2242     case Hash("umlslb_z_zzzi_d"):
2243       umlsl(vform, zd, zn_b, zm_idx);
2244       break;
2245     case Hash("umlslt_z_zzzi_s"):
2246     case Hash("umlslt_z_zzzi_d"):
2247       umlsl(vform, zd, zn_t, zm_idx);
2248       break;
2249     default:
2250       VIXL_UNIMPLEMENTED();
2251   }
2252 }
2253 
2254 void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) {
2255   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2256   SimVRegister& zd = ReadVRegister(instr->GetRd());
2257   SimVRegister& zn = ReadVRegister(instr->GetRn());
2258   SimVRegister result, zd_b;
2259 
2260   pack_even_elements(kFormatVnH, zd_b, zd);
2261 
2262   switch (form_hash_) {
2263     case Hash("fcvtnt_z_p_z_s2h"):
2264       fcvt(kFormatVnH, kFormatVnS, result, pg, zn);
2265       pack_even_elements(kFormatVnH, result, result);
2266       zip1(kFormatVnH, result, zd_b, result);
2267       break;
2268     default:
2269       VIXL_UNIMPLEMENTED();
2270   }
2271   mov_merging(kFormatVnS, zd, pg, result);
2272 }
2273 
2274 void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) {
2275   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2276   SimVRegister& zd = ReadVRegister(instr->GetRd());
2277   SimVRegister& zn = ReadVRegister(instr->GetRn());
2278   SimVRegister result, zero, zd_b;
2279 
2280   zero.Clear();
2281   pack_even_elements(kFormatVnS, zd_b, zd);
2282 
2283   switch (form_hash_) {
2284     case Hash("fcvtnt_z_p_z_d2s"):
2285       fcvt(kFormatVnS, kFormatVnD, result, pg, zn);
2286       pack_even_elements(kFormatVnS, result, result);
2287       zip1(kFormatVnS, result, zd_b, result);
2288       break;
2289     case Hash("fcvtx_z_p_z_d2s"):
2290       fcvtxn(kFormatVnS, result, zn);
2291       zip1(kFormatVnS, result, result, zero);
2292       break;
2293     case Hash("fcvtxnt_z_p_z_d2s"):
2294       fcvtxn(kFormatVnS, result, zn);
2295       zip1(kFormatVnS, result, zd_b, result);
2296       break;
2297     default:
2298       VIXL_UNIMPLEMENTED();
2299   }
2300   mov_merging(kFormatVnD, zd, pg, result);
2301 }
2302 
2303 void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) {
2304   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2305   SimVRegister& zd = ReadVRegister(instr->GetRd());
2306   SimVRegister& zn = ReadVRegister(instr->GetRn());
2307   SimVRegister result;
2308 
2309   switch (form_hash_) {
2310     case Hash("fcvtlt_z_p_z_h2s"):
2311       ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes);
2312       fcvt(kFormatVnS, kFormatVnH, zd, pg, result);
2313       break;
2314     case Hash("fcvtlt_z_p_z_s2d"):
2315       ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes);
2316       fcvt(kFormatVnD, kFormatVnS, zd, pg, result);
2317       break;
2318     default:
2319       VIXL_UNIMPLEMENTED();
2320   }
2321 }
2322 
2323 void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
2324   VectorFormat vform = instr->GetSVEVectorFormat();
2325   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2326   SimVRegister& zd = ReadVRegister(instr->GetRd());
2327   SimVRegister& zn = ReadVRegister(instr->GetRn());
2328   SimVRegister result;
2329 
2330   if (vform != kFormatVnS) {
2331     VIXL_UNIMPLEMENTED();
2332   }
2333 
2334   switch (form_hash_) {
2335     case Hash("urecpe_z_p_z"):
2336       urecpe(vform, result, zn);
2337       break;
2338     case Hash("ursqrte_z_p_z"):
2339       ursqrte(vform, result, zn);
2340       break;
2341     default:
2342       VIXL_UNIMPLEMENTED();
2343   }
2344   mov_merging(vform, zd, pg, result);
2345 }
2346 
2347 void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) {
2348   VectorFormat vform = instr->GetSVEVectorFormat();
2349   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2350   SimVRegister& zd = ReadVRegister(instr->GetRd());
2351   SimVRegister& zn = ReadVRegister(instr->GetRn());
2352   SimVRegister result;
2353 
2354   switch (form_hash_) {
2355     case Hash("flogb_z_p_z"):
2356       vform = instr->GetSVEVectorFormat(17);
2357       flogb(vform, result, zn);
2358       break;
2359     case Hash("sqabs_z_p_z"):
2360       abs(vform, result, zn).SignedSaturate(vform);
2361       break;
2362     case Hash("sqneg_z_p_z"):
2363       neg(vform, result, zn).SignedSaturate(vform);
2364       break;
2365     default:
2366       VIXL_UNIMPLEMENTED();
2367   }
2368   mov_merging(vform, zd, pg, result);
2369 }
2370 
2371 void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) {
2372   VectorFormat vform = instr->GetSVEVectorFormat();
2373   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2374   SimVRegister& zd = ReadVRegister(instr->GetRd());
2375   SimVRegister& zm = ReadVRegister(instr->GetRm());
2376   SimVRegister& zn = ReadVRegister(instr->GetRn());
2377   SimVRegister result;
2378 
2379   VIXL_ASSERT(form_hash_ == Hash("histcnt_z_p_zz"));
2380   if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
2381     histogram(vform, result, pg, zn, zm);
2382     mov_zeroing(vform, zd, pg, result);
2383   } else {
2384     VIXL_UNIMPLEMENTED();
2385   }
2386 }
2387 
2388 void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
2389   VectorFormat vform = instr->GetSVEVectorFormat();
2390   SimVRegister& zd = ReadVRegister(instr->GetRd());
2391   SimVRegister& zm = ReadVRegister(instr->GetRm());
2392   SimVRegister& zn = ReadVRegister(instr->GetRn());
2393   SimVRegister result;
2394   bool do_bext = false;
2395 
2396   switch (form_hash_) {
2397     case Hash("bdep_z_zz"):
2398       bdep(vform, zd, zn, zm);
2399       break;
2400     case Hash("bext_z_zz"):
2401       do_bext = true;
2402       VIXL_FALLTHROUGH();
2403     case Hash("bgrp_z_zz"):
2404       bgrp(vform, zd, zn, zm, do_bext);
2405       break;
2406     case Hash("eorbt_z_zz"):
2407       rotate_elements_right(vform, result, zm, 1);
2408       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2409       mov_alternating(vform, zd, result, 0);
2410       break;
2411     case Hash("eortb_z_zz"):
2412       rotate_elements_right(vform, result, zm, -1);
2413       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
2414       mov_alternating(vform, zd, result, 1);
2415       break;
2416     case Hash("mul_z_zz"):
2417       mul(vform, zd, zn, zm);
2418       break;
2419     case Hash("smulh_z_zz"):
2420       smulh(vform, zd, zn, zm);
2421       break;
2422     case Hash("sqdmulh_z_zz"):
2423       sqdmulh(vform, zd, zn, zm);
2424       break;
2425     case Hash("sqrdmulh_z_zz"):
2426       sqrdmulh(vform, zd, zn, zm);
2427       break;
2428     case Hash("umulh_z_zz"):
2429       umulh(vform, zd, zn, zm);
2430       break;
2431     default:
2432       VIXL_UNIMPLEMENTED();
2433   }
2434 }
2435 
2436 void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) {
2437   VectorFormat vform = instr->GetSVEVectorFormat();
2438   SimVRegister& zd = ReadVRegister(instr->GetRd());
2439   SimVRegister& zm = ReadVRegister(instr->GetRm());
2440   SimVRegister& zn = ReadVRegister(instr->GetRn());
2441 
2442   SimVRegister zm_b, zm_t;
2443   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2444   pack_even_elements(vform_half, zm_b, zm);
2445   pack_odd_elements(vform_half, zm_t, zm);
2446 
2447   switch (form_hash_) {
2448     case Hash("saddwb_z_zz"):
2449       saddw(vform, zd, zn, zm_b);
2450       break;
2451     case Hash("saddwt_z_zz"):
2452       saddw(vform, zd, zn, zm_t);
2453       break;
2454     case Hash("ssubwb_z_zz"):
2455       ssubw(vform, zd, zn, zm_b);
2456       break;
2457     case Hash("ssubwt_z_zz"):
2458       ssubw(vform, zd, zn, zm_t);
2459       break;
2460     case Hash("uaddwb_z_zz"):
2461       uaddw(vform, zd, zn, zm_b);
2462       break;
2463     case Hash("uaddwt_z_zz"):
2464       uaddw(vform, zd, zn, zm_t);
2465       break;
2466     case Hash("usubwb_z_zz"):
2467       usubw(vform, zd, zn, zm_b);
2468       break;
2469     case Hash("usubwt_z_zz"):
2470       usubw(vform, zd, zn, zm_t);
2471       break;
2472     default:
2473       VIXL_UNIMPLEMENTED();
2474   }
2475 }
2476 
2477 void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) {
2478   SimVRegister& zd = ReadVRegister(instr->GetRd());
2479   SimVRegister& zn = ReadVRegister(instr->GetRn());
2480 
2481   std::pair<int, int> shift_and_lane_size =
2482       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2483   int lane_size = shift_and_lane_size.second;
2484   VIXL_ASSERT((lane_size >= 0) &&
2485               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2486   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2487   int shift_dist = shift_and_lane_size.first;
2488 
2489   switch (form_hash_) {
2490     case Hash("sli_z_zzi"):
2491       // Shift distance is computed differently for left shifts. Convert the
2492       // result.
2493       shift_dist = (8 << lane_size) - shift_dist;
2494       sli(vform, zd, zn, shift_dist);
2495       break;
2496     case Hash("sri_z_zzi"):
2497       sri(vform, zd, zn, shift_dist);
2498       break;
2499     default:
2500       VIXL_UNIMPLEMENTED();
2501   }
2502 }
2503 
2504 void Simulator::SimulateSVENarrow(const Instruction* instr) {
2505   SimVRegister& zd = ReadVRegister(instr->GetRd());
2506   SimVRegister& zn = ReadVRegister(instr->GetRn());
2507   SimVRegister result;
2508 
2509   std::pair<int, int> shift_and_lane_size =
2510       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2511   int lane_size = shift_and_lane_size.second;
2512   VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
2513               (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)));
2514   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
2515   int right_shift_dist = shift_and_lane_size.first;
2516   bool top = false;
2517 
2518   switch (form_hash_) {
2519     case Hash("sqxtnt_z_zz"):
2520       top = true;
2521       VIXL_FALLTHROUGH();
2522     case Hash("sqxtnb_z_zz"):
2523       sqxtn(vform, result, zn);
2524       break;
2525     case Hash("sqxtunt_z_zz"):
2526       top = true;
2527       VIXL_FALLTHROUGH();
2528     case Hash("sqxtunb_z_zz"):
2529       sqxtun(vform, result, zn);
2530       break;
2531     case Hash("uqxtnt_z_zz"):
2532       top = true;
2533       VIXL_FALLTHROUGH();
2534     case Hash("uqxtnb_z_zz"):
2535       uqxtn(vform, result, zn);
2536       break;
2537     case Hash("rshrnt_z_zi"):
2538       top = true;
2539       VIXL_FALLTHROUGH();
2540     case Hash("rshrnb_z_zi"):
2541       rshrn(vform, result, zn, right_shift_dist);
2542       break;
2543     case Hash("shrnt_z_zi"):
2544       top = true;
2545       VIXL_FALLTHROUGH();
2546     case Hash("shrnb_z_zi"):
2547       shrn(vform, result, zn, right_shift_dist);
2548       break;
2549     case Hash("sqrshrnt_z_zi"):
2550       top = true;
2551       VIXL_FALLTHROUGH();
2552     case Hash("sqrshrnb_z_zi"):
2553       sqrshrn(vform, result, zn, right_shift_dist);
2554       break;
2555     case Hash("sqrshrunt_z_zi"):
2556       top = true;
2557       VIXL_FALLTHROUGH();
2558     case Hash("sqrshrunb_z_zi"):
2559       sqrshrun(vform, result, zn, right_shift_dist);
2560       break;
2561     case Hash("sqshrnt_z_zi"):
2562       top = true;
2563       VIXL_FALLTHROUGH();
2564     case Hash("sqshrnb_z_zi"):
2565       sqshrn(vform, result, zn, right_shift_dist);
2566       break;
2567     case Hash("sqshrunt_z_zi"):
2568       top = true;
2569       VIXL_FALLTHROUGH();
2570     case Hash("sqshrunb_z_zi"):
2571       sqshrun(vform, result, zn, right_shift_dist);
2572       break;
2573     case Hash("uqrshrnt_z_zi"):
2574       top = true;
2575       VIXL_FALLTHROUGH();
2576     case Hash("uqrshrnb_z_zi"):
2577       uqrshrn(vform, result, zn, right_shift_dist);
2578       break;
2579     case Hash("uqshrnt_z_zi"):
2580       top = true;
2581       VIXL_FALLTHROUGH();
2582     case Hash("uqshrnb_z_zi"):
2583       uqshrn(vform, result, zn, right_shift_dist);
2584       break;
2585     default:
2586       VIXL_UNIMPLEMENTED();
2587   }
2588 
2589   if (top) {
2590     // Keep even elements, replace odd elements with the results.
2591     xtn(vform, zd, zd);
2592     zip1(vform, zd, zd, result);
2593   } else {
2594     // Zero odd elements, replace even elements with the results.
2595     SimVRegister zero;
2596     zero.Clear();
2597     zip1(vform, zd, result, zero);
2598   }
2599 }
2600 
2601 void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) {
2602   VectorFormat vform = instr->GetSVEVectorFormat();
2603   SimVRegister& zd = ReadVRegister(instr->GetRd());
2604   SimVRegister& zm = ReadVRegister(instr->GetRm());
2605   SimVRegister& zn = ReadVRegister(instr->GetRn());
2606   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2607 
2608   // Construct temporary registers containing the even (bottom) and odd (top)
2609   // elements.
2610   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2611   pack_even_elements(vform_half, zn_b, zn);
2612   pack_even_elements(vform_half, zm_b, zm);
2613   pack_odd_elements(vform_half, zn_t, zn);
2614   pack_odd_elements(vform_half, zm_t, zm);
2615 
2616   switch (form_hash_) {
2617     case Hash("sabdlb_z_zz"):
2618       sabdl(vform, zd, zn_b, zm_b);
2619       break;
2620     case Hash("sabdlt_z_zz"):
2621       sabdl(vform, zd, zn_t, zm_t);
2622       break;
2623     case Hash("saddlb_z_zz"):
2624       saddl(vform, zd, zn_b, zm_b);
2625       break;
2626     case Hash("saddlbt_z_zz"):
2627       saddl(vform, zd, zn_b, zm_t);
2628       break;
2629     case Hash("saddlt_z_zz"):
2630       saddl(vform, zd, zn_t, zm_t);
2631       break;
2632     case Hash("ssublb_z_zz"):
2633       ssubl(vform, zd, zn_b, zm_b);
2634       break;
2635     case Hash("ssublbt_z_zz"):
2636       ssubl(vform, zd, zn_b, zm_t);
2637       break;
2638     case Hash("ssublt_z_zz"):
2639       ssubl(vform, zd, zn_t, zm_t);
2640       break;
2641     case Hash("ssubltb_z_zz"):
2642       ssubl(vform, zd, zn_t, zm_b);
2643       break;
2644     case Hash("uabdlb_z_zz"):
2645       uabdl(vform, zd, zn_b, zm_b);
2646       break;
2647     case Hash("uabdlt_z_zz"):
2648       uabdl(vform, zd, zn_t, zm_t);
2649       break;
2650     case Hash("uaddlb_z_zz"):
2651       uaddl(vform, zd, zn_b, zm_b);
2652       break;
2653     case Hash("uaddlt_z_zz"):
2654       uaddl(vform, zd, zn_t, zm_t);
2655       break;
2656     case Hash("usublb_z_zz"):
2657       usubl(vform, zd, zn_b, zm_b);
2658       break;
2659     case Hash("usublt_z_zz"):
2660       usubl(vform, zd, zn_t, zm_t);
2661       break;
2662     case Hash("sabalb_z_zzz"):
2663       sabal(vform, zd, zn_b, zm_b);
2664       break;
2665     case Hash("sabalt_z_zzz"):
2666       sabal(vform, zd, zn_t, zm_t);
2667       break;
2668     case Hash("uabalb_z_zzz"):
2669       uabal(vform, zd, zn_b, zm_b);
2670       break;
2671     case Hash("uabalt_z_zzz"):
2672       uabal(vform, zd, zn_t, zm_t);
2673       break;
2674     default:
2675       VIXL_UNIMPLEMENTED();
2676   }
2677 }
2678 
2679 void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
2680   VectorFormat vform = instr->GetSVEVectorFormat();
2681   SimVRegister& zd = ReadVRegister(instr->GetRd());
2682   SimVRegister& zm = ReadVRegister(instr->GetRm());
2683   SimVRegister& zn = ReadVRegister(instr->GetRn());
2684   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2685   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2686   pack_even_elements(vform_half, zn_b, zn);
2687   pack_even_elements(vform_half, zm_b, zm);
2688   pack_odd_elements(vform_half, zn_t, zn);
2689   pack_odd_elements(vform_half, zm_t, zm);
2690 
2691   switch (form_hash_) {
2692     case Hash("pmullb_z_zz"):
2693       // '00' is reserved for Q-sized lane.
2694       if (vform == kFormatVnB) {
2695         VIXL_UNIMPLEMENTED();
2696       }
2697       pmull(vform, zd, zn_b, zm_b);
2698       break;
2699     case Hash("pmullt_z_zz"):
2700       // '00' is reserved for Q-sized lane.
2701       if (vform == kFormatVnB) {
2702         VIXL_UNIMPLEMENTED();
2703       }
2704       pmull(vform, zd, zn_t, zm_t);
2705       break;
2706     case Hash("smullb_z_zz"):
2707       smull(vform, zd, zn_b, zm_b);
2708       break;
2709     case Hash("smullt_z_zz"):
2710       smull(vform, zd, zn_t, zm_t);
2711       break;
2712     case Hash("sqdmullb_z_zz"):
2713       sqdmull(vform, zd, zn_b, zm_b);
2714       break;
2715     case Hash("sqdmullt_z_zz"):
2716       sqdmull(vform, zd, zn_t, zm_t);
2717       break;
2718     case Hash("umullb_z_zz"):
2719       umull(vform, zd, zn_b, zm_b);
2720       break;
2721     case Hash("umullt_z_zz"):
2722       umull(vform, zd, zn_t, zm_t);
2723       break;
2724     default:
2725       VIXL_UNIMPLEMENTED();
2726   }
2727 }
2728 
2729 void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) {
2730   SimVRegister& zd = ReadVRegister(instr->GetRd());
2731   SimVRegister& zm = ReadVRegister(instr->GetRm());
2732   SimVRegister& zn = ReadVRegister(instr->GetRn());
2733   SimVRegister result;
2734   bool top = false;
2735 
2736   VectorFormat vform_src = instr->GetSVEVectorFormat();
2737   if (vform_src == kFormatVnB) {
2738     VIXL_UNIMPLEMENTED();
2739   }
2740   VectorFormat vform = VectorFormatHalfWidth(vform_src);
2741 
2742   switch (form_hash_) {
2743     case Hash("addhnt_z_zz"):
2744       top = true;
2745       VIXL_FALLTHROUGH();
2746     case Hash("addhnb_z_zz"):
2747       addhn(vform, result, zn, zm);
2748       break;
2749     case Hash("raddhnt_z_zz"):
2750       top = true;
2751       VIXL_FALLTHROUGH();
2752     case Hash("raddhnb_z_zz"):
2753       raddhn(vform, result, zn, zm);
2754       break;
2755     case Hash("rsubhnt_z_zz"):
2756       top = true;
2757       VIXL_FALLTHROUGH();
2758     case Hash("rsubhnb_z_zz"):
2759       rsubhn(vform, result, zn, zm);
2760       break;
2761     case Hash("subhnt_z_zz"):
2762       top = true;
2763       VIXL_FALLTHROUGH();
2764     case Hash("subhnb_z_zz"):
2765       subhn(vform, result, zn, zm);
2766       break;
2767     default:
2768       VIXL_UNIMPLEMENTED();
2769   }
2770 
2771   if (top) {
2772     // Keep even elements, replace odd elements with the results.
2773     xtn(vform, zd, zd);
2774     zip1(vform, zd, zd, result);
2775   } else {
2776     // Zero odd elements, replace even elements with the results.
2777     SimVRegister zero;
2778     zero.Clear();
2779     zip1(vform, zd, result, zero);
2780   }
2781 }
2782 
2783 void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) {
2784   SimVRegister& zd = ReadVRegister(instr->GetRd());
2785   SimVRegister& zn = ReadVRegister(instr->GetRn());
2786   SimVRegister zn_b, zn_t;
2787 
2788   std::pair<int, int> shift_and_lane_size =
2789       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
2790   int lane_size = shift_and_lane_size.second;
2791   VIXL_ASSERT((lane_size >= 0) &&
2792               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
2793   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1);
2794   int right_shift_dist = shift_and_lane_size.first;
2795   int left_shift_dist = (8 << lane_size) - right_shift_dist;
2796 
2797   // Construct temporary registers containing the even (bottom) and odd (top)
2798   // elements.
2799   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2800   pack_even_elements(vform_half, zn_b, zn);
2801   pack_odd_elements(vform_half, zn_t, zn);
2802 
2803   switch (form_hash_) {
2804     case Hash("sshllb_z_zi"):
2805       sshll(vform, zd, zn_b, left_shift_dist);
2806       break;
2807     case Hash("sshllt_z_zi"):
2808       sshll(vform, zd, zn_t, left_shift_dist);
2809       break;
2810     case Hash("ushllb_z_zi"):
2811       ushll(vform, zd, zn_b, left_shift_dist);
2812       break;
2813     case Hash("ushllt_z_zi"):
2814       ushll(vform, zd, zn_t, left_shift_dist);
2815       break;
2816     default:
2817       VIXL_UNIMPLEMENTED();
2818   }
2819 }
2820 
2821 void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) {
2822   VectorFormat vform = instr->GetSVEVectorFormat();
2823   SimVRegister& zda = ReadVRegister(instr->GetRd());
2824   SimVRegister& zn = ReadVRegister(instr->GetRn());
2825   unsigned zm_code = instr->GetRm();
2826   int index = -1;
2827   bool is_mla = false;
2828 
2829   switch (form_hash_) {
2830     case Hash("sqrdmlah_z_zzz"):
2831       is_mla = true;
2832       VIXL_FALLTHROUGH();
2833     case Hash("sqrdmlsh_z_zzz"):
2834       // Nothing to do.
2835       break;
2836     case Hash("sqrdmlah_z_zzzi_h"):
2837       is_mla = true;
2838       VIXL_FALLTHROUGH();
2839     case Hash("sqrdmlsh_z_zzzi_h"):
2840       vform = kFormatVnH;
2841       index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
2842       zm_code = instr->ExtractBits(18, 16);
2843       break;
2844     case Hash("sqrdmlah_z_zzzi_s"):
2845       is_mla = true;
2846       VIXL_FALLTHROUGH();
2847     case Hash("sqrdmlsh_z_zzzi_s"):
2848       vform = kFormatVnS;
2849       index = instr->ExtractBits(20, 19);
2850       zm_code = instr->ExtractBits(18, 16);
2851       break;
2852     case Hash("sqrdmlah_z_zzzi_d"):
2853       is_mla = true;
2854       VIXL_FALLTHROUGH();
2855     case Hash("sqrdmlsh_z_zzzi_d"):
2856       vform = kFormatVnD;
2857       index = instr->ExtractBit(20);
2858       zm_code = instr->ExtractBits(19, 16);
2859       break;
2860     default:
2861       VIXL_UNIMPLEMENTED();
2862   }
2863 
2864   SimVRegister& zm = ReadVRegister(zm_code);
2865   SimVRegister zm_idx;
2866   if (index >= 0) {
2867     dup_elements_to_segments(vform, zm_idx, zm, index);
2868   }
2869 
2870   if (is_mla) {
2871     sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm);
2872   } else {
2873     sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm);
2874   }
2875 }
2876 
2877 void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
2878   SimVRegister& zda = ReadVRegister(instr->GetRd());
2879   SimVRegister& zn = ReadVRegister(instr->GetRn());
2880   SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
2881 
2882   SimVRegister temp, zm_idx, zn_b, zn_t;
2883   Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
2884   dup_elements_to_segments(kFormatVnS, temp, zm, index);
2885   pack_even_elements(kFormatVnS, zm_idx, temp);
2886   pack_even_elements(kFormatVnS, zn_b, zn);
2887   pack_odd_elements(kFormatVnS, zn_t, zn);
2888 
2889   switch (form_hash_) {
2890     case Hash("sqdmlalb_z_zzzi_d"):
2891       sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
2892       break;
2893     case Hash("sqdmlalt_z_zzzi_d"):
2894       sqdmlal(kFormatVnD, zda, zn_t, zm_idx);
2895       break;
2896     case Hash("sqdmlslb_z_zzzi_d"):
2897       sqdmlsl(kFormatVnD, zda, zn_b, zm_idx);
2898       break;
2899     case Hash("sqdmlslt_z_zzzi_d"):
2900       sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
2901       break;
2902     default:
2903       VIXL_UNIMPLEMENTED();
2904   }
2905 }
2906 
2907 void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
2908   SimVRegister& zda = ReadVRegister(instr->GetRd());
2909   SimVRegister& zm = ReadVRegister(instr->GetRm());
2910   SimVRegister& zn = ReadVRegister(instr->GetRn());
2911 
2912   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
2913   pack_even_elements(kFormatVnH, zn_b, zn);
2914   pack_even_elements(kFormatVnH, zm_b, zm);
2915   pack_odd_elements(kFormatVnH, zn_t, zn);
2916   pack_odd_elements(kFormatVnH, zm_t, zm);
2917 
2918   switch (form_hash_) {
2919     case Hash("fmlalb_z_zzz"):
2920       fmlal(kFormatVnS, zda, zn_b, zm_b);
2921       break;
2922     case Hash("fmlalt_z_zzz"):
2923       fmlal(kFormatVnS, zda, zn_t, zm_t);
2924       break;
2925     case Hash("fmlslb_z_zzz"):
2926       fmlsl(kFormatVnS, zda, zn_b, zm_b);
2927       break;
2928     case Hash("fmlslt_z_zzz"):
2929       fmlsl(kFormatVnS, zda, zn_t, zm_t);
2930       break;
2931     default:
2932       VIXL_UNIMPLEMENTED();
2933   }
2934 }
2935 
2936 void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
2937   SimVRegister& zda = ReadVRegister(instr->GetRd());
2938   SimVRegister& zn = ReadVRegister(instr->GetRn());
2939   SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
2940 
2941   SimVRegister temp, zm_idx, zn_b, zn_t;
2942   Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
2943   dup_elements_to_segments(kFormatVnH, temp, zm, index);
2944   pack_even_elements(kFormatVnH, zm_idx, temp);
2945   pack_even_elements(kFormatVnH, zn_b, zn);
2946   pack_odd_elements(kFormatVnH, zn_t, zn);
2947 
2948   switch (form_hash_) {
2949     case Hash("fmlalb_z_zzzi_s"):
2950       fmlal(kFormatVnS, zda, zn_b, zm_idx);
2951       break;
2952     case Hash("fmlalt_z_zzzi_s"):
2953       fmlal(kFormatVnS, zda, zn_t, zm_idx);
2954       break;
2955     case Hash("fmlslb_z_zzzi_s"):
2956       fmlsl(kFormatVnS, zda, zn_b, zm_idx);
2957       break;
2958     case Hash("fmlslt_z_zzzi_s"):
2959       fmlsl(kFormatVnS, zda, zn_t, zm_idx);
2960       break;
2961     case Hash("sqdmlalb_z_zzzi_s"):
2962       sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
2963       break;
2964     case Hash("sqdmlalt_z_zzzi_s"):
2965       sqdmlal(kFormatVnS, zda, zn_t, zm_idx);
2966       break;
2967     case Hash("sqdmlslb_z_zzzi_s"):
2968       sqdmlsl(kFormatVnS, zda, zn_b, zm_idx);
2969       break;
2970     case Hash("sqdmlslt_z_zzzi_s"):
2971       sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
2972       break;
2973     default:
2974       VIXL_UNIMPLEMENTED();
2975   }
2976 }
2977 
2978 void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) {
2979   VectorFormat vform = instr->GetSVEVectorFormat();
2980   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
2981   SimVRegister& zda = ReadVRegister(instr->GetRd());
2982   SimVRegister& zn = ReadVRegister(instr->GetRn());
2983   SimVRegister result;
2984 
2985   switch (form_hash_) {
2986     case Hash("sadalp_z_p_z"):
2987       sadalp(vform, result, zn);
2988       break;
2989     case Hash("uadalp_z_p_z"):
2990       uadalp(vform, result, zn);
2991       break;
2992     default:
2993       VIXL_UNIMPLEMENTED();
2994   }
2995   mov_merging(vform, zda, pg, result);
2996 }
2997 
2998 void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) {
2999   VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD;
3000   SimVRegister& zda = ReadVRegister(instr->GetRd());
3001   SimVRegister& zm = ReadVRegister(instr->GetRm());
3002   SimVRegister& zn = ReadVRegister(instr->GetRn());
3003 
3004   SimVRegister not_zn;
3005   not_(vform, not_zn, zn);
3006 
3007   switch (form_hash_) {
3008     case Hash("adclb_z_zzz"):
3009       adcl(vform, zda, zn, zm, /* top = */ false);
3010       break;
3011     case Hash("adclt_z_zzz"):
3012       adcl(vform, zda, zn, zm, /* top = */ true);
3013       break;
3014     case Hash("sbclb_z_zzz"):
3015       adcl(vform, zda, not_zn, zm, /* top = */ false);
3016       break;
3017     case Hash("sbclt_z_zzz"):
3018       adcl(vform, zda, not_zn, zm, /* top = */ true);
3019       break;
3020     default:
3021       VIXL_UNIMPLEMENTED();
3022   }
3023 }
3024 
3025 void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) {
3026   VectorFormat vform = instr->GetSVEVectorFormat();
3027   SimVRegister& zda = ReadVRegister(instr->GetRd());
3028   SimVRegister& zm = ReadVRegister(instr->GetRm());
3029   SimVRegister& zn = ReadVRegister(instr->GetRn());
3030 
3031   switch (form_hash_) {
3032     case Hash("saba_z_zzz"):
3033       saba(vform, zda, zn, zm);
3034       break;
3035     case Hash("uaba_z_zzz"):
3036       uaba(vform, zda, zn, zm);
3037       break;
3038     default:
3039       VIXL_UNIMPLEMENTED();
3040   }
3041 }
3042 
3043 void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) {
3044   SimVRegister& zda = ReadVRegister(instr->GetRd());
3045   SimVRegister& zn = ReadVRegister(instr->GetRn());
3046   int rot = instr->ExtractBits(11, 10) * 90;
3047   // vform and zm are only valid for the vector form of instruction.
3048   VectorFormat vform = instr->GetSVEVectorFormat();
3049   SimVRegister& zm = ReadVRegister(instr->GetRm());
3050 
3051   // Inputs for indexed form of instruction.
3052   SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16));
3053   SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16));
3054   int idx_h = instr->ExtractBits(20, 19);
3055   int idx_s = instr->ExtractBit(20);
3056 
3057   switch (form_hash_) {
3058     case Hash("cmla_z_zzz"):
3059       cmla(vform, zda, zda, zn, zm, rot);
3060       break;
3061     case Hash("cmla_z_zzzi_h"):
3062       cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3063       break;
3064     case Hash("cmla_z_zzzi_s"):
3065       cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3066       break;
3067     case Hash("sqrdcmlah_z_zzz"):
3068       sqrdcmlah(vform, zda, zda, zn, zm, rot);
3069       break;
3070     case Hash("sqrdcmlah_z_zzzi_h"):
3071       sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
3072       break;
3073     case Hash("sqrdcmlah_z_zzzi_s"):
3074       sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
3075       break;
3076     default:
3077       VIXL_UNIMPLEMENTED();
3078   }
3079 }
3080 
3081 void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) {
3082   SimVRegister& zd = ReadVRegister(instr->GetRd());
3083   SimVRegister& zn = ReadVRegister(instr->GetRn());
3084 
3085   std::pair<int, int> shift_and_lane_size =
3086       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3087   int lane_size = shift_and_lane_size.second;
3088   VIXL_ASSERT((lane_size >= 0) &&
3089               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
3090   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3091   int shift_dist = shift_and_lane_size.first;
3092 
3093   switch (form_hash_) {
3094     case Hash("srsra_z_zi"):
3095       srsra(vform, zd, zn, shift_dist);
3096       break;
3097     case Hash("ssra_z_zi"):
3098       ssra(vform, zd, zn, shift_dist);
3099       break;
3100     case Hash("ursra_z_zi"):
3101       ursra(vform, zd, zn, shift_dist);
3102       break;
3103     case Hash("usra_z_zi"):
3104       usra(vform, zd, zn, shift_dist);
3105       break;
3106     default:
3107       VIXL_UNIMPLEMENTED();
3108   }
3109 }
3110 
3111 void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
3112   VectorFormat vform = instr->GetSVEVectorFormat();
3113   SimVRegister& zda = ReadVRegister(instr->GetRd());
3114   SimVRegister& zm = ReadVRegister(instr->GetRm());
3115   SimVRegister& zn = ReadVRegister(instr->GetRn());
3116 
3117   SimVRegister zero, zn_b, zm_b, zn_t, zm_t;
3118   zero.Clear();
3119 
3120   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3121   uzp1(vform_half, zn_b, zn, zero);
3122   uzp1(vform_half, zm_b, zm, zero);
3123   uzp2(vform_half, zn_t, zn, zero);
3124   uzp2(vform_half, zm_t, zm, zero);
3125 
3126   switch (form_hash_) {
3127     case Hash("smlalb_z_zzz"):
3128       smlal(vform, zda, zn_b, zm_b);
3129       break;
3130     case Hash("smlalt_z_zzz"):
3131       smlal(vform, zda, zn_t, zm_t);
3132       break;
3133     case Hash("smlslb_z_zzz"):
3134       smlsl(vform, zda, zn_b, zm_b);
3135       break;
3136     case Hash("smlslt_z_zzz"):
3137       smlsl(vform, zda, zn_t, zm_t);
3138       break;
3139     case Hash("sqdmlalb_z_zzz"):
3140       sqdmlal(vform, zda, zn_b, zm_b);
3141       break;
3142     case Hash("sqdmlalbt_z_zzz"):
3143       sqdmlal(vform, zda, zn_b, zm_t);
3144       break;
3145     case Hash("sqdmlalt_z_zzz"):
3146       sqdmlal(vform, zda, zn_t, zm_t);
3147       break;
3148     case Hash("sqdmlslb_z_zzz"):
3149       sqdmlsl(vform, zda, zn_b, zm_b);
3150       break;
3151     case Hash("sqdmlslbt_z_zzz"):
3152       sqdmlsl(vform, zda, zn_b, zm_t);
3153       break;
3154     case Hash("sqdmlslt_z_zzz"):
3155       sqdmlsl(vform, zda, zn_t, zm_t);
3156       break;
3157     case Hash("umlalb_z_zzz"):
3158       umlal(vform, zda, zn_b, zm_b);
3159       break;
3160     case Hash("umlalt_z_zzz"):
3161       umlal(vform, zda, zn_t, zm_t);
3162       break;
3163     case Hash("umlslb_z_zzz"):
3164       umlsl(vform, zda, zn_b, zm_b);
3165       break;
3166     case Hash("umlslt_z_zzz"):
3167       umlsl(vform, zda, zn_t, zm_t);
3168       break;
3169     default:
3170       VIXL_UNIMPLEMENTED();
3171   }
3172 }
3173 
3174 void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
3175   VectorFormat vform = instr->GetSVEVectorFormat();
3176   SimVRegister& zda = ReadVRegister(instr->GetRd());
3177   SimVRegister& zn = ReadVRegister(instr->GetRn());
3178   int rot = instr->ExtractBits(11, 10) * 90;
3179   unsigned zm_code = instr->GetRm();
3180   int index = -1;
3181 
3182   switch (form_hash_) {
3183     case Hash("cdot_z_zzz"):
3184       // Nothing to do.
3185       break;
3186     case Hash("cdot_z_zzzi_s"):
3187       index = zm_code >> 3;
3188       zm_code &= 0x7;
3189       break;
3190     case Hash("cdot_z_zzzi_d"):
3191       index = zm_code >> 4;
3192       zm_code &= 0xf;
3193       break;
3194     default:
3195       VIXL_UNIMPLEMENTED();
3196   }
3197 
3198   SimVRegister temp;
3199   SimVRegister& zm = ReadVRegister(zm_code);
3200   if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
3201   cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
3202 }
3203 
3204 void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
3205   VectorFormat vform = kFormatVnD;
3206   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3207   SimVRegister& zm = ReadVRegister(instr->GetRm());
3208   SimVRegister& zk = ReadVRegister(instr->GetRn());
3209   SimVRegister temp;
3210 
3211   switch (form_hash_) {
3212     case Hash("bcax_z_zzz"):
3213       bic(vform, temp, zm, zk);
3214       eor(vform, zdn, temp, zdn);
3215       break;
3216     case Hash("bsl1n_z_zzz"):
3217       not_(vform, temp, zdn);
3218       bsl(vform, zdn, zk, temp, zm);
3219       break;
3220     case Hash("bsl2n_z_zzz"):
3221       not_(vform, temp, zm);
3222       bsl(vform, zdn, zk, zdn, temp);
3223       break;
3224     case Hash("bsl_z_zzz"):
3225       bsl(vform, zdn, zk, zdn, zm);
3226       break;
3227     case Hash("eor3_z_zzz"):
3228       eor(vform, temp, zdn, zm);
3229       eor(vform, zdn, temp, zk);
3230       break;
3231     case Hash("nbsl_z_zzz"):
3232       bsl(vform, zdn, zk, zdn, zm);
3233       not_(vform, zdn, zdn);
3234       break;
3235     default:
3236       VIXL_UNIMPLEMENTED();
3237   }
3238 }
3239 
3240 void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) {
3241   VectorFormat vform = instr->GetSVEVectorFormat();
3242   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3243   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3244   SimVRegister& zm = ReadVRegister(instr->GetRn());
3245   SimVRegister result;
3246 
3247   switch (form_hash_) {
3248     case Hash("shadd_z_p_zz"):
3249       add(vform, result, zdn, zm).Halve(vform);
3250       break;
3251     case Hash("shsub_z_p_zz"):
3252       sub(vform, result, zdn, zm).Halve(vform);
3253       break;
3254     case Hash("shsubr_z_p_zz"):
3255       sub(vform, result, zm, zdn).Halve(vform);
3256       break;
3257     case Hash("srhadd_z_p_zz"):
3258       add(vform, result, zdn, zm).Halve(vform).Round(vform);
3259       break;
3260     case Hash("uhadd_z_p_zz"):
3261       add(vform, result, zdn, zm).Uhalve(vform);
3262       break;
3263     case Hash("uhsub_z_p_zz"):
3264       sub(vform, result, zdn, zm).Uhalve(vform);
3265       break;
3266     case Hash("uhsubr_z_p_zz"):
3267       sub(vform, result, zm, zdn).Uhalve(vform);
3268       break;
3269     case Hash("urhadd_z_p_zz"):
3270       add(vform, result, zdn, zm).Uhalve(vform).Round(vform);
3271       break;
3272     default:
3273       VIXL_UNIMPLEMENTED();
3274       break;
3275   }
3276   mov_merging(vform, zdn, pg, result);
3277 }
3278 
3279 void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) {
3280   VectorFormat vform = instr->GetSVEVectorFormat();
3281   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3282   SimVRegister& zm = ReadVRegister(instr->GetRn());
3283   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3284   SimVRegister result;
3285 
3286   switch (form_hash_) {
3287     case Hash("sqadd_z_p_zz"):
3288       add(vform, result, zdn, zm).SignedSaturate(vform);
3289       break;
3290     case Hash("sqsub_z_p_zz"):
3291       sub(vform, result, zdn, zm).SignedSaturate(vform);
3292       break;
3293     case Hash("sqsubr_z_p_zz"):
3294       sub(vform, result, zm, zdn).SignedSaturate(vform);
3295       break;
3296     case Hash("suqadd_z_p_zz"):
3297       suqadd(vform, result, zdn, zm);
3298       break;
3299     case Hash("uqadd_z_p_zz"):
3300       add(vform, result, zdn, zm).UnsignedSaturate(vform);
3301       break;
3302     case Hash("uqsub_z_p_zz"):
3303       sub(vform, result, zdn, zm).UnsignedSaturate(vform);
3304       break;
3305     case Hash("uqsubr_z_p_zz"):
3306       sub(vform, result, zm, zdn).UnsignedSaturate(vform);
3307       break;
3308     case Hash("usqadd_z_p_zz"):
3309       usqadd(vform, result, zdn, zm);
3310       break;
3311     default:
3312       VIXL_UNIMPLEMENTED();
3313       break;
3314   }
3315   mov_merging(vform, zdn, pg, result);
3316 }
3317 
3318 void Simulator::SimulateSVEIntArithPair(const Instruction* instr) {
3319   VectorFormat vform = instr->GetSVEVectorFormat();
3320   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3321   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3322   SimVRegister& zm = ReadVRegister(instr->GetRn());
3323   SimVRegister result;
3324 
3325   switch (form_hash_) {
3326     case Hash("addp_z_p_zz"):
3327       addp(vform, result, zdn, zm);
3328       break;
3329     case Hash("smaxp_z_p_zz"):
3330       smaxp(vform, result, zdn, zm);
3331       break;
3332     case Hash("sminp_z_p_zz"):
3333       sminp(vform, result, zdn, zm);
3334       break;
3335     case Hash("umaxp_z_p_zz"):
3336       umaxp(vform, result, zdn, zm);
3337       break;
3338     case Hash("uminp_z_p_zz"):
3339       uminp(vform, result, zdn, zm);
3340       break;
3341     default:
3342       VIXL_UNIMPLEMENTED();
3343       break;
3344   }
3345   mov_merging(vform, zdn, pg, result);
3346 }
3347 
3348 void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) {
3349   VectorFormat vform = instr->GetSVEVectorFormat();
3350   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3351   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3352   SimVRegister& zm = ReadVRegister(instr->GetRn());
3353   SimVRegister result;
3354 
3355   switch (form_hash_) {
3356     case Hash("faddp_z_p_zz"):
3357       faddp(vform, result, zdn, zm);
3358       break;
3359     case Hash("fmaxnmp_z_p_zz"):
3360       fmaxnmp(vform, result, zdn, zm);
3361       break;
3362     case Hash("fmaxp_z_p_zz"):
3363       fmaxp(vform, result, zdn, zm);
3364       break;
3365     case Hash("fminnmp_z_p_zz"):
3366       fminnmp(vform, result, zdn, zm);
3367       break;
3368     case Hash("fminp_z_p_zz"):
3369       fminp(vform, result, zdn, zm);
3370       break;
3371     default:
3372       VIXL_UNIMPLEMENTED();
3373   }
3374   mov_merging(vform, zdn, pg, result);
3375 }
3376 
3377 void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) {
3378   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3379   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3380 
3381   std::pair<int, int> shift_and_lane_size =
3382       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
3383   unsigned lane_size = shift_and_lane_size.second;
3384   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3385   int right_shift_dist = shift_and_lane_size.first;
3386   int left_shift_dist = (8 << lane_size) - right_shift_dist;
3387   SimVRegister result;
3388 
3389   switch (form_hash_) {
3390     case Hash("sqshl_z_p_zi"):
3391       sqshl(vform, result, zdn, left_shift_dist);
3392       break;
3393     case Hash("sqshlu_z_p_zi"):
3394       sqshlu(vform, result, zdn, left_shift_dist);
3395       break;
3396     case Hash("srshr_z_p_zi"):
3397       sshr(vform, result, zdn, right_shift_dist).Round(vform);
3398       break;
3399     case Hash("uqshl_z_p_zi"):
3400       uqshl(vform, result, zdn, left_shift_dist);
3401       break;
3402     case Hash("urshr_z_p_zi"):
3403       ushr(vform, result, zdn, right_shift_dist).Round(vform);
3404       break;
3405     default:
3406       VIXL_UNIMPLEMENTED();
3407   }
3408   mov_merging(vform, zdn, pg, result);
3409 }
3410 
3411 void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) {
3412   VIXL_ASSERT(form_hash_ == Hash("xar_z_zzi"));
3413 
3414   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3415   SimVRegister& zm = ReadVRegister(instr->GetRn());
3416 
3417   std::pair<int, int> shift_and_lane_size =
3418       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
3419   unsigned lane_size = shift_and_lane_size.second;
3420   VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
3421   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
3422   int shift_dist = shift_and_lane_size.first;
3423   eor(vform, zdn, zdn, zm);
3424   ror(vform, zdn, zdn, shift_dist);
3425 }
3426 
3427 void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) {
3428   VectorFormat vform = instr->GetSVEVectorFormat();
3429   SimVRegister& zdn = ReadVRegister(instr->GetRd());
3430   SimVRegister& zm = ReadVRegister(instr->GetRn());
3431   int rot = (instr->ExtractBit(10) == 0) ? 90 : 270;
3432 
3433   switch (form_hash_) {
3434     case Hash("cadd_z_zz"):
3435       cadd(vform, zdn, zdn, zm, rot);
3436       break;
3437     case Hash("sqcadd_z_zz"):
3438       cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true);
3439       break;
3440     default:
3441       VIXL_UNIMPLEMENTED();
3442   }
3443 }
3444 
3445 void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) {
3446   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3447   SimVRegister& zn = ReadVRegister(instr->GetRn());
3448   uint64_t xm = ReadXRegister(instr->GetRm());
3449 
3450   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3451   int msize = -1;
3452   bool is_signed = false;
3453 
3454   switch (form_hash_) {
3455     case Hash("ldnt1b_z_p_ar_d_64_unscaled"):
3456       msize = 0;
3457       break;
3458     case Hash("ldnt1d_z_p_ar_d_64_unscaled"):
3459       msize = 3;
3460       break;
3461     case Hash("ldnt1h_z_p_ar_d_64_unscaled"):
3462       msize = 1;
3463       break;
3464     case Hash("ldnt1sb_z_p_ar_d_64_unscaled"):
3465       msize = 0;
3466       is_signed = true;
3467       break;
3468     case Hash("ldnt1sh_z_p_ar_d_64_unscaled"):
3469       msize = 1;
3470       is_signed = true;
3471       break;
3472     case Hash("ldnt1sw_z_p_ar_d_64_unscaled"):
3473       msize = 2;
3474       is_signed = true;
3475       break;
3476     case Hash("ldnt1w_z_p_ar_d_64_unscaled"):
3477       msize = 2;
3478       break;
3479     default:
3480       VIXL_UNIMPLEMENTED();
3481   }
3482   addr.SetMsizeInBytesLog2(msize);
3483   SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed);
3484 }
3485 
3486 void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) {
3487   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3488   SimVRegister& zn = ReadVRegister(instr->GetRn());
3489   uint64_t xm = ReadXRegister(instr->GetRm());
3490 
3491   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
3492   VIXL_ASSERT((form_hash_ == Hash("stnt1b_z_p_ar_d_64_unscaled")) ||
3493               (form_hash_ == Hash("stnt1d_z_p_ar_d_64_unscaled")) ||
3494               (form_hash_ == Hash("stnt1h_z_p_ar_d_64_unscaled")) ||
3495               (form_hash_ == Hash("stnt1w_z_p_ar_d_64_unscaled")));
3496 
3497   addr.SetMsizeInBytesLog2(
3498       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3499   SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr);
3500 }
3501 
3502 void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) {
3503   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3504   SimVRegister& zn = ReadVRegister(instr->GetRn());
3505   uint64_t xm = ReadXRegister(instr->GetRm());
3506 
3507   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3508   int msize = -1;
3509   bool is_signed = false;
3510 
3511   switch (form_hash_) {
3512     case Hash("ldnt1b_z_p_ar_s_x32_unscaled"):
3513       msize = 0;
3514       break;
3515     case Hash("ldnt1h_z_p_ar_s_x32_unscaled"):
3516       msize = 1;
3517       break;
3518     case Hash("ldnt1sb_z_p_ar_s_x32_unscaled"):
3519       msize = 0;
3520       is_signed = true;
3521       break;
3522     case Hash("ldnt1sh_z_p_ar_s_x32_unscaled"):
3523       msize = 1;
3524       is_signed = true;
3525       break;
3526     case Hash("ldnt1w_z_p_ar_s_x32_unscaled"):
3527       msize = 2;
3528       break;
3529     default:
3530       VIXL_UNIMPLEMENTED();
3531   }
3532   addr.SetMsizeInBytesLog2(msize);
3533   SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed);
3534 }
3535 
3536 void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) {
3537   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
3538   SimVRegister& zn = ReadVRegister(instr->GetRn());
3539   uint64_t xm = ReadXRegister(instr->GetRm());
3540 
3541   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
3542   VIXL_ASSERT((form_hash_ == Hash("stnt1b_z_p_ar_s_x32_unscaled")) ||
3543               (form_hash_ == Hash("stnt1h_z_p_ar_s_x32_unscaled")) ||
3544               (form_hash_ == Hash("stnt1w_z_p_ar_s_x32_unscaled")));
3545 
3546   addr.SetMsizeInBytesLog2(
3547       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
3548   SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr);
3549 }
3550 
3551 void Simulator::VisitReserved(const Instruction* instr) {
3552   // UDF is the only instruction in this group, and the Decoder is precise here.
3553   VIXL_ASSERT(instr->Mask(ReservedMask) == UDF);
3554 
3555   printf("UDF (permanently undefined) instruction at %p: 0x%08" PRIx32 "\n",
3556          reinterpret_cast<const void*>(instr),
3557          instr->GetInstructionBits());
3558   VIXL_ABORT_WITH_MSG("UNDEFINED (UDF)\n");
3559 }
3560 
3561 
3562 void Simulator::VisitUnimplemented(const Instruction* instr) {
3563   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
3564          reinterpret_cast<const void*>(instr),
3565          instr->GetInstructionBits());
3566   VIXL_UNIMPLEMENTED();
3567 }
3568 
3569 
3570 void Simulator::VisitUnallocated(const Instruction* instr) {
3571   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
3572          reinterpret_cast<const void*>(instr),
3573          instr->GetInstructionBits());
3574   VIXL_UNIMPLEMENTED();
3575 }
3576 
3577 
3578 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
3579   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
3580               (instr->Mask(PCRelAddressingMask) == ADRP));
3581 
3582   WriteRegister(instr->GetRd(), instr->GetImmPCOffsetTarget());
3583 }
3584 
3585 
3586 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
3587   switch (instr->Mask(UnconditionalBranchMask)) {
3588     case BL:
3589       WriteLr(instr->GetNextInstruction());
3590       VIXL_FALLTHROUGH();
3591     case B:
3592       WritePc(instr->GetImmPCOffsetTarget());
3593       break;
3594     default:
3595       VIXL_UNREACHABLE();
3596   }
3597 }
3598 
3599 
3600 void Simulator::VisitConditionalBranch(const Instruction* instr) {
3601   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
3602   if (ConditionPassed(instr->GetConditionBranch())) {
3603     WritePc(instr->GetImmPCOffsetTarget());
3604   }
3605 }
3606 
3607 BType Simulator::GetBTypeFromInstruction(const Instruction* instr) const {
3608   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3609     case BLR:
3610     case BLRAA:
3611     case BLRAB:
3612     case BLRAAZ:
3613     case BLRABZ:
3614       return BranchAndLink;
3615     case BR:
3616     case BRAA:
3617     case BRAB:
3618     case BRAAZ:
3619     case BRABZ:
3620       if ((instr->GetRn() == 16) || (instr->GetRn() == 17) ||
3621           !PcIsInGuardedPage()) {
3622         return BranchFromUnguardedOrToIP;
3623       }
3624       return BranchFromGuardedNotToIP;
3625   }
3626   return DefaultBType;
3627 }
3628 
3629 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
3630   bool authenticate = false;
3631   bool link = false;
3632   uint64_t addr = ReadXRegister(instr->GetRn());
3633   uint64_t context = 0;
3634 
3635   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
3636     case BLR:
3637       link = true;
3638       VIXL_FALLTHROUGH();
3639     case BR:
3640     case RET:
3641       break;
3642 
3643     case BLRAAZ:
3644     case BLRABZ:
3645       link = true;
3646       VIXL_FALLTHROUGH();
3647     case BRAAZ:
3648     case BRABZ:
3649       authenticate = true;
3650       break;
3651 
3652     case BLRAA:
3653     case BLRAB:
3654       link = true;
3655       VIXL_FALLTHROUGH();
3656     case BRAA:
3657     case BRAB:
3658       authenticate = true;
3659       context = ReadXRegister(instr->GetRd());
3660       break;
3661 
3662     case RETAA:
3663     case RETAB:
3664       authenticate = true;
3665       addr = ReadXRegister(kLinkRegCode);
3666       context = ReadXRegister(31, Reg31IsStackPointer);
3667       break;
3668     default:
3669       VIXL_UNREACHABLE();
3670   }
3671 
3672   if (link) {
3673     WriteLr(instr->GetNextInstruction());
3674   }
3675 
3676   if (authenticate) {
3677     PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
3678     addr = AuthPAC(addr, context, key, kInstructionPointer);
3679 
3680     int error_lsb = GetTopPACBit(addr, kInstructionPointer) - 2;
3681     if (((addr >> error_lsb) & 0x3) != 0x0) {
3682       VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
3683     }
3684   }
3685 
3686   WritePc(Instruction::Cast(addr));
3687   WriteNextBType(GetBTypeFromInstruction(instr));
3688 }
3689 
3690 
3691 void Simulator::VisitTestBranch(const Instruction* instr) {
3692   unsigned bit_pos =
3693       (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40();
3694   bool bit_zero = ((ReadXRegister(instr->GetRt()) >> bit_pos) & 1) == 0;
3695   bool take_branch = false;
3696   switch (instr->Mask(TestBranchMask)) {
3697     case TBZ:
3698       take_branch = bit_zero;
3699       break;
3700     case TBNZ:
3701       take_branch = !bit_zero;
3702       break;
3703     default:
3704       VIXL_UNIMPLEMENTED();
3705   }
3706   if (take_branch) {
3707     WritePc(instr->GetImmPCOffsetTarget());
3708   }
3709 }
3710 
3711 
3712 void Simulator::VisitCompareBranch(const Instruction* instr) {
3713   unsigned rt = instr->GetRt();
3714   bool take_branch = false;
3715   switch (instr->Mask(CompareBranchMask)) {
3716     case CBZ_w:
3717       take_branch = (ReadWRegister(rt) == 0);
3718       break;
3719     case CBZ_x:
3720       take_branch = (ReadXRegister(rt) == 0);
3721       break;
3722     case CBNZ_w:
3723       take_branch = (ReadWRegister(rt) != 0);
3724       break;
3725     case CBNZ_x:
3726       take_branch = (ReadXRegister(rt) != 0);
3727       break;
3728     default:
3729       VIXL_UNIMPLEMENTED();
3730   }
3731   if (take_branch) {
3732     WritePc(instr->GetImmPCOffsetTarget());
3733   }
3734 }
3735 
3736 
3737 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
3738   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3739   bool set_flags = instr->GetFlagsUpdate();
3740   int64_t new_val = 0;
3741   Instr operation = instr->Mask(AddSubOpMask);
3742 
3743   switch (operation) {
3744     case ADD:
3745     case ADDS: {
3746       new_val = AddWithCarry(reg_size,
3747                              set_flags,
3748                              ReadRegister(reg_size,
3749                                           instr->GetRn(),
3750                                           instr->GetRnMode()),
3751                              op2);
3752       break;
3753     }
3754     case SUB:
3755     case SUBS: {
3756       new_val = AddWithCarry(reg_size,
3757                              set_flags,
3758                              ReadRegister(reg_size,
3759                                           instr->GetRn(),
3760                                           instr->GetRnMode()),
3761                              ~op2,
3762                              1);
3763       break;
3764     }
3765     default:
3766       VIXL_UNREACHABLE();
3767   }
3768 
3769   WriteRegister(reg_size,
3770                 instr->GetRd(),
3771                 new_val,
3772                 LogRegWrites,
3773                 instr->GetRdMode());
3774 }
3775 
3776 
3777 void Simulator::VisitAddSubShifted(const Instruction* instr) {
3778   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3779   int64_t op2 = ShiftOperand(reg_size,
3780                              ReadRegister(reg_size, instr->GetRm()),
3781                              static_cast<Shift>(instr->GetShiftDP()),
3782                              instr->GetImmDPShift());
3783   AddSubHelper(instr, op2);
3784 }
3785 
3786 
3787 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
3788   int64_t op2 = instr->GetImmAddSub()
3789                 << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
3790   AddSubHelper(instr, op2);
3791 }
3792 
3793 
3794 void Simulator::VisitAddSubExtended(const Instruction* instr) {
3795   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3796   int64_t op2 = ExtendValue(reg_size,
3797                             ReadRegister(reg_size, instr->GetRm()),
3798                             static_cast<Extend>(instr->GetExtendMode()),
3799                             instr->GetImmExtendShift());
3800   AddSubHelper(instr, op2);
3801 }
3802 
3803 
3804 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
3805   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3806   int64_t op2 = ReadRegister(reg_size, instr->GetRm());
3807   int64_t new_val;
3808 
3809   if ((instr->Mask(AddSubOpMask) == SUB) ||
3810       (instr->Mask(AddSubOpMask) == SUBS)) {
3811     op2 = ~op2;
3812   }
3813 
3814   new_val = AddWithCarry(reg_size,
3815                          instr->GetFlagsUpdate(),
3816                          ReadRegister(reg_size, instr->GetRn()),
3817                          op2,
3818                          ReadC());
3819 
3820   WriteRegister(reg_size, instr->GetRd(), new_val);
3821 }
3822 
3823 
3824 void Simulator::VisitRotateRightIntoFlags(const Instruction* instr) {
3825   switch (instr->Mask(RotateRightIntoFlagsMask)) {
3826     case RMIF: {
3827       uint64_t value = ReadRegister<uint64_t>(instr->GetRn());
3828       unsigned shift = instr->GetImmRMIFRotation();
3829       unsigned mask = instr->GetNzcv();
3830       uint64_t rotated = RotateRight(value, shift, kXRegSize);
3831 
3832       ReadNzcv().SetFlags((rotated & mask) | (ReadNzcv().GetFlags() & ~mask));
3833       break;
3834     }
3835   }
3836 }
3837 
3838 
3839 void Simulator::VisitEvaluateIntoFlags(const Instruction* instr) {
3840   uint32_t value = ReadRegister<uint32_t>(instr->GetRn());
3841   unsigned msb = (instr->Mask(EvaluateIntoFlagsMask) == SETF16) ? 15 : 7;
3842 
3843   unsigned sign_bit = (value >> msb) & 1;
3844   unsigned overflow_bit = (value >> (msb + 1)) & 1;
3845   ReadNzcv().SetN(sign_bit);
3846   ReadNzcv().SetZ((value << (31 - msb)) == 0);
3847   ReadNzcv().SetV(sign_bit ^ overflow_bit);
3848 }
3849 
3850 
3851 void Simulator::VisitLogicalShifted(const Instruction* instr) {
3852   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3853   Shift shift_type = static_cast<Shift>(instr->GetShiftDP());
3854   unsigned shift_amount = instr->GetImmDPShift();
3855   int64_t op2 = ShiftOperand(reg_size,
3856                              ReadRegister(reg_size, instr->GetRm()),
3857                              shift_type,
3858                              shift_amount);
3859   if (instr->Mask(NOT) == NOT) {
3860     op2 = ~op2;
3861   }
3862   LogicalHelper(instr, op2);
3863 }
3864 
3865 
3866 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
3867   if (instr->GetImmLogical() == 0) {
3868     VIXL_UNIMPLEMENTED();
3869   } else {
3870     LogicalHelper(instr, instr->GetImmLogical());
3871   }
3872 }
3873 
3874 
3875 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
3876   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3877   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
3878   int64_t result = 0;
3879   bool update_flags = false;
3880 
3881   // Switch on the logical operation, stripping out the NOT bit, as it has a
3882   // different meaning for logical immediate instructions.
3883   switch (instr->Mask(LogicalOpMask & ~NOT)) {
3884     case ANDS:
3885       update_flags = true;
3886       VIXL_FALLTHROUGH();
3887     case AND:
3888       result = op1 & op2;
3889       break;
3890     case ORR:
3891       result = op1 | op2;
3892       break;
3893     case EOR:
3894       result = op1 ^ op2;
3895       break;
3896     default:
3897       VIXL_UNIMPLEMENTED();
3898   }
3899 
3900   if (update_flags) {
3901     ReadNzcv().SetN(CalcNFlag(result, reg_size));
3902     ReadNzcv().SetZ(CalcZFlag(result));
3903     ReadNzcv().SetC(0);
3904     ReadNzcv().SetV(0);
3905     LogSystemRegister(NZCV);
3906   }
3907 
3908   WriteRegister(reg_size,
3909                 instr->GetRd(),
3910                 result,
3911                 LogRegWrites,
3912                 instr->GetRdMode());
3913 }
3914 
3915 
3916 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
3917   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3918   ConditionalCompareHelper(instr, ReadRegister(reg_size, instr->GetRm()));
3919 }
3920 
3921 
3922 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
3923   ConditionalCompareHelper(instr, instr->GetImmCondCmp());
3924 }
3925 
3926 
3927 void Simulator::ConditionalCompareHelper(const Instruction* instr,
3928                                          int64_t op2) {
3929   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
3930   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
3931 
3932   if (ConditionPassed(instr->GetCondition())) {
3933     // If the condition passes, set the status flags to the result of comparing
3934     // the operands.
3935     if (instr->Mask(ConditionalCompareMask) == CCMP) {
3936       AddWithCarry(reg_size, true, op1, ~op2, 1);
3937     } else {
3938       VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
3939       AddWithCarry(reg_size, true, op1, op2, 0);
3940     }
3941   } else {
3942     // If the condition fails, set the status flags to the nzcv immediate.
3943     ReadNzcv().SetFlags(instr->GetNzcv());
3944     LogSystemRegister(NZCV);
3945   }
3946 }
3947 
3948 
3949 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
3950   int offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
3951   LoadStoreHelper(instr, offset, Offset);
3952 }
3953 
3954 
3955 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
3956   LoadStoreHelper(instr, instr->GetImmLS(), Offset);
3957 }
3958 
3959 
3960 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
3961   LoadStoreHelper(instr, instr->GetImmLS(), PreIndex);
3962 }
3963 
3964 
3965 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
3966   LoadStoreHelper(instr, instr->GetImmLS(), PostIndex);
3967 }
3968 
3969 
3970 template <typename T1, typename T2>
3971 void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
3972   unsigned rt = instr->GetRt();
3973   unsigned rn = instr->GetRn();
3974 
3975   unsigned element_size = sizeof(T2);
3976   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
3977   int offset = instr->GetImmLS();
3978   address += offset;
3979 
3980   // Verify that the address is available to the host.
3981   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
3982 
3983   // Check the alignment of `address`.
3984   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
3985     VIXL_ALIGNMENT_EXCEPTION();
3986   }
3987 
3988   WriteRegister<T1>(rt, static_cast<T1>(MemRead<T2>(address)));
3989 
3990   // Approximate load-acquire by issuing a full barrier after the load.
3991   __sync_synchronize();
3992 
3993   LogRead(rt, GetPrintRegisterFormat(element_size), address);
3994 }
3995 
3996 
3997 template <typename T>
3998 void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
3999   unsigned rt = instr->GetRt();
4000   unsigned rn = instr->GetRn();
4001 
4002   unsigned element_size = sizeof(T);
4003   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4004   int offset = instr->GetImmLS();
4005   address += offset;
4006 
4007   // Verify that the address is available to the host.
4008   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4009 
4010   // Check the alignment of `address`.
4011   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
4012     VIXL_ALIGNMENT_EXCEPTION();
4013   }
4014 
4015   // Approximate store-release by issuing a full barrier after the load.
4016   __sync_synchronize();
4017 
4018   MemWrite<T>(address, ReadRegister<T>(rt));
4019 
4020   LogWrite(rt, GetPrintRegisterFormat(element_size), address);
4021 }
4022 
4023 
4024 void Simulator::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
4025   switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
4026     case LDAPURB:
4027       LoadAcquireRCpcUnscaledOffsetHelper<uint8_t, uint8_t>(instr);
4028       break;
4029     case LDAPURH:
4030       LoadAcquireRCpcUnscaledOffsetHelper<uint16_t, uint16_t>(instr);
4031       break;
4032     case LDAPUR_w:
4033       LoadAcquireRCpcUnscaledOffsetHelper<uint32_t, uint32_t>(instr);
4034       break;
4035     case LDAPUR_x:
4036       LoadAcquireRCpcUnscaledOffsetHelper<uint64_t, uint64_t>(instr);
4037       break;
4038     case LDAPURSB_w:
4039       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int8_t>(instr);
4040       break;
4041     case LDAPURSB_x:
4042       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int8_t>(instr);
4043       break;
4044     case LDAPURSH_w:
4045       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int16_t>(instr);
4046       break;
4047     case LDAPURSH_x:
4048       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int16_t>(instr);
4049       break;
4050     case LDAPURSW:
4051       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int32_t>(instr);
4052       break;
4053     case STLURB:
4054       StoreReleaseUnscaledOffsetHelper<uint8_t>(instr);
4055       break;
4056     case STLURH:
4057       StoreReleaseUnscaledOffsetHelper<uint16_t>(instr);
4058       break;
4059     case STLUR_w:
4060       StoreReleaseUnscaledOffsetHelper<uint32_t>(instr);
4061       break;
4062     case STLUR_x:
4063       StoreReleaseUnscaledOffsetHelper<uint64_t>(instr);
4064       break;
4065   }
4066 }
4067 
4068 
4069 void Simulator::VisitLoadStorePAC(const Instruction* instr) {
4070   unsigned dst = instr->GetRt();
4071   unsigned addr_reg = instr->GetRn();
4072 
4073   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
4074 
4075   PACKey key = (instr->ExtractBit(23) == 0) ? kPACKeyDA : kPACKeyDB;
4076   address = AuthPAC(address, 0, key, kDataPointer);
4077 
4078   int error_lsb = GetTopPACBit(address, kInstructionPointer) - 2;
4079   if (((address >> error_lsb) & 0x3) != 0x0) {
4080     VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
4081   }
4082 
4083 
4084   if ((addr_reg == 31) && ((address % 16) != 0)) {
4085     // When the base register is SP the stack pointer is required to be
4086     // quadword aligned prior to the address calculation and write-backs.
4087     // Misalignment will cause a stack alignment fault.
4088     VIXL_ALIGNMENT_EXCEPTION();
4089   }
4090 
4091   int64_t offset = instr->GetImmLSPAC();
4092   address += offset;
4093 
4094   if (instr->Mask(LoadStorePACPreBit) == LoadStorePACPreBit) {
4095     // Pre-index mode.
4096     VIXL_ASSERT(offset != 0);
4097     WriteXRegister(addr_reg, address, LogRegWrites, Reg31IsStackPointer);
4098   }
4099 
4100   uintptr_t addr_ptr = static_cast<uintptr_t>(address);
4101 
4102   // Verify that the calculated address is available to the host.
4103   VIXL_ASSERT(address == addr_ptr);
4104 
4105   WriteXRegister(dst, MemRead<uint64_t>(addr_ptr), NoRegLog);
4106   unsigned access_size = 1 << 3;
4107   LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
4108 }
4109 
4110 
4111 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
4112   Extend ext = static_cast<Extend>(instr->GetExtendMode());
4113   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
4114   unsigned shift_amount = instr->GetImmShiftLS() * instr->GetSizeLS();
4115 
4116   int64_t offset =
4117       ExtendValue(kXRegSize, ReadXRegister(instr->GetRm()), ext, shift_amount);
4118   LoadStoreHelper(instr, offset, Offset);
4119 }
4120 
4121 
4122 void Simulator::LoadStoreHelper(const Instruction* instr,
4123                                 int64_t offset,
4124                                 AddrMode addrmode) {
4125   unsigned srcdst = instr->GetRt();
4126   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4127 
4128   bool rt_is_vreg = false;
4129   int extend_to_size = 0;
4130   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
4131   switch (op) {
4132     case LDRB_w:
4133       WriteWRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4134       extend_to_size = kWRegSizeInBytes;
4135       break;
4136     case LDRH_w:
4137       WriteWRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4138       extend_to_size = kWRegSizeInBytes;
4139       break;
4140     case LDR_w:
4141       WriteWRegister(srcdst, MemRead<uint32_t>(address), NoRegLog);
4142       extend_to_size = kWRegSizeInBytes;
4143       break;
4144     case LDR_x:
4145       WriteXRegister(srcdst, MemRead<uint64_t>(address), NoRegLog);
4146       extend_to_size = kXRegSizeInBytes;
4147       break;
4148     case LDRSB_w:
4149       WriteWRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4150       extend_to_size = kWRegSizeInBytes;
4151       break;
4152     case LDRSH_w:
4153       WriteWRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4154       extend_to_size = kWRegSizeInBytes;
4155       break;
4156     case LDRSB_x:
4157       WriteXRegister(srcdst, MemRead<int8_t>(address), NoRegLog);
4158       extend_to_size = kXRegSizeInBytes;
4159       break;
4160     case LDRSH_x:
4161       WriteXRegister(srcdst, MemRead<int16_t>(address), NoRegLog);
4162       extend_to_size = kXRegSizeInBytes;
4163       break;
4164     case LDRSW_x:
4165       WriteXRegister(srcdst, MemRead<int32_t>(address), NoRegLog);
4166       extend_to_size = kXRegSizeInBytes;
4167       break;
4168     case LDR_b:
4169       WriteBRegister(srcdst, MemRead<uint8_t>(address), NoRegLog);
4170       rt_is_vreg = true;
4171       break;
4172     case LDR_h:
4173       WriteHRegister(srcdst, MemRead<uint16_t>(address), NoRegLog);
4174       rt_is_vreg = true;
4175       break;
4176     case LDR_s:
4177       WriteSRegister(srcdst, MemRead<float>(address), NoRegLog);
4178       rt_is_vreg = true;
4179       break;
4180     case LDR_d:
4181       WriteDRegister(srcdst, MemRead<double>(address), NoRegLog);
4182       rt_is_vreg = true;
4183       break;
4184     case LDR_q:
4185       WriteQRegister(srcdst, MemRead<qreg_t>(address), NoRegLog);
4186       rt_is_vreg = true;
4187       break;
4188 
4189     case STRB_w:
4190       MemWrite<uint8_t>(address, ReadWRegister(srcdst));
4191       break;
4192     case STRH_w:
4193       MemWrite<uint16_t>(address, ReadWRegister(srcdst));
4194       break;
4195     case STR_w:
4196       MemWrite<uint32_t>(address, ReadWRegister(srcdst));
4197       break;
4198     case STR_x:
4199       MemWrite<uint64_t>(address, ReadXRegister(srcdst));
4200       break;
4201     case STR_b:
4202       MemWrite<uint8_t>(address, ReadBRegister(srcdst));
4203       rt_is_vreg = true;
4204       break;
4205     case STR_h:
4206       MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst));
4207       rt_is_vreg = true;
4208       break;
4209     case STR_s:
4210       MemWrite<float>(address, ReadSRegister(srcdst));
4211       rt_is_vreg = true;
4212       break;
4213     case STR_d:
4214       MemWrite<double>(address, ReadDRegister(srcdst));
4215       rt_is_vreg = true;
4216       break;
4217     case STR_q:
4218       MemWrite<qreg_t>(address, ReadQRegister(srcdst));
4219       rt_is_vreg = true;
4220       break;
4221 
4222     // Ignore prfm hint instructions.
4223     case PRFM:
4224       break;
4225 
4226     default:
4227       VIXL_UNIMPLEMENTED();
4228   }
4229 
4230   // Print a detailed trace (including the memory address).
4231   bool extend = (extend_to_size != 0);
4232   unsigned access_size = 1 << instr->GetSizeLS();
4233   unsigned result_size = extend ? extend_to_size : access_size;
4234   PrintRegisterFormat print_format =
4235       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4236                  : GetPrintRegisterFormatForSize(result_size);
4237 
4238   if (instr->IsLoad()) {
4239     if (rt_is_vreg) {
4240       LogVRead(srcdst, print_format, address);
4241     } else {
4242       LogExtendingRead(srcdst, print_format, access_size, address);
4243     }
4244   } else if (instr->IsStore()) {
4245     if (rt_is_vreg) {
4246       LogVWrite(srcdst, print_format, address);
4247     } else {
4248       LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
4249     }
4250   } else {
4251     VIXL_ASSERT(op == PRFM);
4252   }
4253 
4254   local_monitor_.MaybeClear();
4255 }
4256 
4257 
4258 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
4259   LoadStorePairHelper(instr, Offset);
4260 }
4261 
4262 
4263 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
4264   LoadStorePairHelper(instr, PreIndex);
4265 }
4266 
4267 
4268 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
4269   LoadStorePairHelper(instr, PostIndex);
4270 }
4271 
4272 
4273 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
4274   LoadStorePairHelper(instr, Offset);
4275 }
4276 
4277 
4278 void Simulator::LoadStorePairHelper(const Instruction* instr,
4279                                     AddrMode addrmode) {
4280   unsigned rt = instr->GetRt();
4281   unsigned rt2 = instr->GetRt2();
4282   int element_size = 1 << instr->GetSizeLSPair();
4283   int64_t offset = instr->GetImmLSPair() * element_size;
4284   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
4285   uintptr_t address2 = address + element_size;
4286 
4287   LoadStorePairOp op =
4288       static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
4289 
4290   // 'rt' and 'rt2' can only be aliased for stores.
4291   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
4292 
4293   bool rt_is_vreg = false;
4294   bool sign_extend = false;
4295   switch (op) {
4296     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
4297     // will print a more detailed log.
4298     case LDP_w: {
4299       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4300       WriteWRegister(rt2, MemRead<uint32_t>(address2), NoRegLog);
4301       break;
4302     }
4303     case LDP_s: {
4304       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
4305       WriteSRegister(rt2, MemRead<float>(address2), NoRegLog);
4306       rt_is_vreg = true;
4307       break;
4308     }
4309     case LDP_x: {
4310       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4311       WriteXRegister(rt2, MemRead<uint64_t>(address2), NoRegLog);
4312       break;
4313     }
4314     case LDP_d: {
4315       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
4316       WriteDRegister(rt2, MemRead<double>(address2), NoRegLog);
4317       rt_is_vreg = true;
4318       break;
4319     }
4320     case LDP_q: {
4321       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
4322       WriteQRegister(rt2, MemRead<qreg_t>(address2), NoRegLog);
4323       rt_is_vreg = true;
4324       break;
4325     }
4326     case LDPSW_x: {
4327       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
4328       WriteXRegister(rt2, MemRead<int32_t>(address2), NoRegLog);
4329       sign_extend = true;
4330       break;
4331     }
4332     case STP_w: {
4333       MemWrite<uint32_t>(address, ReadWRegister(rt));
4334       MemWrite<uint32_t>(address2, ReadWRegister(rt2));
4335       break;
4336     }
4337     case STP_s: {
4338       MemWrite<float>(address, ReadSRegister(rt));
4339       MemWrite<float>(address2, ReadSRegister(rt2));
4340       rt_is_vreg = true;
4341       break;
4342     }
4343     case STP_x: {
4344       MemWrite<uint64_t>(address, ReadXRegister(rt));
4345       MemWrite<uint64_t>(address2, ReadXRegister(rt2));
4346       break;
4347     }
4348     case STP_d: {
4349       MemWrite<double>(address, ReadDRegister(rt));
4350       MemWrite<double>(address2, ReadDRegister(rt2));
4351       rt_is_vreg = true;
4352       break;
4353     }
4354     case STP_q: {
4355       MemWrite<qreg_t>(address, ReadQRegister(rt));
4356       MemWrite<qreg_t>(address2, ReadQRegister(rt2));
4357       rt_is_vreg = true;
4358       break;
4359     }
4360     default:
4361       VIXL_UNREACHABLE();
4362   }
4363 
4364   // Print a detailed trace (including the memory address).
4365   unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
4366   PrintRegisterFormat print_format =
4367       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
4368                  : GetPrintRegisterFormatForSize(result_size);
4369 
4370   if (instr->IsLoad()) {
4371     if (rt_is_vreg) {
4372       LogVRead(rt, print_format, address);
4373       LogVRead(rt2, print_format, address2);
4374     } else if (sign_extend) {
4375       LogExtendingRead(rt, print_format, element_size, address);
4376       LogExtendingRead(rt2, print_format, element_size, address2);
4377     } else {
4378       LogRead(rt, print_format, address);
4379       LogRead(rt2, print_format, address2);
4380     }
4381   } else {
4382     if (rt_is_vreg) {
4383       LogVWrite(rt, print_format, address);
4384       LogVWrite(rt2, print_format, address2);
4385     } else {
4386       LogWrite(rt, print_format, address);
4387       LogWrite(rt2, print_format, address2);
4388     }
4389   }
4390 
4391   local_monitor_.MaybeClear();
4392 }
4393 
4394 
4395 template <typename T>
4396 void Simulator::CompareAndSwapHelper(const Instruction* instr) {
4397   unsigned rs = instr->GetRs();
4398   unsigned rt = instr->GetRt();
4399   unsigned rn = instr->GetRn();
4400 
4401   unsigned element_size = sizeof(T);
4402   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4403 
4404   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4405 
4406   bool is_acquire = instr->ExtractBit(22) == 1;
4407   bool is_release = instr->ExtractBit(15) == 1;
4408 
4409   T comparevalue = ReadRegister<T>(rs);
4410   T newvalue = ReadRegister<T>(rt);
4411 
4412   // The architecture permits that the data read clears any exclusive monitors
4413   // associated with that location, even if the compare subsequently fails.
4414   local_monitor_.Clear();
4415 
4416   T data = MemRead<T>(address);
4417   if (is_acquire) {
4418     // Approximate load-acquire by issuing a full barrier after the load.
4419     __sync_synchronize();
4420   }
4421 
4422   if (data == comparevalue) {
4423     if (is_release) {
4424       // Approximate store-release by issuing a full barrier before the store.
4425       __sync_synchronize();
4426     }
4427     MemWrite<T>(address, newvalue);
4428     LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
4429   }
4430   WriteRegister<T>(rs, data, NoRegLog);
4431   LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
4432 }
4433 
4434 
4435 template <typename T>
4436 void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
4437   VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
4438   unsigned rs = instr->GetRs();
4439   unsigned rt = instr->GetRt();
4440   unsigned rn = instr->GetRn();
4441 
4442   VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
4443 
4444   unsigned element_size = sizeof(T);
4445   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4446 
4447   CheckIsValidUnalignedAtomicAccess(rn, address, element_size * 2);
4448 
4449   uint64_t address2 = address + element_size;
4450 
4451   bool is_acquire = instr->ExtractBit(22) == 1;
4452   bool is_release = instr->ExtractBit(15) == 1;
4453 
4454   T comparevalue_high = ReadRegister<T>(rs + 1);
4455   T comparevalue_low = ReadRegister<T>(rs);
4456   T newvalue_high = ReadRegister<T>(rt + 1);
4457   T newvalue_low = ReadRegister<T>(rt);
4458 
4459   // The architecture permits that the data read clears any exclusive monitors
4460   // associated with that location, even if the compare subsequently fails.
4461   local_monitor_.Clear();
4462 
4463   T data_low = MemRead<T>(address);
4464   T data_high = MemRead<T>(address2);
4465 
4466   if (is_acquire) {
4467     // Approximate load-acquire by issuing a full barrier after the load.
4468     __sync_synchronize();
4469   }
4470 
4471   bool same =
4472       (data_high == comparevalue_high) && (data_low == comparevalue_low);
4473   if (same) {
4474     if (is_release) {
4475       // Approximate store-release by issuing a full barrier before the store.
4476       __sync_synchronize();
4477     }
4478 
4479     MemWrite<T>(address, newvalue_low);
4480     MemWrite<T>(address2, newvalue_high);
4481   }
4482 
4483   WriteRegister<T>(rs + 1, data_high, NoRegLog);
4484   WriteRegister<T>(rs, data_low, NoRegLog);
4485 
4486   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4487   LogRead(rs, format, address);
4488   LogRead(rs + 1, format, address2);
4489 
4490   if (same) {
4491     LogWrite(rt, format, address);
4492     LogWrite(rt + 1, format, address2);
4493   }
4494 }
4495 
4496 bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
4497   // To simulate fault-tolerant loads, we need to know what host addresses we
4498   // can access without generating a real fault. One way to do that is to
4499   // attempt to `write()` the memory to a placeholder pipe[1]. This is more
4500   // portable and less intrusive than using (global) signal handlers.
4501   //
4502   // [1]: https://stackoverflow.com/questions/7134590
4503 
4504   size_t written = 0;
4505   bool can_read = true;
4506   // `write` will normally return after one invocation, but it is allowed to
4507   // handle only part of the operation, so wrap it in a loop.
4508   while (can_read && (written < size)) {
4509     ssize_t result = write(placeholder_pipe_fd_[1],
4510                            reinterpret_cast<void*>(address + written),
4511                            size - written);
4512     if (result > 0) {
4513       written += result;
4514     } else {
4515       switch (result) {
4516         case -EPERM:
4517         case -EFAULT:
4518           // The address range is not accessible.
4519           // `write` is supposed to return -EFAULT in this case, but in practice
4520           // it seems to return -EPERM, so we accept that too.
4521           can_read = false;
4522           break;
4523         case -EINTR:
4524           // The call was interrupted by a signal. Just try again.
4525           break;
4526         default:
4527           // Any other error is fatal.
4528           VIXL_ABORT();
4529       }
4530     }
4531   }
4532   // Drain the read side of the pipe. If we don't do this, we'll leak memory as
4533   // the placeholder data is buffered. As before, we expect to drain the whole
4534   // write in one invocation, but cannot guarantee that, so we wrap it in a
4535   // loop. This function is primarily intended to implement SVE fault-tolerant
4536   // loads, so the maximum Z register size is a good default buffer size.
4537   char buffer[kZRegMaxSizeInBytes];
4538   while (written > 0) {
4539     ssize_t result = read(placeholder_pipe_fd_[0],
4540                           reinterpret_cast<void*>(buffer),
4541                           sizeof(buffer));
4542     // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
4543     // we've read everything that was written, so treat 0 as an error.
4544     if (result > 0) {
4545       VIXL_ASSERT(static_cast<size_t>(result) <= written);
4546       written -= result;
4547     } else {
4548       // For -EINTR, just try again. We can't handle any other error.
4549       VIXL_CHECK(result == -EINTR);
4550     }
4551   }
4552 
4553   return can_read;
4554 }
4555 
4556 void Simulator::PrintExclusiveAccessWarning() {
4557   if (print_exclusive_access_warning_) {
4558     fprintf(stderr,
4559             "%sWARNING:%s VIXL simulator support for "
4560             "load-/store-/clear-exclusive "
4561             "instructions is limited. Refer to the README for details.%s\n",
4562             clr_warning,
4563             clr_warning_message,
4564             clr_normal);
4565     print_exclusive_access_warning_ = false;
4566   }
4567 }
4568 
4569 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
4570   LoadStoreExclusive op =
4571       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
4572 
4573   switch (op) {
4574     case CAS_w:
4575     case CASA_w:
4576     case CASL_w:
4577     case CASAL_w:
4578       CompareAndSwapHelper<uint32_t>(instr);
4579       break;
4580     case CAS_x:
4581     case CASA_x:
4582     case CASL_x:
4583     case CASAL_x:
4584       CompareAndSwapHelper<uint64_t>(instr);
4585       break;
4586     case CASB:
4587     case CASAB:
4588     case CASLB:
4589     case CASALB:
4590       CompareAndSwapHelper<uint8_t>(instr);
4591       break;
4592     case CASH:
4593     case CASAH:
4594     case CASLH:
4595     case CASALH:
4596       CompareAndSwapHelper<uint16_t>(instr);
4597       break;
4598     case CASP_w:
4599     case CASPA_w:
4600     case CASPL_w:
4601     case CASPAL_w:
4602       CompareAndSwapPairHelper<uint32_t>(instr);
4603       break;
4604     case CASP_x:
4605     case CASPA_x:
4606     case CASPL_x:
4607     case CASPAL_x:
4608       CompareAndSwapPairHelper<uint64_t>(instr);
4609       break;
4610     default:
4611       PrintExclusiveAccessWarning();
4612 
4613       unsigned rs = instr->GetRs();
4614       unsigned rt = instr->GetRt();
4615       unsigned rt2 = instr->GetRt2();
4616       unsigned rn = instr->GetRn();
4617 
4618       bool is_exclusive = !instr->GetLdStXNotExclusive();
4619       bool is_acquire_release =
4620           !is_exclusive || instr->GetLdStXAcquireRelease();
4621       bool is_load = instr->GetLdStXLoad();
4622       bool is_pair = instr->GetLdStXPair();
4623 
4624       unsigned element_size = 1 << instr->GetLdStXSizeLog2();
4625       unsigned access_size = is_pair ? element_size * 2 : element_size;
4626       uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4627 
4628       CheckIsValidUnalignedAtomicAccess(rn, address, access_size);
4629 
4630       if (is_load) {
4631         if (is_exclusive) {
4632           local_monitor_.MarkExclusive(address, access_size);
4633         } else {
4634           // Any non-exclusive load can clear the local monitor as a side
4635           // effect. We don't need to do this, but it is useful to stress the
4636           // simulated code.
4637           local_monitor_.Clear();
4638         }
4639 
4640         // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
4641         // We will print a more detailed log.
4642         unsigned reg_size = 0;
4643         switch (op) {
4644           case LDXRB_w:
4645           case LDAXRB_w:
4646           case LDARB_w:
4647           case LDLARB:
4648             WriteWRegister(rt, MemRead<uint8_t>(address), NoRegLog);
4649             reg_size = kWRegSizeInBytes;
4650             break;
4651           case LDXRH_w:
4652           case LDAXRH_w:
4653           case LDARH_w:
4654           case LDLARH:
4655             WriteWRegister(rt, MemRead<uint16_t>(address), NoRegLog);
4656             reg_size = kWRegSizeInBytes;
4657             break;
4658           case LDXR_w:
4659           case LDAXR_w:
4660           case LDAR_w:
4661           case LDLAR_w:
4662             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4663             reg_size = kWRegSizeInBytes;
4664             break;
4665           case LDXR_x:
4666           case LDAXR_x:
4667           case LDAR_x:
4668           case LDLAR_x:
4669             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4670             reg_size = kXRegSizeInBytes;
4671             break;
4672           case LDXP_w:
4673           case LDAXP_w:
4674             WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
4675             WriteWRegister(rt2,
4676                            MemRead<uint32_t>(address + element_size),
4677                            NoRegLog);
4678             reg_size = kWRegSizeInBytes;
4679             break;
4680           case LDXP_x:
4681           case LDAXP_x:
4682             WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
4683             WriteXRegister(rt2,
4684                            MemRead<uint64_t>(address + element_size),
4685                            NoRegLog);
4686             reg_size = kXRegSizeInBytes;
4687             break;
4688           default:
4689             VIXL_UNREACHABLE();
4690         }
4691 
4692         if (is_acquire_release) {
4693           // Approximate load-acquire by issuing a full barrier after the load.
4694           __sync_synchronize();
4695         }
4696 
4697         PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
4698         LogExtendingRead(rt, format, element_size, address);
4699         if (is_pair) {
4700           LogExtendingRead(rt2, format, element_size, address + element_size);
4701         }
4702       } else {
4703         if (is_acquire_release) {
4704           // Approximate store-release by issuing a full barrier before the
4705           // store.
4706           __sync_synchronize();
4707         }
4708 
4709         bool do_store = true;
4710         if (is_exclusive) {
4711           do_store = local_monitor_.IsExclusive(address, access_size) &&
4712                      global_monitor_.IsExclusive(address, access_size);
4713           WriteWRegister(rs, do_store ? 0 : 1);
4714 
4715           //  - All exclusive stores explicitly clear the local monitor.
4716           local_monitor_.Clear();
4717         } else {
4718           //  - Any other store can clear the local monitor as a side effect.
4719           local_monitor_.MaybeClear();
4720         }
4721 
4722         if (do_store) {
4723           switch (op) {
4724             case STXRB_w:
4725             case STLXRB_w:
4726             case STLRB_w:
4727             case STLLRB:
4728               MemWrite<uint8_t>(address, ReadWRegister(rt));
4729               break;
4730             case STXRH_w:
4731             case STLXRH_w:
4732             case STLRH_w:
4733             case STLLRH:
4734               MemWrite<uint16_t>(address, ReadWRegister(rt));
4735               break;
4736             case STXR_w:
4737             case STLXR_w:
4738             case STLR_w:
4739             case STLLR_w:
4740               MemWrite<uint32_t>(address, ReadWRegister(rt));
4741               break;
4742             case STXR_x:
4743             case STLXR_x:
4744             case STLR_x:
4745             case STLLR_x:
4746               MemWrite<uint64_t>(address, ReadXRegister(rt));
4747               break;
4748             case STXP_w:
4749             case STLXP_w:
4750               MemWrite<uint32_t>(address, ReadWRegister(rt));
4751               MemWrite<uint32_t>(address + element_size, ReadWRegister(rt2));
4752               break;
4753             case STXP_x:
4754             case STLXP_x:
4755               MemWrite<uint64_t>(address, ReadXRegister(rt));
4756               MemWrite<uint64_t>(address + element_size, ReadXRegister(rt2));
4757               break;
4758             default:
4759               VIXL_UNREACHABLE();
4760           }
4761 
4762           PrintRegisterFormat format =
4763               GetPrintRegisterFormatForSize(element_size);
4764           LogWrite(rt, format, address);
4765           if (is_pair) {
4766             LogWrite(rt2, format, address + element_size);
4767           }
4768         }
4769       }
4770   }
4771 }
4772 
4773 template <typename T>
4774 void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
4775   unsigned rs = instr->GetRs();
4776   unsigned rt = instr->GetRt();
4777   unsigned rn = instr->GetRn();
4778 
4779   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
4780   bool is_release = instr->ExtractBit(22) == 1;
4781 
4782   unsigned element_size = sizeof(T);
4783   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4784 
4785   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4786 
4787   T value = ReadRegister<T>(rs);
4788 
4789   T data = MemRead<T>(address);
4790 
4791   if (is_acquire) {
4792     // Approximate load-acquire by issuing a full barrier after the load.
4793     __sync_synchronize();
4794   }
4795 
4796   T result = 0;
4797   switch (instr->Mask(AtomicMemorySimpleOpMask)) {
4798     case LDADDOp:
4799       result = data + value;
4800       break;
4801     case LDCLROp:
4802       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4803       result = data & ~value;
4804       break;
4805     case LDEOROp:
4806       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4807       result = data ^ value;
4808       break;
4809     case LDSETOp:
4810       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
4811       result = data | value;
4812       break;
4813 
4814     // Signed/Unsigned difference is done via the templated type T.
4815     case LDSMAXOp:
4816     case LDUMAXOp:
4817       result = (data > value) ? data : value;
4818       break;
4819     case LDSMINOp:
4820     case LDUMINOp:
4821       result = (data > value) ? value : data;
4822       break;
4823   }
4824 
4825   if (is_release) {
4826     // Approximate store-release by issuing a full barrier before the store.
4827     __sync_synchronize();
4828   }
4829 
4830   MemWrite<T>(address, result);
4831   WriteRegister<T>(rt, data, NoRegLog);
4832 
4833   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4834   LogRead(rt, format, address);
4835   LogWrite(rs, format, address);
4836 }
4837 
4838 template <typename T>
4839 void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
4840   unsigned rs = instr->GetRs();
4841   unsigned rt = instr->GetRt();
4842   unsigned rn = instr->GetRn();
4843 
4844   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
4845   bool is_release = instr->ExtractBit(22) == 1;
4846 
4847   unsigned element_size = sizeof(T);
4848   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4849 
4850   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4851 
4852   T data = MemRead<T>(address);
4853   if (is_acquire) {
4854     // Approximate load-acquire by issuing a full barrier after the load.
4855     __sync_synchronize();
4856   }
4857 
4858   if (is_release) {
4859     // Approximate store-release by issuing a full barrier before the store.
4860     __sync_synchronize();
4861   }
4862   MemWrite<T>(address, ReadRegister<T>(rs));
4863 
4864   WriteRegister<T>(rt, data);
4865 
4866   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
4867   LogRead(rt, format, address);
4868   LogWrite(rs, format, address);
4869 }
4870 
4871 template <typename T>
4872 void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
4873   unsigned rt = instr->GetRt();
4874   unsigned rn = instr->GetRn();
4875 
4876   unsigned element_size = sizeof(T);
4877   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
4878 
4879   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
4880 
4881   WriteRegister<T>(rt, MemRead<T>(address));
4882 
4883   // Approximate load-acquire by issuing a full barrier after the load.
4884   __sync_synchronize();
4885 
4886   LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
4887 }
4888 
4889 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
4890   V(LDADD)                                \
4891   V(LDCLR)                                \
4892   V(LDEOR)                                \
4893   V(LDSET)                                \
4894   V(LDUMAX)                               \
4895   V(LDUMIN)
4896 
4897 #define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
4898   V(LDSMAX)                              \
4899   V(LDSMIN)
4900 
4901 void Simulator::VisitAtomicMemory(const Instruction* instr) {
4902   switch (instr->Mask(AtomicMemoryMask)) {
4903 // clang-format off
4904 #define SIM_FUNC_B(A) \
4905     case A##B:        \
4906     case A##AB:       \
4907     case A##LB:       \
4908     case A##ALB:
4909 #define SIM_FUNC_H(A) \
4910     case A##H:        \
4911     case A##AH:       \
4912     case A##LH:       \
4913     case A##ALH:
4914 #define SIM_FUNC_w(A) \
4915     case A##_w:       \
4916     case A##A_w:      \
4917     case A##L_w:      \
4918     case A##AL_w:
4919 #define SIM_FUNC_x(A) \
4920     case A##_x:       \
4921     case A##A_x:      \
4922     case A##L_x:      \
4923     case A##AL_x:
4924 
4925     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
4926       AtomicMemorySimpleHelper<uint8_t>(instr);
4927       break;
4928     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
4929       AtomicMemorySimpleHelper<int8_t>(instr);
4930       break;
4931     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
4932       AtomicMemorySimpleHelper<uint16_t>(instr);
4933       break;
4934     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
4935       AtomicMemorySimpleHelper<int16_t>(instr);
4936       break;
4937     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
4938       AtomicMemorySimpleHelper<uint32_t>(instr);
4939       break;
4940     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
4941       AtomicMemorySimpleHelper<int32_t>(instr);
4942       break;
4943     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
4944       AtomicMemorySimpleHelper<uint64_t>(instr);
4945       break;
4946     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
4947       AtomicMemorySimpleHelper<int64_t>(instr);
4948       break;
4949     // clang-format on
4950 
4951     case SWPB:
4952     case SWPAB:
4953     case SWPLB:
4954     case SWPALB:
4955       AtomicMemorySwapHelper<uint8_t>(instr);
4956       break;
4957     case SWPH:
4958     case SWPAH:
4959     case SWPLH:
4960     case SWPALH:
4961       AtomicMemorySwapHelper<uint16_t>(instr);
4962       break;
4963     case SWP_w:
4964     case SWPA_w:
4965     case SWPL_w:
4966     case SWPAL_w:
4967       AtomicMemorySwapHelper<uint32_t>(instr);
4968       break;
4969     case SWP_x:
4970     case SWPA_x:
4971     case SWPL_x:
4972     case SWPAL_x:
4973       AtomicMemorySwapHelper<uint64_t>(instr);
4974       break;
4975     case LDAPRB:
4976       LoadAcquireRCpcHelper<uint8_t>(instr);
4977       break;
4978     case LDAPRH:
4979       LoadAcquireRCpcHelper<uint16_t>(instr);
4980       break;
4981     case LDAPR_w:
4982       LoadAcquireRCpcHelper<uint32_t>(instr);
4983       break;
4984     case LDAPR_x:
4985       LoadAcquireRCpcHelper<uint64_t>(instr);
4986       break;
4987   }
4988 }
4989 
4990 
4991 void Simulator::VisitLoadLiteral(const Instruction* instr) {
4992   unsigned rt = instr->GetRt();
4993   uint64_t address = instr->GetLiteralAddress<uint64_t>();
4994 
4995   // Verify that the calculated address is available to the host.
4996   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
4997 
4998   switch (instr->Mask(LoadLiteralMask)) {
4999     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
5000     // print a more detailed log.
5001     case LDR_w_lit:
5002       WriteWRegister(rt, MemRead<uint32_t>(address), NoRegLog);
5003       LogRead(rt, kPrintWReg, address);
5004       break;
5005     case LDR_x_lit:
5006       WriteXRegister(rt, MemRead<uint64_t>(address), NoRegLog);
5007       LogRead(rt, kPrintXReg, address);
5008       break;
5009     case LDR_s_lit:
5010       WriteSRegister(rt, MemRead<float>(address), NoRegLog);
5011       LogVRead(rt, kPrintSRegFP, address);
5012       break;
5013     case LDR_d_lit:
5014       WriteDRegister(rt, MemRead<double>(address), NoRegLog);
5015       LogVRead(rt, kPrintDRegFP, address);
5016       break;
5017     case LDR_q_lit:
5018       WriteQRegister(rt, MemRead<qreg_t>(address), NoRegLog);
5019       LogVRead(rt, kPrintReg1Q, address);
5020       break;
5021     case LDRSW_x_lit:
5022       WriteXRegister(rt, MemRead<int32_t>(address), NoRegLog);
5023       LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
5024       break;
5025 
5026     // Ignore prfm hint instructions.
5027     case PRFM_lit:
5028       break;
5029 
5030     default:
5031       VIXL_UNREACHABLE();
5032   }
5033 
5034   local_monitor_.MaybeClear();
5035 }
5036 
5037 
5038 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
5039                                        int64_t offset,
5040                                        AddrMode addrmode) {
5041   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
5042 
5043   if ((addr_reg == 31) && ((address % 16) != 0)) {
5044     // When the base register is SP the stack pointer is required to be
5045     // quadword aligned prior to the address calculation and write-backs.
5046     // Misalignment will cause a stack alignment fault.
5047     VIXL_ALIGNMENT_EXCEPTION();
5048   }
5049 
5050   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
5051     VIXL_ASSERT(offset != 0);
5052     // Only preindex should log the register update here. For Postindex, the
5053     // update will be printed automatically by LogWrittenRegisters _after_ the
5054     // memory access itself is logged.
5055     RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
5056     WriteXRegister(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
5057   }
5058 
5059   if ((addrmode == Offset) || (addrmode == PreIndex)) {
5060     address += offset;
5061   }
5062 
5063   // Verify that the calculated address is available to the host.
5064   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
5065 
5066   return static_cast<uintptr_t>(address);
5067 }
5068 
5069 
5070 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
5071   MoveWideImmediateOp mov_op =
5072       static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
5073   int64_t new_xn_val = 0;
5074 
5075   bool is_64_bits = instr->GetSixtyFourBits() == 1;
5076   // Shift is limited for W operations.
5077   VIXL_ASSERT(is_64_bits || (instr->GetShiftMoveWide() < 2));
5078 
5079   // Get the shifted immediate.
5080   int64_t shift = instr->GetShiftMoveWide() * 16;
5081   int64_t shifted_imm16 = static_cast<int64_t>(instr->GetImmMoveWide())
5082                           << shift;
5083 
5084   // Compute the new value.
5085   switch (mov_op) {
5086     case MOVN_w:
5087     case MOVN_x: {
5088       new_xn_val = ~shifted_imm16;
5089       if (!is_64_bits) new_xn_val &= kWRegMask;
5090       break;
5091     }
5092     case MOVK_w:
5093     case MOVK_x: {
5094       unsigned reg_code = instr->GetRd();
5095       int64_t prev_xn_val =
5096           is_64_bits ? ReadXRegister(reg_code) : ReadWRegister(reg_code);
5097       new_xn_val = (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
5098       break;
5099     }
5100     case MOVZ_w:
5101     case MOVZ_x: {
5102       new_xn_val = shifted_imm16;
5103       break;
5104     }
5105     default:
5106       VIXL_UNREACHABLE();
5107   }
5108 
5109   // Update the destination register.
5110   WriteXRegister(instr->GetRd(), new_xn_val);
5111 }
5112 
5113 
5114 void Simulator::VisitConditionalSelect(const Instruction* instr) {
5115   uint64_t new_val = ReadXRegister(instr->GetRn());
5116 
5117   if (ConditionFailed(static_cast<Condition>(instr->GetCondition()))) {
5118     new_val = ReadXRegister(instr->GetRm());
5119     switch (instr->Mask(ConditionalSelectMask)) {
5120       case CSEL_w:
5121       case CSEL_x:
5122         break;
5123       case CSINC_w:
5124       case CSINC_x:
5125         new_val++;
5126         break;
5127       case CSINV_w:
5128       case CSINV_x:
5129         new_val = ~new_val;
5130         break;
5131       case CSNEG_w:
5132       case CSNEG_x:
5133         new_val = -new_val;
5134         break;
5135       default:
5136         VIXL_UNIMPLEMENTED();
5137     }
5138   }
5139   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5140   WriteRegister(reg_size, instr->GetRd(), new_val);
5141 }
5142 
5143 
5144 #define PAUTH_MODES_REGISTER_CONTEXT(V) \
5145   V(IA, kPACKeyIA, kInstructionPointer) \
5146   V(IB, kPACKeyIB, kInstructionPointer) \
5147   V(DA, kPACKeyDA, kDataPointer)        \
5148   V(DB, kPACKeyDB, kDataPointer)
5149 
5150 #define PAUTH_MODES_ZERO_CONTEXT(V)      \
5151   V(IZA, kPACKeyIA, kInstructionPointer) \
5152   V(IZB, kPACKeyIB, kInstructionPointer) \
5153   V(DZA, kPACKeyDA, kDataPointer)        \
5154   V(DZB, kPACKeyDB, kDataPointer)
5155 
5156 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
5157   unsigned dst = instr->GetRd();
5158   unsigned src = instr->GetRn();
5159 
5160   switch (instr->Mask(DataProcessing1SourceMask)) {
5161 #define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D)          \
5162   case PAC##SUFFIX: {                               \
5163     uint64_t mod = ReadXRegister(src);              \
5164     uint64_t ptr = ReadXRegister(dst);              \
5165     WriteXRegister(dst, AddPAC(ptr, mod, KEY, D));  \
5166     break;                                          \
5167   }                                                 \
5168   case AUT##SUFFIX: {                               \
5169     uint64_t mod = ReadXRegister(src);              \
5170     uint64_t ptr = ReadXRegister(dst);              \
5171     WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
5172     break;                                          \
5173   }
5174 
5175     PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
5176 #undef DEFINE_PAUTH_FUNCS
5177 
5178 #define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D)          \
5179   case PAC##SUFFIX: {                               \
5180     if (src != kZeroRegCode) {                      \
5181       VIXL_UNIMPLEMENTED();                         \
5182     }                                               \
5183     uint64_t ptr = ReadXRegister(dst);              \
5184     WriteXRegister(dst, AddPAC(ptr, 0x0, KEY, D));  \
5185     break;                                          \
5186   }                                                 \
5187   case AUT##SUFFIX: {                               \
5188     if (src != kZeroRegCode) {                      \
5189       VIXL_UNIMPLEMENTED();                         \
5190     }                                               \
5191     uint64_t ptr = ReadXRegister(dst);              \
5192     WriteXRegister(dst, AuthPAC(ptr, 0x0, KEY, D)); \
5193     break;                                          \
5194   }
5195 
5196     PAUTH_MODES_ZERO_CONTEXT(DEFINE_PAUTH_FUNCS)
5197 #undef DEFINE_PAUTH_FUNCS
5198 
5199     case XPACI:
5200       if (src != kZeroRegCode) {
5201         VIXL_UNIMPLEMENTED();
5202       }
5203       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
5204       break;
5205     case XPACD:
5206       if (src != kZeroRegCode) {
5207         VIXL_UNIMPLEMENTED();
5208       }
5209       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
5210       break;
5211     case RBIT_w:
5212       WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
5213       break;
5214     case RBIT_x:
5215       WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
5216       break;
5217     case REV16_w:
5218       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
5219       break;
5220     case REV16_x:
5221       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
5222       break;
5223     case REV_w:
5224       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
5225       break;
5226     case REV32_x:
5227       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
5228       break;
5229     case REV_x:
5230       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
5231       break;
5232     case CLZ_w:
5233       WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
5234       break;
5235     case CLZ_x:
5236       WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
5237       break;
5238     case CLS_w:
5239       WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
5240       break;
5241     case CLS_x:
5242       WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
5243       break;
5244     default:
5245       VIXL_UNIMPLEMENTED();
5246   }
5247 }
5248 
5249 
5250 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
5251   VIXL_ASSERT((n > 32) && (n <= 64));
5252   for (unsigned i = (n - 1); i >= 32; i--) {
5253     if (((data >> i) & 1) != 0) {
5254       uint64_t polysh32 = (uint64_t)poly << (i - 32);
5255       uint64_t mask = (UINT64_C(1) << i) - 1;
5256       data = ((data & mask) ^ polysh32);
5257     }
5258   }
5259   return data & 0xffffffff;
5260 }
5261 
5262 
5263 template <typename T>
5264 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
5265   unsigned size = sizeof(val) * 8;  // Number of bits in type T.
5266   VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
5267   uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
5268   uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
5269   return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
5270 }
5271 
5272 
5273 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
5274   // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
5275   // the CRC of each 32-bit word sequentially.
5276   acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
5277   return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
5278 }
5279 
5280 
5281 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
5282   Shift shift_op = NO_SHIFT;
5283   int64_t result = 0;
5284   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5285 
5286   switch (instr->Mask(DataProcessing2SourceMask)) {
5287     case SDIV_w: {
5288       int32_t rn = ReadWRegister(instr->GetRn());
5289       int32_t rm = ReadWRegister(instr->GetRm());
5290       if ((rn == kWMinInt) && (rm == -1)) {
5291         result = kWMinInt;
5292       } else if (rm == 0) {
5293         // Division by zero can be trapped, but not on A-class processors.
5294         result = 0;
5295       } else {
5296         result = rn / rm;
5297       }
5298       break;
5299     }
5300     case SDIV_x: {
5301       int64_t rn = ReadXRegister(instr->GetRn());
5302       int64_t rm = ReadXRegister(instr->GetRm());
5303       if ((rn == kXMinInt) && (rm == -1)) {
5304         result = kXMinInt;
5305       } else if (rm == 0) {
5306         // Division by zero can be trapped, but not on A-class processors.
5307         result = 0;
5308       } else {
5309         result = rn / rm;
5310       }
5311       break;
5312     }
5313     case UDIV_w: {
5314       uint32_t rn = static_cast<uint32_t>(ReadWRegister(instr->GetRn()));
5315       uint32_t rm = static_cast<uint32_t>(ReadWRegister(instr->GetRm()));
5316       if (rm == 0) {
5317         // Division by zero can be trapped, but not on A-class processors.
5318         result = 0;
5319       } else {
5320         result = rn / rm;
5321       }
5322       break;
5323     }
5324     case UDIV_x: {
5325       uint64_t rn = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5326       uint64_t rm = static_cast<uint64_t>(ReadXRegister(instr->GetRm()));
5327       if (rm == 0) {
5328         // Division by zero can be trapped, but not on A-class processors.
5329         result = 0;
5330       } else {
5331         result = rn / rm;
5332       }
5333       break;
5334     }
5335     case LSLV_w:
5336     case LSLV_x:
5337       shift_op = LSL;
5338       break;
5339     case LSRV_w:
5340     case LSRV_x:
5341       shift_op = LSR;
5342       break;
5343     case ASRV_w:
5344     case ASRV_x:
5345       shift_op = ASR;
5346       break;
5347     case RORV_w:
5348     case RORV_x:
5349       shift_op = ROR;
5350       break;
5351     case PACGA: {
5352       uint64_t dst = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
5353       uint64_t src = static_cast<uint64_t>(
5354           ReadXRegister(instr->GetRm(), Reg31IsStackPointer));
5355       uint64_t code = ComputePAC(dst, src, kPACKeyGA);
5356       result = code & 0xffffffff00000000;
5357       break;
5358     }
5359     case CRC32B: {
5360       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5361       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5362       result = Crc32Checksum(acc, val, CRC32_POLY);
5363       break;
5364     }
5365     case CRC32H: {
5366       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5367       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5368       result = Crc32Checksum(acc, val, CRC32_POLY);
5369       break;
5370     }
5371     case CRC32W: {
5372       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5373       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5374       result = Crc32Checksum(acc, val, CRC32_POLY);
5375       break;
5376     }
5377     case CRC32X: {
5378       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5379       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5380       result = Crc32Checksum(acc, val, CRC32_POLY);
5381       reg_size = kWRegSize;
5382       break;
5383     }
5384     case CRC32CB: {
5385       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5386       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
5387       result = Crc32Checksum(acc, val, CRC32C_POLY);
5388       break;
5389     }
5390     case CRC32CH: {
5391       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5392       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
5393       result = Crc32Checksum(acc, val, CRC32C_POLY);
5394       break;
5395     }
5396     case CRC32CW: {
5397       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5398       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
5399       result = Crc32Checksum(acc, val, CRC32C_POLY);
5400       break;
5401     }
5402     case CRC32CX: {
5403       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
5404       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
5405       result = Crc32Checksum(acc, val, CRC32C_POLY);
5406       reg_size = kWRegSize;
5407       break;
5408     }
5409     default:
5410       VIXL_UNIMPLEMENTED();
5411   }
5412 
5413   if (shift_op != NO_SHIFT) {
5414     // Shift distance encoded in the least-significant five/six bits of the
5415     // register.
5416     int mask = (instr->GetSixtyFourBits() == 1) ? 0x3f : 0x1f;
5417     unsigned shift = ReadWRegister(instr->GetRm()) & mask;
5418     result = ShiftOperand(reg_size,
5419                           ReadRegister(reg_size, instr->GetRn()),
5420                           shift_op,
5421                           shift);
5422   }
5423   WriteRegister(reg_size, instr->GetRd(), result);
5424 }
5425 
5426 
5427 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
5428   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5429 
5430   uint64_t result = 0;
5431   // Extract and sign- or zero-extend 32-bit arguments for widening operations.
5432   uint64_t rn_u32 = ReadRegister<uint32_t>(instr->GetRn());
5433   uint64_t rm_u32 = ReadRegister<uint32_t>(instr->GetRm());
5434   int64_t rn_s32 = ReadRegister<int32_t>(instr->GetRn());
5435   int64_t rm_s32 = ReadRegister<int32_t>(instr->GetRm());
5436   uint64_t rn_u64 = ReadXRegister(instr->GetRn());
5437   uint64_t rm_u64 = ReadXRegister(instr->GetRm());
5438   switch (instr->Mask(DataProcessing3SourceMask)) {
5439     case MADD_w:
5440     case MADD_x:
5441       result = ReadXRegister(instr->GetRa()) + (rn_u64 * rm_u64);
5442       break;
5443     case MSUB_w:
5444     case MSUB_x:
5445       result = ReadXRegister(instr->GetRa()) - (rn_u64 * rm_u64);
5446       break;
5447     case SMADDL_x:
5448       result = ReadXRegister(instr->GetRa()) +
5449                static_cast<uint64_t>(rn_s32 * rm_s32);
5450       break;
5451     case SMSUBL_x:
5452       result = ReadXRegister(instr->GetRa()) -
5453                static_cast<uint64_t>(rn_s32 * rm_s32);
5454       break;
5455     case UMADDL_x:
5456       result = ReadXRegister(instr->GetRa()) + (rn_u32 * rm_u32);
5457       break;
5458     case UMSUBL_x:
5459       result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
5460       break;
5461     case UMULH_x:
5462       result =
5463           internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
5464                                      ReadRegister<uint64_t>(instr->GetRm()));
5465       break;
5466     case SMULH_x:
5467       result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
5468                                           ReadXRegister(instr->GetRm()));
5469       break;
5470     default:
5471       VIXL_UNIMPLEMENTED();
5472   }
5473   WriteRegister(reg_size, instr->GetRd(), result);
5474 }
5475 
5476 
5477 void Simulator::VisitBitfield(const Instruction* instr) {
5478   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
5479   int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask;
5480   int R = instr->GetImmR();
5481   int S = instr->GetImmS();
5482 
5483   if (instr->GetSixtyFourBits() != instr->GetBitN()) {
5484     VisitUnallocated(instr);
5485   }
5486 
5487   if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) {
5488     VisitUnallocated(instr);
5489   }
5490 
5491   int diff = S - R;
5492   uint64_t mask;
5493   if (diff >= 0) {
5494     mask = ~UINT64_C(0) >> (64 - (diff + 1));
5495     mask = (static_cast<unsigned>(diff) < (reg_size - 1)) ? mask : reg_mask;
5496   } else {
5497     mask = ~UINT64_C(0) >> (64 - (S + 1));
5498     mask = RotateRight(mask, R, reg_size);
5499     diff += reg_size;
5500   }
5501 
5502   // inzero indicates if the extracted bitfield is inserted into the
5503   // destination register value or in zero.
5504   // If extend is true, extend the sign of the extracted bitfield.
5505   bool inzero = false;
5506   bool extend = false;
5507   switch (instr->Mask(BitfieldMask)) {
5508     case BFM_x:
5509     case BFM_w:
5510       break;
5511     case SBFM_x:
5512     case SBFM_w:
5513       inzero = true;
5514       extend = true;
5515       break;
5516     case UBFM_x:
5517     case UBFM_w:
5518       inzero = true;
5519       break;
5520     default:
5521       VIXL_UNIMPLEMENTED();
5522   }
5523 
5524   uint64_t dst = inzero ? 0 : ReadRegister(reg_size, instr->GetRd());
5525   uint64_t src = ReadRegister(reg_size, instr->GetRn());
5526   // Rotate source bitfield into place.
5527   uint64_t result = RotateRight(src, R, reg_size);
5528   // Determine the sign extension.
5529   uint64_t topbits = (diff == 63) ? 0 : (~UINT64_C(0) << (diff + 1));
5530   uint64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
5531 
5532   // Merge sign extension, dest/zero and bitfield.
5533   result = signbits | (result & mask) | (dst & ~mask);
5534 
5535   WriteRegister(reg_size, instr->GetRd(), result);
5536 }
5537 
5538 
5539 void Simulator::VisitExtract(const Instruction* instr) {
5540   unsigned lsb = instr->GetImmS();
5541   unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
5542   uint64_t low_res =
5543       static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
5544   uint64_t high_res = (lsb == 0)
5545                           ? 0
5546                           : ReadRegister<uint64_t>(reg_size, instr->GetRn())
5547                                 << (reg_size - lsb);
5548   WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
5549 }
5550 
5551 
5552 void Simulator::VisitFPImmediate(const Instruction* instr) {
5553   AssertSupportedFPCR();
5554   unsigned dest = instr->GetRd();
5555   switch (instr->Mask(FPImmediateMask)) {
5556     case FMOV_h_imm:
5557       WriteHRegister(dest, Float16ToRawbits(instr->GetImmFP16()));
5558       break;
5559     case FMOV_s_imm:
5560       WriteSRegister(dest, instr->GetImmFP32());
5561       break;
5562     case FMOV_d_imm:
5563       WriteDRegister(dest, instr->GetImmFP64());
5564       break;
5565     default:
5566       VIXL_UNREACHABLE();
5567   }
5568 }
5569 
5570 
5571 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
5572   AssertSupportedFPCR();
5573 
5574   unsigned dst = instr->GetRd();
5575   unsigned src = instr->GetRn();
5576 
5577   FPRounding round = ReadRMode();
5578 
5579   switch (instr->Mask(FPIntegerConvertMask)) {
5580     case FCVTAS_wh:
5581       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieAway));
5582       break;
5583     case FCVTAS_xh:
5584       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieAway));
5585       break;
5586     case FCVTAS_ws:
5587       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieAway));
5588       break;
5589     case FCVTAS_xs:
5590       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieAway));
5591       break;
5592     case FCVTAS_wd:
5593       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieAway));
5594       break;
5595     case FCVTAS_xd:
5596       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieAway));
5597       break;
5598     case FCVTAU_wh:
5599       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieAway));
5600       break;
5601     case FCVTAU_xh:
5602       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieAway));
5603       break;
5604     case FCVTAU_ws:
5605       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieAway));
5606       break;
5607     case FCVTAU_xs:
5608       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieAway));
5609       break;
5610     case FCVTAU_wd:
5611       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieAway));
5612       break;
5613     case FCVTAU_xd:
5614       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieAway));
5615       break;
5616     case FCVTMS_wh:
5617       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPNegativeInfinity));
5618       break;
5619     case FCVTMS_xh:
5620       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPNegativeInfinity));
5621       break;
5622     case FCVTMS_ws:
5623       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPNegativeInfinity));
5624       break;
5625     case FCVTMS_xs:
5626       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPNegativeInfinity));
5627       break;
5628     case FCVTMS_wd:
5629       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPNegativeInfinity));
5630       break;
5631     case FCVTMS_xd:
5632       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPNegativeInfinity));
5633       break;
5634     case FCVTMU_wh:
5635       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPNegativeInfinity));
5636       break;
5637     case FCVTMU_xh:
5638       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPNegativeInfinity));
5639       break;
5640     case FCVTMU_ws:
5641       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPNegativeInfinity));
5642       break;
5643     case FCVTMU_xs:
5644       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPNegativeInfinity));
5645       break;
5646     case FCVTMU_wd:
5647       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPNegativeInfinity));
5648       break;
5649     case FCVTMU_xd:
5650       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPNegativeInfinity));
5651       break;
5652     case FCVTPS_wh:
5653       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPPositiveInfinity));
5654       break;
5655     case FCVTPS_xh:
5656       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPPositiveInfinity));
5657       break;
5658     case FCVTPS_ws:
5659       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPPositiveInfinity));
5660       break;
5661     case FCVTPS_xs:
5662       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPPositiveInfinity));
5663       break;
5664     case FCVTPS_wd:
5665       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPPositiveInfinity));
5666       break;
5667     case FCVTPS_xd:
5668       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPPositiveInfinity));
5669       break;
5670     case FCVTPU_wh:
5671       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPPositiveInfinity));
5672       break;
5673     case FCVTPU_xh:
5674       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPPositiveInfinity));
5675       break;
5676     case FCVTPU_ws:
5677       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPPositiveInfinity));
5678       break;
5679     case FCVTPU_xs:
5680       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPPositiveInfinity));
5681       break;
5682     case FCVTPU_wd:
5683       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPPositiveInfinity));
5684       break;
5685     case FCVTPU_xd:
5686       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPPositiveInfinity));
5687       break;
5688     case FCVTNS_wh:
5689       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieEven));
5690       break;
5691     case FCVTNS_xh:
5692       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieEven));
5693       break;
5694     case FCVTNS_ws:
5695       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieEven));
5696       break;
5697     case FCVTNS_xs:
5698       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieEven));
5699       break;
5700     case FCVTNS_wd:
5701       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieEven));
5702       break;
5703     case FCVTNS_xd:
5704       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieEven));
5705       break;
5706     case FCVTNU_wh:
5707       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieEven));
5708       break;
5709     case FCVTNU_xh:
5710       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieEven));
5711       break;
5712     case FCVTNU_ws:
5713       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieEven));
5714       break;
5715     case FCVTNU_xs:
5716       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieEven));
5717       break;
5718     case FCVTNU_wd:
5719       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieEven));
5720       break;
5721     case FCVTNU_xd:
5722       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieEven));
5723       break;
5724     case FCVTZS_wh:
5725       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPZero));
5726       break;
5727     case FCVTZS_xh:
5728       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPZero));
5729       break;
5730     case FCVTZS_ws:
5731       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPZero));
5732       break;
5733     case FCVTZS_xs:
5734       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPZero));
5735       break;
5736     case FCVTZS_wd:
5737       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPZero));
5738       break;
5739     case FCVTZS_xd:
5740       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPZero));
5741       break;
5742     case FCVTZU_wh:
5743       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPZero));
5744       break;
5745     case FCVTZU_xh:
5746       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPZero));
5747       break;
5748     case FCVTZU_ws:
5749       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPZero));
5750       break;
5751     case FCVTZU_xs:
5752       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPZero));
5753       break;
5754     case FCVTZU_wd:
5755       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPZero));
5756       break;
5757     case FCVTZU_xd:
5758       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPZero));
5759       break;
5760     case FJCVTZS:
5761       WriteWRegister(dst, FPToFixedJS(ReadDRegister(src)));
5762       break;
5763     case FMOV_hw:
5764       WriteHRegister(dst, ReadWRegister(src) & kHRegMask);
5765       break;
5766     case FMOV_wh:
5767       WriteWRegister(dst, ReadHRegisterBits(src));
5768       break;
5769     case FMOV_xh:
5770       WriteXRegister(dst, ReadHRegisterBits(src));
5771       break;
5772     case FMOV_hx:
5773       WriteHRegister(dst, ReadXRegister(src) & kHRegMask);
5774       break;
5775     case FMOV_ws:
5776       WriteWRegister(dst, ReadSRegisterBits(src));
5777       break;
5778     case FMOV_xd:
5779       WriteXRegister(dst, ReadDRegisterBits(src));
5780       break;
5781     case FMOV_sw:
5782       WriteSRegisterBits(dst, ReadWRegister(src));
5783       break;
5784     case FMOV_dx:
5785       WriteDRegisterBits(dst, ReadXRegister(src));
5786       break;
5787     case FMOV_d1_x:
5788       LogicVRegister(ReadVRegister(dst))
5789           .SetUint(kFormatD, 1, ReadXRegister(src));
5790       break;
5791     case FMOV_x_d1:
5792       WriteXRegister(dst, LogicVRegister(ReadVRegister(src)).Uint(kFormatD, 1));
5793       break;
5794 
5795     // A 32-bit input can be handled in the same way as a 64-bit input, since
5796     // the sign- or zero-extension will not affect the conversion.
5797     case SCVTF_dx:
5798       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), 0, round));
5799       break;
5800     case SCVTF_dw:
5801       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), 0, round));
5802       break;
5803     case UCVTF_dx:
5804       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), 0, round));
5805       break;
5806     case UCVTF_dw: {
5807       WriteDRegister(dst,
5808                      UFixedToDouble(ReadRegister<uint32_t>(src), 0, round));
5809       break;
5810     }
5811     case SCVTF_sx:
5812       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), 0, round));
5813       break;
5814     case SCVTF_sw:
5815       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), 0, round));
5816       break;
5817     case UCVTF_sx:
5818       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), 0, round));
5819       break;
5820     case UCVTF_sw: {
5821       WriteSRegister(dst, UFixedToFloat(ReadRegister<uint32_t>(src), 0, round));
5822       break;
5823     }
5824     case SCVTF_hx:
5825       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), 0, round));
5826       break;
5827     case SCVTF_hw:
5828       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), 0, round));
5829       break;
5830     case UCVTF_hx:
5831       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), 0, round));
5832       break;
5833     case UCVTF_hw: {
5834       WriteHRegister(dst,
5835                      UFixedToFloat16(ReadRegister<uint32_t>(src), 0, round));
5836       break;
5837     }
5838 
5839     default:
5840       VIXL_UNREACHABLE();
5841   }
5842 }
5843 
5844 
5845 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
5846   AssertSupportedFPCR();
5847 
5848   unsigned dst = instr->GetRd();
5849   unsigned src = instr->GetRn();
5850   int fbits = 64 - instr->GetFPScale();
5851 
5852   FPRounding round = ReadRMode();
5853 
5854   switch (instr->Mask(FPFixedPointConvertMask)) {
5855     // A 32-bit input can be handled in the same way as a 64-bit input, since
5856     // the sign- or zero-extension will not affect the conversion.
5857     case SCVTF_dx_fixed:
5858       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), fbits, round));
5859       break;
5860     case SCVTF_dw_fixed:
5861       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), fbits, round));
5862       break;
5863     case UCVTF_dx_fixed:
5864       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), fbits, round));
5865       break;
5866     case UCVTF_dw_fixed: {
5867       WriteDRegister(dst,
5868                      UFixedToDouble(ReadRegister<uint32_t>(src), fbits, round));
5869       break;
5870     }
5871     case SCVTF_sx_fixed:
5872       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), fbits, round));
5873       break;
5874     case SCVTF_sw_fixed:
5875       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), fbits, round));
5876       break;
5877     case UCVTF_sx_fixed:
5878       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), fbits, round));
5879       break;
5880     case UCVTF_sw_fixed: {
5881       WriteSRegister(dst,
5882                      UFixedToFloat(ReadRegister<uint32_t>(src), fbits, round));
5883       break;
5884     }
5885     case SCVTF_hx_fixed:
5886       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), fbits, round));
5887       break;
5888     case SCVTF_hw_fixed:
5889       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), fbits, round));
5890       break;
5891     case UCVTF_hx_fixed:
5892       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), fbits, round));
5893       break;
5894     case UCVTF_hw_fixed: {
5895       WriteHRegister(dst,
5896                      UFixedToFloat16(ReadRegister<uint32_t>(src),
5897                                      fbits,
5898                                      round));
5899       break;
5900     }
5901     case FCVTZS_xd_fixed:
5902       WriteXRegister(dst,
5903                      FPToInt64(ReadDRegister(src) * std::pow(2.0, fbits),
5904                                FPZero));
5905       break;
5906     case FCVTZS_wd_fixed:
5907       WriteWRegister(dst,
5908                      FPToInt32(ReadDRegister(src) * std::pow(2.0, fbits),
5909                                FPZero));
5910       break;
5911     case FCVTZU_xd_fixed:
5912       WriteXRegister(dst,
5913                      FPToUInt64(ReadDRegister(src) * std::pow(2.0, fbits),
5914                                 FPZero));
5915       break;
5916     case FCVTZU_wd_fixed:
5917       WriteWRegister(dst,
5918                      FPToUInt32(ReadDRegister(src) * std::pow(2.0, fbits),
5919                                 FPZero));
5920       break;
5921     case FCVTZS_xs_fixed:
5922       WriteXRegister(dst,
5923                      FPToInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
5924                                FPZero));
5925       break;
5926     case FCVTZS_ws_fixed:
5927       WriteWRegister(dst,
5928                      FPToInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
5929                                FPZero));
5930       break;
5931     case FCVTZU_xs_fixed:
5932       WriteXRegister(dst,
5933                      FPToUInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
5934                                 FPZero));
5935       break;
5936     case FCVTZU_ws_fixed:
5937       WriteWRegister(dst,
5938                      FPToUInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
5939                                 FPZero));
5940       break;
5941     case FCVTZS_xh_fixed: {
5942       double output =
5943           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5944       WriteXRegister(dst, FPToInt64(output, FPZero));
5945       break;
5946     }
5947     case FCVTZS_wh_fixed: {
5948       double output =
5949           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5950       WriteWRegister(dst, FPToInt32(output, FPZero));
5951       break;
5952     }
5953     case FCVTZU_xh_fixed: {
5954       double output =
5955           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5956       WriteXRegister(dst, FPToUInt64(output, FPZero));
5957       break;
5958     }
5959     case FCVTZU_wh_fixed: {
5960       double output =
5961           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
5962       WriteWRegister(dst, FPToUInt32(output, FPZero));
5963       break;
5964     }
5965     default:
5966       VIXL_UNREACHABLE();
5967   }
5968 }
5969 
5970 
5971 void Simulator::VisitFPCompare(const Instruction* instr) {
5972   AssertSupportedFPCR();
5973 
5974   FPTrapFlags trap = DisableTrap;
5975   switch (instr->Mask(FPCompareMask)) {
5976     case FCMPE_h:
5977       trap = EnableTrap;
5978       VIXL_FALLTHROUGH();
5979     case FCMP_h:
5980       FPCompare(ReadHRegister(instr->GetRn()),
5981                 ReadHRegister(instr->GetRm()),
5982                 trap);
5983       break;
5984     case FCMPE_s:
5985       trap = EnableTrap;
5986       VIXL_FALLTHROUGH();
5987     case FCMP_s:
5988       FPCompare(ReadSRegister(instr->GetRn()),
5989                 ReadSRegister(instr->GetRm()),
5990                 trap);
5991       break;
5992     case FCMPE_d:
5993       trap = EnableTrap;
5994       VIXL_FALLTHROUGH();
5995     case FCMP_d:
5996       FPCompare(ReadDRegister(instr->GetRn()),
5997                 ReadDRegister(instr->GetRm()),
5998                 trap);
5999       break;
6000     case FCMPE_h_zero:
6001       trap = EnableTrap;
6002       VIXL_FALLTHROUGH();
6003     case FCMP_h_zero:
6004       FPCompare(ReadHRegister(instr->GetRn()), SimFloat16(0.0), trap);
6005       break;
6006     case FCMPE_s_zero:
6007       trap = EnableTrap;
6008       VIXL_FALLTHROUGH();
6009     case FCMP_s_zero:
6010       FPCompare(ReadSRegister(instr->GetRn()), 0.0f, trap);
6011       break;
6012     case FCMPE_d_zero:
6013       trap = EnableTrap;
6014       VIXL_FALLTHROUGH();
6015     case FCMP_d_zero:
6016       FPCompare(ReadDRegister(instr->GetRn()), 0.0, trap);
6017       break;
6018     default:
6019       VIXL_UNIMPLEMENTED();
6020   }
6021 }
6022 
6023 
6024 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
6025   AssertSupportedFPCR();
6026 
6027   FPTrapFlags trap = DisableTrap;
6028   switch (instr->Mask(FPConditionalCompareMask)) {
6029     case FCCMPE_h:
6030       trap = EnableTrap;
6031       VIXL_FALLTHROUGH();
6032     case FCCMP_h:
6033       if (ConditionPassed(instr->GetCondition())) {
6034         FPCompare(ReadHRegister(instr->GetRn()),
6035                   ReadHRegister(instr->GetRm()),
6036                   trap);
6037       } else {
6038         ReadNzcv().SetFlags(instr->GetNzcv());
6039         LogSystemRegister(NZCV);
6040       }
6041       break;
6042     case FCCMPE_s:
6043       trap = EnableTrap;
6044       VIXL_FALLTHROUGH();
6045     case FCCMP_s:
6046       if (ConditionPassed(instr->GetCondition())) {
6047         FPCompare(ReadSRegister(instr->GetRn()),
6048                   ReadSRegister(instr->GetRm()),
6049                   trap);
6050       } else {
6051         ReadNzcv().SetFlags(instr->GetNzcv());
6052         LogSystemRegister(NZCV);
6053       }
6054       break;
6055     case FCCMPE_d:
6056       trap = EnableTrap;
6057       VIXL_FALLTHROUGH();
6058     case FCCMP_d:
6059       if (ConditionPassed(instr->GetCondition())) {
6060         FPCompare(ReadDRegister(instr->GetRn()),
6061                   ReadDRegister(instr->GetRm()),
6062                   trap);
6063       } else {
6064         ReadNzcv().SetFlags(instr->GetNzcv());
6065         LogSystemRegister(NZCV);
6066       }
6067       break;
6068     default:
6069       VIXL_UNIMPLEMENTED();
6070   }
6071 }
6072 
6073 
6074 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
6075   AssertSupportedFPCR();
6076 
6077   Instr selected;
6078   if (ConditionPassed(instr->GetCondition())) {
6079     selected = instr->GetRn();
6080   } else {
6081     selected = instr->GetRm();
6082   }
6083 
6084   switch (instr->Mask(FPConditionalSelectMask)) {
6085     case FCSEL_h:
6086       WriteHRegister(instr->GetRd(), ReadHRegister(selected));
6087       break;
6088     case FCSEL_s:
6089       WriteSRegister(instr->GetRd(), ReadSRegister(selected));
6090       break;
6091     case FCSEL_d:
6092       WriteDRegister(instr->GetRd(), ReadDRegister(selected));
6093       break;
6094     default:
6095       VIXL_UNIMPLEMENTED();
6096   }
6097 }
6098 
6099 
6100 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
6101   AssertSupportedFPCR();
6102 
6103   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6104   VectorFormat vform;
6105   switch (instr->Mask(FPTypeMask)) {
6106     default:
6107       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6108     case FP64:
6109       vform = kFormatD;
6110       break;
6111     case FP32:
6112       vform = kFormatS;
6113       break;
6114     case FP16:
6115       vform = kFormatH;
6116       break;
6117   }
6118 
6119   SimVRegister& rd = ReadVRegister(instr->GetRd());
6120   SimVRegister& rn = ReadVRegister(instr->GetRn());
6121   bool inexact_exception = false;
6122   FrintMode frint_mode = kFrintToInteger;
6123 
6124   unsigned fd = instr->GetRd();
6125   unsigned fn = instr->GetRn();
6126 
6127   switch (instr->Mask(FPDataProcessing1SourceMask)) {
6128     case FMOV_h:
6129       WriteHRegister(fd, ReadHRegister(fn));
6130       return;
6131     case FMOV_s:
6132       WriteSRegister(fd, ReadSRegister(fn));
6133       return;
6134     case FMOV_d:
6135       WriteDRegister(fd, ReadDRegister(fn));
6136       return;
6137     case FABS_h:
6138     case FABS_s:
6139     case FABS_d:
6140       fabs_(vform, ReadVRegister(fd), ReadVRegister(fn));
6141       // Explicitly log the register update whilst we have type information.
6142       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6143       return;
6144     case FNEG_h:
6145     case FNEG_s:
6146     case FNEG_d:
6147       fneg(vform, ReadVRegister(fd), ReadVRegister(fn));
6148       // Explicitly log the register update whilst we have type information.
6149       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6150       return;
6151     case FCVT_ds:
6152       WriteDRegister(fd, FPToDouble(ReadSRegister(fn), ReadDN()));
6153       return;
6154     case FCVT_sd:
6155       WriteSRegister(fd, FPToFloat(ReadDRegister(fn), FPTieEven, ReadDN()));
6156       return;
6157     case FCVT_hs:
6158       WriteHRegister(fd,
6159                      Float16ToRawbits(
6160                          FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN())));
6161       return;
6162     case FCVT_sh:
6163       WriteSRegister(fd, FPToFloat(ReadHRegister(fn), ReadDN()));
6164       return;
6165     case FCVT_dh:
6166       WriteDRegister(fd, FPToDouble(ReadHRegister(fn), ReadDN()));
6167       return;
6168     case FCVT_hd:
6169       WriteHRegister(fd,
6170                      Float16ToRawbits(
6171                          FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN())));
6172       return;
6173     case FSQRT_h:
6174     case FSQRT_s:
6175     case FSQRT_d:
6176       fsqrt(vform, rd, rn);
6177       // Explicitly log the register update whilst we have type information.
6178       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6179       return;
6180     case FRINT32X_s:
6181     case FRINT32X_d:
6182       inexact_exception = true;
6183       frint_mode = kFrintToInt32;
6184       break;  // Use FPCR rounding mode.
6185     case FRINT64X_s:
6186     case FRINT64X_d:
6187       inexact_exception = true;
6188       frint_mode = kFrintToInt64;
6189       break;  // Use FPCR rounding mode.
6190     case FRINT32Z_s:
6191     case FRINT32Z_d:
6192       inexact_exception = true;
6193       frint_mode = kFrintToInt32;
6194       fpcr_rounding = FPZero;
6195       break;
6196     case FRINT64Z_s:
6197     case FRINT64Z_d:
6198       inexact_exception = true;
6199       frint_mode = kFrintToInt64;
6200       fpcr_rounding = FPZero;
6201       break;
6202     case FRINTI_h:
6203     case FRINTI_s:
6204     case FRINTI_d:
6205       break;  // Use FPCR rounding mode.
6206     case FRINTX_h:
6207     case FRINTX_s:
6208     case FRINTX_d:
6209       inexact_exception = true;
6210       break;
6211     case FRINTA_h:
6212     case FRINTA_s:
6213     case FRINTA_d:
6214       fpcr_rounding = FPTieAway;
6215       break;
6216     case FRINTM_h:
6217     case FRINTM_s:
6218     case FRINTM_d:
6219       fpcr_rounding = FPNegativeInfinity;
6220       break;
6221     case FRINTN_h:
6222     case FRINTN_s:
6223     case FRINTN_d:
6224       fpcr_rounding = FPTieEven;
6225       break;
6226     case FRINTP_h:
6227     case FRINTP_s:
6228     case FRINTP_d:
6229       fpcr_rounding = FPPositiveInfinity;
6230       break;
6231     case FRINTZ_h:
6232     case FRINTZ_s:
6233     case FRINTZ_d:
6234       fpcr_rounding = FPZero;
6235       break;
6236     default:
6237       VIXL_UNIMPLEMENTED();
6238   }
6239 
6240   // Only FRINT* instructions fall through the switch above.
6241   frint(vform, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
6242   // Explicitly log the register update whilst we have type information.
6243   LogVRegister(fd, GetPrintRegisterFormatFP(vform));
6244 }
6245 
6246 
6247 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
6248   AssertSupportedFPCR();
6249 
6250   VectorFormat vform;
6251   switch (instr->Mask(FPTypeMask)) {
6252     default:
6253       VIXL_UNREACHABLE_OR_FALLTHROUGH();
6254     case FP64:
6255       vform = kFormatD;
6256       break;
6257     case FP32:
6258       vform = kFormatS;
6259       break;
6260     case FP16:
6261       vform = kFormatH;
6262       break;
6263   }
6264   SimVRegister& rd = ReadVRegister(instr->GetRd());
6265   SimVRegister& rn = ReadVRegister(instr->GetRn());
6266   SimVRegister& rm = ReadVRegister(instr->GetRm());
6267 
6268   switch (instr->Mask(FPDataProcessing2SourceMask)) {
6269     case FADD_h:
6270     case FADD_s:
6271     case FADD_d:
6272       fadd(vform, rd, rn, rm);
6273       break;
6274     case FSUB_h:
6275     case FSUB_s:
6276     case FSUB_d:
6277       fsub(vform, rd, rn, rm);
6278       break;
6279     case FMUL_h:
6280     case FMUL_s:
6281     case FMUL_d:
6282       fmul(vform, rd, rn, rm);
6283       break;
6284     case FNMUL_h:
6285     case FNMUL_s:
6286     case FNMUL_d:
6287       fnmul(vform, rd, rn, rm);
6288       break;
6289     case FDIV_h:
6290     case FDIV_s:
6291     case FDIV_d:
6292       fdiv(vform, rd, rn, rm);
6293       break;
6294     case FMAX_h:
6295     case FMAX_s:
6296     case FMAX_d:
6297       fmax(vform, rd, rn, rm);
6298       break;
6299     case FMIN_h:
6300     case FMIN_s:
6301     case FMIN_d:
6302       fmin(vform, rd, rn, rm);
6303       break;
6304     case FMAXNM_h:
6305     case FMAXNM_s:
6306     case FMAXNM_d:
6307       fmaxnm(vform, rd, rn, rm);
6308       break;
6309     case FMINNM_h:
6310     case FMINNM_s:
6311     case FMINNM_d:
6312       fminnm(vform, rd, rn, rm);
6313       break;
6314     default:
6315       VIXL_UNREACHABLE();
6316   }
6317   // Explicitly log the register update whilst we have type information.
6318   LogVRegister(instr->GetRd(), GetPrintRegisterFormatFP(vform));
6319 }
6320 
6321 
6322 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
6323   AssertSupportedFPCR();
6324 
6325   unsigned fd = instr->GetRd();
6326   unsigned fn = instr->GetRn();
6327   unsigned fm = instr->GetRm();
6328   unsigned fa = instr->GetRa();
6329 
6330   switch (instr->Mask(FPDataProcessing3SourceMask)) {
6331     // fd = fa +/- (fn * fm)
6332     case FMADD_h:
6333       WriteHRegister(fd,
6334                      FPMulAdd(ReadHRegister(fa),
6335                               ReadHRegister(fn),
6336                               ReadHRegister(fm)));
6337       break;
6338     case FMSUB_h:
6339       WriteHRegister(fd,
6340                      FPMulAdd(ReadHRegister(fa),
6341                               -ReadHRegister(fn),
6342                               ReadHRegister(fm)));
6343       break;
6344     case FMADD_s:
6345       WriteSRegister(fd,
6346                      FPMulAdd(ReadSRegister(fa),
6347                               ReadSRegister(fn),
6348                               ReadSRegister(fm)));
6349       break;
6350     case FMSUB_s:
6351       WriteSRegister(fd,
6352                      FPMulAdd(ReadSRegister(fa),
6353                               -ReadSRegister(fn),
6354                               ReadSRegister(fm)));
6355       break;
6356     case FMADD_d:
6357       WriteDRegister(fd,
6358                      FPMulAdd(ReadDRegister(fa),
6359                               ReadDRegister(fn),
6360                               ReadDRegister(fm)));
6361       break;
6362     case FMSUB_d:
6363       WriteDRegister(fd,
6364                      FPMulAdd(ReadDRegister(fa),
6365                               -ReadDRegister(fn),
6366                               ReadDRegister(fm)));
6367       break;
6368     // Negated variants of the above.
6369     case FNMADD_h:
6370       WriteHRegister(fd,
6371                      FPMulAdd(-ReadHRegister(fa),
6372                               -ReadHRegister(fn),
6373                               ReadHRegister(fm)));
6374       break;
6375     case FNMSUB_h:
6376       WriteHRegister(fd,
6377                      FPMulAdd(-ReadHRegister(fa),
6378                               ReadHRegister(fn),
6379                               ReadHRegister(fm)));
6380       break;
6381     case FNMADD_s:
6382       WriteSRegister(fd,
6383                      FPMulAdd(-ReadSRegister(fa),
6384                               -ReadSRegister(fn),
6385                               ReadSRegister(fm)));
6386       break;
6387     case FNMSUB_s:
6388       WriteSRegister(fd,
6389                      FPMulAdd(-ReadSRegister(fa),
6390                               ReadSRegister(fn),
6391                               ReadSRegister(fm)));
6392       break;
6393     case FNMADD_d:
6394       WriteDRegister(fd,
6395                      FPMulAdd(-ReadDRegister(fa),
6396                               -ReadDRegister(fn),
6397                               ReadDRegister(fm)));
6398       break;
6399     case FNMSUB_d:
6400       WriteDRegister(fd,
6401                      FPMulAdd(-ReadDRegister(fa),
6402                               ReadDRegister(fn),
6403                               ReadDRegister(fm)));
6404       break;
6405     default:
6406       VIXL_UNIMPLEMENTED();
6407   }
6408 }
6409 
6410 
6411 bool Simulator::FPProcessNaNs(const Instruction* instr) {
6412   unsigned fd = instr->GetRd();
6413   unsigned fn = instr->GetRn();
6414   unsigned fm = instr->GetRm();
6415   bool done = false;
6416 
6417   if (instr->Mask(FP64) == FP64) {
6418     double result = FPProcessNaNs(ReadDRegister(fn), ReadDRegister(fm));
6419     if (IsNaN(result)) {
6420       WriteDRegister(fd, result);
6421       done = true;
6422     }
6423   } else if (instr->Mask(FP32) == FP32) {
6424     float result = FPProcessNaNs(ReadSRegister(fn), ReadSRegister(fm));
6425     if (IsNaN(result)) {
6426       WriteSRegister(fd, result);
6427       done = true;
6428     }
6429   } else {
6430     VIXL_ASSERT(instr->Mask(FP16) == FP16);
6431     VIXL_UNIMPLEMENTED();
6432   }
6433 
6434   return done;
6435 }
6436 
6437 
6438 void Simulator::SysOp_W(int op, int64_t val) {
6439   switch (op) {
6440     case IVAU:
6441     case CVAC:
6442     case CVAU:
6443     case CVAP:
6444     case CVADP:
6445     case CIVAC: {
6446       // Perform a placeholder memory access to ensure that we have read access
6447       // to the specified address.
6448       volatile uint8_t y = MemRead<uint8_t>(val);
6449       USE(y);
6450       // TODO: Implement "case ZVA:".
6451       break;
6452     }
6453     default:
6454       VIXL_UNIMPLEMENTED();
6455   }
6456 }
6457 
6458 
6459 // clang-format off
6460 #define PAUTH_SYSTEM_MODES(V)                                     \
6461   V(A1716, 17, ReadXRegister(16),                      kPACKeyIA) \
6462   V(B1716, 17, ReadXRegister(16),                      kPACKeyIB) \
6463   V(AZ,    30, 0x00000000,                             kPACKeyIA) \
6464   V(BZ,    30, 0x00000000,                             kPACKeyIB) \
6465   V(ASP,   30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIA) \
6466   V(BSP,   30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIB)
6467 // clang-format on
6468 
6469 
6470 void Simulator::VisitSystem(const Instruction* instr) {
6471   // Some system instructions hijack their Op and Cp fields to represent a
6472   // range of immediates instead of indicating a different instruction. This
6473   // makes the decoding tricky.
6474   if (instr->GetInstructionBits() == XPACLRI) {
6475     WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
6476   } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
6477     switch (instr->Mask(SystemPStateMask)) {
6478       case CFINV:
6479         ReadNzcv().SetC(!ReadC());
6480         break;
6481       case AXFLAG:
6482         ReadNzcv().SetN(0);
6483         ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
6484         ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
6485         ReadNzcv().SetV(0);
6486         break;
6487       case XAFLAG: {
6488         // Can't set the flags in place due to the logical dependencies.
6489         uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
6490         uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
6491         uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
6492         uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
6493         ReadNzcv().SetN(n);
6494         ReadNzcv().SetZ(z);
6495         ReadNzcv().SetC(c);
6496         ReadNzcv().SetV(v);
6497         break;
6498       }
6499     }
6500   } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) {
6501     // Check BType allows PACI[AB]SP instructions.
6502     if (PcIsInGuardedPage()) {
6503       Instr i = instr->Mask(SystemPAuthMask);
6504       if ((i == PACIASP) || (i == PACIBSP)) {
6505         switch (ReadBType()) {
6506           case BranchFromGuardedNotToIP:
6507           // TODO: This case depends on the value of SCTLR_EL1.BT0, which we
6508           // assume here to be zero. This allows execution of PACI[AB]SP when
6509           // BTYPE is BranchFromGuardedNotToIP (0b11).
6510           case DefaultBType:
6511           case BranchFromUnguardedOrToIP:
6512           case BranchAndLink:
6513             break;
6514         }
6515       }
6516     }
6517 
6518     switch (instr->Mask(SystemPAuthMask)) {
6519 #define DEFINE_PAUTH_FUNCS(SUFFIX, DST, MOD, KEY)                              \
6520   case PACI##SUFFIX:                                                           \
6521     WriteXRegister(DST,                                                        \
6522                    AddPAC(ReadXRegister(DST), MOD, KEY, kInstructionPointer)); \
6523     break;                                                                     \
6524   case AUTI##SUFFIX:                                                           \
6525     WriteXRegister(DST,                                                        \
6526                    AuthPAC(ReadXRegister(DST),                                 \
6527                            MOD,                                                \
6528                            KEY,                                                \
6529                            kInstructionPointer));                              \
6530     break;
6531 
6532       PAUTH_SYSTEM_MODES(DEFINE_PAUTH_FUNCS)
6533 #undef DEFINE_PAUTH_FUNCS
6534     }
6535   } else if (instr->Mask(SystemExclusiveMonitorFMask) ==
6536              SystemExclusiveMonitorFixed) {
6537     VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
6538     switch (instr->Mask(SystemExclusiveMonitorMask)) {
6539       case CLREX: {
6540         PrintExclusiveAccessWarning();
6541         ClearLocalMonitor();
6542         break;
6543       }
6544     }
6545   } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
6546     switch (instr->Mask(SystemSysRegMask)) {
6547       case MRS: {
6548         switch (instr->GetImmSystemRegister()) {
6549           case NZCV:
6550             WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
6551             break;
6552           case FPCR:
6553             WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
6554             break;
6555           case RNDR:
6556           case RNDRRS: {
6557             uint64_t high = jrand48(rand_state_);
6558             uint64_t low = jrand48(rand_state_);
6559             uint64_t rand_num = (high << 32) | (low & 0xffffffff);
6560             WriteXRegister(instr->GetRt(), rand_num);
6561             // Simulate successful random number generation.
6562             // TODO: Return failure occasionally as a random number cannot be
6563             // returned in a period of time.
6564             ReadNzcv().SetRawValue(NoFlag);
6565             LogSystemRegister(NZCV);
6566             break;
6567           }
6568           default:
6569             VIXL_UNIMPLEMENTED();
6570         }
6571         break;
6572       }
6573       case MSR: {
6574         switch (instr->GetImmSystemRegister()) {
6575           case NZCV:
6576             ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
6577             LogSystemRegister(NZCV);
6578             break;
6579           case FPCR:
6580             ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
6581             LogSystemRegister(FPCR);
6582             break;
6583           default:
6584             VIXL_UNIMPLEMENTED();
6585         }
6586         break;
6587       }
6588     }
6589   } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
6590     VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
6591     switch (instr->GetImmHint()) {
6592       case NOP:
6593       case ESB:
6594       case CSDB:
6595       case BTI_jc:
6596         break;
6597       case BTI:
6598         if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
6599           VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
6600         }
6601         break;
6602       case BTI_c:
6603         if (PcIsInGuardedPage() && (ReadBType() == BranchFromGuardedNotToIP)) {
6604           VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
6605         }
6606         break;
6607       case BTI_j:
6608         if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
6609           VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
6610         }
6611         break;
6612       default:
6613         VIXL_UNIMPLEMENTED();
6614     }
6615   } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
6616     __sync_synchronize();
6617   } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
6618     switch (instr->Mask(SystemSysMask)) {
6619       case SYS:
6620         SysOp_W(instr->GetSysOp(), ReadXRegister(instr->GetRt()));
6621         break;
6622       default:
6623         VIXL_UNIMPLEMENTED();
6624     }
6625   } else {
6626     VIXL_UNIMPLEMENTED();
6627   }
6628 }
6629 
6630 
6631 void Simulator::VisitException(const Instruction* instr) {
6632   switch (instr->Mask(ExceptionMask)) {
6633     case HLT:
6634       switch (instr->GetImmException()) {
6635         case kUnreachableOpcode:
6636           DoUnreachable(instr);
6637           return;
6638         case kTraceOpcode:
6639           DoTrace(instr);
6640           return;
6641         case kLogOpcode:
6642           DoLog(instr);
6643           return;
6644         case kPrintfOpcode:
6645           DoPrintf(instr);
6646           return;
6647         case kRuntimeCallOpcode:
6648           DoRuntimeCall(instr);
6649           return;
6650         case kSetCPUFeaturesOpcode:
6651         case kEnableCPUFeaturesOpcode:
6652         case kDisableCPUFeaturesOpcode:
6653           DoConfigureCPUFeatures(instr);
6654           return;
6655         case kSaveCPUFeaturesOpcode:
6656           DoSaveCPUFeatures(instr);
6657           return;
6658         case kRestoreCPUFeaturesOpcode:
6659           DoRestoreCPUFeatures(instr);
6660           return;
6661         default:
6662           HostBreakpoint();
6663           return;
6664       }
6665     case BRK:
6666       HostBreakpoint();
6667       return;
6668     default:
6669       VIXL_UNIMPLEMENTED();
6670   }
6671 }
6672 
6673 
6674 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
6675   VisitUnimplemented(instr);
6676 }
6677 
6678 
6679 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
6680   VisitUnimplemented(instr);
6681 }
6682 
6683 
6684 void Simulator::VisitCryptoAES(const Instruction* instr) {
6685   VisitUnimplemented(instr);
6686 }
6687 
6688 
6689 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
6690   NEONFormatDecoder nfd(instr);
6691   VectorFormat vf = nfd.GetVectorFormat();
6692 
6693   static const NEONFormatMap map_lp =
6694       {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}};
6695   VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
6696 
6697   static const NEONFormatMap map_fcvtl = {{22}, {NF_4S, NF_2D}};
6698   VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
6699 
6700   static const NEONFormatMap map_fcvtn = {{22, 30},
6701                                           {NF_4H, NF_8H, NF_2S, NF_4S}};
6702   VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
6703 
6704   SimVRegister& rd = ReadVRegister(instr->GetRd());
6705   SimVRegister& rn = ReadVRegister(instr->GetRn());
6706 
6707   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
6708     // These instructions all use a two bit size field, except NOT and RBIT,
6709     // which use the field to encode the operation.
6710     switch (instr->Mask(NEON2RegMiscMask)) {
6711       case NEON_REV64:
6712         rev64(vf, rd, rn);
6713         break;
6714       case NEON_REV32:
6715         rev32(vf, rd, rn);
6716         break;
6717       case NEON_REV16:
6718         rev16(vf, rd, rn);
6719         break;
6720       case NEON_SUQADD:
6721         suqadd(vf, rd, rd, rn);
6722         break;
6723       case NEON_USQADD:
6724         usqadd(vf, rd, rd, rn);
6725         break;
6726       case NEON_CLS:
6727         cls(vf, rd, rn);
6728         break;
6729       case NEON_CLZ:
6730         clz(vf, rd, rn);
6731         break;
6732       case NEON_CNT:
6733         cnt(vf, rd, rn);
6734         break;
6735       case NEON_SQABS:
6736         abs(vf, rd, rn).SignedSaturate(vf);
6737         break;
6738       case NEON_SQNEG:
6739         neg(vf, rd, rn).SignedSaturate(vf);
6740         break;
6741       case NEON_CMGT_zero:
6742         cmp(vf, rd, rn, 0, gt);
6743         break;
6744       case NEON_CMGE_zero:
6745         cmp(vf, rd, rn, 0, ge);
6746         break;
6747       case NEON_CMEQ_zero:
6748         cmp(vf, rd, rn, 0, eq);
6749         break;
6750       case NEON_CMLE_zero:
6751         cmp(vf, rd, rn, 0, le);
6752         break;
6753       case NEON_CMLT_zero:
6754         cmp(vf, rd, rn, 0, lt);
6755         break;
6756       case NEON_ABS:
6757         abs(vf, rd, rn);
6758         break;
6759       case NEON_NEG:
6760         neg(vf, rd, rn);
6761         break;
6762       case NEON_SADDLP:
6763         saddlp(vf_lp, rd, rn);
6764         break;
6765       case NEON_UADDLP:
6766         uaddlp(vf_lp, rd, rn);
6767         break;
6768       case NEON_SADALP:
6769         sadalp(vf_lp, rd, rn);
6770         break;
6771       case NEON_UADALP:
6772         uadalp(vf_lp, rd, rn);
6773         break;
6774       case NEON_RBIT_NOT:
6775         vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
6776         switch (instr->GetFPType()) {
6777           case 0:
6778             not_(vf, rd, rn);
6779             break;
6780           case 1:
6781             rbit(vf, rd, rn);
6782             break;
6783           default:
6784             VIXL_UNIMPLEMENTED();
6785         }
6786         break;
6787     }
6788   } else {
6789     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
6790     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6791     bool inexact_exception = false;
6792     FrintMode frint_mode = kFrintToInteger;
6793 
6794     // These instructions all use a one bit size field, except XTN, SQXTUN,
6795     // SHLL, SQXTN and UQXTN, which use a two bit size field.
6796     switch (instr->Mask(NEON2RegMiscFPMask)) {
6797       case NEON_FABS:
6798         fabs_(fpf, rd, rn);
6799         return;
6800       case NEON_FNEG:
6801         fneg(fpf, rd, rn);
6802         return;
6803       case NEON_FSQRT:
6804         fsqrt(fpf, rd, rn);
6805         return;
6806       case NEON_FCVTL:
6807         if (instr->Mask(NEON_Q)) {
6808           fcvtl2(vf_fcvtl, rd, rn);
6809         } else {
6810           fcvtl(vf_fcvtl, rd, rn);
6811         }
6812         return;
6813       case NEON_FCVTN:
6814         if (instr->Mask(NEON_Q)) {
6815           fcvtn2(vf_fcvtn, rd, rn);
6816         } else {
6817           fcvtn(vf_fcvtn, rd, rn);
6818         }
6819         return;
6820       case NEON_FCVTXN:
6821         if (instr->Mask(NEON_Q)) {
6822           fcvtxn2(vf_fcvtn, rd, rn);
6823         } else {
6824           fcvtxn(vf_fcvtn, rd, rn);
6825         }
6826         return;
6827 
6828       // The following instructions break from the switch statement, rather
6829       // than return.
6830       case NEON_FRINT32X:
6831         inexact_exception = true;
6832         frint_mode = kFrintToInt32;
6833         break;  // Use FPCR rounding mode.
6834       case NEON_FRINT32Z:
6835         inexact_exception = true;
6836         frint_mode = kFrintToInt32;
6837         fpcr_rounding = FPZero;
6838         break;
6839       case NEON_FRINT64X:
6840         inexact_exception = true;
6841         frint_mode = kFrintToInt64;
6842         break;  // Use FPCR rounding mode.
6843       case NEON_FRINT64Z:
6844         inexact_exception = true;
6845         frint_mode = kFrintToInt64;
6846         fpcr_rounding = FPZero;
6847         break;
6848       case NEON_FRINTI:
6849         break;  // Use FPCR rounding mode.
6850       case NEON_FRINTX:
6851         inexact_exception = true;
6852         break;
6853       case NEON_FRINTA:
6854         fpcr_rounding = FPTieAway;
6855         break;
6856       case NEON_FRINTM:
6857         fpcr_rounding = FPNegativeInfinity;
6858         break;
6859       case NEON_FRINTN:
6860         fpcr_rounding = FPTieEven;
6861         break;
6862       case NEON_FRINTP:
6863         fpcr_rounding = FPPositiveInfinity;
6864         break;
6865       case NEON_FRINTZ:
6866         fpcr_rounding = FPZero;
6867         break;
6868 
6869       case NEON_FCVTNS:
6870         fcvts(fpf, rd, rn, FPTieEven);
6871         return;
6872       case NEON_FCVTNU:
6873         fcvtu(fpf, rd, rn, FPTieEven);
6874         return;
6875       case NEON_FCVTPS:
6876         fcvts(fpf, rd, rn, FPPositiveInfinity);
6877         return;
6878       case NEON_FCVTPU:
6879         fcvtu(fpf, rd, rn, FPPositiveInfinity);
6880         return;
6881       case NEON_FCVTMS:
6882         fcvts(fpf, rd, rn, FPNegativeInfinity);
6883         return;
6884       case NEON_FCVTMU:
6885         fcvtu(fpf, rd, rn, FPNegativeInfinity);
6886         return;
6887       case NEON_FCVTZS:
6888         fcvts(fpf, rd, rn, FPZero);
6889         return;
6890       case NEON_FCVTZU:
6891         fcvtu(fpf, rd, rn, FPZero);
6892         return;
6893       case NEON_FCVTAS:
6894         fcvts(fpf, rd, rn, FPTieAway);
6895         return;
6896       case NEON_FCVTAU:
6897         fcvtu(fpf, rd, rn, FPTieAway);
6898         return;
6899       case NEON_SCVTF:
6900         scvtf(fpf, rd, rn, 0, fpcr_rounding);
6901         return;
6902       case NEON_UCVTF:
6903         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
6904         return;
6905       case NEON_URSQRTE:
6906         ursqrte(fpf, rd, rn);
6907         return;
6908       case NEON_URECPE:
6909         urecpe(fpf, rd, rn);
6910         return;
6911       case NEON_FRSQRTE:
6912         frsqrte(fpf, rd, rn);
6913         return;
6914       case NEON_FRECPE:
6915         frecpe(fpf, rd, rn, fpcr_rounding);
6916         return;
6917       case NEON_FCMGT_zero:
6918         fcmp_zero(fpf, rd, rn, gt);
6919         return;
6920       case NEON_FCMGE_zero:
6921         fcmp_zero(fpf, rd, rn, ge);
6922         return;
6923       case NEON_FCMEQ_zero:
6924         fcmp_zero(fpf, rd, rn, eq);
6925         return;
6926       case NEON_FCMLE_zero:
6927         fcmp_zero(fpf, rd, rn, le);
6928         return;
6929       case NEON_FCMLT_zero:
6930         fcmp_zero(fpf, rd, rn, lt);
6931         return;
6932       default:
6933         if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
6934             (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
6935           switch (instr->Mask(NEON2RegMiscMask)) {
6936             case NEON_XTN:
6937               xtn(vf, rd, rn);
6938               return;
6939             case NEON_SQXTN:
6940               sqxtn(vf, rd, rn);
6941               return;
6942             case NEON_UQXTN:
6943               uqxtn(vf, rd, rn);
6944               return;
6945             case NEON_SQXTUN:
6946               sqxtun(vf, rd, rn);
6947               return;
6948             case NEON_SHLL:
6949               vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
6950               if (instr->Mask(NEON_Q)) {
6951                 shll2(vf, rd, rn);
6952               } else {
6953                 shll(vf, rd, rn);
6954               }
6955               return;
6956             default:
6957               VIXL_UNIMPLEMENTED();
6958           }
6959         } else {
6960           VIXL_UNIMPLEMENTED();
6961         }
6962     }
6963 
6964     // Only FRINT* instructions fall through the switch above.
6965     frint(fpf, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
6966   }
6967 }
6968 
6969 
6970 void Simulator::VisitNEON2RegMiscFP16(const Instruction* instr) {
6971   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
6972   NEONFormatDecoder nfd(instr);
6973   VectorFormat fpf = nfd.GetVectorFormat(&map_half);
6974 
6975   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
6976 
6977   SimVRegister& rd = ReadVRegister(instr->GetRd());
6978   SimVRegister& rn = ReadVRegister(instr->GetRn());
6979 
6980   switch (instr->Mask(NEON2RegMiscFP16Mask)) {
6981     case NEON_SCVTF_H:
6982       scvtf(fpf, rd, rn, 0, fpcr_rounding);
6983       return;
6984     case NEON_UCVTF_H:
6985       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
6986       return;
6987     case NEON_FCVTNS_H:
6988       fcvts(fpf, rd, rn, FPTieEven);
6989       return;
6990     case NEON_FCVTNU_H:
6991       fcvtu(fpf, rd, rn, FPTieEven);
6992       return;
6993     case NEON_FCVTPS_H:
6994       fcvts(fpf, rd, rn, FPPositiveInfinity);
6995       return;
6996     case NEON_FCVTPU_H:
6997       fcvtu(fpf, rd, rn, FPPositiveInfinity);
6998       return;
6999     case NEON_FCVTMS_H:
7000       fcvts(fpf, rd, rn, FPNegativeInfinity);
7001       return;
7002     case NEON_FCVTMU_H:
7003       fcvtu(fpf, rd, rn, FPNegativeInfinity);
7004       return;
7005     case NEON_FCVTZS_H:
7006       fcvts(fpf, rd, rn, FPZero);
7007       return;
7008     case NEON_FCVTZU_H:
7009       fcvtu(fpf, rd, rn, FPZero);
7010       return;
7011     case NEON_FCVTAS_H:
7012       fcvts(fpf, rd, rn, FPTieAway);
7013       return;
7014     case NEON_FCVTAU_H:
7015       fcvtu(fpf, rd, rn, FPTieAway);
7016       return;
7017     case NEON_FRINTI_H:
7018       frint(fpf, rd, rn, fpcr_rounding, false);
7019       return;
7020     case NEON_FRINTX_H:
7021       frint(fpf, rd, rn, fpcr_rounding, true);
7022       return;
7023     case NEON_FRINTA_H:
7024       frint(fpf, rd, rn, FPTieAway, false);
7025       return;
7026     case NEON_FRINTM_H:
7027       frint(fpf, rd, rn, FPNegativeInfinity, false);
7028       return;
7029     case NEON_FRINTN_H:
7030       frint(fpf, rd, rn, FPTieEven, false);
7031       return;
7032     case NEON_FRINTP_H:
7033       frint(fpf, rd, rn, FPPositiveInfinity, false);
7034       return;
7035     case NEON_FRINTZ_H:
7036       frint(fpf, rd, rn, FPZero, false);
7037       return;
7038     case NEON_FABS_H:
7039       fabs_(fpf, rd, rn);
7040       return;
7041     case NEON_FNEG_H:
7042       fneg(fpf, rd, rn);
7043       return;
7044     case NEON_FSQRT_H:
7045       fsqrt(fpf, rd, rn);
7046       return;
7047     case NEON_FRSQRTE_H:
7048       frsqrte(fpf, rd, rn);
7049       return;
7050     case NEON_FRECPE_H:
7051       frecpe(fpf, rd, rn, fpcr_rounding);
7052       return;
7053     case NEON_FCMGT_H_zero:
7054       fcmp_zero(fpf, rd, rn, gt);
7055       return;
7056     case NEON_FCMGE_H_zero:
7057       fcmp_zero(fpf, rd, rn, ge);
7058       return;
7059     case NEON_FCMEQ_H_zero:
7060       fcmp_zero(fpf, rd, rn, eq);
7061       return;
7062     case NEON_FCMLE_H_zero:
7063       fcmp_zero(fpf, rd, rn, le);
7064       return;
7065     case NEON_FCMLT_H_zero:
7066       fcmp_zero(fpf, rd, rn, lt);
7067       return;
7068     default:
7069       VIXL_UNIMPLEMENTED();
7070       return;
7071   }
7072 }
7073 
7074 
7075 void Simulator::VisitNEON3Same(const Instruction* instr) {
7076   NEONFormatDecoder nfd(instr);
7077   SimVRegister& rd = ReadVRegister(instr->GetRd());
7078   SimVRegister& rn = ReadVRegister(instr->GetRn());
7079   SimVRegister& rm = ReadVRegister(instr->GetRm());
7080 
7081   if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
7082     VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
7083     switch (instr->Mask(NEON3SameLogicalMask)) {
7084       case NEON_AND:
7085         and_(vf, rd, rn, rm);
7086         break;
7087       case NEON_ORR:
7088         orr(vf, rd, rn, rm);
7089         break;
7090       case NEON_ORN:
7091         orn(vf, rd, rn, rm);
7092         break;
7093       case NEON_EOR:
7094         eor(vf, rd, rn, rm);
7095         break;
7096       case NEON_BIC:
7097         bic(vf, rd, rn, rm);
7098         break;
7099       case NEON_BIF:
7100         bif(vf, rd, rn, rm);
7101         break;
7102       case NEON_BIT:
7103         bit(vf, rd, rn, rm);
7104         break;
7105       case NEON_BSL:
7106         bsl(vf, rd, rd, rn, rm);
7107         break;
7108       default:
7109         VIXL_UNIMPLEMENTED();
7110     }
7111   } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
7112     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7113     switch (instr->Mask(NEON3SameFPMask)) {
7114       case NEON_FADD:
7115         fadd(vf, rd, rn, rm);
7116         break;
7117       case NEON_FSUB:
7118         fsub(vf, rd, rn, rm);
7119         break;
7120       case NEON_FMUL:
7121         fmul(vf, rd, rn, rm);
7122         break;
7123       case NEON_FDIV:
7124         fdiv(vf, rd, rn, rm);
7125         break;
7126       case NEON_FMAX:
7127         fmax(vf, rd, rn, rm);
7128         break;
7129       case NEON_FMIN:
7130         fmin(vf, rd, rn, rm);
7131         break;
7132       case NEON_FMAXNM:
7133         fmaxnm(vf, rd, rn, rm);
7134         break;
7135       case NEON_FMINNM:
7136         fminnm(vf, rd, rn, rm);
7137         break;
7138       case NEON_FMLA:
7139         fmla(vf, rd, rd, rn, rm);
7140         break;
7141       case NEON_FMLS:
7142         fmls(vf, rd, rd, rn, rm);
7143         break;
7144       case NEON_FMULX:
7145         fmulx(vf, rd, rn, rm);
7146         break;
7147       case NEON_FACGE:
7148         fabscmp(vf, rd, rn, rm, ge);
7149         break;
7150       case NEON_FACGT:
7151         fabscmp(vf, rd, rn, rm, gt);
7152         break;
7153       case NEON_FCMEQ:
7154         fcmp(vf, rd, rn, rm, eq);
7155         break;
7156       case NEON_FCMGE:
7157         fcmp(vf, rd, rn, rm, ge);
7158         break;
7159       case NEON_FCMGT:
7160         fcmp(vf, rd, rn, rm, gt);
7161         break;
7162       case NEON_FRECPS:
7163         frecps(vf, rd, rn, rm);
7164         break;
7165       case NEON_FRSQRTS:
7166         frsqrts(vf, rd, rn, rm);
7167         break;
7168       case NEON_FABD:
7169         fabd(vf, rd, rn, rm);
7170         break;
7171       case NEON_FADDP:
7172         faddp(vf, rd, rn, rm);
7173         break;
7174       case NEON_FMAXP:
7175         fmaxp(vf, rd, rn, rm);
7176         break;
7177       case NEON_FMAXNMP:
7178         fmaxnmp(vf, rd, rn, rm);
7179         break;
7180       case NEON_FMINP:
7181         fminp(vf, rd, rn, rm);
7182         break;
7183       case NEON_FMINNMP:
7184         fminnmp(vf, rd, rn, rm);
7185         break;
7186       default:
7187         // FMLAL{2} and FMLSL{2} have special-case encodings.
7188         switch (instr->Mask(NEON3SameFHMMask)) {
7189           case NEON_FMLAL:
7190             fmlal(vf, rd, rn, rm);
7191             break;
7192           case NEON_FMLAL2:
7193             fmlal2(vf, rd, rn, rm);
7194             break;
7195           case NEON_FMLSL:
7196             fmlsl(vf, rd, rn, rm);
7197             break;
7198           case NEON_FMLSL2:
7199             fmlsl2(vf, rd, rn, rm);
7200             break;
7201           default:
7202             VIXL_UNIMPLEMENTED();
7203         }
7204     }
7205   } else {
7206     VectorFormat vf = nfd.GetVectorFormat();
7207     switch (instr->Mask(NEON3SameMask)) {
7208       case NEON_ADD:
7209         add(vf, rd, rn, rm);
7210         break;
7211       case NEON_ADDP:
7212         addp(vf, rd, rn, rm);
7213         break;
7214       case NEON_CMEQ:
7215         cmp(vf, rd, rn, rm, eq);
7216         break;
7217       case NEON_CMGE:
7218         cmp(vf, rd, rn, rm, ge);
7219         break;
7220       case NEON_CMGT:
7221         cmp(vf, rd, rn, rm, gt);
7222         break;
7223       case NEON_CMHI:
7224         cmp(vf, rd, rn, rm, hi);
7225         break;
7226       case NEON_CMHS:
7227         cmp(vf, rd, rn, rm, hs);
7228         break;
7229       case NEON_CMTST:
7230         cmptst(vf, rd, rn, rm);
7231         break;
7232       case NEON_MLS:
7233         mls(vf, rd, rd, rn, rm);
7234         break;
7235       case NEON_MLA:
7236         mla(vf, rd, rd, rn, rm);
7237         break;
7238       case NEON_MUL:
7239         mul(vf, rd, rn, rm);
7240         break;
7241       case NEON_PMUL:
7242         pmul(vf, rd, rn, rm);
7243         break;
7244       case NEON_SMAX:
7245         smax(vf, rd, rn, rm);
7246         break;
7247       case NEON_SMAXP:
7248         smaxp(vf, rd, rn, rm);
7249         break;
7250       case NEON_SMIN:
7251         smin(vf, rd, rn, rm);
7252         break;
7253       case NEON_SMINP:
7254         sminp(vf, rd, rn, rm);
7255         break;
7256       case NEON_SUB:
7257         sub(vf, rd, rn, rm);
7258         break;
7259       case NEON_UMAX:
7260         umax(vf, rd, rn, rm);
7261         break;
7262       case NEON_UMAXP:
7263         umaxp(vf, rd, rn, rm);
7264         break;
7265       case NEON_UMIN:
7266         umin(vf, rd, rn, rm);
7267         break;
7268       case NEON_UMINP:
7269         uminp(vf, rd, rn, rm);
7270         break;
7271       case NEON_SSHL:
7272         sshl(vf, rd, rn, rm);
7273         break;
7274       case NEON_USHL:
7275         ushl(vf, rd, rn, rm);
7276         break;
7277       case NEON_SABD:
7278         absdiff(vf, rd, rn, rm, true);
7279         break;
7280       case NEON_UABD:
7281         absdiff(vf, rd, rn, rm, false);
7282         break;
7283       case NEON_SABA:
7284         saba(vf, rd, rn, rm);
7285         break;
7286       case NEON_UABA:
7287         uaba(vf, rd, rn, rm);
7288         break;
7289       case NEON_UQADD:
7290         add(vf, rd, rn, rm).UnsignedSaturate(vf);
7291         break;
7292       case NEON_SQADD:
7293         add(vf, rd, rn, rm).SignedSaturate(vf);
7294         break;
7295       case NEON_UQSUB:
7296         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
7297         break;
7298       case NEON_SQSUB:
7299         sub(vf, rd, rn, rm).SignedSaturate(vf);
7300         break;
7301       case NEON_SQDMULH:
7302         sqdmulh(vf, rd, rn, rm);
7303         break;
7304       case NEON_SQRDMULH:
7305         sqrdmulh(vf, rd, rn, rm);
7306         break;
7307       case NEON_UQSHL:
7308         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
7309         break;
7310       case NEON_SQSHL:
7311         sshl(vf, rd, rn, rm).SignedSaturate(vf);
7312         break;
7313       case NEON_URSHL:
7314         ushl(vf, rd, rn, rm).Round(vf);
7315         break;
7316       case NEON_SRSHL:
7317         sshl(vf, rd, rn, rm).Round(vf);
7318         break;
7319       case NEON_UQRSHL:
7320         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
7321         break;
7322       case NEON_SQRSHL:
7323         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
7324         break;
7325       case NEON_UHADD:
7326         add(vf, rd, rn, rm).Uhalve(vf);
7327         break;
7328       case NEON_URHADD:
7329         add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
7330         break;
7331       case NEON_SHADD:
7332         add(vf, rd, rn, rm).Halve(vf);
7333         break;
7334       case NEON_SRHADD:
7335         add(vf, rd, rn, rm).Halve(vf).Round(vf);
7336         break;
7337       case NEON_UHSUB:
7338         sub(vf, rd, rn, rm).Uhalve(vf);
7339         break;
7340       case NEON_SHSUB:
7341         sub(vf, rd, rn, rm).Halve(vf);
7342         break;
7343       default:
7344         VIXL_UNIMPLEMENTED();
7345     }
7346   }
7347 }
7348 
7349 
7350 void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
7351   NEONFormatDecoder nfd(instr);
7352   SimVRegister& rd = ReadVRegister(instr->GetRd());
7353   SimVRegister& rn = ReadVRegister(instr->GetRn());
7354   SimVRegister& rm = ReadVRegister(instr->GetRm());
7355 
7356   VectorFormat vf = nfd.GetVectorFormat(nfd.FP16FormatMap());
7357   switch (instr->Mask(NEON3SameFP16Mask)) {
7358 #define SIM_FUNC(A, B) \
7359   case NEON_##A##_H:   \
7360     B(vf, rd, rn, rm); \
7361     break;
7362     SIM_FUNC(FMAXNM, fmaxnm);
7363     SIM_FUNC(FADD, fadd);
7364     SIM_FUNC(FMULX, fmulx);
7365     SIM_FUNC(FMAX, fmax);
7366     SIM_FUNC(FRECPS, frecps);
7367     SIM_FUNC(FMINNM, fminnm);
7368     SIM_FUNC(FSUB, fsub);
7369     SIM_FUNC(FMIN, fmin);
7370     SIM_FUNC(FRSQRTS, frsqrts);
7371     SIM_FUNC(FMAXNMP, fmaxnmp);
7372     SIM_FUNC(FADDP, faddp);
7373     SIM_FUNC(FMUL, fmul);
7374     SIM_FUNC(FMAXP, fmaxp);
7375     SIM_FUNC(FDIV, fdiv);
7376     SIM_FUNC(FMINNMP, fminnmp);
7377     SIM_FUNC(FABD, fabd);
7378     SIM_FUNC(FMINP, fminp);
7379 #undef SIM_FUNC
7380     case NEON_FMLA_H:
7381       fmla(vf, rd, rd, rn, rm);
7382       break;
7383     case NEON_FMLS_H:
7384       fmls(vf, rd, rd, rn, rm);
7385       break;
7386     case NEON_FCMEQ_H:
7387       fcmp(vf, rd, rn, rm, eq);
7388       break;
7389     case NEON_FCMGE_H:
7390       fcmp(vf, rd, rn, rm, ge);
7391       break;
7392     case NEON_FACGE_H:
7393       fabscmp(vf, rd, rn, rm, ge);
7394       break;
7395     case NEON_FCMGT_H:
7396       fcmp(vf, rd, rn, rm, gt);
7397       break;
7398     case NEON_FACGT_H:
7399       fabscmp(vf, rd, rn, rm, gt);
7400       break;
7401     default:
7402       VIXL_UNIMPLEMENTED();
7403       break;
7404   }
7405 }
7406 
7407 void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
7408   NEONFormatDecoder nfd(instr);
7409   SimVRegister& rd = ReadVRegister(instr->GetRd());
7410   SimVRegister& rn = ReadVRegister(instr->GetRn());
7411   SimVRegister& rm = ReadVRegister(instr->GetRm());
7412   int rot = 0;
7413   VectorFormat vf = nfd.GetVectorFormat();
7414 
7415   switch (form_hash_) {
7416     case Hash("fcmla_asimdsame2_c"):
7417       rot = instr->GetImmRotFcmlaVec();
7418       fcmla(vf, rd, rn, rm, rd, rot);
7419       break;
7420     case Hash("fcadd_asimdsame2_c"):
7421       rot = instr->GetImmRotFcadd();
7422       fcadd(vf, rd, rn, rm, rot);
7423       break;
7424     case Hash("sdot_asimdsame2_d"):
7425       sdot(vf, rd, rn, rm);
7426       break;
7427     case Hash("udot_asimdsame2_d"):
7428       udot(vf, rd, rn, rm);
7429       break;
7430     case Hash("usdot_asimdsame2_d"):
7431       usdot(vf, rd, rn, rm);
7432       break;
7433     case Hash("sqrdmlah_asimdsame2_only"):
7434       sqrdmlah(vf, rd, rn, rm);
7435       break;
7436     case Hash("sqrdmlsh_asimdsame2_only"):
7437       sqrdmlsh(vf, rd, rn, rm);
7438       break;
7439   }
7440 }
7441 
7442 
7443 void Simulator::VisitNEON3Different(const Instruction* instr) {
7444   NEONFormatDecoder nfd(instr);
7445   VectorFormat vf = nfd.GetVectorFormat();
7446   VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7447 
7448   SimVRegister& rd = ReadVRegister(instr->GetRd());
7449   SimVRegister& rn = ReadVRegister(instr->GetRn());
7450   SimVRegister& rm = ReadVRegister(instr->GetRm());
7451 
7452   switch (instr->Mask(NEON3DifferentMask)) {
7453     case NEON_PMULL:
7454       pmull(vf_l, rd, rn, rm);
7455       break;
7456     case NEON_PMULL2:
7457       pmull2(vf_l, rd, rn, rm);
7458       break;
7459     case NEON_UADDL:
7460       uaddl(vf_l, rd, rn, rm);
7461       break;
7462     case NEON_UADDL2:
7463       uaddl2(vf_l, rd, rn, rm);
7464       break;
7465     case NEON_SADDL:
7466       saddl(vf_l, rd, rn, rm);
7467       break;
7468     case NEON_SADDL2:
7469       saddl2(vf_l, rd, rn, rm);
7470       break;
7471     case NEON_USUBL:
7472       usubl(vf_l, rd, rn, rm);
7473       break;
7474     case NEON_USUBL2:
7475       usubl2(vf_l, rd, rn, rm);
7476       break;
7477     case NEON_SSUBL:
7478       ssubl(vf_l, rd, rn, rm);
7479       break;
7480     case NEON_SSUBL2:
7481       ssubl2(vf_l, rd, rn, rm);
7482       break;
7483     case NEON_SABAL:
7484       sabal(vf_l, rd, rn, rm);
7485       break;
7486     case NEON_SABAL2:
7487       sabal2(vf_l, rd, rn, rm);
7488       break;
7489     case NEON_UABAL:
7490       uabal(vf_l, rd, rn, rm);
7491       break;
7492     case NEON_UABAL2:
7493       uabal2(vf_l, rd, rn, rm);
7494       break;
7495     case NEON_SABDL:
7496       sabdl(vf_l, rd, rn, rm);
7497       break;
7498     case NEON_SABDL2:
7499       sabdl2(vf_l, rd, rn, rm);
7500       break;
7501     case NEON_UABDL:
7502       uabdl(vf_l, rd, rn, rm);
7503       break;
7504     case NEON_UABDL2:
7505       uabdl2(vf_l, rd, rn, rm);
7506       break;
7507     case NEON_SMLAL:
7508       smlal(vf_l, rd, rn, rm);
7509       break;
7510     case NEON_SMLAL2:
7511       smlal2(vf_l, rd, rn, rm);
7512       break;
7513     case NEON_UMLAL:
7514       umlal(vf_l, rd, rn, rm);
7515       break;
7516     case NEON_UMLAL2:
7517       umlal2(vf_l, rd, rn, rm);
7518       break;
7519     case NEON_SMLSL:
7520       smlsl(vf_l, rd, rn, rm);
7521       break;
7522     case NEON_SMLSL2:
7523       smlsl2(vf_l, rd, rn, rm);
7524       break;
7525     case NEON_UMLSL:
7526       umlsl(vf_l, rd, rn, rm);
7527       break;
7528     case NEON_UMLSL2:
7529       umlsl2(vf_l, rd, rn, rm);
7530       break;
7531     case NEON_SMULL:
7532       smull(vf_l, rd, rn, rm);
7533       break;
7534     case NEON_SMULL2:
7535       smull2(vf_l, rd, rn, rm);
7536       break;
7537     case NEON_UMULL:
7538       umull(vf_l, rd, rn, rm);
7539       break;
7540     case NEON_UMULL2:
7541       umull2(vf_l, rd, rn, rm);
7542       break;
7543     case NEON_SQDMLAL:
7544       sqdmlal(vf_l, rd, rn, rm);
7545       break;
7546     case NEON_SQDMLAL2:
7547       sqdmlal2(vf_l, rd, rn, rm);
7548       break;
7549     case NEON_SQDMLSL:
7550       sqdmlsl(vf_l, rd, rn, rm);
7551       break;
7552     case NEON_SQDMLSL2:
7553       sqdmlsl2(vf_l, rd, rn, rm);
7554       break;
7555     case NEON_SQDMULL:
7556       sqdmull(vf_l, rd, rn, rm);
7557       break;
7558     case NEON_SQDMULL2:
7559       sqdmull2(vf_l, rd, rn, rm);
7560       break;
7561     case NEON_UADDW:
7562       uaddw(vf_l, rd, rn, rm);
7563       break;
7564     case NEON_UADDW2:
7565       uaddw2(vf_l, rd, rn, rm);
7566       break;
7567     case NEON_SADDW:
7568       saddw(vf_l, rd, rn, rm);
7569       break;
7570     case NEON_SADDW2:
7571       saddw2(vf_l, rd, rn, rm);
7572       break;
7573     case NEON_USUBW:
7574       usubw(vf_l, rd, rn, rm);
7575       break;
7576     case NEON_USUBW2:
7577       usubw2(vf_l, rd, rn, rm);
7578       break;
7579     case NEON_SSUBW:
7580       ssubw(vf_l, rd, rn, rm);
7581       break;
7582     case NEON_SSUBW2:
7583       ssubw2(vf_l, rd, rn, rm);
7584       break;
7585     case NEON_ADDHN:
7586       addhn(vf, rd, rn, rm);
7587       break;
7588     case NEON_ADDHN2:
7589       addhn2(vf, rd, rn, rm);
7590       break;
7591     case NEON_RADDHN:
7592       raddhn(vf, rd, rn, rm);
7593       break;
7594     case NEON_RADDHN2:
7595       raddhn2(vf, rd, rn, rm);
7596       break;
7597     case NEON_SUBHN:
7598       subhn(vf, rd, rn, rm);
7599       break;
7600     case NEON_SUBHN2:
7601       subhn2(vf, rd, rn, rm);
7602       break;
7603     case NEON_RSUBHN:
7604       rsubhn(vf, rd, rn, rm);
7605       break;
7606     case NEON_RSUBHN2:
7607       rsubhn2(vf, rd, rn, rm);
7608       break;
7609     default:
7610       VIXL_UNIMPLEMENTED();
7611   }
7612 }
7613 
7614 
7615 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
7616   NEONFormatDecoder nfd(instr);
7617 
7618   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
7619 
7620   SimVRegister& rd = ReadVRegister(instr->GetRd());
7621   SimVRegister& rn = ReadVRegister(instr->GetRn());
7622 
7623   if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
7624     VectorFormat vf = nfd.GetVectorFormat(&map_half);
7625     switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
7626       case NEON_FMAXV_H:
7627         fmaxv(vf, rd, rn);
7628         break;
7629       case NEON_FMINV_H:
7630         fminv(vf, rd, rn);
7631         break;
7632       case NEON_FMAXNMV_H:
7633         fmaxnmv(vf, rd, rn);
7634         break;
7635       case NEON_FMINNMV_H:
7636         fminnmv(vf, rd, rn);
7637         break;
7638       default:
7639         VIXL_UNIMPLEMENTED();
7640     }
7641   } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
7642     // The input operand's VectorFormat is passed for these instructions.
7643     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
7644 
7645     switch (instr->Mask(NEONAcrossLanesFPMask)) {
7646       case NEON_FMAXV:
7647         fmaxv(vf, rd, rn);
7648         break;
7649       case NEON_FMINV:
7650         fminv(vf, rd, rn);
7651         break;
7652       case NEON_FMAXNMV:
7653         fmaxnmv(vf, rd, rn);
7654         break;
7655       case NEON_FMINNMV:
7656         fminnmv(vf, rd, rn);
7657         break;
7658       default:
7659         VIXL_UNIMPLEMENTED();
7660     }
7661   } else {
7662     VectorFormat vf = nfd.GetVectorFormat();
7663 
7664     switch (instr->Mask(NEONAcrossLanesMask)) {
7665       case NEON_ADDV:
7666         addv(vf, rd, rn);
7667         break;
7668       case NEON_SMAXV:
7669         smaxv(vf, rd, rn);
7670         break;
7671       case NEON_SMINV:
7672         sminv(vf, rd, rn);
7673         break;
7674       case NEON_UMAXV:
7675         umaxv(vf, rd, rn);
7676         break;
7677       case NEON_UMINV:
7678         uminv(vf, rd, rn);
7679         break;
7680       case NEON_SADDLV:
7681         saddlv(vf, rd, rn);
7682         break;
7683       case NEON_UADDLV:
7684         uaddlv(vf, rd, rn);
7685         break;
7686       default:
7687         VIXL_UNIMPLEMENTED();
7688     }
7689   }
7690 }
7691 
7692 void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
7693   NEONFormatDecoder nfd(instr);
7694   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
7695 
7696   SimVRegister& rd = ReadVRegister(instr->GetRd());
7697   SimVRegister& rn = ReadVRegister(instr->GetRn());
7698 
7699   int rm_reg = instr->GetRm();
7700   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7701   if (instr->GetNEONSize() == 1) {
7702     rm_reg = instr->GetRmLow16();
7703     index = (index << 1) | instr->GetNEONM();
7704   }
7705   SimVRegister& rm = ReadVRegister(rm_reg);
7706 
7707   SimVRegister temp;
7708   VectorFormat indexform =
7709       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
7710   dup_element(indexform, temp, rm, index);
7711 
7712   bool is_2 = instr->Mask(NEON_Q) ? true : false;
7713 
7714   switch (form_hash_) {
7715     case Hash("smull_asimdelem_l"):
7716       smull(vf, rd, rn, temp, is_2);
7717       break;
7718     case Hash("umull_asimdelem_l"):
7719       umull(vf, rd, rn, temp, is_2);
7720       break;
7721     case Hash("smlal_asimdelem_l"):
7722       smlal(vf, rd, rn, temp, is_2);
7723       break;
7724     case Hash("umlal_asimdelem_l"):
7725       umlal(vf, rd, rn, temp, is_2);
7726       break;
7727     case Hash("smlsl_asimdelem_l"):
7728       smlsl(vf, rd, rn, temp, is_2);
7729       break;
7730     case Hash("umlsl_asimdelem_l"):
7731       umlsl(vf, rd, rn, temp, is_2);
7732       break;
7733     case Hash("sqdmull_asimdelem_l"):
7734       sqdmull(vf, rd, rn, temp, is_2);
7735       break;
7736     case Hash("sqdmlal_asimdelem_l"):
7737       sqdmlal(vf, rd, rn, temp, is_2);
7738       break;
7739     case Hash("sqdmlsl_asimdelem_l"):
7740       sqdmlsl(vf, rd, rn, temp, is_2);
7741       break;
7742     default:
7743       VIXL_UNREACHABLE();
7744   }
7745 }
7746 
7747 void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) {
7748   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
7749   SimVRegister& rd = ReadVRegister(instr->GetRd());
7750   SimVRegister& rn = ReadVRegister(instr->GetRn());
7751   SimVRegister& rm = ReadVRegister(instr->GetRmLow16());
7752 
7753   int index =
7754       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
7755 
7756   switch (form_hash_) {
7757     case Hash("fmlal_asimdelem_lh"):
7758       fmlal(vform, rd, rn, rm, index);
7759       break;
7760     case Hash("fmlal2_asimdelem_lh"):
7761       fmlal2(vform, rd, rn, rm, index);
7762       break;
7763     case Hash("fmlsl_asimdelem_lh"):
7764       fmlsl(vform, rd, rn, rm, index);
7765       break;
7766     case Hash("fmlsl2_asimdelem_lh"):
7767       fmlsl2(vform, rd, rn, rm, index);
7768       break;
7769     default:
7770       VIXL_UNREACHABLE();
7771   }
7772 }
7773 
7774 void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) {
7775   NEONFormatDecoder nfd(instr);
7776   static const NEONFormatMap map =
7777       {{23, 22, 30},
7778        {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
7779   VectorFormat vform = nfd.GetVectorFormat(&map);
7780 
7781   SimVRegister& rd = ReadVRegister(instr->GetRd());
7782   SimVRegister& rn = ReadVRegister(instr->GetRn());
7783 
7784   int rm_reg = instr->GetRm();
7785   int index =
7786       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
7787 
7788   if ((vform == kFormat4H) || (vform == kFormat8H)) {
7789     rm_reg &= 0xf;
7790   } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
7791     index >>= 1;
7792   } else {
7793     VIXL_ASSERT(vform == kFormat2D);
7794     VIXL_ASSERT(instr->GetNEONL() == 0);
7795     index >>= 2;
7796   }
7797 
7798   SimVRegister& rm = ReadVRegister(rm_reg);
7799 
7800   switch (form_hash_) {
7801     case Hash("fmul_asimdelem_rh_h"):
7802     case Hash("fmul_asimdelem_r_sd"):
7803       fmul(vform, rd, rn, rm, index);
7804       break;
7805     case Hash("fmla_asimdelem_rh_h"):
7806     case Hash("fmla_asimdelem_r_sd"):
7807       fmla(vform, rd, rn, rm, index);
7808       break;
7809     case Hash("fmls_asimdelem_rh_h"):
7810     case Hash("fmls_asimdelem_r_sd"):
7811       fmls(vform, rd, rn, rm, index);
7812       break;
7813     case Hash("fmulx_asimdelem_rh_h"):
7814     case Hash("fmulx_asimdelem_r_sd"):
7815       fmulx(vform, rd, rn, rm, index);
7816       break;
7817     default:
7818       VIXL_UNREACHABLE();
7819   }
7820 }
7821 
7822 void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) {
7823   VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H;
7824   SimVRegister& rd = ReadVRegister(instr->GetRd());
7825   SimVRegister& rn = ReadVRegister(instr->GetRn());
7826   SimVRegister& rm = ReadVRegister(instr->GetRm());
7827   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7828 
7829   switch (form_hash_) {
7830     case Hash("fcmla_asimdelem_c_s"):
7831       vform = kFormat4S;
7832       index >>= 1;
7833       VIXL_FALLTHROUGH();
7834     case Hash("fcmla_asimdelem_c_h"):
7835       fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca());
7836       break;
7837     default:
7838       VIXL_UNREACHABLE();
7839   }
7840 }
7841 
7842 void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) {
7843   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
7844 
7845   SimVRegister& rd = ReadVRegister(instr->GetRd());
7846   SimVRegister& rn = ReadVRegister(instr->GetRn());
7847   SimVRegister& rm = ReadVRegister(instr->GetRm());
7848   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7849 
7850   SimVRegister temp;
7851   // NEON indexed `dot` allows the index value exceed the register size.
7852   // Promote the format to Q-sized vector format before the duplication.
7853   dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index);
7854 
7855   switch (form_hash_) {
7856     case Hash("sdot_asimdelem_d"):
7857       sdot(vform, rd, rn, temp);
7858       break;
7859     case Hash("udot_asimdelem_d"):
7860       udot(vform, rd, rn, temp);
7861       break;
7862     case Hash("sudot_asimdelem_d"):
7863       usdot(vform, rd, temp, rn);
7864       break;
7865     case Hash("usdot_asimdelem_d"):
7866       usdot(vform, rd, rn, temp);
7867       break;
7868   }
7869 }
7870 
7871 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
7872   NEONFormatDecoder nfd(instr);
7873   VectorFormat vform = nfd.GetVectorFormat();
7874 
7875   SimVRegister& rd = ReadVRegister(instr->GetRd());
7876   SimVRegister& rn = ReadVRegister(instr->GetRn());
7877 
7878   int rm_reg = instr->GetRm();
7879   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
7880 
7881   if ((vform == kFormat4H) || (vform == kFormat8H)) {
7882     rm_reg &= 0xf;
7883     index = (index << 1) | instr->GetNEONM();
7884   }
7885 
7886   SimVRegister& rm = ReadVRegister(rm_reg);
7887 
7888   switch (form_hash_) {
7889     case Hash("mul_asimdelem_r"):
7890       mul(vform, rd, rn, rm, index);
7891       break;
7892     case Hash("mla_asimdelem_r"):
7893       mla(vform, rd, rn, rm, index);
7894       break;
7895     case Hash("mls_asimdelem_r"):
7896       mls(vform, rd, rn, rm, index);
7897       break;
7898     case Hash("sqdmulh_asimdelem_r"):
7899       sqdmulh(vform, rd, rn, rm, index);
7900       break;
7901     case Hash("sqrdmulh_asimdelem_r"):
7902       sqrdmulh(vform, rd, rn, rm, index);
7903       break;
7904     case Hash("sqrdmlah_asimdelem_r"):
7905       sqrdmlah(vform, rd, rn, rm, index);
7906       break;
7907     case Hash("sqrdmlsh_asimdelem_r"):
7908       sqrdmlsh(vform, rd, rn, rm, index);
7909       break;
7910   }
7911 }
7912 
7913 
7914 void Simulator::VisitNEONCopy(const Instruction* instr) {
7915   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
7916   VectorFormat vf = nfd.GetVectorFormat();
7917 
7918   SimVRegister& rd = ReadVRegister(instr->GetRd());
7919   SimVRegister& rn = ReadVRegister(instr->GetRn());
7920   int imm5 = instr->GetImmNEON5();
7921   int tz = CountTrailingZeros(imm5, 32);
7922   int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5);
7923 
7924   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
7925     int imm4 = instr->GetImmNEON4();
7926     int rn_index = ExtractSignedBitfield32(31, tz, imm4);
7927     ins_element(vf, rd, reg_index, rn, rn_index);
7928   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
7929     ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
7930   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
7931     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
7932     value &= MaxUintFromFormat(vf);
7933     WriteXRegister(instr->GetRd(), value);
7934   } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
7935     int64_t value = LogicVRegister(rn).Int(vf, reg_index);
7936     if (instr->GetNEONQ()) {
7937       WriteXRegister(instr->GetRd(), value);
7938     } else {
7939       WriteWRegister(instr->GetRd(), (int32_t)value);
7940     }
7941   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
7942     dup_element(vf, rd, rn, reg_index);
7943   } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
7944     dup_immediate(vf, rd, ReadXRegister(instr->GetRn()));
7945   } else {
7946     VIXL_UNIMPLEMENTED();
7947   }
7948 }
7949 
7950 
7951 void Simulator::VisitNEONExtract(const Instruction* instr) {
7952   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
7953   VectorFormat vf = nfd.GetVectorFormat();
7954   SimVRegister& rd = ReadVRegister(instr->GetRd());
7955   SimVRegister& rn = ReadVRegister(instr->GetRn());
7956   SimVRegister& rm = ReadVRegister(instr->GetRm());
7957   if (instr->Mask(NEONExtractMask) == NEON_EXT) {
7958     int index = instr->GetImmNEONExt();
7959     ext(vf, rd, rn, rm, index);
7960   } else {
7961     VIXL_UNIMPLEMENTED();
7962   }
7963 }
7964 
7965 
7966 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
7967                                                AddrMode addr_mode) {
7968   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
7969   VectorFormat vf = nfd.GetVectorFormat();
7970 
7971   uint64_t addr_base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7972   int reg_size = RegisterSizeInBytesFromFormat(vf);
7973 
7974   int reg[4];
7975   uint64_t addr[4];
7976   for (int i = 0; i < 4; i++) {
7977     reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
7978     addr[i] = addr_base + (i * reg_size);
7979   }
7980   int struct_parts = 1;
7981   int reg_count = 1;
7982   bool log_read = true;
7983 
7984   // Bit 23 determines whether this is an offset or post-index addressing mode.
7985   // In offset mode, bits 20 to 16 should be zero; these bits encode the
7986   // register or immediate in post-index mode.
7987   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
7988     VIXL_UNREACHABLE();
7989   }
7990 
7991   // We use the PostIndex mask here, as it works in this case for both Offset
7992   // and PostIndex addressing.
7993   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
7994     case NEON_LD1_4v:
7995     case NEON_LD1_4v_post:
7996       ld1(vf, ReadVRegister(reg[3]), addr[3]);
7997       reg_count++;
7998       VIXL_FALLTHROUGH();
7999     case NEON_LD1_3v:
8000     case NEON_LD1_3v_post:
8001       ld1(vf, ReadVRegister(reg[2]), addr[2]);
8002       reg_count++;
8003       VIXL_FALLTHROUGH();
8004     case NEON_LD1_2v:
8005     case NEON_LD1_2v_post:
8006       ld1(vf, ReadVRegister(reg[1]), addr[1]);
8007       reg_count++;
8008       VIXL_FALLTHROUGH();
8009     case NEON_LD1_1v:
8010     case NEON_LD1_1v_post:
8011       ld1(vf, ReadVRegister(reg[0]), addr[0]);
8012       break;
8013     case NEON_ST1_4v:
8014     case NEON_ST1_4v_post:
8015       st1(vf, ReadVRegister(reg[3]), addr[3]);
8016       reg_count++;
8017       VIXL_FALLTHROUGH();
8018     case NEON_ST1_3v:
8019     case NEON_ST1_3v_post:
8020       st1(vf, ReadVRegister(reg[2]), addr[2]);
8021       reg_count++;
8022       VIXL_FALLTHROUGH();
8023     case NEON_ST1_2v:
8024     case NEON_ST1_2v_post:
8025       st1(vf, ReadVRegister(reg[1]), addr[1]);
8026       reg_count++;
8027       VIXL_FALLTHROUGH();
8028     case NEON_ST1_1v:
8029     case NEON_ST1_1v_post:
8030       st1(vf, ReadVRegister(reg[0]), addr[0]);
8031       log_read = false;
8032       break;
8033     case NEON_LD2_post:
8034     case NEON_LD2:
8035       ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8036       struct_parts = 2;
8037       reg_count = 2;
8038       break;
8039     case NEON_ST2:
8040     case NEON_ST2_post:
8041       st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
8042       struct_parts = 2;
8043       reg_count = 2;
8044       log_read = false;
8045       break;
8046     case NEON_LD3_post:
8047     case NEON_LD3:
8048       ld3(vf,
8049           ReadVRegister(reg[0]),
8050           ReadVRegister(reg[1]),
8051           ReadVRegister(reg[2]),
8052           addr[0]);
8053       struct_parts = 3;
8054       reg_count = 3;
8055       break;
8056     case NEON_ST3:
8057     case NEON_ST3_post:
8058       st3(vf,
8059           ReadVRegister(reg[0]),
8060           ReadVRegister(reg[1]),
8061           ReadVRegister(reg[2]),
8062           addr[0]);
8063       struct_parts = 3;
8064       reg_count = 3;
8065       log_read = false;
8066       break;
8067     case NEON_ST4:
8068     case NEON_ST4_post:
8069       st4(vf,
8070           ReadVRegister(reg[0]),
8071           ReadVRegister(reg[1]),
8072           ReadVRegister(reg[2]),
8073           ReadVRegister(reg[3]),
8074           addr[0]);
8075       struct_parts = 4;
8076       reg_count = 4;
8077       log_read = false;
8078       break;
8079     case NEON_LD4_post:
8080     case NEON_LD4:
8081       ld4(vf,
8082           ReadVRegister(reg[0]),
8083           ReadVRegister(reg[1]),
8084           ReadVRegister(reg[2]),
8085           ReadVRegister(reg[3]),
8086           addr[0]);
8087       struct_parts = 4;
8088       reg_count = 4;
8089       break;
8090     default:
8091       VIXL_UNIMPLEMENTED();
8092   }
8093 
8094   bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
8095   if (do_trace) {
8096     PrintRegisterFormat print_format =
8097         GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8098     const char* op;
8099     if (log_read) {
8100       op = "<-";
8101     } else {
8102       op = "->";
8103       // Stores don't represent a change to the source register's value, so only
8104       // print the relevant part of the value.
8105       print_format = GetPrintRegPartial(print_format);
8106     }
8107 
8108     VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
8109     for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
8110       uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
8111       PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
8112     }
8113   }
8114 
8115   if (addr_mode == PostIndex) {
8116     int rm = instr->GetRm();
8117     // The immediate post index addressing mode is indicated by rm = 31.
8118     // The immediate is implied by the number of vector registers used.
8119     addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
8120                             : ReadXRegister(rm);
8121     WriteXRegister(instr->GetRn(), addr_base);
8122   } else {
8123     VIXL_ASSERT(addr_mode == Offset);
8124   }
8125 }
8126 
8127 
8128 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
8129   NEONLoadStoreMultiStructHelper(instr, Offset);
8130 }
8131 
8132 
8133 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
8134     const Instruction* instr) {
8135   NEONLoadStoreMultiStructHelper(instr, PostIndex);
8136 }
8137 
8138 
8139 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
8140                                                 AddrMode addr_mode) {
8141   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
8142   int rt = instr->GetRt();
8143 
8144   // Bit 23 determines whether this is an offset or post-index addressing mode.
8145   // In offset mode, bits 20 to 16 should be zero; these bits encode the
8146   // register or immediate in post-index mode.
8147   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
8148     VIXL_UNREACHABLE();
8149   }
8150 
8151   // We use the PostIndex mask here, as it works in this case for both Offset
8152   // and PostIndex addressing.
8153   bool do_load = false;
8154 
8155   bool replicating = false;
8156 
8157   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
8158   VectorFormat vf_t = nfd.GetVectorFormat();
8159 
8160   VectorFormat vf = kFormat16B;
8161   switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
8162     case NEON_LD1_b:
8163     case NEON_LD1_b_post:
8164     case NEON_LD2_b:
8165     case NEON_LD2_b_post:
8166     case NEON_LD3_b:
8167     case NEON_LD3_b_post:
8168     case NEON_LD4_b:
8169     case NEON_LD4_b_post:
8170       do_load = true;
8171       VIXL_FALLTHROUGH();
8172     case NEON_ST1_b:
8173     case NEON_ST1_b_post:
8174     case NEON_ST2_b:
8175     case NEON_ST2_b_post:
8176     case NEON_ST3_b:
8177     case NEON_ST3_b_post:
8178     case NEON_ST4_b:
8179     case NEON_ST4_b_post:
8180       break;
8181 
8182     case NEON_LD1_h:
8183     case NEON_LD1_h_post:
8184     case NEON_LD2_h:
8185     case NEON_LD2_h_post:
8186     case NEON_LD3_h:
8187     case NEON_LD3_h_post:
8188     case NEON_LD4_h:
8189     case NEON_LD4_h_post:
8190       do_load = true;
8191       VIXL_FALLTHROUGH();
8192     case NEON_ST1_h:
8193     case NEON_ST1_h_post:
8194     case NEON_ST2_h:
8195     case NEON_ST2_h_post:
8196     case NEON_ST3_h:
8197     case NEON_ST3_h_post:
8198     case NEON_ST4_h:
8199     case NEON_ST4_h_post:
8200       vf = kFormat8H;
8201       break;
8202     case NEON_LD1_s:
8203     case NEON_LD1_s_post:
8204     case NEON_LD2_s:
8205     case NEON_LD2_s_post:
8206     case NEON_LD3_s:
8207     case NEON_LD3_s_post:
8208     case NEON_LD4_s:
8209     case NEON_LD4_s_post:
8210       do_load = true;
8211       VIXL_FALLTHROUGH();
8212     case NEON_ST1_s:
8213     case NEON_ST1_s_post:
8214     case NEON_ST2_s:
8215     case NEON_ST2_s_post:
8216     case NEON_ST3_s:
8217     case NEON_ST3_s_post:
8218     case NEON_ST4_s:
8219     case NEON_ST4_s_post: {
8220       VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
8221       VIXL_STATIC_ASSERT((NEON_LD1_s_post | (1 << NEONLSSize_offset)) ==
8222                          NEON_LD1_d_post);
8223       VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
8224       VIXL_STATIC_ASSERT((NEON_ST1_s_post | (1 << NEONLSSize_offset)) ==
8225                          NEON_ST1_d_post);
8226       vf = ((instr->GetNEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
8227       break;
8228     }
8229 
8230     case NEON_LD1R:
8231     case NEON_LD1R_post:
8232     case NEON_LD2R:
8233     case NEON_LD2R_post:
8234     case NEON_LD3R:
8235     case NEON_LD3R_post:
8236     case NEON_LD4R:
8237     case NEON_LD4R_post:
8238       vf = vf_t;
8239       do_load = true;
8240       replicating = true;
8241       break;
8242 
8243     default:
8244       VIXL_UNIMPLEMENTED();
8245   }
8246 
8247   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
8248   int lane = instr->GetNEONLSIndex(index_shift);
8249   int reg_count = 0;
8250   int rt2 = (rt + 1) % kNumberOfVRegisters;
8251   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
8252   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
8253   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
8254     case NEONLoadStoreSingle1:
8255       reg_count = 1;
8256       if (replicating) {
8257         VIXL_ASSERT(do_load);
8258         ld1r(vf, ReadVRegister(rt), addr);
8259       } else if (do_load) {
8260         ld1(vf, ReadVRegister(rt), lane, addr);
8261       } else {
8262         st1(vf, ReadVRegister(rt), lane, addr);
8263       }
8264       break;
8265     case NEONLoadStoreSingle2:
8266       reg_count = 2;
8267       if (replicating) {
8268         VIXL_ASSERT(do_load);
8269         ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
8270       } else if (do_load) {
8271         ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8272       } else {
8273         st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
8274       }
8275       break;
8276     case NEONLoadStoreSingle3:
8277       reg_count = 3;
8278       if (replicating) {
8279         VIXL_ASSERT(do_load);
8280         ld3r(vf,
8281              ReadVRegister(rt),
8282              ReadVRegister(rt2),
8283              ReadVRegister(rt3),
8284              addr);
8285       } else if (do_load) {
8286         ld3(vf,
8287             ReadVRegister(rt),
8288             ReadVRegister(rt2),
8289             ReadVRegister(rt3),
8290             lane,
8291             addr);
8292       } else {
8293         st3(vf,
8294             ReadVRegister(rt),
8295             ReadVRegister(rt2),
8296             ReadVRegister(rt3),
8297             lane,
8298             addr);
8299       }
8300       break;
8301     case NEONLoadStoreSingle4:
8302       reg_count = 4;
8303       if (replicating) {
8304         VIXL_ASSERT(do_load);
8305         ld4r(vf,
8306              ReadVRegister(rt),
8307              ReadVRegister(rt2),
8308              ReadVRegister(rt3),
8309              ReadVRegister(rt4),
8310              addr);
8311       } else if (do_load) {
8312         ld4(vf,
8313             ReadVRegister(rt),
8314             ReadVRegister(rt2),
8315             ReadVRegister(rt3),
8316             ReadVRegister(rt4),
8317             lane,
8318             addr);
8319       } else {
8320         st4(vf,
8321             ReadVRegister(rt),
8322             ReadVRegister(rt2),
8323             ReadVRegister(rt3),
8324             ReadVRegister(rt4),
8325             lane,
8326             addr);
8327       }
8328       break;
8329     default:
8330       VIXL_UNIMPLEMENTED();
8331   }
8332 
8333   // Trace registers and/or memory writes.
8334   PrintRegisterFormat print_format =
8335       GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
8336   if (do_load) {
8337     if (ShouldTraceVRegs()) {
8338       if (replicating) {
8339         PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
8340       } else {
8341         PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
8342       }
8343     }
8344   } else {
8345     if (ShouldTraceWrites()) {
8346       // Stores don't represent a change to the source register's value, so only
8347       // print the relevant part of the value.
8348       print_format = GetPrintRegPartial(print_format);
8349       PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
8350     }
8351   }
8352 
8353   if (addr_mode == PostIndex) {
8354     int rm = instr->GetRm();
8355     int lane_size = LaneSizeInBytesFromFormat(vf);
8356     WriteXRegister(instr->GetRn(),
8357                    addr + ((rm == 31) ? (reg_count * lane_size)
8358                                       : ReadXRegister(rm)));
8359   }
8360 }
8361 
8362 
8363 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
8364   NEONLoadStoreSingleStructHelper(instr, Offset);
8365 }
8366 
8367 
8368 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
8369     const Instruction* instr) {
8370   NEONLoadStoreSingleStructHelper(instr, PostIndex);
8371 }
8372 
8373 
8374 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
8375   SimVRegister& rd = ReadVRegister(instr->GetRd());
8376   int cmode = instr->GetNEONCmode();
8377   int cmode_3_1 = (cmode >> 1) & 7;
8378   int cmode_3 = (cmode >> 3) & 1;
8379   int cmode_2 = (cmode >> 2) & 1;
8380   int cmode_1 = (cmode >> 1) & 1;
8381   int cmode_0 = cmode & 1;
8382   int half_enc = instr->ExtractBit(11);
8383   int q = instr->GetNEONQ();
8384   int op_bit = instr->GetNEONModImmOp();
8385   uint64_t imm8 = instr->GetImmNEONabcdefgh();
8386   // Find the format and immediate value
8387   uint64_t imm = 0;
8388   VectorFormat vform = kFormatUndefined;
8389   switch (cmode_3_1) {
8390     case 0x0:
8391     case 0x1:
8392     case 0x2:
8393     case 0x3:
8394       vform = (q == 1) ? kFormat4S : kFormat2S;
8395       imm = imm8 << (8 * cmode_3_1);
8396       break;
8397     case 0x4:
8398     case 0x5:
8399       vform = (q == 1) ? kFormat8H : kFormat4H;
8400       imm = imm8 << (8 * cmode_1);
8401       break;
8402     case 0x6:
8403       vform = (q == 1) ? kFormat4S : kFormat2S;
8404       if (cmode_0 == 0) {
8405         imm = imm8 << 8 | 0x000000ff;
8406       } else {
8407         imm = imm8 << 16 | 0x0000ffff;
8408       }
8409       break;
8410     case 0x7:
8411       if (cmode_0 == 0 && op_bit == 0) {
8412         vform = q ? kFormat16B : kFormat8B;
8413         imm = imm8;
8414       } else if (cmode_0 == 0 && op_bit == 1) {
8415         vform = q ? kFormat2D : kFormat1D;
8416         imm = 0;
8417         for (int i = 0; i < 8; ++i) {
8418           if (imm8 & (1 << i)) {
8419             imm |= (UINT64_C(0xff) << (8 * i));
8420           }
8421         }
8422       } else {  // cmode_0 == 1, cmode == 0xf.
8423         if (half_enc == 1) {
8424           vform = q ? kFormat8H : kFormat4H;
8425           imm = Float16ToRawbits(instr->GetImmNEONFP16());
8426         } else if (op_bit == 0) {
8427           vform = q ? kFormat4S : kFormat2S;
8428           imm = FloatToRawbits(instr->GetImmNEONFP32());
8429         } else if (q == 1) {
8430           vform = kFormat2D;
8431           imm = DoubleToRawbits(instr->GetImmNEONFP64());
8432         } else {
8433           VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
8434           VisitUnallocated(instr);
8435         }
8436       }
8437       break;
8438     default:
8439       VIXL_UNREACHABLE();
8440       break;
8441   }
8442 
8443   // Find the operation
8444   NEONModifiedImmediateOp op;
8445   if (cmode_3 == 0) {
8446     if (cmode_0 == 0) {
8447       op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8448     } else {  // cmode<0> == '1'
8449       op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8450     }
8451   } else {  // cmode<3> == '1'
8452     if (cmode_2 == 0) {
8453       if (cmode_0 == 0) {
8454         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8455       } else {  // cmode<0> == '1'
8456         op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
8457       }
8458     } else {  // cmode<2> == '1'
8459       if (cmode_1 == 0) {
8460         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
8461       } else {  // cmode<1> == '1'
8462         if (cmode_0 == 0) {
8463           op = NEONModifiedImmediate_MOVI;
8464         } else {  // cmode<0> == '1'
8465           op = NEONModifiedImmediate_MOVI;
8466         }
8467       }
8468     }
8469   }
8470 
8471   // Call the logic function
8472   if (op == NEONModifiedImmediate_ORR) {
8473     orr(vform, rd, rd, imm);
8474   } else if (op == NEONModifiedImmediate_BIC) {
8475     bic(vform, rd, rd, imm);
8476   } else if (op == NEONModifiedImmediate_MOVI) {
8477     movi(vform, rd, imm);
8478   } else if (op == NEONModifiedImmediate_MVNI) {
8479     mvni(vform, rd, imm);
8480   } else {
8481     VisitUnimplemented(instr);
8482   }
8483 }
8484 
8485 
8486 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
8487   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8488   VectorFormat vf = nfd.GetVectorFormat();
8489 
8490   SimVRegister& rd = ReadVRegister(instr->GetRd());
8491   SimVRegister& rn = ReadVRegister(instr->GetRn());
8492 
8493   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
8494     // These instructions all use a two bit size field, except NOT and RBIT,
8495     // which use the field to encode the operation.
8496     switch (instr->Mask(NEONScalar2RegMiscMask)) {
8497       case NEON_CMEQ_zero_scalar:
8498         cmp(vf, rd, rn, 0, eq);
8499         break;
8500       case NEON_CMGE_zero_scalar:
8501         cmp(vf, rd, rn, 0, ge);
8502         break;
8503       case NEON_CMGT_zero_scalar:
8504         cmp(vf, rd, rn, 0, gt);
8505         break;
8506       case NEON_CMLT_zero_scalar:
8507         cmp(vf, rd, rn, 0, lt);
8508         break;
8509       case NEON_CMLE_zero_scalar:
8510         cmp(vf, rd, rn, 0, le);
8511         break;
8512       case NEON_ABS_scalar:
8513         abs(vf, rd, rn);
8514         break;
8515       case NEON_SQABS_scalar:
8516         abs(vf, rd, rn).SignedSaturate(vf);
8517         break;
8518       case NEON_NEG_scalar:
8519         neg(vf, rd, rn);
8520         break;
8521       case NEON_SQNEG_scalar:
8522         neg(vf, rd, rn).SignedSaturate(vf);
8523         break;
8524       case NEON_SUQADD_scalar:
8525         suqadd(vf, rd, rd, rn);
8526         break;
8527       case NEON_USQADD_scalar:
8528         usqadd(vf, rd, rd, rn);
8529         break;
8530       default:
8531         VIXL_UNIMPLEMENTED();
8532         break;
8533     }
8534   } else {
8535     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8536     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8537 
8538     // These instructions all use a one bit size field, except SQXTUN, SQXTN
8539     // and UQXTN, which use a two bit size field.
8540     switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
8541       case NEON_FRECPE_scalar:
8542         frecpe(fpf, rd, rn, fpcr_rounding);
8543         break;
8544       case NEON_FRECPX_scalar:
8545         frecpx(fpf, rd, rn);
8546         break;
8547       case NEON_FRSQRTE_scalar:
8548         frsqrte(fpf, rd, rn);
8549         break;
8550       case NEON_FCMGT_zero_scalar:
8551         fcmp_zero(fpf, rd, rn, gt);
8552         break;
8553       case NEON_FCMGE_zero_scalar:
8554         fcmp_zero(fpf, rd, rn, ge);
8555         break;
8556       case NEON_FCMEQ_zero_scalar:
8557         fcmp_zero(fpf, rd, rn, eq);
8558         break;
8559       case NEON_FCMLE_zero_scalar:
8560         fcmp_zero(fpf, rd, rn, le);
8561         break;
8562       case NEON_FCMLT_zero_scalar:
8563         fcmp_zero(fpf, rd, rn, lt);
8564         break;
8565       case NEON_SCVTF_scalar:
8566         scvtf(fpf, rd, rn, 0, fpcr_rounding);
8567         break;
8568       case NEON_UCVTF_scalar:
8569         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8570         break;
8571       case NEON_FCVTNS_scalar:
8572         fcvts(fpf, rd, rn, FPTieEven);
8573         break;
8574       case NEON_FCVTNU_scalar:
8575         fcvtu(fpf, rd, rn, FPTieEven);
8576         break;
8577       case NEON_FCVTPS_scalar:
8578         fcvts(fpf, rd, rn, FPPositiveInfinity);
8579         break;
8580       case NEON_FCVTPU_scalar:
8581         fcvtu(fpf, rd, rn, FPPositiveInfinity);
8582         break;
8583       case NEON_FCVTMS_scalar:
8584         fcvts(fpf, rd, rn, FPNegativeInfinity);
8585         break;
8586       case NEON_FCVTMU_scalar:
8587         fcvtu(fpf, rd, rn, FPNegativeInfinity);
8588         break;
8589       case NEON_FCVTZS_scalar:
8590         fcvts(fpf, rd, rn, FPZero);
8591         break;
8592       case NEON_FCVTZU_scalar:
8593         fcvtu(fpf, rd, rn, FPZero);
8594         break;
8595       case NEON_FCVTAS_scalar:
8596         fcvts(fpf, rd, rn, FPTieAway);
8597         break;
8598       case NEON_FCVTAU_scalar:
8599         fcvtu(fpf, rd, rn, FPTieAway);
8600         break;
8601       case NEON_FCVTXN_scalar:
8602         // Unlike all of the other FP instructions above, fcvtxn encodes dest
8603         // size S as size<0>=1. There's only one case, so we ignore the form.
8604         VIXL_ASSERT(instr->ExtractBit(22) == 1);
8605         fcvtxn(kFormatS, rd, rn);
8606         break;
8607       default:
8608         switch (instr->Mask(NEONScalar2RegMiscMask)) {
8609           case NEON_SQXTN_scalar:
8610             sqxtn(vf, rd, rn);
8611             break;
8612           case NEON_UQXTN_scalar:
8613             uqxtn(vf, rd, rn);
8614             break;
8615           case NEON_SQXTUN_scalar:
8616             sqxtun(vf, rd, rn);
8617             break;
8618           default:
8619             VIXL_UNIMPLEMENTED();
8620         }
8621     }
8622   }
8623 }
8624 
8625 
8626 void Simulator::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
8627   VectorFormat fpf = kFormatH;
8628   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
8629 
8630   SimVRegister& rd = ReadVRegister(instr->GetRd());
8631   SimVRegister& rn = ReadVRegister(instr->GetRn());
8632 
8633   switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
8634     case NEON_FRECPE_H_scalar:
8635       frecpe(fpf, rd, rn, fpcr_rounding);
8636       break;
8637     case NEON_FRECPX_H_scalar:
8638       frecpx(fpf, rd, rn);
8639       break;
8640     case NEON_FRSQRTE_H_scalar:
8641       frsqrte(fpf, rd, rn);
8642       break;
8643     case NEON_FCMGT_H_zero_scalar:
8644       fcmp_zero(fpf, rd, rn, gt);
8645       break;
8646     case NEON_FCMGE_H_zero_scalar:
8647       fcmp_zero(fpf, rd, rn, ge);
8648       break;
8649     case NEON_FCMEQ_H_zero_scalar:
8650       fcmp_zero(fpf, rd, rn, eq);
8651       break;
8652     case NEON_FCMLE_H_zero_scalar:
8653       fcmp_zero(fpf, rd, rn, le);
8654       break;
8655     case NEON_FCMLT_H_zero_scalar:
8656       fcmp_zero(fpf, rd, rn, lt);
8657       break;
8658     case NEON_SCVTF_H_scalar:
8659       scvtf(fpf, rd, rn, 0, fpcr_rounding);
8660       break;
8661     case NEON_UCVTF_H_scalar:
8662       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
8663       break;
8664     case NEON_FCVTNS_H_scalar:
8665       fcvts(fpf, rd, rn, FPTieEven);
8666       break;
8667     case NEON_FCVTNU_H_scalar:
8668       fcvtu(fpf, rd, rn, FPTieEven);
8669       break;
8670     case NEON_FCVTPS_H_scalar:
8671       fcvts(fpf, rd, rn, FPPositiveInfinity);
8672       break;
8673     case NEON_FCVTPU_H_scalar:
8674       fcvtu(fpf, rd, rn, FPPositiveInfinity);
8675       break;
8676     case NEON_FCVTMS_H_scalar:
8677       fcvts(fpf, rd, rn, FPNegativeInfinity);
8678       break;
8679     case NEON_FCVTMU_H_scalar:
8680       fcvtu(fpf, rd, rn, FPNegativeInfinity);
8681       break;
8682     case NEON_FCVTZS_H_scalar:
8683       fcvts(fpf, rd, rn, FPZero);
8684       break;
8685     case NEON_FCVTZU_H_scalar:
8686       fcvtu(fpf, rd, rn, FPZero);
8687       break;
8688     case NEON_FCVTAS_H_scalar:
8689       fcvts(fpf, rd, rn, FPTieAway);
8690       break;
8691     case NEON_FCVTAU_H_scalar:
8692       fcvtu(fpf, rd, rn, FPTieAway);
8693       break;
8694   }
8695 }
8696 
8697 
8698 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
8699   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
8700   VectorFormat vf = nfd.GetVectorFormat();
8701 
8702   SimVRegister& rd = ReadVRegister(instr->GetRd());
8703   SimVRegister& rn = ReadVRegister(instr->GetRn());
8704   SimVRegister& rm = ReadVRegister(instr->GetRm());
8705   switch (instr->Mask(NEONScalar3DiffMask)) {
8706     case NEON_SQDMLAL_scalar:
8707       sqdmlal(vf, rd, rn, rm);
8708       break;
8709     case NEON_SQDMLSL_scalar:
8710       sqdmlsl(vf, rd, rn, rm);
8711       break;
8712     case NEON_SQDMULL_scalar:
8713       sqdmull(vf, rd, rn, rm);
8714       break;
8715     default:
8716       VIXL_UNIMPLEMENTED();
8717   }
8718 }
8719 
8720 
8721 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
8722   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8723   VectorFormat vf = nfd.GetVectorFormat();
8724 
8725   SimVRegister& rd = ReadVRegister(instr->GetRd());
8726   SimVRegister& rn = ReadVRegister(instr->GetRn());
8727   SimVRegister& rm = ReadVRegister(instr->GetRm());
8728 
8729   if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
8730     vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8731     switch (instr->Mask(NEONScalar3SameFPMask)) {
8732       case NEON_FMULX_scalar:
8733         fmulx(vf, rd, rn, rm);
8734         break;
8735       case NEON_FACGE_scalar:
8736         fabscmp(vf, rd, rn, rm, ge);
8737         break;
8738       case NEON_FACGT_scalar:
8739         fabscmp(vf, rd, rn, rm, gt);
8740         break;
8741       case NEON_FCMEQ_scalar:
8742         fcmp(vf, rd, rn, rm, eq);
8743         break;
8744       case NEON_FCMGE_scalar:
8745         fcmp(vf, rd, rn, rm, ge);
8746         break;
8747       case NEON_FCMGT_scalar:
8748         fcmp(vf, rd, rn, rm, gt);
8749         break;
8750       case NEON_FRECPS_scalar:
8751         frecps(vf, rd, rn, rm);
8752         break;
8753       case NEON_FRSQRTS_scalar:
8754         frsqrts(vf, rd, rn, rm);
8755         break;
8756       case NEON_FABD_scalar:
8757         fabd(vf, rd, rn, rm);
8758         break;
8759       default:
8760         VIXL_UNIMPLEMENTED();
8761     }
8762   } else {
8763     switch (instr->Mask(NEONScalar3SameMask)) {
8764       case NEON_ADD_scalar:
8765         add(vf, rd, rn, rm);
8766         break;
8767       case NEON_SUB_scalar:
8768         sub(vf, rd, rn, rm);
8769         break;
8770       case NEON_CMEQ_scalar:
8771         cmp(vf, rd, rn, rm, eq);
8772         break;
8773       case NEON_CMGE_scalar:
8774         cmp(vf, rd, rn, rm, ge);
8775         break;
8776       case NEON_CMGT_scalar:
8777         cmp(vf, rd, rn, rm, gt);
8778         break;
8779       case NEON_CMHI_scalar:
8780         cmp(vf, rd, rn, rm, hi);
8781         break;
8782       case NEON_CMHS_scalar:
8783         cmp(vf, rd, rn, rm, hs);
8784         break;
8785       case NEON_CMTST_scalar:
8786         cmptst(vf, rd, rn, rm);
8787         break;
8788       case NEON_USHL_scalar:
8789         ushl(vf, rd, rn, rm);
8790         break;
8791       case NEON_SSHL_scalar:
8792         sshl(vf, rd, rn, rm);
8793         break;
8794       case NEON_SQDMULH_scalar:
8795         sqdmulh(vf, rd, rn, rm);
8796         break;
8797       case NEON_SQRDMULH_scalar:
8798         sqrdmulh(vf, rd, rn, rm);
8799         break;
8800       case NEON_UQADD_scalar:
8801         add(vf, rd, rn, rm).UnsignedSaturate(vf);
8802         break;
8803       case NEON_SQADD_scalar:
8804         add(vf, rd, rn, rm).SignedSaturate(vf);
8805         break;
8806       case NEON_UQSUB_scalar:
8807         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
8808         break;
8809       case NEON_SQSUB_scalar:
8810         sub(vf, rd, rn, rm).SignedSaturate(vf);
8811         break;
8812       case NEON_UQSHL_scalar:
8813         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
8814         break;
8815       case NEON_SQSHL_scalar:
8816         sshl(vf, rd, rn, rm).SignedSaturate(vf);
8817         break;
8818       case NEON_URSHL_scalar:
8819         ushl(vf, rd, rn, rm).Round(vf);
8820         break;
8821       case NEON_SRSHL_scalar:
8822         sshl(vf, rd, rn, rm).Round(vf);
8823         break;
8824       case NEON_UQRSHL_scalar:
8825         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
8826         break;
8827       case NEON_SQRSHL_scalar:
8828         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
8829         break;
8830       default:
8831         VIXL_UNIMPLEMENTED();
8832     }
8833   }
8834 }
8835 
8836 void Simulator::VisitNEONScalar3SameFP16(const Instruction* instr) {
8837   SimVRegister& rd = ReadVRegister(instr->GetRd());
8838   SimVRegister& rn = ReadVRegister(instr->GetRn());
8839   SimVRegister& rm = ReadVRegister(instr->GetRm());
8840 
8841   switch (instr->Mask(NEONScalar3SameFP16Mask)) {
8842     case NEON_FABD_H_scalar:
8843       fabd(kFormatH, rd, rn, rm);
8844       break;
8845     case NEON_FMULX_H_scalar:
8846       fmulx(kFormatH, rd, rn, rm);
8847       break;
8848     case NEON_FCMEQ_H_scalar:
8849       fcmp(kFormatH, rd, rn, rm, eq);
8850       break;
8851     case NEON_FCMGE_H_scalar:
8852       fcmp(kFormatH, rd, rn, rm, ge);
8853       break;
8854     case NEON_FCMGT_H_scalar:
8855       fcmp(kFormatH, rd, rn, rm, gt);
8856       break;
8857     case NEON_FACGE_H_scalar:
8858       fabscmp(kFormatH, rd, rn, rm, ge);
8859       break;
8860     case NEON_FACGT_H_scalar:
8861       fabscmp(kFormatH, rd, rn, rm, gt);
8862       break;
8863     case NEON_FRECPS_H_scalar:
8864       frecps(kFormatH, rd, rn, rm);
8865       break;
8866     case NEON_FRSQRTS_H_scalar:
8867       frsqrts(kFormatH, rd, rn, rm);
8868       break;
8869     default:
8870       VIXL_UNREACHABLE();
8871   }
8872 }
8873 
8874 
8875 void Simulator::VisitNEONScalar3SameExtra(const Instruction* instr) {
8876   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
8877   VectorFormat vf = nfd.GetVectorFormat();
8878 
8879   SimVRegister& rd = ReadVRegister(instr->GetRd());
8880   SimVRegister& rn = ReadVRegister(instr->GetRn());
8881   SimVRegister& rm = ReadVRegister(instr->GetRm());
8882 
8883   switch (instr->Mask(NEONScalar3SameExtraMask)) {
8884     case NEON_SQRDMLAH_scalar:
8885       sqrdmlah(vf, rd, rn, rm);
8886       break;
8887     case NEON_SQRDMLSH_scalar:
8888       sqrdmlsh(vf, rd, rn, rm);
8889       break;
8890     default:
8891       VIXL_UNIMPLEMENTED();
8892   }
8893 }
8894 
8895 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
8896   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
8897   VectorFormat vf = nfd.GetVectorFormat();
8898   VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
8899 
8900   SimVRegister& rd = ReadVRegister(instr->GetRd());
8901   SimVRegister& rn = ReadVRegister(instr->GetRn());
8902   ByElementOp Op = NULL;
8903 
8904   int rm_reg = instr->GetRm();
8905   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
8906   if (instr->GetNEONSize() == 1) {
8907     rm_reg &= 0xf;
8908     index = (index << 1) | instr->GetNEONM();
8909   }
8910 
8911   switch (instr->Mask(NEONScalarByIndexedElementMask)) {
8912     case NEON_SQDMULL_byelement_scalar:
8913       Op = &Simulator::sqdmull;
8914       break;
8915     case NEON_SQDMLAL_byelement_scalar:
8916       Op = &Simulator::sqdmlal;
8917       break;
8918     case NEON_SQDMLSL_byelement_scalar:
8919       Op = &Simulator::sqdmlsl;
8920       break;
8921     case NEON_SQDMULH_byelement_scalar:
8922       Op = &Simulator::sqdmulh;
8923       vf = vf_r;
8924       break;
8925     case NEON_SQRDMULH_byelement_scalar:
8926       Op = &Simulator::sqrdmulh;
8927       vf = vf_r;
8928       break;
8929     case NEON_SQRDMLAH_byelement_scalar:
8930       Op = &Simulator::sqrdmlah;
8931       vf = vf_r;
8932       break;
8933     case NEON_SQRDMLSH_byelement_scalar:
8934       Op = &Simulator::sqrdmlsh;
8935       vf = vf_r;
8936       break;
8937     default:
8938       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
8939       index = instr->GetNEONH();
8940       if (instr->GetFPType() == 0) {
8941         index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
8942         rm_reg &= 0xf;
8943         vf = kFormatH;
8944       } else if ((instr->GetFPType() & 1) == 0) {
8945         index = (index << 1) | instr->GetNEONL();
8946       }
8947       switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
8948         case NEON_FMUL_H_byelement_scalar:
8949         case NEON_FMUL_byelement_scalar:
8950           Op = &Simulator::fmul;
8951           break;
8952         case NEON_FMLA_H_byelement_scalar:
8953         case NEON_FMLA_byelement_scalar:
8954           Op = &Simulator::fmla;
8955           break;
8956         case NEON_FMLS_H_byelement_scalar:
8957         case NEON_FMLS_byelement_scalar:
8958           Op = &Simulator::fmls;
8959           break;
8960         case NEON_FMULX_H_byelement_scalar:
8961         case NEON_FMULX_byelement_scalar:
8962           Op = &Simulator::fmulx;
8963           break;
8964         default:
8965           VIXL_UNIMPLEMENTED();
8966       }
8967   }
8968 
8969   (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
8970 }
8971 
8972 
8973 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
8974   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
8975   VectorFormat vf = nfd.GetVectorFormat();
8976 
8977   SimVRegister& rd = ReadVRegister(instr->GetRd());
8978   SimVRegister& rn = ReadVRegister(instr->GetRn());
8979 
8980   if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
8981     int imm5 = instr->GetImmNEON5();
8982     int tz = CountTrailingZeros(imm5, 32);
8983     int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5);
8984     dup_element(vf, rd, rn, rn_index);
8985   } else {
8986     VIXL_UNIMPLEMENTED();
8987   }
8988 }
8989 
8990 
8991 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
8992   NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarPairwiseFormatMap());
8993   VectorFormat vf = nfd.GetVectorFormat();
8994 
8995   SimVRegister& rd = ReadVRegister(instr->GetRd());
8996   SimVRegister& rn = ReadVRegister(instr->GetRn());
8997   switch (instr->Mask(NEONScalarPairwiseMask)) {
8998     case NEON_ADDP_scalar: {
8999       // All pairwise operations except ADDP use bit U to differentiate FP16
9000       // from FP32/FP64 variations.
9001       NEONFormatDecoder nfd_addp(instr, NEONFormatDecoder::FPScalarFormatMap());
9002       addp(nfd_addp.GetVectorFormat(), rd, rn);
9003       break;
9004     }
9005     case NEON_FADDP_h_scalar:
9006     case NEON_FADDP_scalar:
9007       faddp(vf, rd, rn);
9008       break;
9009     case NEON_FMAXP_h_scalar:
9010     case NEON_FMAXP_scalar:
9011       fmaxp(vf, rd, rn);
9012       break;
9013     case NEON_FMAXNMP_h_scalar:
9014     case NEON_FMAXNMP_scalar:
9015       fmaxnmp(vf, rd, rn);
9016       break;
9017     case NEON_FMINP_h_scalar:
9018     case NEON_FMINP_scalar:
9019       fminp(vf, rd, rn);
9020       break;
9021     case NEON_FMINNMP_h_scalar:
9022     case NEON_FMINNMP_scalar:
9023       fminnmp(vf, rd, rn);
9024       break;
9025     default:
9026       VIXL_UNIMPLEMENTED();
9027   }
9028 }
9029 
9030 
9031 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
9032   SimVRegister& rd = ReadVRegister(instr->GetRd());
9033   SimVRegister& rn = ReadVRegister(instr->GetRn());
9034   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9035 
9036   static const NEONFormatMap map = {{22, 21, 20, 19},
9037                                     {NF_UNDEF,
9038                                      NF_B,
9039                                      NF_H,
9040                                      NF_H,
9041                                      NF_S,
9042                                      NF_S,
9043                                      NF_S,
9044                                      NF_S,
9045                                      NF_D,
9046                                      NF_D,
9047                                      NF_D,
9048                                      NF_D,
9049                                      NF_D,
9050                                      NF_D,
9051                                      NF_D,
9052                                      NF_D}};
9053   NEONFormatDecoder nfd(instr, &map);
9054   VectorFormat vf = nfd.GetVectorFormat();
9055 
9056   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9057   int immh_immb = instr->GetImmNEONImmhImmb();
9058   int right_shift = (16 << highest_set_bit) - immh_immb;
9059   int left_shift = immh_immb - (8 << highest_set_bit);
9060   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
9061     case NEON_SHL_scalar:
9062       shl(vf, rd, rn, left_shift);
9063       break;
9064     case NEON_SLI_scalar:
9065       sli(vf, rd, rn, left_shift);
9066       break;
9067     case NEON_SQSHL_imm_scalar:
9068       sqshl(vf, rd, rn, left_shift);
9069       break;
9070     case NEON_UQSHL_imm_scalar:
9071       uqshl(vf, rd, rn, left_shift);
9072       break;
9073     case NEON_SQSHLU_scalar:
9074       sqshlu(vf, rd, rn, left_shift);
9075       break;
9076     case NEON_SRI_scalar:
9077       sri(vf, rd, rn, right_shift);
9078       break;
9079     case NEON_SSHR_scalar:
9080       sshr(vf, rd, rn, right_shift);
9081       break;
9082     case NEON_USHR_scalar:
9083       ushr(vf, rd, rn, right_shift);
9084       break;
9085     case NEON_SRSHR_scalar:
9086       sshr(vf, rd, rn, right_shift).Round(vf);
9087       break;
9088     case NEON_URSHR_scalar:
9089       ushr(vf, rd, rn, right_shift).Round(vf);
9090       break;
9091     case NEON_SSRA_scalar:
9092       ssra(vf, rd, rn, right_shift);
9093       break;
9094     case NEON_USRA_scalar:
9095       usra(vf, rd, rn, right_shift);
9096       break;
9097     case NEON_SRSRA_scalar:
9098       srsra(vf, rd, rn, right_shift);
9099       break;
9100     case NEON_URSRA_scalar:
9101       ursra(vf, rd, rn, right_shift);
9102       break;
9103     case NEON_UQSHRN_scalar:
9104       uqshrn(vf, rd, rn, right_shift);
9105       break;
9106     case NEON_UQRSHRN_scalar:
9107       uqrshrn(vf, rd, rn, right_shift);
9108       break;
9109     case NEON_SQSHRN_scalar:
9110       sqshrn(vf, rd, rn, right_shift);
9111       break;
9112     case NEON_SQRSHRN_scalar:
9113       sqrshrn(vf, rd, rn, right_shift);
9114       break;
9115     case NEON_SQSHRUN_scalar:
9116       sqshrun(vf, rd, rn, right_shift);
9117       break;
9118     case NEON_SQRSHRUN_scalar:
9119       sqrshrun(vf, rd, rn, right_shift);
9120       break;
9121     case NEON_FCVTZS_imm_scalar:
9122       fcvts(vf, rd, rn, FPZero, right_shift);
9123       break;
9124     case NEON_FCVTZU_imm_scalar:
9125       fcvtu(vf, rd, rn, FPZero, right_shift);
9126       break;
9127     case NEON_SCVTF_imm_scalar:
9128       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9129       break;
9130     case NEON_UCVTF_imm_scalar:
9131       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9132       break;
9133     default:
9134       VIXL_UNIMPLEMENTED();
9135   }
9136 }
9137 
9138 
9139 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
9140   SimVRegister& rd = ReadVRegister(instr->GetRd());
9141   SimVRegister& rn = ReadVRegister(instr->GetRn());
9142   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
9143 
9144   // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
9145   // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
9146   static const NEONFormatMap map = {{22, 21, 20, 19, 30},
9147                                     {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
9148                                      NF_4H,    NF_8H,    NF_4H,    NF_8H,
9149                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9150                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
9151                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9152                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9153                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
9154                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
9155   NEONFormatDecoder nfd(instr, &map);
9156   VectorFormat vf = nfd.GetVectorFormat();
9157 
9158   // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
9159   static const NEONFormatMap map_l =
9160       {{22, 21, 20, 19},
9161        {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
9162   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
9163 
9164   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
9165   int immh_immb = instr->GetImmNEONImmhImmb();
9166   int right_shift = (16 << highest_set_bit) - immh_immb;
9167   int left_shift = immh_immb - (8 << highest_set_bit);
9168 
9169   switch (instr->Mask(NEONShiftImmediateMask)) {
9170     case NEON_SHL:
9171       shl(vf, rd, rn, left_shift);
9172       break;
9173     case NEON_SLI:
9174       sli(vf, rd, rn, left_shift);
9175       break;
9176     case NEON_SQSHLU:
9177       sqshlu(vf, rd, rn, left_shift);
9178       break;
9179     case NEON_SRI:
9180       sri(vf, rd, rn, right_shift);
9181       break;
9182     case NEON_SSHR:
9183       sshr(vf, rd, rn, right_shift);
9184       break;
9185     case NEON_USHR:
9186       ushr(vf, rd, rn, right_shift);
9187       break;
9188     case NEON_SRSHR:
9189       sshr(vf, rd, rn, right_shift).Round(vf);
9190       break;
9191     case NEON_URSHR:
9192       ushr(vf, rd, rn, right_shift).Round(vf);
9193       break;
9194     case NEON_SSRA:
9195       ssra(vf, rd, rn, right_shift);
9196       break;
9197     case NEON_USRA:
9198       usra(vf, rd, rn, right_shift);
9199       break;
9200     case NEON_SRSRA:
9201       srsra(vf, rd, rn, right_shift);
9202       break;
9203     case NEON_URSRA:
9204       ursra(vf, rd, rn, right_shift);
9205       break;
9206     case NEON_SQSHL_imm:
9207       sqshl(vf, rd, rn, left_shift);
9208       break;
9209     case NEON_UQSHL_imm:
9210       uqshl(vf, rd, rn, left_shift);
9211       break;
9212     case NEON_SCVTF_imm:
9213       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
9214       break;
9215     case NEON_UCVTF_imm:
9216       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
9217       break;
9218     case NEON_FCVTZS_imm:
9219       fcvts(vf, rd, rn, FPZero, right_shift);
9220       break;
9221     case NEON_FCVTZU_imm:
9222       fcvtu(vf, rd, rn, FPZero, right_shift);
9223       break;
9224     case NEON_SSHLL:
9225       vf = vf_l;
9226       if (instr->Mask(NEON_Q)) {
9227         sshll2(vf, rd, rn, left_shift);
9228       } else {
9229         sshll(vf, rd, rn, left_shift);
9230       }
9231       break;
9232     case NEON_USHLL:
9233       vf = vf_l;
9234       if (instr->Mask(NEON_Q)) {
9235         ushll2(vf, rd, rn, left_shift);
9236       } else {
9237         ushll(vf, rd, rn, left_shift);
9238       }
9239       break;
9240     case NEON_SHRN:
9241       if (instr->Mask(NEON_Q)) {
9242         shrn2(vf, rd, rn, right_shift);
9243       } else {
9244         shrn(vf, rd, rn, right_shift);
9245       }
9246       break;
9247     case NEON_RSHRN:
9248       if (instr->Mask(NEON_Q)) {
9249         rshrn2(vf, rd, rn, right_shift);
9250       } else {
9251         rshrn(vf, rd, rn, right_shift);
9252       }
9253       break;
9254     case NEON_UQSHRN:
9255       if (instr->Mask(NEON_Q)) {
9256         uqshrn2(vf, rd, rn, right_shift);
9257       } else {
9258         uqshrn(vf, rd, rn, right_shift);
9259       }
9260       break;
9261     case NEON_UQRSHRN:
9262       if (instr->Mask(NEON_Q)) {
9263         uqrshrn2(vf, rd, rn, right_shift);
9264       } else {
9265         uqrshrn(vf, rd, rn, right_shift);
9266       }
9267       break;
9268     case NEON_SQSHRN:
9269       if (instr->Mask(NEON_Q)) {
9270         sqshrn2(vf, rd, rn, right_shift);
9271       } else {
9272         sqshrn(vf, rd, rn, right_shift);
9273       }
9274       break;
9275     case NEON_SQRSHRN:
9276       if (instr->Mask(NEON_Q)) {
9277         sqrshrn2(vf, rd, rn, right_shift);
9278       } else {
9279         sqrshrn(vf, rd, rn, right_shift);
9280       }
9281       break;
9282     case NEON_SQSHRUN:
9283       if (instr->Mask(NEON_Q)) {
9284         sqshrun2(vf, rd, rn, right_shift);
9285       } else {
9286         sqshrun(vf, rd, rn, right_shift);
9287       }
9288       break;
9289     case NEON_SQRSHRUN:
9290       if (instr->Mask(NEON_Q)) {
9291         sqrshrun2(vf, rd, rn, right_shift);
9292       } else {
9293         sqrshrun(vf, rd, rn, right_shift);
9294       }
9295       break;
9296     default:
9297       VIXL_UNIMPLEMENTED();
9298   }
9299 }
9300 
9301 
9302 void Simulator::VisitNEONTable(const Instruction* instr) {
9303   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
9304   VectorFormat vf = nfd.GetVectorFormat();
9305 
9306   SimVRegister& rd = ReadVRegister(instr->GetRd());
9307   SimVRegister& rn = ReadVRegister(instr->GetRn());
9308   SimVRegister& rn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfVRegisters);
9309   SimVRegister& rn3 = ReadVRegister((instr->GetRn() + 2) % kNumberOfVRegisters);
9310   SimVRegister& rn4 = ReadVRegister((instr->GetRn() + 3) % kNumberOfVRegisters);
9311   SimVRegister& rm = ReadVRegister(instr->GetRm());
9312 
9313   switch (instr->Mask(NEONTableMask)) {
9314     case NEON_TBL_1v:
9315       tbl(vf, rd, rn, rm);
9316       break;
9317     case NEON_TBL_2v:
9318       tbl(vf, rd, rn, rn2, rm);
9319       break;
9320     case NEON_TBL_3v:
9321       tbl(vf, rd, rn, rn2, rn3, rm);
9322       break;
9323     case NEON_TBL_4v:
9324       tbl(vf, rd, rn, rn2, rn3, rn4, rm);
9325       break;
9326     case NEON_TBX_1v:
9327       tbx(vf, rd, rn, rm);
9328       break;
9329     case NEON_TBX_2v:
9330       tbx(vf, rd, rn, rn2, rm);
9331       break;
9332     case NEON_TBX_3v:
9333       tbx(vf, rd, rn, rn2, rn3, rm);
9334       break;
9335     case NEON_TBX_4v:
9336       tbx(vf, rd, rn, rn2, rn3, rn4, rm);
9337       break;
9338     default:
9339       VIXL_UNIMPLEMENTED();
9340   }
9341 }
9342 
9343 
9344 void Simulator::VisitNEONPerm(const Instruction* instr) {
9345   NEONFormatDecoder nfd(instr);
9346   VectorFormat vf = nfd.GetVectorFormat();
9347 
9348   SimVRegister& rd = ReadVRegister(instr->GetRd());
9349   SimVRegister& rn = ReadVRegister(instr->GetRn());
9350   SimVRegister& rm = ReadVRegister(instr->GetRm());
9351 
9352   switch (instr->Mask(NEONPermMask)) {
9353     case NEON_TRN1:
9354       trn1(vf, rd, rn, rm);
9355       break;
9356     case NEON_TRN2:
9357       trn2(vf, rd, rn, rm);
9358       break;
9359     case NEON_UZP1:
9360       uzp1(vf, rd, rn, rm);
9361       break;
9362     case NEON_UZP2:
9363       uzp2(vf, rd, rn, rm);
9364       break;
9365     case NEON_ZIP1:
9366       zip1(vf, rd, rn, rm);
9367       break;
9368     case NEON_ZIP2:
9369       zip2(vf, rd, rn, rm);
9370       break;
9371     default:
9372       VIXL_UNIMPLEMENTED();
9373   }
9374 }
9375 
9376 void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
9377   SimVRegister& zd = ReadVRegister(instr->GetRd());
9378   SimVRegister& zn = ReadVRegister(instr->GetRn());
9379   SimVRegister& zm = ReadVRegister(instr->GetRm());
9380   SimVRegister temp;
9381 
9382   VectorFormat vform = kFormatVnD;
9383   mov(vform, temp, zm);
9384 
9385   switch (instr->Mask(SVEAddressGenerationMask)) {
9386     case ADR_z_az_d_s32_scaled:
9387       sxt(vform, temp, temp, kSRegSize);
9388       break;
9389     case ADR_z_az_d_u32_scaled:
9390       uxt(vform, temp, temp, kSRegSize);
9391       break;
9392     case ADR_z_az_s_same_scaled:
9393       vform = kFormatVnS;
9394       break;
9395     case ADR_z_az_d_same_scaled:
9396       // Nothing to do.
9397       break;
9398     default:
9399       VIXL_UNIMPLEMENTED();
9400       break;
9401   }
9402 
9403   int shift_amount = instr->ExtractBits(11, 10);
9404   shl(vform, temp, temp, shift_amount);
9405   add(vform, zd, zn, temp);
9406 }
9407 
9408 void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
9409     const Instruction* instr) {
9410   Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
9411   switch (op) {
9412     case AND_z_zi:
9413     case EOR_z_zi:
9414     case ORR_z_zi: {
9415       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9416       uint64_t imm = instr->GetSVEImmLogical();
9417       // Valid immediate is a non-zero bits
9418       VIXL_ASSERT(imm != 0);
9419       SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
9420                               op),
9421                           SVEFormatFromLaneSizeInBytesLog2(lane_size),
9422                           ReadVRegister(instr->GetRd()),
9423                           imm);
9424       break;
9425     }
9426     default:
9427       VIXL_UNIMPLEMENTED();
9428       break;
9429   }
9430 }
9431 
9432 void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
9433   switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
9434     case DUPM_z_i: {
9435       /* DUPM uses the same lane size and immediate encoding as bitwise logical
9436        * immediate instructions. */
9437       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
9438       uint64_t imm = instr->GetSVEImmLogical();
9439       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9440       dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
9441       break;
9442     }
9443     default:
9444       VIXL_UNIMPLEMENTED();
9445       break;
9446   }
9447 }
9448 
9449 void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
9450   SimVRegister& zd = ReadVRegister(instr->GetRd());
9451   SimVRegister& zn = ReadVRegister(instr->GetRn());
9452   SimVRegister& zm = ReadVRegister(instr->GetRm());
9453   Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
9454 
9455   LogicalOp logical_op = LogicalOpMask;
9456   switch (op) {
9457     case AND_z_zz:
9458       logical_op = AND;
9459       break;
9460     case BIC_z_zz:
9461       logical_op = BIC;
9462       break;
9463     case EOR_z_zz:
9464       logical_op = EOR;
9465       break;
9466     case ORR_z_zz:
9467       logical_op = ORR;
9468       break;
9469     default:
9470       VIXL_UNIMPLEMENTED();
9471       break;
9472   }
9473   // Lane size of registers is irrelevant to the bitwise operations, so perform
9474   // the operation on D-sized lanes.
9475   SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
9476 }
9477 
9478 void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
9479   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9480   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9481 
9482   SimVRegister scratch;
9483   SimVRegister result;
9484 
9485   bool for_division = false;
9486   Shift shift_op = NO_SHIFT;
9487   switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
9488     case ASRD_z_p_zi:
9489       shift_op = ASR;
9490       for_division = true;
9491       break;
9492     case ASR_z_p_zi:
9493       shift_op = ASR;
9494       break;
9495     case LSL_z_p_zi:
9496       shift_op = LSL;
9497       break;
9498     case LSR_z_p_zi:
9499       shift_op = LSR;
9500       break;
9501     default:
9502       VIXL_UNIMPLEMENTED();
9503       break;
9504   }
9505 
9506   std::pair<int, int> shift_and_lane_size =
9507       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
9508   unsigned lane_size = shift_and_lane_size.second;
9509   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9510   int shift_dist = shift_and_lane_size.first;
9511 
9512   if ((shift_op == ASR) && for_division) {
9513     asrd(vform, result, zdn, shift_dist);
9514   } else {
9515     if (shift_op == LSL) {
9516       // Shift distance is computed differently for LSL. Convert the result.
9517       shift_dist = (8 << lane_size) - shift_dist;
9518     }
9519     dup_immediate(vform, scratch, shift_dist);
9520     SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
9521   }
9522   mov_merging(vform, zdn, pg, result);
9523 }
9524 
9525 void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
9526     const Instruction* instr) {
9527   VectorFormat vform = instr->GetSVEVectorFormat();
9528   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9529   SimVRegister& zm = ReadVRegister(instr->GetRn());
9530   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9531   SimVRegister result;
9532 
9533   // SVE uses the whole (saturated) lane for the shift amount.
9534   bool shift_in_ls_byte = false;
9535 
9536   switch (form_hash_) {
9537     case Hash("asrr_z_p_zz"):
9538       sshr(vform, result, zm, zdn);
9539       break;
9540     case Hash("asr_z_p_zz"):
9541       sshr(vform, result, zdn, zm);
9542       break;
9543     case Hash("lslr_z_p_zz"):
9544       sshl(vform, result, zm, zdn, shift_in_ls_byte);
9545       break;
9546     case Hash("lsl_z_p_zz"):
9547       sshl(vform, result, zdn, zm, shift_in_ls_byte);
9548       break;
9549     case Hash("lsrr_z_p_zz"):
9550       ushr(vform, result, zm, zdn);
9551       break;
9552     case Hash("lsr_z_p_zz"):
9553       ushr(vform, result, zdn, zm);
9554       break;
9555     case Hash("sqrshl_z_p_zz"):
9556       sshl(vform, result, zdn, zm, shift_in_ls_byte)
9557           .Round(vform)
9558           .SignedSaturate(vform);
9559       break;
9560     case Hash("sqrshlr_z_p_zz"):
9561       sshl(vform, result, zm, zdn, shift_in_ls_byte)
9562           .Round(vform)
9563           .SignedSaturate(vform);
9564       break;
9565     case Hash("sqshl_z_p_zz"):
9566       sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform);
9567       break;
9568     case Hash("sqshlr_z_p_zz"):
9569       sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform);
9570       break;
9571     case Hash("srshl_z_p_zz"):
9572       sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9573       break;
9574     case Hash("srshlr_z_p_zz"):
9575       sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9576       break;
9577     case Hash("uqrshl_z_p_zz"):
9578       ushl(vform, result, zdn, zm, shift_in_ls_byte)
9579           .Round(vform)
9580           .UnsignedSaturate(vform);
9581       break;
9582     case Hash("uqrshlr_z_p_zz"):
9583       ushl(vform, result, zm, zdn, shift_in_ls_byte)
9584           .Round(vform)
9585           .UnsignedSaturate(vform);
9586       break;
9587     case Hash("uqshl_z_p_zz"):
9588       ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform);
9589       break;
9590     case Hash("uqshlr_z_p_zz"):
9591       ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform);
9592       break;
9593     case Hash("urshl_z_p_zz"):
9594       ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
9595       break;
9596     case Hash("urshlr_z_p_zz"):
9597       ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
9598       break;
9599     default:
9600       VIXL_UNIMPLEMENTED();
9601       break;
9602   }
9603   mov_merging(vform, zdn, pg, result);
9604 }
9605 
9606 void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
9607     const Instruction* instr) {
9608   VectorFormat vform = instr->GetSVEVectorFormat();
9609   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9610   SimVRegister& zm = ReadVRegister(instr->GetRn());
9611   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9612 
9613   SimVRegister result;
9614   Shift shift_op = ASR;
9615 
9616   switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
9617     case ASR_z_p_zw:
9618       break;
9619     case LSL_z_p_zw:
9620       shift_op = LSL;
9621       break;
9622     case LSR_z_p_zw:
9623       shift_op = LSR;
9624       break;
9625     default:
9626       VIXL_UNIMPLEMENTED();
9627       break;
9628   }
9629   SVEBitwiseShiftHelper(shift_op,
9630                         vform,
9631                         result,
9632                         zdn,
9633                         zm,
9634                         /* is_wide_elements = */ true);
9635   mov_merging(vform, zdn, pg, result);
9636 }
9637 
9638 void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
9639   SimVRegister& zd = ReadVRegister(instr->GetRd());
9640   SimVRegister& zn = ReadVRegister(instr->GetRn());
9641 
9642   Shift shift_op = NO_SHIFT;
9643   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9644     case ASR_z_zi:
9645     case ASR_z_zw:
9646       shift_op = ASR;
9647       break;
9648     case LSL_z_zi:
9649     case LSL_z_zw:
9650       shift_op = LSL;
9651       break;
9652     case LSR_z_zi:
9653     case LSR_z_zw:
9654       shift_op = LSR;
9655       break;
9656     default:
9657       VIXL_UNIMPLEMENTED();
9658       break;
9659   }
9660 
9661   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
9662     case ASR_z_zi:
9663     case LSL_z_zi:
9664     case LSR_z_zi: {
9665       SimVRegister scratch;
9666       std::pair<int, int> shift_and_lane_size =
9667           instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
9668       unsigned lane_size = shift_and_lane_size.second;
9669       VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
9670       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
9671       int shift_dist = shift_and_lane_size.first;
9672       if (shift_op == LSL) {
9673         // Shift distance is computed differently for LSL. Convert the result.
9674         shift_dist = (8 << lane_size) - shift_dist;
9675       }
9676       dup_immediate(vform, scratch, shift_dist);
9677       SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
9678       break;
9679     }
9680     case ASR_z_zw:
9681     case LSL_z_zw:
9682     case LSR_z_zw:
9683       SVEBitwiseShiftHelper(shift_op,
9684                             instr->GetSVEVectorFormat(),
9685                             zd,
9686                             zn,
9687                             ReadVRegister(instr->GetRm()),
9688                             true);
9689       break;
9690     default:
9691       VIXL_UNIMPLEMENTED();
9692       break;
9693   }
9694 }
9695 
9696 void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
9697   // Although the instructions have a separate encoding class, the lane size is
9698   // encoded in the same way as most other SVE instructions.
9699   VectorFormat vform = instr->GetSVEVectorFormat();
9700 
9701   int pattern = instr->GetImmSVEPredicateConstraint();
9702   int count = GetPredicateConstraintLaneCount(vform, pattern);
9703   int multiplier = instr->ExtractBits(19, 16) + 1;
9704 
9705   switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
9706     case DECB_r_rs:
9707     case DECD_r_rs:
9708     case DECH_r_rs:
9709     case DECW_r_rs:
9710       count = -count;
9711       break;
9712     case INCB_r_rs:
9713     case INCD_r_rs:
9714     case INCH_r_rs:
9715     case INCW_r_rs:
9716       // Nothing to do.
9717       break;
9718     default:
9719       VIXL_UNIMPLEMENTED();
9720       return;
9721   }
9722 
9723   WriteXRegister(instr->GetRd(),
9724                  IncDecN(ReadXRegister(instr->GetRd()),
9725                          count * multiplier,
9726                          kXRegSize));
9727 }
9728 
9729 void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
9730   VectorFormat vform = instr->GetSVEVectorFormat();
9731   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9732     VIXL_UNIMPLEMENTED();
9733   }
9734 
9735   int pattern = instr->GetImmSVEPredicateConstraint();
9736   int count = GetPredicateConstraintLaneCount(vform, pattern);
9737   int multiplier = instr->ExtractBits(19, 16) + 1;
9738 
9739   switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
9740     case DECD_z_zs:
9741     case DECH_z_zs:
9742     case DECW_z_zs:
9743       count = -count;
9744       break;
9745     case INCD_z_zs:
9746     case INCH_z_zs:
9747     case INCW_z_zs:
9748       // Nothing to do.
9749       break;
9750     default:
9751       VIXL_UNIMPLEMENTED();
9752       break;
9753   }
9754 
9755   SimVRegister& zd = ReadVRegister(instr->GetRd());
9756   SimVRegister scratch;
9757   dup_immediate(vform,
9758                 scratch,
9759                 IncDecN(0,
9760                         count * multiplier,
9761                         LaneSizeInBitsFromFormat(vform)));
9762   add(vform, zd, zd, scratch);
9763 }
9764 
9765 void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
9766     const Instruction* instr) {
9767   // Although the instructions have a separate encoding class, the lane size is
9768   // encoded in the same way as most other SVE instructions.
9769   VectorFormat vform = instr->GetSVEVectorFormat();
9770 
9771   int pattern = instr->GetImmSVEPredicateConstraint();
9772   int count = GetPredicateConstraintLaneCount(vform, pattern);
9773   int multiplier = instr->ExtractBits(19, 16) + 1;
9774 
9775   unsigned width = kXRegSize;
9776   bool is_signed = false;
9777 
9778   switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
9779     case SQDECB_r_rs_sx:
9780     case SQDECD_r_rs_sx:
9781     case SQDECH_r_rs_sx:
9782     case SQDECW_r_rs_sx:
9783       width = kWRegSize;
9784       VIXL_FALLTHROUGH();
9785     case SQDECB_r_rs_x:
9786     case SQDECD_r_rs_x:
9787     case SQDECH_r_rs_x:
9788     case SQDECW_r_rs_x:
9789       is_signed = true;
9790       count = -count;
9791       break;
9792     case SQINCB_r_rs_sx:
9793     case SQINCD_r_rs_sx:
9794     case SQINCH_r_rs_sx:
9795     case SQINCW_r_rs_sx:
9796       width = kWRegSize;
9797       VIXL_FALLTHROUGH();
9798     case SQINCB_r_rs_x:
9799     case SQINCD_r_rs_x:
9800     case SQINCH_r_rs_x:
9801     case SQINCW_r_rs_x:
9802       is_signed = true;
9803       break;
9804     case UQDECB_r_rs_uw:
9805     case UQDECD_r_rs_uw:
9806     case UQDECH_r_rs_uw:
9807     case UQDECW_r_rs_uw:
9808       width = kWRegSize;
9809       VIXL_FALLTHROUGH();
9810     case UQDECB_r_rs_x:
9811     case UQDECD_r_rs_x:
9812     case UQDECH_r_rs_x:
9813     case UQDECW_r_rs_x:
9814       count = -count;
9815       break;
9816     case UQINCB_r_rs_uw:
9817     case UQINCD_r_rs_uw:
9818     case UQINCH_r_rs_uw:
9819     case UQINCW_r_rs_uw:
9820       width = kWRegSize;
9821       VIXL_FALLTHROUGH();
9822     case UQINCB_r_rs_x:
9823     case UQINCD_r_rs_x:
9824     case UQINCH_r_rs_x:
9825     case UQINCW_r_rs_x:
9826       // Nothing to do.
9827       break;
9828     default:
9829       VIXL_UNIMPLEMENTED();
9830       break;
9831   }
9832 
9833   WriteXRegister(instr->GetRd(),
9834                  IncDecN(ReadXRegister(instr->GetRd()),
9835                          count * multiplier,
9836                          width,
9837                          true,
9838                          is_signed));
9839 }
9840 
9841 void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
9842     const Instruction* instr) {
9843   VectorFormat vform = instr->GetSVEVectorFormat();
9844   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9845     VIXL_UNIMPLEMENTED();
9846   }
9847 
9848   int pattern = instr->GetImmSVEPredicateConstraint();
9849   int count = GetPredicateConstraintLaneCount(vform, pattern);
9850   int multiplier = instr->ExtractBits(19, 16) + 1;
9851 
9852   SimVRegister& zd = ReadVRegister(instr->GetRd());
9853   SimVRegister scratch;
9854   dup_immediate(vform,
9855                 scratch,
9856                 IncDecN(0,
9857                         count * multiplier,
9858                         LaneSizeInBitsFromFormat(vform)));
9859 
9860   switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
9861     case SQDECD_z_zs:
9862     case SQDECH_z_zs:
9863     case SQDECW_z_zs:
9864       sub(vform, zd, zd, scratch).SignedSaturate(vform);
9865       break;
9866     case SQINCD_z_zs:
9867     case SQINCH_z_zs:
9868     case SQINCW_z_zs:
9869       add(vform, zd, zd, scratch).SignedSaturate(vform);
9870       break;
9871     case UQDECD_z_zs:
9872     case UQDECH_z_zs:
9873     case UQDECW_z_zs:
9874       sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
9875       break;
9876     case UQINCD_z_zs:
9877     case UQINCH_z_zs:
9878     case UQINCW_z_zs:
9879       add(vform, zd, zd, scratch).UnsignedSaturate(vform);
9880       break;
9881     default:
9882       VIXL_UNIMPLEMENTED();
9883       break;
9884   }
9885 }
9886 
9887 void Simulator::VisitSVEElementCount(const Instruction* instr) {
9888   switch (instr->Mask(SVEElementCountMask)) {
9889     case CNTB_r_s:
9890     case CNTD_r_s:
9891     case CNTH_r_s:
9892     case CNTW_r_s:
9893       // All handled below.
9894       break;
9895     default:
9896       VIXL_UNIMPLEMENTED();
9897       break;
9898   }
9899 
9900   // Although the instructions are separated, the lane size is encoded in the
9901   // same way as most other SVE instructions.
9902   VectorFormat vform = instr->GetSVEVectorFormat();
9903 
9904   int pattern = instr->GetImmSVEPredicateConstraint();
9905   int count = GetPredicateConstraintLaneCount(vform, pattern);
9906   int multiplier = instr->ExtractBits(19, 16) + 1;
9907   WriteXRegister(instr->GetRd(), count * multiplier);
9908 }
9909 
9910 void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
9911   VectorFormat vform = instr->GetSVEVectorFormat();
9912   SimVRegister& vdn = ReadVRegister(instr->GetRd());
9913   SimVRegister& zm = ReadVRegister(instr->GetRn());
9914   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9915 
9916   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
9917 
9918   switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
9919     case FADDA_v_p_z:
9920       fadda(vform, vdn, pg, zm);
9921       break;
9922     default:
9923       VIXL_UNIMPLEMENTED();
9924       break;
9925   }
9926 }
9927 
9928 void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
9929   VectorFormat vform = instr->GetSVEVectorFormat();
9930   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9931   SimVRegister& zm = ReadVRegister(instr->GetRn());
9932   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9933 
9934   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
9935 
9936   SimVRegister result;
9937   switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
9938     case FABD_z_p_zz:
9939       fabd(vform, result, zdn, zm);
9940       break;
9941     case FADD_z_p_zz:
9942       fadd(vform, result, zdn, zm);
9943       break;
9944     case FDIVR_z_p_zz:
9945       fdiv(vform, result, zm, zdn);
9946       break;
9947     case FDIV_z_p_zz:
9948       fdiv(vform, result, zdn, zm);
9949       break;
9950     case FMAXNM_z_p_zz:
9951       fmaxnm(vform, result, zdn, zm);
9952       break;
9953     case FMAX_z_p_zz:
9954       fmax(vform, result, zdn, zm);
9955       break;
9956     case FMINNM_z_p_zz:
9957       fminnm(vform, result, zdn, zm);
9958       break;
9959     case FMIN_z_p_zz:
9960       fmin(vform, result, zdn, zm);
9961       break;
9962     case FMULX_z_p_zz:
9963       fmulx(vform, result, zdn, zm);
9964       break;
9965     case FMUL_z_p_zz:
9966       fmul(vform, result, zdn, zm);
9967       break;
9968     case FSCALE_z_p_zz:
9969       fscale(vform, result, zdn, zm);
9970       break;
9971     case FSUBR_z_p_zz:
9972       fsub(vform, result, zm, zdn);
9973       break;
9974     case FSUB_z_p_zz:
9975       fsub(vform, result, zdn, zm);
9976       break;
9977     default:
9978       VIXL_UNIMPLEMENTED();
9979       break;
9980   }
9981   mov_merging(vform, zdn, pg, result);
9982 }
9983 
9984 void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
9985     const Instruction* instr) {
9986   VectorFormat vform = instr->GetSVEVectorFormat();
9987   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
9988     VIXL_UNIMPLEMENTED();
9989   }
9990 
9991   SimVRegister& zdn = ReadVRegister(instr->GetRd());
9992   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
9993   SimVRegister result;
9994 
9995   int i1 = instr->ExtractBit(5);
9996   SimVRegister add_sub_imm, min_max_imm, mul_imm;
9997   uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
9998   uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
9999   uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
10000   dup_immediate(vform, add_sub_imm, i1 ? one : half);
10001   dup_immediate(vform, min_max_imm, i1 ? one : 0);
10002   dup_immediate(vform, mul_imm, i1 ? two : half);
10003 
10004   switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
10005     case FADD_z_p_zs:
10006       fadd(vform, result, zdn, add_sub_imm);
10007       break;
10008     case FMAXNM_z_p_zs:
10009       fmaxnm(vform, result, zdn, min_max_imm);
10010       break;
10011     case FMAX_z_p_zs:
10012       fmax(vform, result, zdn, min_max_imm);
10013       break;
10014     case FMINNM_z_p_zs:
10015       fminnm(vform, result, zdn, min_max_imm);
10016       break;
10017     case FMIN_z_p_zs:
10018       fmin(vform, result, zdn, min_max_imm);
10019       break;
10020     case FMUL_z_p_zs:
10021       fmul(vform, result, zdn, mul_imm);
10022       break;
10023     case FSUBR_z_p_zs:
10024       fsub(vform, result, add_sub_imm, zdn);
10025       break;
10026     case FSUB_z_p_zs:
10027       fsub(vform, result, zdn, add_sub_imm);
10028       break;
10029     default:
10030       VIXL_UNIMPLEMENTED();
10031       break;
10032   }
10033   mov_merging(vform, zdn, pg, result);
10034 }
10035 
10036 void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
10037   VectorFormat vform = instr->GetSVEVectorFormat();
10038   SimVRegister& zd = ReadVRegister(instr->GetRd());
10039   SimVRegister& zm = ReadVRegister(instr->GetRn());
10040 
10041   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10042 
10043   switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
10044     case FTMAD_z_zzi:
10045       ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
10046       break;
10047     default:
10048       VIXL_UNIMPLEMENTED();
10049       break;
10050   }
10051 }
10052 
10053 void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
10054   VectorFormat vform = instr->GetSVEVectorFormat();
10055   SimVRegister& zd = ReadVRegister(instr->GetRd());
10056   SimVRegister& zn = ReadVRegister(instr->GetRn());
10057   SimVRegister& zm = ReadVRegister(instr->GetRm());
10058 
10059   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10060 
10061   switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
10062     case FADD_z_zz:
10063       fadd(vform, zd, zn, zm);
10064       break;
10065     case FMUL_z_zz:
10066       fmul(vform, zd, zn, zm);
10067       break;
10068     case FRECPS_z_zz:
10069       frecps(vform, zd, zn, zm);
10070       break;
10071     case FRSQRTS_z_zz:
10072       frsqrts(vform, zd, zn, zm);
10073       break;
10074     case FSUB_z_zz:
10075       fsub(vform, zd, zn, zm);
10076       break;
10077     case FTSMUL_z_zz:
10078       ftsmul(vform, zd, zn, zm);
10079       break;
10080     default:
10081       VIXL_UNIMPLEMENTED();
10082       break;
10083   }
10084 }
10085 
10086 void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
10087   SimPRegister& pd = ReadPRegister(instr->GetPd());
10088   SimVRegister& zn = ReadVRegister(instr->GetRn());
10089   SimVRegister& zm = ReadVRegister(instr->GetRm());
10090   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10091   VectorFormat vform = instr->GetSVEVectorFormat();
10092   SimVRegister result;
10093 
10094   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10095 
10096   switch (instr->Mask(SVEFPCompareVectorsMask)) {
10097     case FACGE_p_p_zz:
10098       fabscmp(vform, result, zn, zm, ge);
10099       break;
10100     case FACGT_p_p_zz:
10101       fabscmp(vform, result, zn, zm, gt);
10102       break;
10103     case FCMEQ_p_p_zz:
10104       fcmp(vform, result, zn, zm, eq);
10105       break;
10106     case FCMGE_p_p_zz:
10107       fcmp(vform, result, zn, zm, ge);
10108       break;
10109     case FCMGT_p_p_zz:
10110       fcmp(vform, result, zn, zm, gt);
10111       break;
10112     case FCMNE_p_p_zz:
10113       fcmp(vform, result, zn, zm, ne);
10114       break;
10115     case FCMUO_p_p_zz:
10116       fcmp(vform, result, zn, zm, uo);
10117       break;
10118     default:
10119       VIXL_UNIMPLEMENTED();
10120       break;
10121   }
10122 
10123   ExtractFromSimVRegister(vform, pd, result);
10124   mov_zeroing(pd, pg, pd);
10125 }
10126 
10127 void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
10128   SimPRegister& pd = ReadPRegister(instr->GetPd());
10129   SimVRegister& zn = ReadVRegister(instr->GetRn());
10130   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10131   VectorFormat vform = instr->GetSVEVectorFormat();
10132 
10133   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10134 
10135   SimVRegister result;
10136   SimVRegister zeros;
10137   dup_immediate(kFormatVnD, zeros, 0);
10138 
10139   switch (instr->Mask(SVEFPCompareWithZeroMask)) {
10140     case FCMEQ_p_p_z0:
10141       fcmp(vform, result, zn, zeros, eq);
10142       break;
10143     case FCMGE_p_p_z0:
10144       fcmp(vform, result, zn, zeros, ge);
10145       break;
10146     case FCMGT_p_p_z0:
10147       fcmp(vform, result, zn, zeros, gt);
10148       break;
10149     case FCMLE_p_p_z0:
10150       fcmp(vform, result, zn, zeros, le);
10151       break;
10152     case FCMLT_p_p_z0:
10153       fcmp(vform, result, zn, zeros, lt);
10154       break;
10155     case FCMNE_p_p_z0:
10156       fcmp(vform, result, zn, zeros, ne);
10157       break;
10158     default:
10159       VIXL_UNIMPLEMENTED();
10160       break;
10161   }
10162 
10163   ExtractFromSimVRegister(vform, pd, result);
10164   mov_zeroing(pd, pg, pd);
10165 }
10166 
10167 void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
10168   VectorFormat vform = instr->GetSVEVectorFormat();
10169 
10170   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10171     VIXL_UNIMPLEMENTED();
10172   }
10173 
10174   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10175   SimVRegister& zm = ReadVRegister(instr->GetRn());
10176   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10177   int rot = instr->ExtractBit(16);
10178 
10179   SimVRegister result;
10180 
10181   switch (instr->Mask(SVEFPComplexAdditionMask)) {
10182     case FCADD_z_p_zz:
10183       fcadd(vform, result, zdn, zm, rot);
10184       break;
10185     default:
10186       VIXL_UNIMPLEMENTED();
10187       break;
10188   }
10189   mov_merging(vform, zdn, pg, result);
10190 }
10191 
10192 void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
10193   VectorFormat vform = instr->GetSVEVectorFormat();
10194 
10195   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
10196     VIXL_UNIMPLEMENTED();
10197   }
10198 
10199   SimVRegister& zda = ReadVRegister(instr->GetRd());
10200   SimVRegister& zn = ReadVRegister(instr->GetRn());
10201   SimVRegister& zm = ReadVRegister(instr->GetRm());
10202   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10203   int rot = instr->ExtractBits(14, 13);
10204 
10205   SimVRegister result;
10206 
10207   switch (instr->Mask(SVEFPComplexMulAddMask)) {
10208     case FCMLA_z_p_zzz:
10209       fcmla(vform, result, zn, zm, zda, rot);
10210       break;
10211     default:
10212       VIXL_UNIMPLEMENTED();
10213       break;
10214   }
10215   mov_merging(vform, zda, pg, result);
10216 }
10217 
10218 void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
10219   SimVRegister& zda = ReadVRegister(instr->GetRd());
10220   SimVRegister& zn = ReadVRegister(instr->GetRn());
10221   int rot = instr->ExtractBits(11, 10);
10222   unsigned zm_code = instr->GetRm();
10223   int index = -1;
10224   VectorFormat vform, vform_dup;
10225 
10226   switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
10227     case FCMLA_z_zzzi_h:
10228       vform = kFormatVnH;
10229       vform_dup = kFormatVnS;
10230       index = zm_code >> 3;
10231       zm_code &= 0x7;
10232       break;
10233     case FCMLA_z_zzzi_s:
10234       vform = kFormatVnS;
10235       vform_dup = kFormatVnD;
10236       index = zm_code >> 4;
10237       zm_code &= 0xf;
10238       break;
10239     default:
10240       VIXL_UNIMPLEMENTED();
10241       break;
10242   }
10243 
10244   if (index >= 0) {
10245     SimVRegister temp;
10246     dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
10247     fcmla(vform, zda, zn, temp, zda, rot);
10248   }
10249 }
10250 
10251 typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
10252                                                   LogicVRegister dst,
10253                                                   const LogicVRegister& src);
10254 
10255 void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
10256   VectorFormat vform = instr->GetSVEVectorFormat();
10257   SimVRegister& vd = ReadVRegister(instr->GetRd());
10258   SimVRegister& zn = ReadVRegister(instr->GetRn());
10259   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10260   int lane_size = LaneSizeInBitsFromFormat(vform);
10261 
10262   uint64_t inactive_value = 0;
10263   FastReduceFn fn = nullptr;
10264 
10265   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10266 
10267   switch (instr->Mask(SVEFPFastReductionMask)) {
10268     case FADDV_v_p_z:
10269       fn = &Simulator::faddv;
10270       break;
10271     case FMAXNMV_v_p_z:
10272       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10273       fn = &Simulator::fmaxnmv;
10274       break;
10275     case FMAXV_v_p_z:
10276       inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
10277       fn = &Simulator::fmaxv;
10278       break;
10279     case FMINNMV_v_p_z:
10280       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
10281       fn = &Simulator::fminnmv;
10282       break;
10283     case FMINV_v_p_z:
10284       inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
10285       fn = &Simulator::fminv;
10286       break;
10287     default:
10288       VIXL_UNIMPLEMENTED();
10289       break;
10290   }
10291 
10292   SimVRegister scratch;
10293   dup_immediate(vform, scratch, inactive_value);
10294   mov_merging(vform, scratch, pg, zn);
10295   if (fn != nullptr) (this->*fn)(vform, vd, scratch);
10296 }
10297 
10298 void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
10299   VectorFormat vform = kFormatUndefined;
10300 
10301   switch (instr->Mask(SVEFPMulIndexMask)) {
10302     case FMUL_z_zzi_d:
10303       vform = kFormatVnD;
10304       break;
10305     case FMUL_z_zzi_h_i3h:
10306     case FMUL_z_zzi_h:
10307       vform = kFormatVnH;
10308       break;
10309     case FMUL_z_zzi_s:
10310       vform = kFormatVnS;
10311       break;
10312     default:
10313       VIXL_UNIMPLEMENTED();
10314       break;
10315   }
10316 
10317   SimVRegister& zd = ReadVRegister(instr->GetRd());
10318   SimVRegister& zn = ReadVRegister(instr->GetRn());
10319   SimVRegister temp;
10320 
10321   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10322   fmul(vform, zd, zn, temp);
10323 }
10324 
10325 void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
10326   VectorFormat vform = instr->GetSVEVectorFormat();
10327   SimVRegister& zd = ReadVRegister(instr->GetRd());
10328   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10329   SimVRegister result;
10330 
10331   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10332 
10333   if (instr->ExtractBit(15) == 0) {
10334     // Floating-point multiply-accumulate writing addend.
10335     SimVRegister& zm = ReadVRegister(instr->GetRm());
10336     SimVRegister& zn = ReadVRegister(instr->GetRn());
10337 
10338     switch (instr->Mask(SVEFPMulAddMask)) {
10339       // zda = zda + zn * zm
10340       case FMLA_z_p_zzz:
10341         fmla(vform, result, zd, zn, zm);
10342         break;
10343       // zda = -zda + -zn * zm
10344       case FNMLA_z_p_zzz:
10345         fneg(vform, result, zd);
10346         fmls(vform, result, result, zn, zm);
10347         break;
10348       // zda = zda + -zn * zm
10349       case FMLS_z_p_zzz:
10350         fmls(vform, result, zd, zn, zm);
10351         break;
10352       // zda = -zda + zn * zm
10353       case FNMLS_z_p_zzz:
10354         fneg(vform, result, zd);
10355         fmla(vform, result, result, zn, zm);
10356         break;
10357       default:
10358         VIXL_UNIMPLEMENTED();
10359         break;
10360     }
10361   } else {
10362     // Floating-point multiply-accumulate writing multiplicand.
10363     SimVRegister& za = ReadVRegister(instr->GetRm());
10364     SimVRegister& zm = ReadVRegister(instr->GetRn());
10365 
10366     switch (instr->Mask(SVEFPMulAddMask)) {
10367       // zdn = za + zdn * zm
10368       case FMAD_z_p_zzz:
10369         fmla(vform, result, za, zd, zm);
10370         break;
10371       // zdn = -za + -zdn * zm
10372       case FNMAD_z_p_zzz:
10373         fneg(vform, result, za);
10374         fmls(vform, result, result, zd, zm);
10375         break;
10376       // zdn = za + -zdn * zm
10377       case FMSB_z_p_zzz:
10378         fmls(vform, result, za, zd, zm);
10379         break;
10380       // zdn = -za + zdn * zm
10381       case FNMSB_z_p_zzz:
10382         fneg(vform, result, za);
10383         fmla(vform, result, result, zd, zm);
10384         break;
10385       default:
10386         VIXL_UNIMPLEMENTED();
10387         break;
10388     }
10389   }
10390 
10391   mov_merging(vform, zd, pg, result);
10392 }
10393 
10394 void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
10395   VectorFormat vform = kFormatUndefined;
10396 
10397   switch (instr->Mask(SVEFPMulAddIndexMask)) {
10398     case FMLA_z_zzzi_d:
10399     case FMLS_z_zzzi_d:
10400       vform = kFormatVnD;
10401       break;
10402     case FMLA_z_zzzi_s:
10403     case FMLS_z_zzzi_s:
10404       vform = kFormatVnS;
10405       break;
10406     case FMLA_z_zzzi_h:
10407     case FMLS_z_zzzi_h:
10408     case FMLA_z_zzzi_h_i3h:
10409     case FMLS_z_zzzi_h_i3h:
10410       vform = kFormatVnH;
10411       break;
10412     default:
10413       VIXL_UNIMPLEMENTED();
10414       break;
10415   }
10416 
10417   SimVRegister& zd = ReadVRegister(instr->GetRd());
10418   SimVRegister& zn = ReadVRegister(instr->GetRn());
10419   SimVRegister temp;
10420 
10421   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
10422   if (instr->ExtractBit(10) == 1) {
10423     fmls(vform, zd, zd, zn, temp);
10424   } else {
10425     fmla(vform, zd, zd, zn, temp);
10426   }
10427 }
10428 
10429 void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
10430   SimVRegister& zd = ReadVRegister(instr->GetRd());
10431   SimVRegister& zn = ReadVRegister(instr->GetRn());
10432   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10433   int dst_data_size;
10434   int src_data_size;
10435 
10436   switch (instr->Mask(SVEFPConvertToIntMask)) {
10437     case FCVTZS_z_p_z_d2w:
10438     case FCVTZU_z_p_z_d2w:
10439       dst_data_size = kSRegSize;
10440       src_data_size = kDRegSize;
10441       break;
10442     case FCVTZS_z_p_z_d2x:
10443     case FCVTZU_z_p_z_d2x:
10444       dst_data_size = kDRegSize;
10445       src_data_size = kDRegSize;
10446       break;
10447     case FCVTZS_z_p_z_fp162h:
10448     case FCVTZU_z_p_z_fp162h:
10449       dst_data_size = kHRegSize;
10450       src_data_size = kHRegSize;
10451       break;
10452     case FCVTZS_z_p_z_fp162w:
10453     case FCVTZU_z_p_z_fp162w:
10454       dst_data_size = kSRegSize;
10455       src_data_size = kHRegSize;
10456       break;
10457     case FCVTZS_z_p_z_fp162x:
10458     case FCVTZU_z_p_z_fp162x:
10459       dst_data_size = kDRegSize;
10460       src_data_size = kHRegSize;
10461       break;
10462     case FCVTZS_z_p_z_s2w:
10463     case FCVTZU_z_p_z_s2w:
10464       dst_data_size = kSRegSize;
10465       src_data_size = kSRegSize;
10466       break;
10467     case FCVTZS_z_p_z_s2x:
10468     case FCVTZU_z_p_z_s2x:
10469       dst_data_size = kDRegSize;
10470       src_data_size = kSRegSize;
10471       break;
10472     default:
10473       VIXL_UNIMPLEMENTED();
10474       dst_data_size = 0;
10475       src_data_size = 0;
10476       break;
10477   }
10478 
10479   VectorFormat vform =
10480       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10481 
10482   if (instr->ExtractBit(16) == 0) {
10483     fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10484   } else {
10485     fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
10486   }
10487 }
10488 
10489 void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
10490   SimVRegister& zd = ReadVRegister(instr->GetRd());
10491   SimVRegister& zn = ReadVRegister(instr->GetRn());
10492   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10493   VectorFormat dst_data_size = kFormatUndefined;
10494   VectorFormat src_data_size = kFormatUndefined;
10495 
10496   switch (instr->Mask(SVEFPConvertPrecisionMask)) {
10497     case FCVT_z_p_z_d2h:
10498       dst_data_size = kFormatVnH;
10499       src_data_size = kFormatVnD;
10500       break;
10501     case FCVT_z_p_z_d2s:
10502       dst_data_size = kFormatVnS;
10503       src_data_size = kFormatVnD;
10504       break;
10505     case FCVT_z_p_z_h2d:
10506       dst_data_size = kFormatVnD;
10507       src_data_size = kFormatVnH;
10508       break;
10509     case FCVT_z_p_z_h2s:
10510       dst_data_size = kFormatVnS;
10511       src_data_size = kFormatVnH;
10512       break;
10513     case FCVT_z_p_z_s2d:
10514       dst_data_size = kFormatVnD;
10515       src_data_size = kFormatVnS;
10516       break;
10517     case FCVT_z_p_z_s2h:
10518       dst_data_size = kFormatVnH;
10519       src_data_size = kFormatVnS;
10520       break;
10521     default:
10522       VIXL_UNIMPLEMENTED();
10523       break;
10524   }
10525 
10526   fcvt(dst_data_size, src_data_size, zd, pg, zn);
10527 }
10528 
10529 void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
10530   SimVRegister& zd = ReadVRegister(instr->GetRd());
10531   SimVRegister& zn = ReadVRegister(instr->GetRn());
10532   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10533   VectorFormat vform = instr->GetSVEVectorFormat();
10534   SimVRegister result;
10535 
10536   switch (instr->Mask(SVEFPUnaryOpMask)) {
10537     case FRECPX_z_p_z:
10538       frecpx(vform, result, zn);
10539       break;
10540     case FSQRT_z_p_z:
10541       fsqrt(vform, result, zn);
10542       break;
10543     default:
10544       VIXL_UNIMPLEMENTED();
10545       break;
10546   }
10547   mov_merging(vform, zd, pg, result);
10548 }
10549 
10550 void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
10551   SimVRegister& zd = ReadVRegister(instr->GetRd());
10552   SimVRegister& zn = ReadVRegister(instr->GetRn());
10553   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10554   VectorFormat vform = instr->GetSVEVectorFormat();
10555   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10556   bool exact_exception = false;
10557 
10558   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10559 
10560   switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
10561     case FRINTA_z_p_z:
10562       fpcr_rounding = FPTieAway;
10563       break;
10564     case FRINTI_z_p_z:
10565       break;  // Use FPCR rounding mode.
10566     case FRINTM_z_p_z:
10567       fpcr_rounding = FPNegativeInfinity;
10568       break;
10569     case FRINTN_z_p_z:
10570       fpcr_rounding = FPTieEven;
10571       break;
10572     case FRINTP_z_p_z:
10573       fpcr_rounding = FPPositiveInfinity;
10574       break;
10575     case FRINTX_z_p_z:
10576       exact_exception = true;
10577       break;
10578     case FRINTZ_z_p_z:
10579       fpcr_rounding = FPZero;
10580       break;
10581     default:
10582       VIXL_UNIMPLEMENTED();
10583       break;
10584   }
10585 
10586   SimVRegister result;
10587   frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
10588   mov_merging(vform, zd, pg, result);
10589 }
10590 
10591 void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
10592   SimVRegister& zd = ReadVRegister(instr->GetRd());
10593   SimVRegister& zn = ReadVRegister(instr->GetRn());
10594   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10595   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10596   int dst_data_size;
10597   int src_data_size;
10598 
10599   switch (instr->Mask(SVEIntConvertToFPMask)) {
10600     case SCVTF_z_p_z_h2fp16:
10601     case UCVTF_z_p_z_h2fp16:
10602       dst_data_size = kHRegSize;
10603       src_data_size = kHRegSize;
10604       break;
10605     case SCVTF_z_p_z_w2d:
10606     case UCVTF_z_p_z_w2d:
10607       dst_data_size = kDRegSize;
10608       src_data_size = kSRegSize;
10609       break;
10610     case SCVTF_z_p_z_w2fp16:
10611     case UCVTF_z_p_z_w2fp16:
10612       dst_data_size = kHRegSize;
10613       src_data_size = kSRegSize;
10614       break;
10615     case SCVTF_z_p_z_w2s:
10616     case UCVTF_z_p_z_w2s:
10617       dst_data_size = kSRegSize;
10618       src_data_size = kSRegSize;
10619       break;
10620     case SCVTF_z_p_z_x2d:
10621     case UCVTF_z_p_z_x2d:
10622       dst_data_size = kDRegSize;
10623       src_data_size = kDRegSize;
10624       break;
10625     case SCVTF_z_p_z_x2fp16:
10626     case UCVTF_z_p_z_x2fp16:
10627       dst_data_size = kHRegSize;
10628       src_data_size = kDRegSize;
10629       break;
10630     case SCVTF_z_p_z_x2s:
10631     case UCVTF_z_p_z_x2s:
10632       dst_data_size = kSRegSize;
10633       src_data_size = kDRegSize;
10634       break;
10635     default:
10636       VIXL_UNIMPLEMENTED();
10637       dst_data_size = 0;
10638       src_data_size = 0;
10639       break;
10640   }
10641 
10642   VectorFormat vform =
10643       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
10644 
10645   if (instr->ExtractBit(16) == 0) {
10646     scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10647   } else {
10648     ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
10649   }
10650 }
10651 
10652 void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
10653   VectorFormat vform = instr->GetSVEVectorFormat();
10654   SimVRegister& zd = ReadVRegister(instr->GetRd());
10655   SimVRegister& zn = ReadVRegister(instr->GetRn());
10656   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
10657 
10658   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
10659 
10660   switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
10661     case FRECPE_z_z:
10662       frecpe(vform, zd, zn, fpcr_rounding);
10663       break;
10664     case FRSQRTE_z_z:
10665       frsqrte(vform, zd, zn);
10666       break;
10667     default:
10668       VIXL_UNIMPLEMENTED();
10669       break;
10670   }
10671 }
10672 
10673 void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
10674   VectorFormat vform = instr->GetSVEVectorFormat();
10675   SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
10676 
10677   int count = CountActiveLanes(vform, pg);
10678 
10679   if (instr->ExtractBit(11) == 0) {
10680     SimVRegister& zdn = ReadVRegister(instr->GetRd());
10681     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10682       case DECP_z_p_z:
10683         sub_uint(vform, zdn, zdn, count);
10684         break;
10685       case INCP_z_p_z:
10686         add_uint(vform, zdn, zdn, count);
10687         break;
10688       case SQDECP_z_p_z:
10689         sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10690         break;
10691       case SQINCP_z_p_z:
10692         add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
10693         break;
10694       case UQDECP_z_p_z:
10695         sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10696         break;
10697       case UQINCP_z_p_z:
10698         add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
10699         break;
10700       default:
10701         VIXL_UNIMPLEMENTED();
10702         break;
10703     }
10704   } else {
10705     bool is_saturating = (instr->ExtractBit(18) == 0);
10706     bool decrement =
10707         is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
10708     bool is_signed = (instr->ExtractBit(16) == 0);
10709     bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
10710     unsigned width = sf ? kXRegSize : kWRegSize;
10711 
10712     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
10713       case DECP_r_p_r:
10714       case INCP_r_p_r:
10715       case SQDECP_r_p_r_sx:
10716       case SQDECP_r_p_r_x:
10717       case SQINCP_r_p_r_sx:
10718       case SQINCP_r_p_r_x:
10719       case UQDECP_r_p_r_uw:
10720       case UQDECP_r_p_r_x:
10721       case UQINCP_r_p_r_uw:
10722       case UQINCP_r_p_r_x:
10723         WriteXRegister(instr->GetRd(),
10724                        IncDecN(ReadXRegister(instr->GetRd()),
10725                                decrement ? -count : count,
10726                                width,
10727                                is_saturating,
10728                                is_signed));
10729         break;
10730       default:
10731         VIXL_UNIMPLEMENTED();
10732         break;
10733     }
10734   }
10735 }
10736 
10737 uint64_t Simulator::IncDecN(uint64_t acc,
10738                             int64_t delta,
10739                             unsigned n,
10740                             bool is_saturating,
10741                             bool is_signed) {
10742   VIXL_ASSERT(n <= 64);
10743   VIXL_ASSERT(IsIntN(n, delta));
10744 
10745   uint64_t sign_mask = UINT64_C(1) << (n - 1);
10746   uint64_t mask = GetUintMask(n);
10747 
10748   acc &= mask;  // Ignore initial accumulator high bits.
10749   uint64_t result = (acc + delta) & mask;
10750 
10751   bool result_negative = ((result & sign_mask) != 0);
10752 
10753   if (is_saturating) {
10754     if (is_signed) {
10755       bool acc_negative = ((acc & sign_mask) != 0);
10756       bool delta_negative = delta < 0;
10757 
10758       // If the signs of the operands are the same, but different from the
10759       // result, there was an overflow.
10760       if ((acc_negative == delta_negative) &&
10761           (acc_negative != result_negative)) {
10762         if (result_negative) {
10763           // Saturate to [..., INT<n>_MAX].
10764           result_negative = false;
10765           result = mask & ~sign_mask;  // E.g. 0x000000007fffffff
10766         } else {
10767           // Saturate to [INT<n>_MIN, ...].
10768           result_negative = true;
10769           result = ~mask | sign_mask;  // E.g. 0xffffffff80000000
10770         }
10771       }
10772     } else {
10773       if ((delta < 0) && (result > acc)) {
10774         // Saturate to [0, ...].
10775         result = 0;
10776       } else if ((delta > 0) && (result < acc)) {
10777         // Saturate to [..., UINT<n>_MAX].
10778         result = mask;
10779       }
10780     }
10781   }
10782 
10783   // Sign-extend if necessary.
10784   if (result_negative && is_signed) result |= ~mask;
10785 
10786   return result;
10787 }
10788 
10789 void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
10790   VectorFormat vform = instr->GetSVEVectorFormat();
10791   SimVRegister& zd = ReadVRegister(instr->GetRd());
10792   switch (instr->Mask(SVEIndexGenerationMask)) {
10793     case INDEX_z_ii:
10794     case INDEX_z_ir:
10795     case INDEX_z_ri:
10796     case INDEX_z_rr: {
10797       uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
10798                                              : instr->ExtractSignedBits(9, 5);
10799       uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
10800                                             : instr->ExtractSignedBits(20, 16);
10801       index(vform, zd, start, step);
10802       break;
10803     }
10804     default:
10805       VIXL_UNIMPLEMENTED();
10806       break;
10807   }
10808 }
10809 
10810 void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
10811   VectorFormat vform = instr->GetSVEVectorFormat();
10812   SimVRegister& zd = ReadVRegister(instr->GetRd());
10813   SimVRegister& zn = ReadVRegister(instr->GetRn());
10814   SimVRegister& zm = ReadVRegister(instr->GetRm());
10815   switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
10816     case ADD_z_zz:
10817       add(vform, zd, zn, zm);
10818       break;
10819     case SQADD_z_zz:
10820       add(vform, zd, zn, zm).SignedSaturate(vform);
10821       break;
10822     case SQSUB_z_zz:
10823       sub(vform, zd, zn, zm).SignedSaturate(vform);
10824       break;
10825     case SUB_z_zz:
10826       sub(vform, zd, zn, zm);
10827       break;
10828     case UQADD_z_zz:
10829       add(vform, zd, zn, zm).UnsignedSaturate(vform);
10830       break;
10831     case UQSUB_z_zz:
10832       sub(vform, zd, zn, zm).UnsignedSaturate(vform);
10833       break;
10834     default:
10835       VIXL_UNIMPLEMENTED();
10836       break;
10837   }
10838 }
10839 
10840 void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
10841     const Instruction* instr) {
10842   VectorFormat vform = instr->GetSVEVectorFormat();
10843   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10844   SimVRegister& zm = ReadVRegister(instr->GetRn());
10845   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10846   SimVRegister result;
10847 
10848   switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
10849     case ADD_z_p_zz:
10850       add(vform, result, zdn, zm);
10851       break;
10852     case SUBR_z_p_zz:
10853       sub(vform, result, zm, zdn);
10854       break;
10855     case SUB_z_p_zz:
10856       sub(vform, result, zdn, zm);
10857       break;
10858     default:
10859       VIXL_UNIMPLEMENTED();
10860       break;
10861   }
10862   mov_merging(vform, zdn, pg, result);
10863 }
10864 
10865 void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
10866   VectorFormat vform = instr->GetSVEVectorFormat();
10867   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10868   SimVRegister& zm = ReadVRegister(instr->GetRn());
10869   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10870   SimVRegister result;
10871 
10872   switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
10873     case AND_z_p_zz:
10874       SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
10875       break;
10876     case BIC_z_p_zz:
10877       SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
10878       break;
10879     case EOR_z_p_zz:
10880       SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
10881       break;
10882     case ORR_z_p_zz:
10883       SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
10884       break;
10885     default:
10886       VIXL_UNIMPLEMENTED();
10887       break;
10888   }
10889   mov_merging(vform, zdn, pg, result);
10890 }
10891 
10892 void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
10893   VectorFormat vform = instr->GetSVEVectorFormat();
10894   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10895   SimVRegister& zm = ReadVRegister(instr->GetRn());
10896   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10897   SimVRegister result;
10898 
10899   switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
10900     case MUL_z_p_zz:
10901       mul(vform, result, zdn, zm);
10902       break;
10903     case SMULH_z_p_zz:
10904       smulh(vform, result, zdn, zm);
10905       break;
10906     case UMULH_z_p_zz:
10907       umulh(vform, result, zdn, zm);
10908       break;
10909     default:
10910       VIXL_UNIMPLEMENTED();
10911       break;
10912   }
10913   mov_merging(vform, zdn, pg, result);
10914 }
10915 
10916 void Simulator::VisitSVEIntMinMaxDifference_Predicated(
10917     const Instruction* instr) {
10918   VectorFormat vform = instr->GetSVEVectorFormat();
10919   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10920   SimVRegister& zm = ReadVRegister(instr->GetRn());
10921   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10922   SimVRegister result;
10923 
10924   switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
10925     case SABD_z_p_zz:
10926       absdiff(vform, result, zdn, zm, true);
10927       break;
10928     case SMAX_z_p_zz:
10929       smax(vform, result, zdn, zm);
10930       break;
10931     case SMIN_z_p_zz:
10932       smin(vform, result, zdn, zm);
10933       break;
10934     case UABD_z_p_zz:
10935       absdiff(vform, result, zdn, zm, false);
10936       break;
10937     case UMAX_z_p_zz:
10938       umax(vform, result, zdn, zm);
10939       break;
10940     case UMIN_z_p_zz:
10941       umin(vform, result, zdn, zm);
10942       break;
10943     default:
10944       VIXL_UNIMPLEMENTED();
10945       break;
10946   }
10947   mov_merging(vform, zdn, pg, result);
10948 }
10949 
10950 void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
10951   VectorFormat vform = instr->GetSVEVectorFormat();
10952   SimVRegister& zd = ReadVRegister(instr->GetRd());
10953   SimVRegister scratch;
10954 
10955   switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
10956     case MUL_z_zi:
10957       dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
10958       mul(vform, zd, zd, scratch);
10959       break;
10960     default:
10961       VIXL_UNIMPLEMENTED();
10962       break;
10963   }
10964 }
10965 
10966 void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
10967   VectorFormat vform = instr->GetSVEVectorFormat();
10968   SimVRegister& zdn = ReadVRegister(instr->GetRd());
10969   SimVRegister& zm = ReadVRegister(instr->GetRn());
10970   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
10971   SimVRegister result;
10972 
10973   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
10974 
10975   switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
10976     case SDIVR_z_p_zz:
10977       sdiv(vform, result, zm, zdn);
10978       break;
10979     case SDIV_z_p_zz:
10980       sdiv(vform, result, zdn, zm);
10981       break;
10982     case UDIVR_z_p_zz:
10983       udiv(vform, result, zm, zdn);
10984       break;
10985     case UDIV_z_p_zz:
10986       udiv(vform, result, zdn, zm);
10987       break;
10988     default:
10989       VIXL_UNIMPLEMENTED();
10990       break;
10991   }
10992   mov_merging(vform, zdn, pg, result);
10993 }
10994 
10995 void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
10996   VectorFormat vform = instr->GetSVEVectorFormat();
10997   SimVRegister& zd = ReadVRegister(instr->GetRd());
10998   SimVRegister scratch;
10999 
11000   uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
11001   int64_t signed_imm = instr->GetImmSVEIntWideSigned();
11002 
11003   switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
11004     case SMAX_z_zi:
11005       dup_immediate(vform, scratch, signed_imm);
11006       smax(vform, zd, zd, scratch);
11007       break;
11008     case SMIN_z_zi:
11009       dup_immediate(vform, scratch, signed_imm);
11010       smin(vform, zd, zd, scratch);
11011       break;
11012     case UMAX_z_zi:
11013       dup_immediate(vform, scratch, unsigned_imm);
11014       umax(vform, zd, zd, scratch);
11015       break;
11016     case UMIN_z_zi:
11017       dup_immediate(vform, scratch, unsigned_imm);
11018       umin(vform, zd, zd, scratch);
11019       break;
11020     default:
11021       VIXL_UNIMPLEMENTED();
11022       break;
11023   }
11024 }
11025 
11026 void Simulator::VisitSVEIntCompareScalarCountAndLimit(
11027     const Instruction* instr) {
11028   unsigned rn_code = instr->GetRn();
11029   unsigned rm_code = instr->GetRm();
11030   SimPRegister& pd = ReadPRegister(instr->GetPd());
11031   VectorFormat vform = instr->GetSVEVectorFormat();
11032 
11033   bool is_64_bit = instr->ExtractBit(12) == 1;
11034   int rsize = is_64_bit ? kXRegSize : kWRegSize;
11035   uint64_t mask = is_64_bit ? kXRegMask : kWRegMask;
11036 
11037   uint64_t usrc1 = ReadXRegister(rn_code);
11038   int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11039   uint64_t usrc2 = ssrc2 & mask;
11040 
11041   bool reverse = (form_hash_ == Hash("whilege_p_p_rr")) ||
11042                  (form_hash_ == Hash("whilegt_p_p_rr")) ||
11043                  (form_hash_ == Hash("whilehi_p_p_rr")) ||
11044                  (form_hash_ == Hash("whilehs_p_p_rr"));
11045 
11046   int lane_count = LaneCountFromFormat(vform);
11047   bool last = true;
11048   for (int i = 0; i < lane_count; i++) {
11049     usrc1 &= mask;
11050     int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1);
11051 
11052     bool cond = false;
11053     switch (form_hash_) {
11054       case Hash("whilele_p_p_rr"):
11055         cond = ssrc1 <= ssrc2;
11056         break;
11057       case Hash("whilelo_p_p_rr"):
11058         cond = usrc1 < usrc2;
11059         break;
11060       case Hash("whilels_p_p_rr"):
11061         cond = usrc1 <= usrc2;
11062         break;
11063       case Hash("whilelt_p_p_rr"):
11064         cond = ssrc1 < ssrc2;
11065         break;
11066       case Hash("whilege_p_p_rr"):
11067         cond = ssrc1 >= ssrc2;
11068         break;
11069       case Hash("whilegt_p_p_rr"):
11070         cond = ssrc1 > ssrc2;
11071         break;
11072       case Hash("whilehi_p_p_rr"):
11073         cond = usrc1 > usrc2;
11074         break;
11075       case Hash("whilehs_p_p_rr"):
11076         cond = usrc1 >= usrc2;
11077         break;
11078       default:
11079         VIXL_UNIMPLEMENTED();
11080         break;
11081     }
11082     last = last && cond;
11083     LogicPRegister dst(pd);
11084     int lane = reverse ? ((lane_count - 1) - i) : i;
11085     dst.SetActive(vform, lane, last);
11086     usrc1 += reverse ? -1 : 1;
11087   }
11088 
11089   PredTest(vform, GetPTrue(), pd);
11090   LogSystemRegister(NZCV);
11091 }
11092 
11093 void Simulator::VisitSVEConditionallyTerminateScalars(
11094     const Instruction* instr) {
11095   unsigned rn_code = instr->GetRn();
11096   unsigned rm_code = instr->GetRm();
11097   bool is_64_bit = instr->ExtractBit(22) == 1;
11098   uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
11099   uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
11100   bool term = false;
11101   switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
11102     case CTERMEQ_rr:
11103       term = src1 == src2;
11104       break;
11105     case CTERMNE_rr:
11106       term = src1 != src2;
11107       break;
11108     default:
11109       VIXL_UNIMPLEMENTED();
11110       break;
11111   }
11112   ReadNzcv().SetN(term ? 1 : 0);
11113   ReadNzcv().SetV(term ? 0 : !ReadC());
11114   LogSystemRegister(NZCV);
11115 }
11116 
11117 void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
11118   bool commute_inputs = false;
11119   Condition cond = al;
11120   switch (instr->Mask(SVEIntCompareSignedImmMask)) {
11121     case CMPEQ_p_p_zi:
11122       cond = eq;
11123       break;
11124     case CMPGE_p_p_zi:
11125       cond = ge;
11126       break;
11127     case CMPGT_p_p_zi:
11128       cond = gt;
11129       break;
11130     case CMPLE_p_p_zi:
11131       cond = ge;
11132       commute_inputs = true;
11133       break;
11134     case CMPLT_p_p_zi:
11135       cond = gt;
11136       commute_inputs = true;
11137       break;
11138     case CMPNE_p_p_zi:
11139       cond = ne;
11140       break;
11141     default:
11142       VIXL_UNIMPLEMENTED();
11143       break;
11144   }
11145 
11146   VectorFormat vform = instr->GetSVEVectorFormat();
11147   SimVRegister src2;
11148   dup_immediate(vform,
11149                 src2,
11150                 ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
11151   SVEIntCompareVectorsHelper(cond,
11152                              vform,
11153                              ReadPRegister(instr->GetPd()),
11154                              ReadPRegister(instr->GetPgLow8()),
11155                              commute_inputs ? src2
11156                                             : ReadVRegister(instr->GetRn()),
11157                              commute_inputs ? ReadVRegister(instr->GetRn())
11158                                             : src2);
11159 }
11160 
11161 void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
11162   bool commute_inputs = false;
11163   Condition cond = al;
11164   switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
11165     case CMPHI_p_p_zi:
11166       cond = hi;
11167       break;
11168     case CMPHS_p_p_zi:
11169       cond = hs;
11170       break;
11171     case CMPLO_p_p_zi:
11172       cond = hi;
11173       commute_inputs = true;
11174       break;
11175     case CMPLS_p_p_zi:
11176       cond = hs;
11177       commute_inputs = true;
11178       break;
11179     default:
11180       VIXL_UNIMPLEMENTED();
11181       break;
11182   }
11183 
11184   VectorFormat vform = instr->GetSVEVectorFormat();
11185   SimVRegister src2;
11186   dup_immediate(vform, src2, instr->ExtractBits(20, 14));
11187   SVEIntCompareVectorsHelper(cond,
11188                              vform,
11189                              ReadPRegister(instr->GetPd()),
11190                              ReadPRegister(instr->GetPgLow8()),
11191                              commute_inputs ? src2
11192                                             : ReadVRegister(instr->GetRn()),
11193                              commute_inputs ? ReadVRegister(instr->GetRn())
11194                                             : src2);
11195 }
11196 
11197 void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
11198   Instr op = instr->Mask(SVEIntCompareVectorsMask);
11199   bool is_wide_elements = false;
11200   switch (op) {
11201     case CMPEQ_p_p_zw:
11202     case CMPGE_p_p_zw:
11203     case CMPGT_p_p_zw:
11204     case CMPHI_p_p_zw:
11205     case CMPHS_p_p_zw:
11206     case CMPLE_p_p_zw:
11207     case CMPLO_p_p_zw:
11208     case CMPLS_p_p_zw:
11209     case CMPLT_p_p_zw:
11210     case CMPNE_p_p_zw:
11211       is_wide_elements = true;
11212       break;
11213   }
11214 
11215   Condition cond;
11216   switch (op) {
11217     case CMPEQ_p_p_zw:
11218     case CMPEQ_p_p_zz:
11219       cond = eq;
11220       break;
11221     case CMPGE_p_p_zw:
11222     case CMPGE_p_p_zz:
11223       cond = ge;
11224       break;
11225     case CMPGT_p_p_zw:
11226     case CMPGT_p_p_zz:
11227       cond = gt;
11228       break;
11229     case CMPHI_p_p_zw:
11230     case CMPHI_p_p_zz:
11231       cond = hi;
11232       break;
11233     case CMPHS_p_p_zw:
11234     case CMPHS_p_p_zz:
11235       cond = hs;
11236       break;
11237     case CMPNE_p_p_zw:
11238     case CMPNE_p_p_zz:
11239       cond = ne;
11240       break;
11241     case CMPLE_p_p_zw:
11242       cond = le;
11243       break;
11244     case CMPLO_p_p_zw:
11245       cond = lo;
11246       break;
11247     case CMPLS_p_p_zw:
11248       cond = ls;
11249       break;
11250     case CMPLT_p_p_zw:
11251       cond = lt;
11252       break;
11253     default:
11254       VIXL_UNIMPLEMENTED();
11255       cond = al;
11256       break;
11257   }
11258 
11259   SVEIntCompareVectorsHelper(cond,
11260                              instr->GetSVEVectorFormat(),
11261                              ReadPRegister(instr->GetPd()),
11262                              ReadPRegister(instr->GetPgLow8()),
11263                              ReadVRegister(instr->GetRn()),
11264                              ReadVRegister(instr->GetRm()),
11265                              is_wide_elements);
11266 }
11267 
11268 void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
11269   VectorFormat vform = instr->GetSVEVectorFormat();
11270   SimVRegister& zd = ReadVRegister(instr->GetRd());
11271   SimVRegister& zn = ReadVRegister(instr->GetRn());
11272 
11273   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11274               (vform == kFormatVnD));
11275 
11276   switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
11277     case FEXPA_z_z:
11278       fexpa(vform, zd, zn);
11279       break;
11280     default:
11281       VIXL_UNIMPLEMENTED();
11282       break;
11283   }
11284 }
11285 
11286 void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
11287   VectorFormat vform = instr->GetSVEVectorFormat();
11288   SimVRegister& zd = ReadVRegister(instr->GetRd());
11289   SimVRegister& zn = ReadVRegister(instr->GetRn());
11290   SimVRegister& zm = ReadVRegister(instr->GetRm());
11291 
11292   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
11293               (vform == kFormatVnD));
11294 
11295   switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
11296     case FTSSEL_z_zz:
11297       ftssel(vform, zd, zn, zm);
11298       break;
11299     default:
11300       VIXL_UNIMPLEMENTED();
11301       break;
11302   }
11303 }
11304 
11305 void Simulator::VisitSVEConstructivePrefix_Unpredicated(
11306     const Instruction* instr) {
11307   SimVRegister& zd = ReadVRegister(instr->GetRd());
11308   SimVRegister& zn = ReadVRegister(instr->GetRn());
11309 
11310   switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
11311     case MOVPRFX_z_z:
11312       mov(kFormatVnD, zd, zn);  // The lane size is arbitrary.
11313       break;
11314     default:
11315       VIXL_UNIMPLEMENTED();
11316       break;
11317   }
11318 }
11319 
11320 void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
11321   VectorFormat vform = instr->GetSVEVectorFormat();
11322 
11323   SimVRegister& zd = ReadVRegister(instr->GetRd());
11324   SimVRegister& zm = ReadVRegister(instr->GetRm());
11325 
11326   SimVRegister result;
11327   switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
11328     case MLA_z_p_zzz:
11329       mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11330       break;
11331     case MLS_z_p_zzz:
11332       mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
11333       break;
11334     case MAD_z_p_zzz:
11335       // 'za' is encoded in 'Rn'.
11336       mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11337       break;
11338     case MSB_z_p_zzz: {
11339       // 'za' is encoded in 'Rn'.
11340       mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
11341       break;
11342     }
11343     default:
11344       VIXL_UNIMPLEMENTED();
11345       break;
11346   }
11347   mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
11348 }
11349 
11350 void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
11351   VectorFormat vform = instr->GetSVEVectorFormat();
11352   SimVRegister& zda = ReadVRegister(instr->GetRd());
11353   SimVRegister& zn = ReadVRegister(instr->GetRn());
11354   SimVRegister& zm = ReadVRegister(instr->GetRm());
11355 
11356   switch (form_hash_) {
11357     case Hash("sdot_z_zzz"):
11358       sdot(vform, zda, zn, zm);
11359       break;
11360     case Hash("udot_z_zzz"):
11361       udot(vform, zda, zn, zm);
11362       break;
11363     case Hash("usdot_z_zzz_s"):
11364       usdot(vform, zda, zn, zm);
11365       break;
11366     default:
11367       VIXL_UNIMPLEMENTED();
11368       break;
11369   }
11370 }
11371 
11372 void Simulator::VisitSVEMovprfx(const Instruction* instr) {
11373   VectorFormat vform = instr->GetSVEVectorFormat();
11374   SimVRegister& zn = ReadVRegister(instr->GetRn());
11375   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11376   SimVRegister& zd = ReadVRegister(instr->GetRd());
11377 
11378   switch (instr->Mask(SVEMovprfxMask)) {
11379     case MOVPRFX_z_p_z:
11380       if (instr->ExtractBit(16)) {
11381         mov_merging(vform, zd, pg, zn);
11382       } else {
11383         mov_zeroing(vform, zd, pg, zn);
11384       }
11385       break;
11386     default:
11387       VIXL_UNIMPLEMENTED();
11388       break;
11389   }
11390 }
11391 
11392 void Simulator::VisitSVEIntReduction(const Instruction* instr) {
11393   VectorFormat vform = instr->GetSVEVectorFormat();
11394   SimVRegister& vd = ReadVRegister(instr->GetRd());
11395   SimVRegister& zn = ReadVRegister(instr->GetRn());
11396   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11397 
11398   if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
11399     switch (instr->Mask(SVEIntReductionLogicalMask)) {
11400       case ANDV_r_p_z:
11401         andv(vform, vd, pg, zn);
11402         break;
11403       case EORV_r_p_z:
11404         eorv(vform, vd, pg, zn);
11405         break;
11406       case ORV_r_p_z:
11407         orv(vform, vd, pg, zn);
11408         break;
11409       default:
11410         VIXL_UNIMPLEMENTED();
11411         break;
11412     }
11413   } else {
11414     switch (instr->Mask(SVEIntReductionMask)) {
11415       case SADDV_r_p_z:
11416         saddv(vform, vd, pg, zn);
11417         break;
11418       case SMAXV_r_p_z:
11419         smaxv(vform, vd, pg, zn);
11420         break;
11421       case SMINV_r_p_z:
11422         sminv(vform, vd, pg, zn);
11423         break;
11424       case UADDV_r_p_z:
11425         uaddv(vform, vd, pg, zn);
11426         break;
11427       case UMAXV_r_p_z:
11428         umaxv(vform, vd, pg, zn);
11429         break;
11430       case UMINV_r_p_z:
11431         uminv(vform, vd, pg, zn);
11432         break;
11433       default:
11434         VIXL_UNIMPLEMENTED();
11435         break;
11436     }
11437   }
11438 }
11439 
11440 void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
11441   VectorFormat vform = instr->GetSVEVectorFormat();
11442   SimVRegister& zn = ReadVRegister(instr->GetRn());
11443 
11444   SimVRegister result;
11445   switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
11446     case ABS_z_p_z:
11447       abs(vform, result, zn);
11448       break;
11449     case CLS_z_p_z:
11450       cls(vform, result, zn);
11451       break;
11452     case CLZ_z_p_z:
11453       clz(vform, result, zn);
11454       break;
11455     case CNOT_z_p_z:
11456       cnot(vform, result, zn);
11457       break;
11458     case CNT_z_p_z:
11459       cnt(vform, result, zn);
11460       break;
11461     case FABS_z_p_z:
11462       fabs_(vform, result, zn);
11463       break;
11464     case FNEG_z_p_z:
11465       fneg(vform, result, zn);
11466       break;
11467     case NEG_z_p_z:
11468       neg(vform, result, zn);
11469       break;
11470     case NOT_z_p_z:
11471       not_(vform, result, zn);
11472       break;
11473     case SXTB_z_p_z:
11474     case SXTH_z_p_z:
11475     case SXTW_z_p_z:
11476       sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11477       break;
11478     case UXTB_z_p_z:
11479     case UXTH_z_p_z:
11480     case UXTW_z_p_z:
11481       uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
11482       break;
11483     default:
11484       VIXL_UNIMPLEMENTED();
11485       break;
11486   }
11487 
11488   SimVRegister& zd = ReadVRegister(instr->GetRd());
11489   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
11490   mov_merging(vform, zd, pg, result);
11491 }
11492 
11493 void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
11494   // There is only one instruction in this group.
11495   VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
11496 
11497   VectorFormat vform = instr->GetSVEVectorFormat();
11498   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
11499   SimVRegister& zd = ReadVRegister(instr->GetRd());
11500 
11501   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
11502 
11503   SimVRegister result;
11504   switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
11505     case FCPY_z_p_i: {
11506       int imm8 = instr->ExtractBits(12, 5);
11507       uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
11508                                            Instruction::Imm8ToFP64(imm8));
11509       dup_immediate(vform, result, value);
11510       break;
11511     }
11512     default:
11513       VIXL_UNIMPLEMENTED();
11514       break;
11515   }
11516   mov_merging(vform, zd, pg, result);
11517 }
11518 
11519 void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
11520     const Instruction* instr) {
11521   VectorFormat vform = instr->GetSVEVectorFormat();
11522   SimVRegister& zd = ReadVRegister(instr->GetRd());
11523   SimVRegister scratch;
11524 
11525   uint64_t imm = instr->GetImmSVEIntWideUnsigned();
11526   imm <<= instr->ExtractBit(13) * 8;
11527 
11528   switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
11529     case ADD_z_zi:
11530       add_uint(vform, zd, zd, imm);
11531       break;
11532     case SQADD_z_zi:
11533       add_uint(vform, zd, zd, imm).SignedSaturate(vform);
11534       break;
11535     case SQSUB_z_zi:
11536       sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
11537       break;
11538     case SUBR_z_zi:
11539       dup_immediate(vform, scratch, imm);
11540       sub(vform, zd, scratch, zd);
11541       break;
11542     case SUB_z_zi:
11543       sub_uint(vform, zd, zd, imm);
11544       break;
11545     case UQADD_z_zi:
11546       add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11547       break;
11548     case UQSUB_z_zi:
11549       sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
11550       break;
11551     default:
11552       break;
11553   }
11554 }
11555 
11556 void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
11557   SimVRegister& zd = ReadVRegister(instr->GetRd());
11558 
11559   VectorFormat format = instr->GetSVEVectorFormat();
11560   int64_t imm = instr->GetImmSVEIntWideSigned();
11561   int shift = instr->ExtractBit(13) * 8;
11562   imm *= 1 << shift;
11563 
11564   switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
11565     case DUP_z_i:
11566       // The encoding of byte-sized lanes with lsl #8 is undefined.
11567       if ((format == kFormatVnB) && (shift == 8)) {
11568         VIXL_UNIMPLEMENTED();
11569       } else {
11570         dup_immediate(format, zd, imm);
11571       }
11572       break;
11573     default:
11574       VIXL_UNIMPLEMENTED();
11575       break;
11576   }
11577 }
11578 
11579 void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
11580   VectorFormat vform = instr->GetSVEVectorFormat();
11581   SimVRegister& zd = ReadVRegister(instr->GetRd());
11582 
11583   switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
11584     case FDUP_z_i:
11585       switch (vform) {
11586         case kFormatVnH:
11587           dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
11588           break;
11589         case kFormatVnS:
11590           dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
11591           break;
11592         case kFormatVnD:
11593           dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
11594           break;
11595         default:
11596           VIXL_UNIMPLEMENTED();
11597       }
11598       break;
11599     default:
11600       VIXL_UNIMPLEMENTED();
11601       break;
11602   }
11603 }
11604 
11605 void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
11606     const Instruction* instr) {
11607   switch (instr->Mask(
11608       SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
11609     case LD1H_z_p_bz_s_x32_scaled:
11610     case LD1SH_z_p_bz_s_x32_scaled:
11611     case LDFF1H_z_p_bz_s_x32_scaled:
11612     case LDFF1SH_z_p_bz_s_x32_scaled:
11613       break;
11614     default:
11615       VIXL_UNIMPLEMENTED();
11616       break;
11617   }
11618 
11619   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11620   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11621 }
11622 
11623 void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
11624     const Instruction* instr) {
11625   switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
11626     case LD1B_z_p_bz_s_x32_unscaled:
11627     case LD1H_z_p_bz_s_x32_unscaled:
11628     case LD1SB_z_p_bz_s_x32_unscaled:
11629     case LD1SH_z_p_bz_s_x32_unscaled:
11630     case LD1W_z_p_bz_s_x32_unscaled:
11631     case LDFF1B_z_p_bz_s_x32_unscaled:
11632     case LDFF1H_z_p_bz_s_x32_unscaled:
11633     case LDFF1SB_z_p_bz_s_x32_unscaled:
11634     case LDFF1SH_z_p_bz_s_x32_unscaled:
11635     case LDFF1W_z_p_bz_s_x32_unscaled:
11636       break;
11637     default:
11638       VIXL_UNIMPLEMENTED();
11639       break;
11640   }
11641 
11642   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11643   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11644 }
11645 
11646 void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
11647     const Instruction* instr) {
11648   switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
11649     case LD1B_z_p_ai_s:
11650       VIXL_UNIMPLEMENTED();
11651       break;
11652     case LD1H_z_p_ai_s:
11653       VIXL_UNIMPLEMENTED();
11654       break;
11655     case LD1SB_z_p_ai_s:
11656       VIXL_UNIMPLEMENTED();
11657       break;
11658     case LD1SH_z_p_ai_s:
11659       VIXL_UNIMPLEMENTED();
11660       break;
11661     case LD1W_z_p_ai_s:
11662       VIXL_UNIMPLEMENTED();
11663       break;
11664     case LDFF1B_z_p_ai_s:
11665       VIXL_UNIMPLEMENTED();
11666       break;
11667     case LDFF1H_z_p_ai_s:
11668       VIXL_UNIMPLEMENTED();
11669       break;
11670     case LDFF1SB_z_p_ai_s:
11671       VIXL_UNIMPLEMENTED();
11672       break;
11673     case LDFF1SH_z_p_ai_s:
11674       VIXL_UNIMPLEMENTED();
11675       break;
11676     case LDFF1W_z_p_ai_s:
11677       VIXL_UNIMPLEMENTED();
11678       break;
11679     default:
11680       VIXL_UNIMPLEMENTED();
11681       break;
11682   }
11683 }
11684 
11685 void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
11686     const Instruction* instr) {
11687   switch (
11688       instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
11689     case LD1W_z_p_bz_s_x32_scaled:
11690     case LDFF1W_z_p_bz_s_x32_scaled:
11691       break;
11692     default:
11693       VIXL_UNIMPLEMENTED();
11694       break;
11695   }
11696 
11697   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11698   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
11699 }
11700 
11701 void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
11702     const Instruction* instr) {
11703   switch (
11704       instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
11705     // Ignore prefetch hint instructions.
11706     case PRFB_i_p_bz_s_x32_scaled:
11707     case PRFD_i_p_bz_s_x32_scaled:
11708     case PRFH_i_p_bz_s_x32_scaled:
11709     case PRFW_i_p_bz_s_x32_scaled:
11710       break;
11711     default:
11712       VIXL_UNIMPLEMENTED();
11713       break;
11714   }
11715 }
11716 
11717 void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
11718     const Instruction* instr) {
11719   switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
11720     // Ignore prefetch hint instructions.
11721     case PRFB_i_p_ai_s:
11722     case PRFD_i_p_ai_s:
11723     case PRFH_i_p_ai_s:
11724     case PRFW_i_p_ai_s:
11725       break;
11726     default:
11727       VIXL_UNIMPLEMENTED();
11728       break;
11729   }
11730 }
11731 
11732 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
11733     const Instruction* instr) {
11734   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
11735     // Ignore prefetch hint instructions.
11736     case PRFB_i_p_bi_s:
11737     case PRFD_i_p_bi_s:
11738     case PRFH_i_p_bi_s:
11739     case PRFW_i_p_bi_s:
11740       break;
11741     default:
11742       VIXL_UNIMPLEMENTED();
11743       break;
11744   }
11745 }
11746 
11747 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
11748     const Instruction* instr) {
11749   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
11750     // Ignore prefetch hint instructions.
11751     case PRFB_i_p_br_s:
11752     case PRFD_i_p_br_s:
11753     case PRFH_i_p_br_s:
11754     case PRFW_i_p_br_s:
11755       if (instr->GetRm() == kZeroRegCode) {
11756         VIXL_UNIMPLEMENTED();
11757       }
11758       break;
11759     default:
11760       VIXL_UNIMPLEMENTED();
11761       break;
11762   }
11763 }
11764 
11765 void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
11766   bool is_signed;
11767   switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
11768     case LD1RB_z_p_bi_u8:
11769     case LD1RB_z_p_bi_u16:
11770     case LD1RB_z_p_bi_u32:
11771     case LD1RB_z_p_bi_u64:
11772     case LD1RH_z_p_bi_u16:
11773     case LD1RH_z_p_bi_u32:
11774     case LD1RH_z_p_bi_u64:
11775     case LD1RW_z_p_bi_u32:
11776     case LD1RW_z_p_bi_u64:
11777     case LD1RD_z_p_bi_u64:
11778       is_signed = false;
11779       break;
11780     case LD1RSB_z_p_bi_s16:
11781     case LD1RSB_z_p_bi_s32:
11782     case LD1RSB_z_p_bi_s64:
11783     case LD1RSH_z_p_bi_s32:
11784     case LD1RSH_z_p_bi_s64:
11785     case LD1RSW_z_p_bi_s64:
11786       is_signed = true;
11787       break;
11788     default:
11789       // This encoding group is complete, so no other values should be possible.
11790       VIXL_UNREACHABLE();
11791       is_signed = false;
11792       break;
11793   }
11794 
11795   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
11796   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
11797   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
11798   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
11799   uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
11800   uint64_t base = ReadXRegister(instr->GetRn()) + offset;
11801   VectorFormat unpack_vform =
11802       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
11803   SimVRegister temp;
11804   ld1r(vform, unpack_vform, temp, base, is_signed);
11805   mov_zeroing(vform,
11806               ReadVRegister(instr->GetRt()),
11807               ReadPRegister(instr->GetPgLow8()),
11808               temp);
11809 }
11810 
11811 void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
11812   switch (instr->Mask(SVELoadPredicateRegisterMask)) {
11813     case LDR_p_bi: {
11814       SimPRegister& pt = ReadPRegister(instr->GetPt());
11815       int pl = GetPredicateLengthInBytes();
11816       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
11817       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
11818       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
11819       for (int i = 0; i < pl; i++) {
11820         pt.Insert(i, MemRead<uint8_t>(address + i));
11821       }
11822       LogPRead(instr->GetPt(), address);
11823       break;
11824     }
11825     default:
11826       VIXL_UNIMPLEMENTED();
11827       break;
11828   }
11829 }
11830 
11831 void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
11832   switch (instr->Mask(SVELoadVectorRegisterMask)) {
11833     case LDR_z_bi: {
11834       SimVRegister& zt = ReadVRegister(instr->GetRt());
11835       int vl = GetVectorLengthInBytes();
11836       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
11837       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
11838       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
11839       for (int i = 0; i < vl; i++) {
11840         zt.Insert(i, MemRead<uint8_t>(address + i));
11841       }
11842       LogZRead(instr->GetRt(), address);
11843       break;
11844     }
11845     default:
11846       VIXL_UNIMPLEMENTED();
11847       break;
11848   }
11849 }
11850 
11851 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
11852     const Instruction* instr) {
11853   switch (instr->Mask(
11854       SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
11855     case LD1D_z_p_bz_d_x32_scaled:
11856     case LD1H_z_p_bz_d_x32_scaled:
11857     case LD1SH_z_p_bz_d_x32_scaled:
11858     case LD1SW_z_p_bz_d_x32_scaled:
11859     case LD1W_z_p_bz_d_x32_scaled:
11860     case LDFF1H_z_p_bz_d_x32_scaled:
11861     case LDFF1W_z_p_bz_d_x32_scaled:
11862     case LDFF1D_z_p_bz_d_x32_scaled:
11863     case LDFF1SH_z_p_bz_d_x32_scaled:
11864     case LDFF1SW_z_p_bz_d_x32_scaled:
11865       break;
11866     default:
11867       VIXL_UNIMPLEMENTED();
11868       break;
11869   }
11870 
11871   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11872   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
11873 }
11874 
11875 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
11876     const Instruction* instr) {
11877   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
11878     case LD1D_z_p_bz_d_64_scaled:
11879     case LD1H_z_p_bz_d_64_scaled:
11880     case LD1SH_z_p_bz_d_64_scaled:
11881     case LD1SW_z_p_bz_d_64_scaled:
11882     case LD1W_z_p_bz_d_64_scaled:
11883     case LDFF1H_z_p_bz_d_64_scaled:
11884     case LDFF1W_z_p_bz_d_64_scaled:
11885     case LDFF1D_z_p_bz_d_64_scaled:
11886     case LDFF1SH_z_p_bz_d_64_scaled:
11887     case LDFF1SW_z_p_bz_d_64_scaled:
11888       break;
11889     default:
11890       VIXL_UNIMPLEMENTED();
11891       break;
11892   }
11893 
11894   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
11895 }
11896 
11897 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
11898     const Instruction* instr) {
11899   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
11900     case LD1B_z_p_bz_d_64_unscaled:
11901     case LD1D_z_p_bz_d_64_unscaled:
11902     case LD1H_z_p_bz_d_64_unscaled:
11903     case LD1SB_z_p_bz_d_64_unscaled:
11904     case LD1SH_z_p_bz_d_64_unscaled:
11905     case LD1SW_z_p_bz_d_64_unscaled:
11906     case LD1W_z_p_bz_d_64_unscaled:
11907     case LDFF1B_z_p_bz_d_64_unscaled:
11908     case LDFF1D_z_p_bz_d_64_unscaled:
11909     case LDFF1H_z_p_bz_d_64_unscaled:
11910     case LDFF1SB_z_p_bz_d_64_unscaled:
11911     case LDFF1SH_z_p_bz_d_64_unscaled:
11912     case LDFF1SW_z_p_bz_d_64_unscaled:
11913     case LDFF1W_z_p_bz_d_64_unscaled:
11914       break;
11915     default:
11916       VIXL_UNIMPLEMENTED();
11917       break;
11918   }
11919 
11920   SVEGatherLoadScalarPlusVectorHelper(instr,
11921                                       kFormatVnD,
11922                                       NO_SVE_OFFSET_MODIFIER);
11923 }
11924 
11925 void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
11926     const Instruction* instr) {
11927   switch (instr->Mask(
11928       SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
11929     case LD1B_z_p_bz_d_x32_unscaled:
11930     case LD1D_z_p_bz_d_x32_unscaled:
11931     case LD1H_z_p_bz_d_x32_unscaled:
11932     case LD1SB_z_p_bz_d_x32_unscaled:
11933     case LD1SH_z_p_bz_d_x32_unscaled:
11934     case LD1SW_z_p_bz_d_x32_unscaled:
11935     case LD1W_z_p_bz_d_x32_unscaled:
11936     case LDFF1B_z_p_bz_d_x32_unscaled:
11937     case LDFF1H_z_p_bz_d_x32_unscaled:
11938     case LDFF1W_z_p_bz_d_x32_unscaled:
11939     case LDFF1D_z_p_bz_d_x32_unscaled:
11940     case LDFF1SB_z_p_bz_d_x32_unscaled:
11941     case LDFF1SH_z_p_bz_d_x32_unscaled:
11942     case LDFF1SW_z_p_bz_d_x32_unscaled:
11943       break;
11944     default:
11945       VIXL_UNIMPLEMENTED();
11946       break;
11947   }
11948 
11949   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
11950   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
11951 }
11952 
11953 void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
11954     const Instruction* instr) {
11955   switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
11956     case LD1B_z_p_ai_d:
11957     case LD1D_z_p_ai_d:
11958     case LD1H_z_p_ai_d:
11959     case LD1SB_z_p_ai_d:
11960     case LD1SH_z_p_ai_d:
11961     case LD1SW_z_p_ai_d:
11962     case LD1W_z_p_ai_d:
11963     case LDFF1B_z_p_ai_d:
11964     case LDFF1D_z_p_ai_d:
11965     case LDFF1H_z_p_ai_d:
11966     case LDFF1SB_z_p_ai_d:
11967     case LDFF1SH_z_p_ai_d:
11968     case LDFF1SW_z_p_ai_d:
11969     case LDFF1W_z_p_ai_d:
11970       break;
11971     default:
11972       VIXL_UNIMPLEMENTED();
11973       break;
11974   }
11975   bool is_signed = instr->ExtractBit(14) == 0;
11976   bool is_ff = instr->ExtractBit(13) == 1;
11977   // Note that these instructions don't use the Dtype encoding.
11978   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
11979   uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
11980   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
11981   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
11982   if (is_ff) {
11983     VIXL_UNIMPLEMENTED();
11984   } else {
11985     SVEStructuredLoadHelper(kFormatVnD,
11986                             ReadPRegister(instr->GetPgLow8()),
11987                             instr->GetRt(),
11988                             addr,
11989                             is_signed);
11990   }
11991 }
11992 
11993 void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
11994     const Instruction* instr) {
11995   switch (
11996       instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
11997     // Ignore prefetch hint instructions.
11998     case PRFB_i_p_bz_d_64_scaled:
11999     case PRFD_i_p_bz_d_64_scaled:
12000     case PRFH_i_p_bz_d_64_scaled:
12001     case PRFW_i_p_bz_d_64_scaled:
12002       break;
12003     default:
12004       VIXL_UNIMPLEMENTED();
12005       break;
12006   }
12007 }
12008 
12009 void Simulator::
12010     VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
12011         const Instruction* instr) {
12012   switch (instr->Mask(
12013       SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12014     // Ignore prefetch hint instructions.
12015     case PRFB_i_p_bz_d_x32_scaled:
12016     case PRFD_i_p_bz_d_x32_scaled:
12017     case PRFH_i_p_bz_d_x32_scaled:
12018     case PRFW_i_p_bz_d_x32_scaled:
12019       break;
12020     default:
12021       VIXL_UNIMPLEMENTED();
12022       break;
12023   }
12024 }
12025 
12026 void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
12027     const Instruction* instr) {
12028   switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
12029     // Ignore prefetch hint instructions.
12030     case PRFB_i_p_ai_d:
12031     case PRFD_i_p_ai_d:
12032     case PRFH_i_p_ai_d:
12033     case PRFW_i_p_ai_d:
12034       break;
12035     default:
12036       VIXL_UNIMPLEMENTED();
12037       break;
12038   }
12039 }
12040 
12041 void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
12042     const Instruction* instr) {
12043   bool is_signed;
12044   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
12045     case LDFF1B_z_p_br_u8:
12046     case LDFF1B_z_p_br_u16:
12047     case LDFF1B_z_p_br_u32:
12048     case LDFF1B_z_p_br_u64:
12049     case LDFF1H_z_p_br_u16:
12050     case LDFF1H_z_p_br_u32:
12051     case LDFF1H_z_p_br_u64:
12052     case LDFF1W_z_p_br_u32:
12053     case LDFF1W_z_p_br_u64:
12054     case LDFF1D_z_p_br_u64:
12055       is_signed = false;
12056       break;
12057     case LDFF1SB_z_p_br_s16:
12058     case LDFF1SB_z_p_br_s32:
12059     case LDFF1SB_z_p_br_s64:
12060     case LDFF1SH_z_p_br_s32:
12061     case LDFF1SH_z_p_br_s64:
12062     case LDFF1SW_z_p_br_s64:
12063       is_signed = true;
12064       break;
12065     default:
12066       // This encoding group is complete, so no other values should be possible.
12067       VIXL_UNREACHABLE();
12068       is_signed = false;
12069       break;
12070   }
12071 
12072   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12073   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12074   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12075   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12076   uint64_t offset = ReadXRegister(instr->GetRm());
12077   offset <<= msize_in_bytes_log2;
12078   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12079   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12080   SVEFaultTolerantLoadHelper(vform,
12081                              ReadPRegister(instr->GetPgLow8()),
12082                              instr->GetRt(),
12083                              addr,
12084                              kSVEFirstFaultLoad,
12085                              is_signed);
12086 }
12087 
12088 void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
12089     const Instruction* instr) {
12090   bool is_signed = false;
12091   switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
12092     case LDNF1B_z_p_bi_u16:
12093     case LDNF1B_z_p_bi_u32:
12094     case LDNF1B_z_p_bi_u64:
12095     case LDNF1B_z_p_bi_u8:
12096     case LDNF1D_z_p_bi_u64:
12097     case LDNF1H_z_p_bi_u16:
12098     case LDNF1H_z_p_bi_u32:
12099     case LDNF1H_z_p_bi_u64:
12100     case LDNF1W_z_p_bi_u32:
12101     case LDNF1W_z_p_bi_u64:
12102       break;
12103     case LDNF1SB_z_p_bi_s16:
12104     case LDNF1SB_z_p_bi_s32:
12105     case LDNF1SB_z_p_bi_s64:
12106     case LDNF1SH_z_p_bi_s32:
12107     case LDNF1SH_z_p_bi_s64:
12108     case LDNF1SW_z_p_bi_s64:
12109       is_signed = true;
12110       break;
12111     default:
12112       VIXL_UNIMPLEMENTED();
12113       break;
12114   }
12115   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
12116   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
12117   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
12118   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12119   int vl = GetVectorLengthInBytes();
12120   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12121   uint64_t offset =
12122       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12123   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12124   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12125   SVEFaultTolerantLoadHelper(vform,
12126                              ReadPRegister(instr->GetPgLow8()),
12127                              instr->GetRt(),
12128                              addr,
12129                              kSVENonFaultLoad,
12130                              is_signed);
12131 }
12132 
12133 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
12134     const Instruction* instr) {
12135   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12136   VectorFormat vform = kFormatUndefined;
12137 
12138   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
12139     case LDNT1B_z_p_bi_contiguous:
12140       vform = kFormatVnB;
12141       break;
12142     case LDNT1D_z_p_bi_contiguous:
12143       vform = kFormatVnD;
12144       break;
12145     case LDNT1H_z_p_bi_contiguous:
12146       vform = kFormatVnH;
12147       break;
12148     case LDNT1W_z_p_bi_contiguous:
12149       vform = kFormatVnS;
12150       break;
12151     default:
12152       VIXL_UNIMPLEMENTED();
12153       break;
12154   }
12155   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12156   int vl = GetVectorLengthInBytes();
12157   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12158   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12159   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12160   SVEStructuredLoadHelper(vform,
12161                           pg,
12162                           instr->GetRt(),
12163                           addr,
12164                           /* is_signed = */ false);
12165 }
12166 
12167 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
12168     const Instruction* instr) {
12169   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12170   VectorFormat vform = kFormatUndefined;
12171 
12172   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
12173     case LDNT1B_z_p_br_contiguous:
12174       vform = kFormatVnB;
12175       break;
12176     case LDNT1D_z_p_br_contiguous:
12177       vform = kFormatVnD;
12178       break;
12179     case LDNT1H_z_p_br_contiguous:
12180       vform = kFormatVnH;
12181       break;
12182     case LDNT1W_z_p_br_contiguous:
12183       vform = kFormatVnS;
12184       break;
12185     default:
12186       VIXL_UNIMPLEMENTED();
12187       break;
12188   }
12189   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12190   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12191   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12192   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12193   SVEStructuredLoadHelper(vform,
12194                           pg,
12195                           instr->GetRt(),
12196                           addr,
12197                           /* is_signed = */ false);
12198 }
12199 
12200 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
12201     const Instruction* instr) {
12202   SimVRegister& zt = ReadVRegister(instr->GetRt());
12203   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12204 
12205   uint64_t dwords = 2;
12206   VectorFormat vform_dst = kFormatVnQ;
12207   if ((form_hash_ == Hash("ld1rob_z_p_bi_u8")) ||
12208       (form_hash_ == Hash("ld1roh_z_p_bi_u16")) ||
12209       (form_hash_ == Hash("ld1row_z_p_bi_u32")) ||
12210       (form_hash_ == Hash("ld1rod_z_p_bi_u64"))) {
12211     dwords = 4;
12212     vform_dst = kFormatVnO;
12213   }
12214 
12215   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12216   uint64_t offset =
12217       instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes;
12218   int msz = instr->ExtractBits(24, 23);
12219   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12220 
12221   for (unsigned i = 0; i < dwords; i++) {
12222     ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes));
12223   }
12224   mov_zeroing(vform, zt, pg, zt);
12225   dup_element(vform_dst, zt, zt, 0);
12226 }
12227 
12228 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
12229     const Instruction* instr) {
12230   SimVRegister& zt = ReadVRegister(instr->GetRt());
12231   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12232 
12233   uint64_t bytes = 16;
12234   VectorFormat vform_dst = kFormatVnQ;
12235   if ((form_hash_ == Hash("ld1rob_z_p_br_contiguous")) ||
12236       (form_hash_ == Hash("ld1roh_z_p_br_contiguous")) ||
12237       (form_hash_ == Hash("ld1row_z_p_br_contiguous")) ||
12238       (form_hash_ == Hash("ld1rod_z_p_br_contiguous"))) {
12239     bytes = 32;
12240     vform_dst = kFormatVnO;
12241   }
12242 
12243   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
12244   uint64_t offset = ReadXRegister(instr->GetRm());
12245   int msz = instr->ExtractBits(24, 23);
12246   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12247   offset <<= msz;
12248   for (unsigned i = 0; i < bytes; i++) {
12249     ld1(kFormatVnB, zt, i, addr + offset + i);
12250   }
12251   mov_zeroing(vform, zt, pg, zt);
12252   dup_element(vform_dst, zt, zt, 0);
12253 }
12254 
12255 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
12256     const Instruction* instr) {
12257   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
12258     case LD2B_z_p_bi_contiguous:
12259     case LD2D_z_p_bi_contiguous:
12260     case LD2H_z_p_bi_contiguous:
12261     case LD2W_z_p_bi_contiguous:
12262     case LD3B_z_p_bi_contiguous:
12263     case LD3D_z_p_bi_contiguous:
12264     case LD3H_z_p_bi_contiguous:
12265     case LD3W_z_p_bi_contiguous:
12266     case LD4B_z_p_bi_contiguous:
12267     case LD4D_z_p_bi_contiguous:
12268     case LD4H_z_p_bi_contiguous:
12269     case LD4W_z_p_bi_contiguous: {
12270       int vl = GetVectorLengthInBytes();
12271       int msz = instr->ExtractBits(24, 23);
12272       int reg_count = instr->ExtractBits(22, 21) + 1;
12273       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12274       LogicSVEAddressVector addr(
12275           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12276       addr.SetMsizeInBytesLog2(msz);
12277       addr.SetRegCount(reg_count);
12278       SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12279                               ReadPRegister(instr->GetPgLow8()),
12280                               instr->GetRt(),
12281                               addr);
12282       break;
12283     }
12284     default:
12285       VIXL_UNIMPLEMENTED();
12286       break;
12287   }
12288 }
12289 
12290 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
12291     const Instruction* instr) {
12292   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
12293     case LD2B_z_p_br_contiguous:
12294     case LD2D_z_p_br_contiguous:
12295     case LD2H_z_p_br_contiguous:
12296     case LD2W_z_p_br_contiguous:
12297     case LD3B_z_p_br_contiguous:
12298     case LD3D_z_p_br_contiguous:
12299     case LD3H_z_p_br_contiguous:
12300     case LD3W_z_p_br_contiguous:
12301     case LD4B_z_p_br_contiguous:
12302     case LD4D_z_p_br_contiguous:
12303     case LD4H_z_p_br_contiguous:
12304     case LD4W_z_p_br_contiguous: {
12305       int msz = instr->ExtractBits(24, 23);
12306       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
12307       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12308       LogicSVEAddressVector addr(
12309           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12310       addr.SetMsizeInBytesLog2(msz);
12311       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
12312       SVEStructuredLoadHelper(vform,
12313                               ReadPRegister(instr->GetPgLow8()),
12314                               instr->GetRt(),
12315                               addr,
12316                               false);
12317       break;
12318     }
12319     default:
12320       VIXL_UNIMPLEMENTED();
12321       break;
12322   }
12323 }
12324 
12325 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
12326     const Instruction* instr) {
12327   switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
12328     case ST1H_z_p_bz_s_x32_scaled:
12329     case ST1W_z_p_bz_s_x32_scaled: {
12330       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12331       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12332       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12333       uint64_t base = ReadXRegister(instr->GetRn());
12334       SVEOffsetModifier mod =
12335           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12336       LogicSVEAddressVector addr(base,
12337                                  &ReadVRegister(instr->GetRm()),
12338                                  kFormatVnS,
12339                                  mod,
12340                                  scale);
12341       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12342       SVEStructuredStoreHelper(kFormatVnS,
12343                                ReadPRegister(instr->GetPgLow8()),
12344                                instr->GetRt(),
12345                                addr);
12346       break;
12347     }
12348     default:
12349       VIXL_UNIMPLEMENTED();
12350       break;
12351   }
12352 }
12353 
12354 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
12355     const Instruction* instr) {
12356   switch (
12357       instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
12358     case ST1B_z_p_bz_s_x32_unscaled:
12359     case ST1H_z_p_bz_s_x32_unscaled:
12360     case ST1W_z_p_bz_s_x32_unscaled: {
12361       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12362       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12363       uint64_t base = ReadXRegister(instr->GetRn());
12364       SVEOffsetModifier mod =
12365           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12366       LogicSVEAddressVector addr(base,
12367                                  &ReadVRegister(instr->GetRm()),
12368                                  kFormatVnS,
12369                                  mod);
12370       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12371       SVEStructuredStoreHelper(kFormatVnS,
12372                                ReadPRegister(instr->GetPgLow8()),
12373                                instr->GetRt(),
12374                                addr);
12375       break;
12376     }
12377     default:
12378       VIXL_UNIMPLEMENTED();
12379       break;
12380   }
12381 }
12382 
12383 void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
12384     const Instruction* instr) {
12385   int msz = 0;
12386   switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
12387     case ST1B_z_p_ai_s:
12388       msz = 0;
12389       break;
12390     case ST1H_z_p_ai_s:
12391       msz = 1;
12392       break;
12393     case ST1W_z_p_ai_s:
12394       msz = 2;
12395       break;
12396     default:
12397       VIXL_UNIMPLEMENTED();
12398       break;
12399   }
12400   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12401   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
12402   addr.SetMsizeInBytesLog2(msz);
12403   SVEStructuredStoreHelper(kFormatVnS,
12404                            ReadPRegister(instr->GetPgLow8()),
12405                            instr->GetRt(),
12406                            addr);
12407 }
12408 
12409 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
12410     const Instruction* instr) {
12411   switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
12412     case ST1D_z_p_bz_d_64_scaled:
12413     case ST1H_z_p_bz_d_64_scaled:
12414     case ST1W_z_p_bz_d_64_scaled: {
12415       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12416       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12417       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12418       uint64_t base = ReadXRegister(instr->GetRn());
12419       LogicSVEAddressVector addr(base,
12420                                  &ReadVRegister(instr->GetRm()),
12421                                  kFormatVnD,
12422                                  SVE_LSL,
12423                                  scale);
12424       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12425       SVEStructuredStoreHelper(kFormatVnD,
12426                                ReadPRegister(instr->GetPgLow8()),
12427                                instr->GetRt(),
12428                                addr);
12429       break;
12430     }
12431     default:
12432       VIXL_UNIMPLEMENTED();
12433       break;
12434   }
12435 }
12436 
12437 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
12438     const Instruction* instr) {
12439   switch (
12440       instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
12441     case ST1B_z_p_bz_d_64_unscaled:
12442     case ST1D_z_p_bz_d_64_unscaled:
12443     case ST1H_z_p_bz_d_64_unscaled:
12444     case ST1W_z_p_bz_d_64_unscaled: {
12445       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12446       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12447       uint64_t base = ReadXRegister(instr->GetRn());
12448       LogicSVEAddressVector addr(base,
12449                                  &ReadVRegister(instr->GetRm()),
12450                                  kFormatVnD,
12451                                  NO_SVE_OFFSET_MODIFIER);
12452       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12453       SVEStructuredStoreHelper(kFormatVnD,
12454                                ReadPRegister(instr->GetPgLow8()),
12455                                instr->GetRt(),
12456                                addr);
12457       break;
12458     }
12459     default:
12460       VIXL_UNIMPLEMENTED();
12461       break;
12462   }
12463 }
12464 
12465 void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
12466     const Instruction* instr) {
12467   switch (instr->Mask(
12468       SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
12469     case ST1D_z_p_bz_d_x32_scaled:
12470     case ST1H_z_p_bz_d_x32_scaled:
12471     case ST1W_z_p_bz_d_x32_scaled: {
12472       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12473       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12474       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
12475       uint64_t base = ReadXRegister(instr->GetRn());
12476       SVEOffsetModifier mod =
12477           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12478       LogicSVEAddressVector addr(base,
12479                                  &ReadVRegister(instr->GetRm()),
12480                                  kFormatVnD,
12481                                  mod,
12482                                  scale);
12483       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12484       SVEStructuredStoreHelper(kFormatVnD,
12485                                ReadPRegister(instr->GetPgLow8()),
12486                                instr->GetRt(),
12487                                addr);
12488       break;
12489     }
12490     default:
12491       VIXL_UNIMPLEMENTED();
12492       break;
12493   }
12494 }
12495 
12496 void Simulator::
12497     VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
12498         const Instruction* instr) {
12499   switch (instr->Mask(
12500       SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
12501     case ST1B_z_p_bz_d_x32_unscaled:
12502     case ST1D_z_p_bz_d_x32_unscaled:
12503     case ST1H_z_p_bz_d_x32_unscaled:
12504     case ST1W_z_p_bz_d_x32_unscaled: {
12505       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12506       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
12507       uint64_t base = ReadXRegister(instr->GetRn());
12508       SVEOffsetModifier mod =
12509           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
12510       LogicSVEAddressVector addr(base,
12511                                  &ReadVRegister(instr->GetRm()),
12512                                  kFormatVnD,
12513                                  mod);
12514       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12515       SVEStructuredStoreHelper(kFormatVnD,
12516                                ReadPRegister(instr->GetPgLow8()),
12517                                instr->GetRt(),
12518                                addr);
12519       break;
12520     }
12521     default:
12522       VIXL_UNIMPLEMENTED();
12523       break;
12524   }
12525 }
12526 
12527 void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
12528     const Instruction* instr) {
12529   int msz = 0;
12530   switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
12531     case ST1B_z_p_ai_d:
12532       msz = 0;
12533       break;
12534     case ST1D_z_p_ai_d:
12535       msz = 3;
12536       break;
12537     case ST1H_z_p_ai_d:
12538       msz = 1;
12539       break;
12540     case ST1W_z_p_ai_d:
12541       msz = 2;
12542       break;
12543     default:
12544       VIXL_UNIMPLEMENTED();
12545       break;
12546   }
12547   uint64_t imm = instr->ExtractBits(20, 16) << msz;
12548   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
12549   addr.SetMsizeInBytesLog2(msz);
12550   SVEStructuredStoreHelper(kFormatVnD,
12551                            ReadPRegister(instr->GetPgLow8()),
12552                            instr->GetRt(),
12553                            addr);
12554 }
12555 
12556 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
12557     const Instruction* instr) {
12558   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12559   VectorFormat vform = kFormatUndefined;
12560 
12561   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
12562     case STNT1B_z_p_bi_contiguous:
12563       vform = kFormatVnB;
12564       break;
12565     case STNT1D_z_p_bi_contiguous:
12566       vform = kFormatVnD;
12567       break;
12568     case STNT1H_z_p_bi_contiguous:
12569       vform = kFormatVnH;
12570       break;
12571     case STNT1W_z_p_bi_contiguous:
12572       vform = kFormatVnS;
12573       break;
12574     default:
12575       VIXL_UNIMPLEMENTED();
12576       break;
12577   }
12578   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12579   int vl = GetVectorLengthInBytes();
12580   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
12581   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12582   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12583   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12584 }
12585 
12586 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
12587     const Instruction* instr) {
12588   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12589   VectorFormat vform = kFormatUndefined;
12590 
12591   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
12592     case STNT1B_z_p_br_contiguous:
12593       vform = kFormatVnB;
12594       break;
12595     case STNT1D_z_p_br_contiguous:
12596       vform = kFormatVnD;
12597       break;
12598     case STNT1H_z_p_br_contiguous:
12599       vform = kFormatVnH;
12600       break;
12601     case STNT1W_z_p_br_contiguous:
12602       vform = kFormatVnS;
12603       break;
12604     default:
12605       VIXL_UNIMPLEMENTED();
12606       break;
12607   }
12608   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
12609   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
12610   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12611   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12612   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
12613 }
12614 
12615 void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
12616     const Instruction* instr) {
12617   switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
12618     case ST1B_z_p_bi:
12619     case ST1D_z_p_bi:
12620     case ST1H_z_p_bi:
12621     case ST1W_z_p_bi: {
12622       int vl = GetVectorLengthInBytes();
12623       int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
12624       int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
12625       VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
12626       int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
12627       uint64_t offset =
12628           (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
12629       VectorFormat vform =
12630           SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
12631       LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12632       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
12633       SVEStructuredStoreHelper(vform,
12634                                ReadPRegister(instr->GetPgLow8()),
12635                                instr->GetRt(),
12636                                addr);
12637       break;
12638     }
12639     default:
12640       VIXL_UNIMPLEMENTED();
12641       break;
12642   }
12643 }
12644 
12645 void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
12646     const Instruction* instr) {
12647   switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
12648     case ST1B_z_p_br:
12649     case ST1D_z_p_br:
12650     case ST1H_z_p_br:
12651     case ST1W_z_p_br: {
12652       uint64_t offset = ReadXRegister(instr->GetRm());
12653       offset <<= instr->ExtractBits(24, 23);
12654       VectorFormat vform =
12655           SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
12656       LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
12657       addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
12658       SVEStructuredStoreHelper(vform,
12659                                ReadPRegister(instr->GetPgLow8()),
12660                                instr->GetRt(),
12661                                addr);
12662       break;
12663     }
12664     default:
12665       VIXL_UNIMPLEMENTED();
12666       break;
12667   }
12668 }
12669 
12670 void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
12671     const Instruction* instr) {
12672   VectorFormat vform = instr->GetSVEVectorFormat();
12673   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
12674   SimVRegister z_result;
12675 
12676   switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
12677     case CPY_z_p_v:
12678       dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
12679       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
12680       break;
12681     default:
12682       VIXL_UNIMPLEMENTED();
12683       break;
12684   }
12685 }
12686 
12687 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
12688     const Instruction* instr) {
12689   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
12690     case ST2B_z_p_bi_contiguous:
12691     case ST2D_z_p_bi_contiguous:
12692     case ST2H_z_p_bi_contiguous:
12693     case ST2W_z_p_bi_contiguous:
12694     case ST3B_z_p_bi_contiguous:
12695     case ST3D_z_p_bi_contiguous:
12696     case ST3H_z_p_bi_contiguous:
12697     case ST3W_z_p_bi_contiguous:
12698     case ST4B_z_p_bi_contiguous:
12699     case ST4D_z_p_bi_contiguous:
12700     case ST4H_z_p_bi_contiguous:
12701     case ST4W_z_p_bi_contiguous: {
12702       int vl = GetVectorLengthInBytes();
12703       int msz = instr->ExtractBits(24, 23);
12704       int reg_count = instr->ExtractBits(22, 21) + 1;
12705       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
12706       LogicSVEAddressVector addr(
12707           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12708       addr.SetMsizeInBytesLog2(msz);
12709       addr.SetRegCount(reg_count);
12710       SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
12711                                ReadPRegister(instr->GetPgLow8()),
12712                                instr->GetRt(),
12713                                addr);
12714       break;
12715     }
12716     default:
12717       VIXL_UNIMPLEMENTED();
12718       break;
12719   }
12720 }
12721 
12722 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
12723     const Instruction* instr) {
12724   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
12725     case ST2B_z_p_br_contiguous:
12726     case ST2D_z_p_br_contiguous:
12727     case ST2H_z_p_br_contiguous:
12728     case ST2W_z_p_br_contiguous:
12729     case ST3B_z_p_br_contiguous:
12730     case ST3D_z_p_br_contiguous:
12731     case ST3H_z_p_br_contiguous:
12732     case ST3W_z_p_br_contiguous:
12733     case ST4B_z_p_br_contiguous:
12734     case ST4D_z_p_br_contiguous:
12735     case ST4H_z_p_br_contiguous:
12736     case ST4W_z_p_br_contiguous: {
12737       int msz = instr->ExtractBits(24, 23);
12738       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
12739       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
12740       LogicSVEAddressVector addr(
12741           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
12742       addr.SetMsizeInBytesLog2(msz);
12743       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
12744       SVEStructuredStoreHelper(vform,
12745                                ReadPRegister(instr->GetPgLow8()),
12746                                instr->GetRt(),
12747                                addr);
12748       break;
12749     }
12750     default:
12751       VIXL_UNIMPLEMENTED();
12752       break;
12753   }
12754 }
12755 
12756 void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
12757   switch (instr->Mask(SVEStorePredicateRegisterMask)) {
12758     case STR_p_bi: {
12759       SimPRegister& pt = ReadPRegister(instr->GetPt());
12760       int pl = GetPredicateLengthInBytes();
12761       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12762       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12763       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
12764       for (int i = 0; i < pl; i++) {
12765         MemWrite(address + i, pt.GetLane<uint8_t>(i));
12766       }
12767       LogPWrite(instr->GetPt(), address);
12768       break;
12769     }
12770     default:
12771       VIXL_UNIMPLEMENTED();
12772       break;
12773   }
12774 }
12775 
12776 void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
12777   switch (instr->Mask(SVEStoreVectorRegisterMask)) {
12778     case STR_z_bi: {
12779       SimVRegister& zt = ReadVRegister(instr->GetRt());
12780       int vl = GetVectorLengthInBytes();
12781       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
12782       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
12783       uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
12784       for (int i = 0; i < vl; i++) {
12785         MemWrite(address + i, zt.GetLane<uint8_t>(i));
12786       }
12787       LogZWrite(instr->GetRt(), address);
12788       break;
12789     }
12790     default:
12791       VIXL_UNIMPLEMENTED();
12792       break;
12793   }
12794 }
12795 
12796 void Simulator::VisitSVEMulIndex(const Instruction* instr) {
12797   VectorFormat vform = instr->GetSVEVectorFormat();
12798   SimVRegister& zda = ReadVRegister(instr->GetRd());
12799   SimVRegister& zn = ReadVRegister(instr->GetRn());
12800   std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex();
12801   SimVRegister zm = ReadVRegister(zm_and_index.first);
12802   int index = zm_and_index.second;
12803 
12804   SimVRegister temp;
12805   dup_elements_to_segments(vform, temp, zm, index);
12806 
12807   switch (form_hash_) {
12808     case Hash("sdot_z_zzzi_d"):
12809     case Hash("sdot_z_zzzi_s"):
12810       sdot(vform, zda, zn, temp);
12811       break;
12812     case Hash("udot_z_zzzi_d"):
12813     case Hash("udot_z_zzzi_s"):
12814       udot(vform, zda, zn, temp);
12815       break;
12816     case Hash("sudot_z_zzzi_s"):
12817       usdot(vform, zda, temp, zn);
12818       break;
12819     case Hash("usdot_z_zzzi_s"):
12820       usdot(vform, zda, zn, temp);
12821       break;
12822     default:
12823       VIXL_UNIMPLEMENTED();
12824       break;
12825   }
12826 }
12827 
12828 void Simulator::SimulateMatrixMul(const Instruction* instr) {
12829   VectorFormat vform = kFormatVnS;
12830   SimVRegister& dn = ReadVRegister(instr->GetRd());
12831   SimVRegister& n = ReadVRegister(instr->GetRn());
12832   SimVRegister& m = ReadVRegister(instr->GetRm());
12833 
12834   bool n_signed = false;
12835   bool m_signed = false;
12836   switch (form_hash_) {
12837     case Hash("smmla_asimdsame2_g"):
12838       vform = kFormat4S;
12839       VIXL_FALLTHROUGH();
12840     case Hash("smmla_z_zzz"):
12841       n_signed = m_signed = true;
12842       break;
12843     case Hash("ummla_asimdsame2_g"):
12844       vform = kFormat4S;
12845       VIXL_FALLTHROUGH();
12846     case Hash("ummla_z_zzz"):
12847       // Nothing to do.
12848       break;
12849     case Hash("usmmla_asimdsame2_g"):
12850       vform = kFormat4S;
12851       VIXL_FALLTHROUGH();
12852     case Hash("usmmla_z_zzz"):
12853       m_signed = true;
12854       break;
12855     default:
12856       VIXL_UNIMPLEMENTED();
12857       break;
12858   }
12859   matmul(vform, dn, n, m, n_signed, m_signed);
12860 }
12861 
12862 void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
12863   VectorFormat vform = instr->GetSVEVectorFormat();
12864   SimVRegister& zdn = ReadVRegister(instr->GetRd());
12865   SimVRegister& zn = ReadVRegister(instr->GetRn());
12866   SimVRegister& zm = ReadVRegister(instr->GetRm());
12867 
12868   switch (form_hash_) {
12869     case Hash("fmmla_z_zzz_s"):
12870     case Hash("fmmla_z_zzz_d"):
12871       fmatmul(vform, zdn, zn, zm);
12872       break;
12873     default:
12874       VIXL_UNIMPLEMENTED();
12875       break;
12876   }
12877 }
12878 
12879 void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
12880   SimPRegister& pd = ReadPRegister(instr->GetPd());
12881   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
12882   SimPRegister& pn = ReadPRegister(instr->GetPn());
12883   SimPRegister result;
12884 
12885   switch (instr->Mask(SVEPartitionBreakConditionMask)) {
12886     case BRKAS_p_p_p_z:
12887     case BRKA_p_p_p:
12888       brka(result, pg, pn);
12889       break;
12890     case BRKBS_p_p_p_z:
12891     case BRKB_p_p_p:
12892       brkb(result, pg, pn);
12893       break;
12894     default:
12895       VIXL_UNIMPLEMENTED();
12896       break;
12897   }
12898 
12899   if (instr->ExtractBit(4) == 1) {
12900     mov_merging(pd, pg, result);
12901   } else {
12902     mov_zeroing(pd, pg, result);
12903   }
12904 
12905   // Set flag if needed.
12906   if (instr->ExtractBit(22) == 1) {
12907     PredTest(kFormatVnB, pg, pd);
12908   }
12909 }
12910 
12911 void Simulator::VisitSVEPropagateBreakToNextPartition(
12912     const Instruction* instr) {
12913   SimPRegister& pdm = ReadPRegister(instr->GetPd());
12914   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
12915   SimPRegister& pn = ReadPRegister(instr->GetPn());
12916 
12917   switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
12918     case BRKNS_p_p_pp:
12919     case BRKN_p_p_pp:
12920       brkn(pdm, pg, pn);
12921       break;
12922     default:
12923       VIXL_UNIMPLEMENTED();
12924       break;
12925   }
12926 
12927   // Set flag if needed.
12928   if (instr->ExtractBit(22) == 1) {
12929     // Note that this ignores `pg`.
12930     PredTest(kFormatVnB, GetPTrue(), pdm);
12931   }
12932 }
12933 
12934 void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
12935   SimPRegister& pd = ReadPRegister(instr->GetPd());
12936   SimPRegister& pn = ReadPRegister(instr->GetPn());
12937 
12938   SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
12939   SimVRegister zero;
12940   dup_immediate(kFormatVnB, zero, 0);
12941 
12942   switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
12943     case PUNPKHI_p_p:
12944       zip2(kFormatVnB, temp, temp, zero);
12945       break;
12946     case PUNPKLO_p_p:
12947       zip1(kFormatVnB, temp, temp, zero);
12948       break;
12949     default:
12950       VIXL_UNIMPLEMENTED();
12951       break;
12952   }
12953   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
12954 }
12955 
12956 void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
12957   VectorFormat vform = instr->GetSVEVectorFormat();
12958   SimPRegister& pd = ReadPRegister(instr->GetPd());
12959   SimPRegister& pn = ReadPRegister(instr->GetPn());
12960   SimPRegister& pm = ReadPRegister(instr->GetPm());
12961 
12962   SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
12963   SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
12964 
12965   switch (instr->Mask(SVEPermutePredicateElementsMask)) {
12966     case TRN1_p_pp:
12967       trn1(vform, temp0, temp0, temp1);
12968       break;
12969     case TRN2_p_pp:
12970       trn2(vform, temp0, temp0, temp1);
12971       break;
12972     case UZP1_p_pp:
12973       uzp1(vform, temp0, temp0, temp1);
12974       break;
12975     case UZP2_p_pp:
12976       uzp2(vform, temp0, temp0, temp1);
12977       break;
12978     case ZIP1_p_pp:
12979       zip1(vform, temp0, temp0, temp1);
12980       break;
12981     case ZIP2_p_pp:
12982       zip2(vform, temp0, temp0, temp1);
12983       break;
12984     default:
12985       VIXL_UNIMPLEMENTED();
12986       break;
12987   }
12988   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
12989 }
12990 
12991 void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
12992   switch (instr->Mask(SVEReversePredicateElementsMask)) {
12993     case REV_p_p: {
12994       VectorFormat vform = instr->GetSVEVectorFormat();
12995       SimPRegister& pn = ReadPRegister(instr->GetPn());
12996       SimPRegister& pd = ReadPRegister(instr->GetPd());
12997       SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
12998       rev(vform, temp, temp);
12999       Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
13000       break;
13001     }
13002     default:
13003       VIXL_UNIMPLEMENTED();
13004       break;
13005   }
13006 }
13007 
13008 void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
13009   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13010   // Second source register "Zm" is encoded where "Zn" would usually be.
13011   SimVRegister& zm = ReadVRegister(instr->GetRn());
13012 
13013   int index = instr->GetSVEExtractImmediate();
13014   int vl = GetVectorLengthInBytes();
13015   index = (index >= vl) ? 0 : index;
13016 
13017   switch (instr->Mask(SVEPermuteVectorExtractMask)) {
13018     case EXT_z_zi_des:
13019       ext(kFormatVnB, zdn, zdn, zm, index);
13020       break;
13021     default:
13022       VIXL_UNIMPLEMENTED();
13023       break;
13024   }
13025 }
13026 
13027 void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
13028   VectorFormat vform = instr->GetSVEVectorFormat();
13029   SimVRegister& zd = ReadVRegister(instr->GetRd());
13030   SimVRegister& zn = ReadVRegister(instr->GetRn());
13031   SimVRegister& zm = ReadVRegister(instr->GetRm());
13032 
13033   switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
13034     case TRN1_z_zz:
13035       trn1(vform, zd, zn, zm);
13036       break;
13037     case TRN2_z_zz:
13038       trn2(vform, zd, zn, zm);
13039       break;
13040     case UZP1_z_zz:
13041       uzp1(vform, zd, zn, zm);
13042       break;
13043     case UZP2_z_zz:
13044       uzp2(vform, zd, zn, zm);
13045       break;
13046     case ZIP1_z_zz:
13047       zip1(vform, zd, zn, zm);
13048       break;
13049     case ZIP2_z_zz:
13050       zip2(vform, zd, zn, zm);
13051       break;
13052     default:
13053       VIXL_UNIMPLEMENTED();
13054       break;
13055   }
13056 }
13057 
13058 void Simulator::VisitSVEConditionallyBroadcastElementToVector(
13059     const Instruction* instr) {
13060   VectorFormat vform = instr->GetSVEVectorFormat();
13061   SimVRegister& zdn = ReadVRegister(instr->GetRd());
13062   SimVRegister& zm = ReadVRegister(instr->GetRn());
13063   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13064 
13065   int active_offset = -1;
13066   switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
13067     case CLASTA_z_p_zz:
13068       active_offset = 1;
13069       break;
13070     case CLASTB_z_p_zz:
13071       active_offset = 0;
13072       break;
13073     default:
13074       VIXL_UNIMPLEMENTED();
13075       break;
13076   }
13077 
13078   if (active_offset >= 0) {
13079     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13080     if (value.first) {
13081       dup_immediate(vform, zdn, value.second);
13082     } else {
13083       // Trigger a line of trace for the operation, even though it doesn't
13084       // change the register value.
13085       mov(vform, zdn, zdn);
13086     }
13087   }
13088 }
13089 
13090 void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
13091     const Instruction* instr) {
13092   VectorFormat vform = instr->GetSVEVectorFormat();
13093   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13094   SimVRegister& zm = ReadVRegister(instr->GetRn());
13095   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13096 
13097   int active_offset = -1;
13098   switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
13099     case CLASTA_v_p_z:
13100       active_offset = 1;
13101       break;
13102     case CLASTB_v_p_z:
13103       active_offset = 0;
13104       break;
13105     default:
13106       VIXL_UNIMPLEMENTED();
13107       break;
13108   }
13109 
13110   if (active_offset >= 0) {
13111     LogicVRegister dst(vdn);
13112     uint64_t src1_value = dst.Uint(vform, 0);
13113     std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
13114     dup_immediate(vform, vdn, 0);
13115     dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
13116   }
13117 }
13118 
13119 void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
13120     const Instruction* instr) {
13121   VectorFormat vform = instr->GetSVEVectorFormat();
13122   SimVRegister& zm = ReadVRegister(instr->GetRn());
13123   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13124 
13125   int active_offset = -1;
13126   switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
13127     case CLASTA_r_p_z:
13128       active_offset = 1;
13129       break;
13130     case CLASTB_r_p_z:
13131       active_offset = 0;
13132       break;
13133     default:
13134       VIXL_UNIMPLEMENTED();
13135       break;
13136   }
13137 
13138   if (active_offset >= 0) {
13139     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13140     uint64_t masked_src = ReadXRegister(instr->GetRd()) &
13141                           GetUintMask(LaneSizeInBitsFromFormat(vform));
13142     WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
13143   }
13144 }
13145 
13146 void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
13147     const Instruction* instr) {
13148   VectorFormat vform = instr->GetSVEVectorFormat();
13149   SimVRegister& vdn = ReadVRegister(instr->GetRd());
13150   SimVRegister& zm = ReadVRegister(instr->GetRn());
13151   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13152 
13153   int active_offset = -1;
13154   switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
13155     case LASTA_v_p_z:
13156       active_offset = 1;
13157       break;
13158     case LASTB_v_p_z:
13159       active_offset = 0;
13160       break;
13161     default:
13162       VIXL_UNIMPLEMENTED();
13163       break;
13164   }
13165 
13166   if (active_offset >= 0) {
13167     LogicVRegister dst(vdn);
13168     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13169     dup_immediate(vform, vdn, 0);
13170     dst.SetUint(vform, 0, value.second);
13171   }
13172 }
13173 
13174 void Simulator::VisitSVEExtractElementToGeneralRegister(
13175     const Instruction* instr) {
13176   VectorFormat vform = instr->GetSVEVectorFormat();
13177   SimVRegister& zm = ReadVRegister(instr->GetRn());
13178   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13179 
13180   int active_offset = -1;
13181   switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
13182     case LASTA_r_p_z:
13183       active_offset = 1;
13184       break;
13185     case LASTB_r_p_z:
13186       active_offset = 0;
13187       break;
13188     default:
13189       VIXL_UNIMPLEMENTED();
13190       break;
13191   }
13192 
13193   if (active_offset >= 0) {
13194     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
13195     WriteXRegister(instr->GetRd(), value.second);
13196   }
13197 }
13198 
13199 void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
13200   VectorFormat vform = instr->GetSVEVectorFormat();
13201   SimVRegister& zd = ReadVRegister(instr->GetRd());
13202   SimVRegister& zn = ReadVRegister(instr->GetRn());
13203   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13204 
13205   switch (instr->Mask(SVECompressActiveElementsMask)) {
13206     case COMPACT_z_p_z:
13207       compact(vform, zd, pg, zn);
13208       break;
13209     default:
13210       VIXL_UNIMPLEMENTED();
13211       break;
13212   }
13213 }
13214 
13215 void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
13216     const Instruction* instr) {
13217   VectorFormat vform = instr->GetSVEVectorFormat();
13218   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13219   SimVRegister z_result;
13220 
13221   switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
13222     case CPY_z_p_r:
13223       dup_immediate(vform,
13224                     z_result,
13225                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13226       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
13227       break;
13228     default:
13229       VIXL_UNIMPLEMENTED();
13230       break;
13231   }
13232 }
13233 
13234 void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
13235   VectorFormat vform = instr->GetSVEVectorFormat();
13236   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
13237   SimVRegister& zd = ReadVRegister(instr->GetRd());
13238 
13239   SimVRegister result;
13240   switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
13241     case CPY_z_p_i: {
13242       // Use unsigned arithmetic to avoid undefined behaviour during the shift.
13243       uint64_t imm8 = instr->GetImmSVEIntWideSigned();
13244       dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
13245       break;
13246     }
13247     default:
13248       VIXL_UNIMPLEMENTED();
13249       break;
13250   }
13251 
13252   if (instr->ExtractBit(14) != 0) {
13253     mov_merging(vform, zd, pg, result);
13254   } else {
13255     mov_zeroing(vform, zd, pg, result);
13256   }
13257 }
13258 
13259 void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
13260   SimVRegister& zd = ReadVRegister(instr->GetRd());
13261   SimVRegister& zn = ReadVRegister(instr->GetRn());
13262   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13263   SimVRegister result;
13264 
13265   // In NEON, the chunk size in which elements are REVersed is in the
13266   // instruction mnemonic, and the element size attached to the register.
13267   // SVE reverses the semantics; the mapping to logic functions below is to
13268   // account for this.
13269   VectorFormat chunk_form = instr->GetSVEVectorFormat();
13270   VectorFormat element_form = kFormatUndefined;
13271 
13272   switch (instr->Mask(SVEReverseWithinElementsMask)) {
13273     case RBIT_z_p_z:
13274       rbit(chunk_form, result, zn);
13275       break;
13276     case REVB_z_z:
13277       VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
13278                   (chunk_form == kFormatVnD));
13279       element_form = kFormatVnB;
13280       break;
13281     case REVH_z_z:
13282       VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
13283       element_form = kFormatVnH;
13284       break;
13285     case REVW_z_z:
13286       VIXL_ASSERT(chunk_form == kFormatVnD);
13287       element_form = kFormatVnS;
13288       break;
13289     default:
13290       VIXL_UNIMPLEMENTED();
13291       break;
13292   }
13293 
13294   if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
13295     VIXL_ASSERT(element_form != kFormatUndefined);
13296     switch (chunk_form) {
13297       case kFormatVnH:
13298         rev16(element_form, result, zn);
13299         break;
13300       case kFormatVnS:
13301         rev32(element_form, result, zn);
13302         break;
13303       case kFormatVnD:
13304         rev64(element_form, result, zn);
13305         break;
13306       default:
13307         VIXL_UNIMPLEMENTED();
13308     }
13309   }
13310 
13311   mov_merging(chunk_form, zd, pg, result);
13312 }
13313 
13314 void Simulator::VisitSVEVectorSplice(const Instruction* instr) {
13315   VectorFormat vform = instr->GetSVEVectorFormat();
13316   SimVRegister& zd = ReadVRegister(instr->GetRd());
13317   SimVRegister& zn = ReadVRegister(instr->GetRn());
13318   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13319   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
13320 
13321   switch (form_hash_) {
13322     case Hash("splice_z_p_zz_des"):
13323       splice(vform, zd, pg, zd, zn);
13324       break;
13325     case Hash("splice_z_p_zz_con"):
13326       splice(vform, zd, pg, zn, zn2);
13327       break;
13328     default:
13329       VIXL_UNIMPLEMENTED();
13330       break;
13331   }
13332 }
13333 
13334 void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
13335   SimVRegister& zd = ReadVRegister(instr->GetRd());
13336   switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
13337     case DUP_z_r:
13338       dup_immediate(instr->GetSVEVectorFormat(),
13339                     zd,
13340                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
13341       break;
13342     default:
13343       VIXL_UNIMPLEMENTED();
13344       break;
13345   }
13346 }
13347 
13348 void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
13349   SimVRegister& zd = ReadVRegister(instr->GetRd());
13350   VectorFormat vform = instr->GetSVEVectorFormat();
13351   switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
13352     case INSR_z_v:
13353       insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
13354       break;
13355     default:
13356       VIXL_UNIMPLEMENTED();
13357       break;
13358   }
13359 }
13360 
13361 void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
13362   SimVRegister& zd = ReadVRegister(instr->GetRd());
13363   VectorFormat vform = instr->GetSVEVectorFormat();
13364   switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
13365     case INSR_z_r:
13366       insr(vform, zd, ReadXRegister(instr->GetRn()));
13367       break;
13368     default:
13369       VIXL_UNIMPLEMENTED();
13370       break;
13371   }
13372 }
13373 
13374 void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
13375   SimVRegister& zd = ReadVRegister(instr->GetRd());
13376   switch (instr->Mask(SVEBroadcastIndexElementMask)) {
13377     case DUP_z_zi: {
13378       std::pair<int, int> index_and_lane_size =
13379           instr->GetSVEPermuteIndexAndLaneSizeLog2();
13380       int index = index_and_lane_size.first;
13381       int lane_size_in_bytes_log_2 = index_and_lane_size.second;
13382       VectorFormat vform =
13383           SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
13384       if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
13385         // Out of bounds, set the destination register to zero.
13386         dup_immediate(kFormatVnD, zd, 0);
13387       } else {
13388         dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
13389       }
13390       return;
13391     }
13392     default:
13393       VIXL_UNIMPLEMENTED();
13394       break;
13395   }
13396 }
13397 
13398 void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
13399   SimVRegister& zd = ReadVRegister(instr->GetRd());
13400   VectorFormat vform = instr->GetSVEVectorFormat();
13401   switch (instr->Mask(SVEReverseVectorElementsMask)) {
13402     case REV_z_z:
13403       rev(vform, zd, ReadVRegister(instr->GetRn()));
13404       break;
13405     default:
13406       VIXL_UNIMPLEMENTED();
13407       break;
13408   }
13409 }
13410 
13411 void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
13412   SimVRegister& zd = ReadVRegister(instr->GetRd());
13413   VectorFormat vform = instr->GetSVEVectorFormat();
13414   switch (instr->Mask(SVEUnpackVectorElementsMask)) {
13415     case SUNPKHI_z_z:
13416       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
13417       break;
13418     case SUNPKLO_z_z:
13419       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
13420       break;
13421     case UUNPKHI_z_z:
13422       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
13423       break;
13424     case UUNPKLO_z_z:
13425       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
13426       break;
13427     default:
13428       VIXL_UNIMPLEMENTED();
13429       break;
13430   }
13431 }
13432 
13433 void Simulator::VisitSVETableLookup(const Instruction* instr) {
13434   VectorFormat vform = instr->GetSVEVectorFormat();
13435   SimVRegister& zd = ReadVRegister(instr->GetRd());
13436   SimVRegister& zn = ReadVRegister(instr->GetRn());
13437   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
13438   SimVRegister& zm = ReadVRegister(instr->GetRm());
13439 
13440   switch (form_hash_) {
13441     case Hash("tbl_z_zz_1"):
13442       tbl(vform, zd, zn, zm);
13443       break;
13444     case Hash("tbl_z_zz_2"):
13445       tbl(vform, zd, zn, zn2, zm);
13446       break;
13447     case Hash("tbx_z_zz"):
13448       tbx(vform, zd, zn, zm);
13449       break;
13450     default:
13451       VIXL_UNIMPLEMENTED();
13452       break;
13453   }
13454 }
13455 
13456 void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
13457   VectorFormat vform = instr->GetSVEVectorFormat();
13458   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13459   SimPRegister& pn = ReadPRegister(instr->GetPn());
13460 
13461   switch (instr->Mask(SVEPredicateCountMask)) {
13462     case CNTP_r_p_p: {
13463       WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
13464       break;
13465     }
13466     default:
13467       VIXL_UNIMPLEMENTED();
13468       break;
13469   }
13470 }
13471 
13472 void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
13473   Instr op = instr->Mask(SVEPredicateLogicalMask);
13474   SimPRegister& pd = ReadPRegister(instr->GetPd());
13475   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13476   SimPRegister& pn = ReadPRegister(instr->GetPn());
13477   SimPRegister& pm = ReadPRegister(instr->GetPm());
13478   SimPRegister result;
13479   switch (op) {
13480     case ANDS_p_p_pp_z:
13481     case AND_p_p_pp_z:
13482     case BICS_p_p_pp_z:
13483     case BIC_p_p_pp_z:
13484     case EORS_p_p_pp_z:
13485     case EOR_p_p_pp_z:
13486     case NANDS_p_p_pp_z:
13487     case NAND_p_p_pp_z:
13488     case NORS_p_p_pp_z:
13489     case NOR_p_p_pp_z:
13490     case ORNS_p_p_pp_z:
13491     case ORN_p_p_pp_z:
13492     case ORRS_p_p_pp_z:
13493     case ORR_p_p_pp_z:
13494       SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
13495                                 result,
13496                                 pn,
13497                                 pm);
13498       break;
13499     case SEL_p_p_pp:
13500       sel(pd, pg, pn, pm);
13501       return;
13502     default:
13503       VIXL_UNIMPLEMENTED();
13504       break;
13505   }
13506 
13507   mov_zeroing(pd, pg, result);
13508   if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
13509     PredTest(kFormatVnB, pg, pd);
13510   }
13511 }
13512 
13513 void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
13514   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13515   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13516   switch (instr->Mask(SVEPredicateFirstActiveMask)) {
13517     case PFIRST_p_p_p:
13518       pfirst(pdn, pg, pdn);
13519       // TODO: Is this broken when pg == pdn?
13520       PredTest(kFormatVnB, pg, pdn);
13521       break;
13522     default:
13523       VIXL_UNIMPLEMENTED();
13524       break;
13525   }
13526 }
13527 
13528 void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
13529   // This group only contains PTRUE{S}, and there are no unallocated encodings.
13530   VIXL_STATIC_ASSERT(
13531       SVEPredicateInitializeMask ==
13532       (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
13533   VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
13534               (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
13535 
13536   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13537   VectorFormat vform = instr->GetSVEVectorFormat();
13538 
13539   ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
13540   if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
13541 }
13542 
13543 void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
13544   // This group only contains PNEXT, and there are no unallocated encodings.
13545   VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
13546   VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
13547 
13548   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
13549   LogicPRegister pdn = ReadPRegister(instr->GetPd());
13550   VectorFormat vform = instr->GetSVEVectorFormat();
13551 
13552   pnext(vform, pdn, pg, pdn);
13553   // TODO: Is this broken when pg == pdn?
13554   PredTest(vform, pg, pdn);
13555 }
13556 
13557 void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
13558     const Instruction* instr) {
13559   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13560   LogicPRegister pg(ReadPRegister(instr->GetPn()));
13561   FlagsUpdate flags = LeaveFlags;
13562   switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
13563     case RDFFR_p_p_f:
13564       // Do nothing.
13565       break;
13566     case RDFFRS_p_p_f:
13567       flags = SetFlags;
13568       break;
13569     default:
13570       VIXL_UNIMPLEMENTED();
13571       break;
13572   }
13573 
13574   LogicPRegister ffr(ReadFFR());
13575   mov_zeroing(pd, pg, ffr);
13576 
13577   if (flags == SetFlags) {
13578     PredTest(kFormatVnB, pg, pd);
13579   }
13580 }
13581 
13582 void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
13583     const Instruction* instr) {
13584   LogicPRegister pd(ReadPRegister(instr->GetPd()));
13585   LogicPRegister ffr(ReadFFR());
13586   switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
13587     case RDFFR_p_f:
13588       mov(pd, ffr);
13589       break;
13590     default:
13591       VIXL_UNIMPLEMENTED();
13592       break;
13593   }
13594 }
13595 
13596 void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
13597   switch (instr->Mask(SVEPredicateTestMask)) {
13598     case PTEST_p_p:
13599       PredTest(kFormatVnB,
13600                ReadPRegister(instr->ExtractBits(13, 10)),
13601                ReadPRegister(instr->GetPn()));
13602       break;
13603     default:
13604       VIXL_UNIMPLEMENTED();
13605       break;
13606   }
13607 }
13608 
13609 void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
13610   switch (instr->Mask(SVEPredicateZeroMask)) {
13611     case PFALSE_p:
13612       pfalse(ReadPRegister(instr->GetPd()));
13613       break;
13614     default:
13615       VIXL_UNIMPLEMENTED();
13616       break;
13617   }
13618 }
13619 
13620 void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
13621   SimPRegister& pd = ReadPRegister(instr->GetPd());
13622   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13623   SimPRegister& pn = ReadPRegister(instr->GetPn());
13624   SimPRegister& pm = ReadPRegister(instr->GetPm());
13625 
13626   bool set_flags = false;
13627   switch (instr->Mask(SVEPropagateBreakMask)) {
13628     case BRKPAS_p_p_pp:
13629       set_flags = true;
13630       VIXL_FALLTHROUGH();
13631     case BRKPA_p_p_pp:
13632       brkpa(pd, pg, pn, pm);
13633       break;
13634     case BRKPBS_p_p_pp:
13635       set_flags = true;
13636       VIXL_FALLTHROUGH();
13637     case BRKPB_p_p_pp:
13638       brkpb(pd, pg, pn, pm);
13639       break;
13640     default:
13641       VIXL_UNIMPLEMENTED();
13642       break;
13643   }
13644 
13645   if (set_flags) {
13646     PredTest(kFormatVnB, pg, pd);
13647   }
13648 }
13649 
13650 void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
13651   uint64_t length = 0;
13652   switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
13653     case ADDPL_r_ri:
13654       length = GetPredicateLengthInBytes();
13655       break;
13656     case ADDVL_r_ri:
13657       length = GetVectorLengthInBytes();
13658       break;
13659     default:
13660       VIXL_UNIMPLEMENTED();
13661   }
13662   uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
13663   WriteXRegister(instr->GetRd(),
13664                  base + (length * instr->GetImmSVEVLScale()),
13665                  LogRegWrites,
13666                  Reg31IsStackPointer);
13667 }
13668 
13669 void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
13670   int64_t scale = instr->GetImmSVEVLScale();
13671 
13672   switch (instr->Mask(SVEStackFrameSizeMask)) {
13673     case RDVL_r_i:
13674       WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
13675       break;
13676     default:
13677       VIXL_UNIMPLEMENTED();
13678   }
13679 }
13680 
13681 void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
13682   // The only instruction in this group is `sel`, and there are no unused
13683   // encodings.
13684   VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
13685 
13686   VectorFormat vform = instr->GetSVEVectorFormat();
13687   SimVRegister& zd = ReadVRegister(instr->GetRd());
13688   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
13689   SimVRegister& zn = ReadVRegister(instr->GetRn());
13690   SimVRegister& zm = ReadVRegister(instr->GetRm());
13691 
13692   sel(vform, zd, pg, zn, zm);
13693 }
13694 
13695 void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
13696   switch (instr->Mask(SVEFFRInitialiseMask)) {
13697     case SETFFR_f: {
13698       LogicPRegister ffr(ReadFFR());
13699       ffr.SetAllBits();
13700       break;
13701     }
13702     default:
13703       VIXL_UNIMPLEMENTED();
13704       break;
13705   }
13706 }
13707 
13708 void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
13709   switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
13710     case WRFFR_f_p: {
13711       SimPRegister pn(ReadPRegister(instr->GetPn()));
13712       bool last_active = true;
13713       for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
13714         bool active = pn.GetBit(i);
13715         if (active && !last_active) {
13716           // `pn` is non-monotonic. This is UNPREDICTABLE.
13717           VIXL_ABORT();
13718         }
13719         last_active = active;
13720       }
13721       mov(ReadFFR(), pn);
13722       break;
13723     }
13724     default:
13725       VIXL_UNIMPLEMENTED();
13726       break;
13727   }
13728 }
13729 
13730 void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
13731   bool is_signed;
13732   switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
13733     case LD1B_z_p_bi_u8:
13734     case LD1B_z_p_bi_u16:
13735     case LD1B_z_p_bi_u32:
13736     case LD1B_z_p_bi_u64:
13737     case LD1H_z_p_bi_u16:
13738     case LD1H_z_p_bi_u32:
13739     case LD1H_z_p_bi_u64:
13740     case LD1W_z_p_bi_u32:
13741     case LD1W_z_p_bi_u64:
13742     case LD1D_z_p_bi_u64:
13743       is_signed = false;
13744       break;
13745     case LD1SB_z_p_bi_s16:
13746     case LD1SB_z_p_bi_s32:
13747     case LD1SB_z_p_bi_s64:
13748     case LD1SH_z_p_bi_s32:
13749     case LD1SH_z_p_bi_s64:
13750     case LD1SW_z_p_bi_s64:
13751       is_signed = true;
13752       break;
13753     default:
13754       // This encoding group is complete, so no other values should be possible.
13755       VIXL_UNREACHABLE();
13756       is_signed = false;
13757       break;
13758   }
13759 
13760   int vl = GetVectorLengthInBytes();
13761   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
13762   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
13763   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
13764   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
13765   uint64_t offset =
13766       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
13767   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
13768   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
13769   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
13770   SVEStructuredLoadHelper(vform,
13771                           ReadPRegister(instr->GetPgLow8()),
13772                           instr->GetRt(),
13773                           addr,
13774                           is_signed);
13775 }
13776 
13777 void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
13778     const Instruction* instr) {
13779   bool is_signed;
13780   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
13781     case LD1B_z_p_br_u8:
13782     case LD1B_z_p_br_u16:
13783     case LD1B_z_p_br_u32:
13784     case LD1B_z_p_br_u64:
13785     case LD1H_z_p_br_u16:
13786     case LD1H_z_p_br_u32:
13787     case LD1H_z_p_br_u64:
13788     case LD1W_z_p_br_u32:
13789     case LD1W_z_p_br_u64:
13790     case LD1D_z_p_br_u64:
13791       is_signed = false;
13792       break;
13793     case LD1SB_z_p_br_s16:
13794     case LD1SB_z_p_br_s32:
13795     case LD1SB_z_p_br_s64:
13796     case LD1SH_z_p_br_s32:
13797     case LD1SH_z_p_br_s64:
13798     case LD1SW_z_p_br_s64:
13799       is_signed = true;
13800       break;
13801     default:
13802       // This encoding group is complete, so no other values should be possible.
13803       VIXL_UNREACHABLE();
13804       is_signed = false;
13805       break;
13806   }
13807 
13808   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
13809   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
13810   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
13811   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
13812   uint64_t offset = ReadXRegister(instr->GetRm());
13813   offset <<= msize_in_bytes_log2;
13814   LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
13815   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
13816   SVEStructuredLoadHelper(vform,
13817                           ReadPRegister(instr->GetPgLow8()),
13818                           instr->GetRt(),
13819                           addr,
13820                           is_signed);
13821 }
13822 
13823 void Simulator::DoUnreachable(const Instruction* instr) {
13824   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13825               (instr->GetImmException() == kUnreachableOpcode));
13826 
13827   fprintf(stream_,
13828           "Hit UNREACHABLE marker at pc=%p.\n",
13829           reinterpret_cast<const void*>(instr));
13830   abort();
13831 }
13832 
13833 
13834 void Simulator::DoTrace(const Instruction* instr) {
13835   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13836               (instr->GetImmException() == kTraceOpcode));
13837 
13838   // Read the arguments encoded inline in the instruction stream.
13839   uint32_t parameters;
13840   uint32_t command;
13841 
13842   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
13843   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
13844   memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
13845 
13846   switch (command) {
13847     case TRACE_ENABLE:
13848       SetTraceParameters(GetTraceParameters() | parameters);
13849       break;
13850     case TRACE_DISABLE:
13851       SetTraceParameters(GetTraceParameters() & ~parameters);
13852       break;
13853     default:
13854       VIXL_UNREACHABLE();
13855   }
13856 
13857   WritePc(instr->GetInstructionAtOffset(kTraceLength));
13858 }
13859 
13860 
13861 void Simulator::DoLog(const Instruction* instr) {
13862   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13863               (instr->GetImmException() == kLogOpcode));
13864 
13865   // Read the arguments encoded inline in the instruction stream.
13866   uint32_t parameters;
13867 
13868   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
13869   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
13870 
13871   // We don't support a one-shot LOG_DISASM.
13872   VIXL_ASSERT((parameters & LOG_DISASM) == 0);
13873   // Print the requested information.
13874   if (parameters & LOG_SYSREGS) PrintSystemRegisters();
13875   if (parameters & LOG_REGS) PrintRegisters();
13876   if (parameters & LOG_VREGS) PrintVRegisters();
13877 
13878   WritePc(instr->GetInstructionAtOffset(kLogLength));
13879 }
13880 
13881 
13882 void Simulator::DoPrintf(const Instruction* instr) {
13883   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
13884               (instr->GetImmException() == kPrintfOpcode));
13885 
13886   // Read the arguments encoded inline in the instruction stream.
13887   uint32_t arg_count;
13888   uint32_t arg_pattern_list;
13889   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
13890   memcpy(&arg_count, instr + kPrintfArgCountOffset, sizeof(arg_count));
13891   memcpy(&arg_pattern_list,
13892          instr + kPrintfArgPatternListOffset,
13893          sizeof(arg_pattern_list));
13894 
13895   VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
13896   VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
13897 
13898   // We need to call the host printf function with a set of arguments defined by
13899   // arg_pattern_list. Because we don't know the types and sizes of the
13900   // arguments, this is very difficult to do in a robust and portable way. To
13901   // work around the problem, we pick apart the format string, and print one
13902   // format placeholder at a time.
13903 
13904   // Allocate space for the format string. We take a copy, so we can modify it.
13905   // Leave enough space for one extra character per expected argument (plus the
13906   // '\0' termination).
13907   const char* format_base = ReadRegister<const char*>(0);
13908   VIXL_ASSERT(format_base != NULL);
13909   size_t length = strlen(format_base) + 1;
13910   char* const format = allocator_.New<char[]>(length + arg_count);
13911   // A list of chunks, each with exactly one format placeholder.
13912   const char* chunks[kPrintfMaxArgCount];
13913 
13914   // Copy the format string and search for format placeholders.
13915   uint32_t placeholder_count = 0;
13916   char* format_scratch = format;
13917   for (size_t i = 0; i < length; i++) {
13918     if (format_base[i] != '%') {
13919       *format_scratch++ = format_base[i];
13920     } else {
13921       if (format_base[i + 1] == '%') {
13922         // Ignore explicit "%%" sequences.
13923         *format_scratch++ = format_base[i];
13924         i++;
13925         // Chunks after the first are passed as format strings to printf, so we
13926         // need to escape '%' characters in those chunks.
13927         if (placeholder_count > 0) *format_scratch++ = format_base[i];
13928       } else {
13929         VIXL_CHECK(placeholder_count < arg_count);
13930         // Insert '\0' before placeholders, and store their locations.
13931         *format_scratch++ = '\0';
13932         chunks[placeholder_count++] = format_scratch;
13933         *format_scratch++ = format_base[i];
13934       }
13935     }
13936   }
13937   VIXL_CHECK(placeholder_count == arg_count);
13938 
13939   // Finally, call printf with each chunk, passing the appropriate register
13940   // argument. Normally, printf returns the number of bytes transmitted, so we
13941   // can emulate a single printf call by adding the result from each chunk. If
13942   // any call returns a negative (error) value, though, just return that value.
13943 
13944   printf("%s", clr_printf);
13945 
13946   // Because '\0' is inserted before each placeholder, the first string in
13947   // 'format' contains no format placeholders and should be printed literally.
13948   int result = printf("%s", format);
13949   int pcs_r = 1;  // Start at x1. x0 holds the format string.
13950   int pcs_f = 0;  // Start at d0.
13951   if (result >= 0) {
13952     for (uint32_t i = 0; i < placeholder_count; i++) {
13953       int part_result = -1;
13954 
13955       uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
13956       arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
13957       switch (arg_pattern) {
13958         case kPrintfArgW:
13959           part_result = printf(chunks[i], ReadWRegister(pcs_r++));
13960           break;
13961         case kPrintfArgX:
13962           part_result = printf(chunks[i], ReadXRegister(pcs_r++));
13963           break;
13964         case kPrintfArgD:
13965           part_result = printf(chunks[i], ReadDRegister(pcs_f++));
13966           break;
13967         default:
13968           VIXL_UNREACHABLE();
13969       }
13970 
13971       if (part_result < 0) {
13972         // Handle error values.
13973         result = part_result;
13974         break;
13975       }
13976 
13977       result += part_result;
13978     }
13979   }
13980 
13981   printf("%s", clr_normal);
13982 
13983   // Printf returns its result in x0 (just like the C library's printf).
13984   WriteXRegister(0, result);
13985 
13986   // The printf parameters are inlined in the code, so skip them.
13987   WritePc(instr->GetInstructionAtOffset(kPrintfLength));
13988 
13989   // Set LR as if we'd just called a native printf function.
13990   WriteLr(ReadPc());
13991   allocator_.DeleteArray(format);
13992 }
13993 
13994 
13995 #ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
13996 void Simulator::DoRuntimeCall(const Instruction* instr) {
13997   VIXL_STATIC_ASSERT(kRuntimeCallAddressSize == sizeof(uintptr_t));
13998   // The appropriate `Simulator::SimulateRuntimeCall()` wrapper and the function
13999   // to call are passed inlined in the assembly.
14000   uintptr_t call_wrapper_address =
14001       MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset);
14002   uintptr_t function_address =
14003       MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset);
14004   RuntimeCallType call_type = static_cast<RuntimeCallType>(
14005       MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
14006   auto runtime_call_wrapper =
14007       reinterpret_cast<void (*)(Simulator*, uintptr_t)>(call_wrapper_address);
14008 
14009   if (call_type == kCallRuntime) {
14010     WriteRegister(kLinkRegCode,
14011                   instr->GetInstructionAtOffset(kRuntimeCallLength));
14012   }
14013   runtime_call_wrapper(this, function_address);
14014   // Read the return address from `lr` and write it into `pc`.
14015   WritePc(ReadRegister<Instruction*>(kLinkRegCode));
14016 }
14017 #else
14018 void Simulator::DoRuntimeCall(const Instruction* instr) {
14019   USE(instr);
14020   VIXL_UNREACHABLE();
14021 }
14022 #endif
14023 
14024 
14025 void Simulator::DoConfigureCPUFeatures(const Instruction* instr) {
14026   VIXL_ASSERT(instr->Mask(ExceptionMask) == HLT);
14027 
14028   typedef ConfigureCPUFeaturesElementType ElementType;
14029   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <
14030               std::numeric_limits<ElementType>::max());
14031 
14032   // k{Set,Enable,Disable}CPUFeatures have the same parameter encoding.
14033 
14034   size_t element_size = sizeof(ElementType);
14035   size_t offset = kConfigureCPUFeaturesListOffset;
14036 
14037   // Read the kNone-terminated list of features.
14038   CPUFeatures parameters;
14039   while (true) {
14040     ElementType feature = MemRead<ElementType>(instr + offset);
14041     offset += element_size;
14042     if (feature == static_cast<ElementType>(CPUFeatures::kNone)) break;
14043     parameters.Combine(static_cast<CPUFeatures::Feature>(feature));
14044   }
14045 
14046   switch (instr->GetImmException()) {
14047     case kSetCPUFeaturesOpcode:
14048       SetCPUFeatures(parameters);
14049       break;
14050     case kEnableCPUFeaturesOpcode:
14051       GetCPUFeatures()->Combine(parameters);
14052       break;
14053     case kDisableCPUFeaturesOpcode:
14054       GetCPUFeatures()->Remove(parameters);
14055       break;
14056     default:
14057       VIXL_UNREACHABLE();
14058       break;
14059   }
14060 
14061   WritePc(instr->GetInstructionAtOffset(AlignUp(offset, kInstructionSize)));
14062 }
14063 
14064 
14065 void Simulator::DoSaveCPUFeatures(const Instruction* instr) {
14066   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14067               (instr->GetImmException() == kSaveCPUFeaturesOpcode));
14068   USE(instr);
14069 
14070   saved_cpu_features_.push_back(*GetCPUFeatures());
14071 }
14072 
14073 
14074 void Simulator::DoRestoreCPUFeatures(const Instruction* instr) {
14075   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
14076               (instr->GetImmException() == kRestoreCPUFeaturesOpcode));
14077   USE(instr);
14078 
14079   SetCPUFeatures(saved_cpu_features_.back());
14080   saved_cpu_features_.pop_back();
14081 }
14082 
14083 
14084 }  // namespace aarch64
14085 }  // namespace vixl
14086 
14087 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
14088