• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A9 processors.
11//
12//===----------------------------------------------------------------------===//
13
14//
15// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16// Reference Manual".
17//
18// Functional units
19def A9_Issue0  : FuncUnit; // Issue 0
20def A9_Issue1  : FuncUnit; // Issue 1
21def A9_Branch  : FuncUnit; // Branch
22def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23def A9_ALU1    : FuncUnit; // ALU pipeline 1
24def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25def A9_NPipe   : FuncUnit; // NEON pipeline
26def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27def A9_LSUnit  : FuncUnit; // L/S Unit
28def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31// Bypasses
32def A9_LdBypass : Bypass;
33
34def CortexA9Itineraries : ProcessorItineraries<
35  [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36   A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37  [A9_LdBypass], [
38  // Two fully-pipelined integer ALU pipelines
39
40  //
41  // Move instructions, unconditional
42  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
54                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
55                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                                  InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
57  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                               InstrStage<1, [A9_MUX0], 0>,
61                               InstrStage<1, [A9_AGU], 0>,
62                               InstrStage<1, [A9_LSUnit]>], [5]>,
63  //
64  // MVN instructions
65  InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
67                              [1]>,
68  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
70                              [1, 1], [NoBypass, A9_LdBypass]>,
71  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
72                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
73                              [2, 1]>,
74  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
76                              [3, 1, 1]>,
77  //
78  // No operand cycles
79  InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                               InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
81  //
82  // Binary Instructions that produce a result
83  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
85                            [1, 1], [NoBypass, A9_LdBypass]>,
86  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
88                            [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
89  InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
90                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
91                            [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
92  InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
94                            [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
95  InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                             InstrStage<3, [A9_ALU0, A9_ALU1]>],
97                            [3, 1, 1, 1],
98                            [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
99  //
100  // Bitwise Instructions that produce a result
101  InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
102                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
103  InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
105  InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
107  InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
109  //
110  // Unary Instructions that produce a result
111
112  // CLZ, RBIT, etc.
113  InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
115
116  // BFC, BFI, UBFX, SBFX
117  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
119
120  //
121  // Zero and sign extension instructions
122  InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
123                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
124  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
126  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
128  //
129  // Compare instructions
130  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
132                               [1], [A9_LdBypass]>,
133  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
135                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
136  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
138                                [1, 1], [A9_LdBypass, NoBypass]>,
139  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
141                              [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
142  //
143  // Test instructions
144  InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146  InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                               InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
152  //
153  // Move instructions, conditional
154  // FIXME: Correctly model the extra input dep on the destination.
155  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
157  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
159  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
161  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
163  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
165                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
167
168  // Integer multiply pipeline
169  //
170  InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
171                               InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
172  InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
173                               InstrStage<2, [A9_ALU0]>],
174                              [3, 1, 1, 1]>,
175  InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                               InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
177  InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
178                               InstrStage<2, [A9_ALU0]>],
179                              [4, 1, 1, 1]>,
180  InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                               InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
182  InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
183                               InstrStage<3, [A9_ALU0]>],
184                              [4, 5, 1, 1]>,
185  // Integer load pipeline
186  // FIXME: The timings are some rough approximations
187  //
188  // Immediate offset
189  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
190                                 InstrStage<1, [A9_MUX0], 0>,
191                                 InstrStage<1, [A9_AGU], 0>,
192                                 InstrStage<1, [A9_LSUnit]>],
193                                [3, 1], [A9_LdBypass]>,
194  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
195                                 InstrStage<1, [A9_MUX0], 0>,
196                                 InstrStage<2, [A9_AGU], 0>,
197                                 InstrStage<1, [A9_LSUnit]>],
198                                [4, 1], [A9_LdBypass]>,
199  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
200  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
201                                 InstrStage<1, [A9_MUX0], 0>,
202                                 InstrStage<2, [A9_AGU], 0>,
203                                 InstrStage<1, [A9_LSUnit]>],
204                                [3, 3, 1], [A9_LdBypass]>,
205  //
206  // Register offset
207  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
208                                 InstrStage<1, [A9_MUX0], 0>,
209                                 InstrStage<1, [A9_AGU], 0>,
210                                 InstrStage<1, [A9_LSUnit]>],
211                                [3, 1, 1], [A9_LdBypass]>,
212  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
213                                 InstrStage<1, [A9_MUX0], 0>,
214                                 InstrStage<2, [A9_AGU], 0>,
215                                 InstrStage<1, [A9_LSUnit]>],
216                                [4, 1, 1], [A9_LdBypass]>,
217  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
218                                 InstrStage<1, [A9_MUX0], 0>,
219                                 InstrStage<2, [A9_AGU], 0>,
220                                 InstrStage<1, [A9_LSUnit]>],
221                                [3, 3, 1, 1], [A9_LdBypass]>,
222  //
223  // Scaled register offset
224  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
225                                 InstrStage<1, [A9_MUX0], 0>,
226                                 InstrStage<1, [A9_AGU], 0>,
227                                 InstrStage<1, [A9_LSUnit], 0>],
228                                [4, 1, 1], [A9_LdBypass]>,
229  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
230                                 InstrStage<1, [A9_MUX0], 0>,
231                                 InstrStage<2, [A9_AGU], 0>,
232                                 InstrStage<1, [A9_LSUnit]>],
233                                [5, 1, 1], [A9_LdBypass]>,
234  //
235  // Immediate offset with update
236  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
237                                 InstrStage<1, [A9_MUX0], 0>,
238                                 InstrStage<1, [A9_AGU], 0>,
239                                 InstrStage<1, [A9_LSUnit]>],
240                                [3, 2, 1], [A9_LdBypass]>,
241  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
242                                 InstrStage<1, [A9_MUX0], 0>,
243                                 InstrStage<2, [A9_AGU], 0>,
244                                 InstrStage<1, [A9_LSUnit]>],
245                                [4, 3, 1], [A9_LdBypass]>,
246  //
247  // Register offset with update
248  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
249                                 InstrStage<1, [A9_MUX0], 0>,
250                                 InstrStage<1, [A9_AGU], 0>,
251                                 InstrStage<1, [A9_LSUnit]>],
252                                [3, 2, 1, 1], [A9_LdBypass]>,
253  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
254                                 InstrStage<1, [A9_MUX0], 0>,
255                                 InstrStage<2, [A9_AGU], 0>,
256                                 InstrStage<1, [A9_LSUnit]>],
257                                [4, 3, 1, 1], [A9_LdBypass]>,
258  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
259                                 InstrStage<1, [A9_MUX0], 0>,
260                                 InstrStage<2, [A9_AGU], 0>,
261                                 InstrStage<1, [A9_LSUnit]>],
262                                [3, 3, 1, 1], [A9_LdBypass]>,
263  //
264  // Scaled register offset with update
265  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
266                                 InstrStage<1, [A9_MUX0], 0>,
267                                 InstrStage<1, [A9_AGU], 0>,
268                                 InstrStage<1, [A9_LSUnit]>],
269                                [4, 3, 1, 1], [A9_LdBypass]>,
270  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
271                                  InstrStage<1, [A9_MUX0], 0>,
272                                  InstrStage<2, [A9_AGU], 0>,
273                                  InstrStage<1, [A9_LSUnit]>],
274                                 [5, 4, 1, 1], [A9_LdBypass]>,
275  //
276  // Load multiple, def is the 5th operand.
277  // FIXME: This assumes 3 to 4 registers.
278  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
279                                InstrStage<1, [A9_MUX0], 0>,
280                                InstrStage<2, [A9_AGU], 1>,
281                                InstrStage<2, [A9_LSUnit]>],
282                               [1, 1, 1, 1, 3],
283                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
284  //
285  // Load multiple + update, defs are the 1st and 5th operands.
286  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
287                                InstrStage<1, [A9_MUX0], 0>,
288                                InstrStage<2, [A9_AGU], 1>,
289                                InstrStage<2, [A9_LSUnit]>],
290                               [2, 1, 1, 1, 3],
291                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
292  //
293  // Load multiple plus branch
294  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
295                                InstrStage<1, [A9_MUX0], 0>,
296                                InstrStage<1, [A9_AGU], 1>,
297                                InstrStage<2, [A9_LSUnit]>,
298                                InstrStage<1, [A9_Branch]>],
299                               [1, 2, 1, 1, 3],
300                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
301  //
302  // Pop, def is the 3rd operand.
303  InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
304                                InstrStage<1, [A9_MUX0], 0>,
305                                InstrStage<2, [A9_AGU], 1>,
306                                InstrStage<2, [A9_LSUnit]>],
307                               [1, 1, 3],
308                               [NoBypass, NoBypass, A9_LdBypass]>,
309  //
310  // Pop + branch, def is the 3rd operand.
311  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
312                                InstrStage<1, [A9_MUX0], 0>,
313                                InstrStage<2, [A9_AGU], 1>,
314                                InstrStage<2, [A9_LSUnit]>,
315                                InstrStage<1, [A9_Branch]>],
316                               [1, 1, 3],
317                               [NoBypass, NoBypass, A9_LdBypass]>,
318
319  //
320  // iLoadi + iALUr for t2LDRpci_pic.
321  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                InstrStage<1, [A9_MUX0], 0>,
323                                InstrStage<1, [A9_AGU], 0>,
324                                InstrStage<1, [A9_LSUnit]>,
325                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
326                               [2, 1]>,
327
328  // Integer store pipeline
329  ///
330  // Immediate offset
331  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332                                 InstrStage<1, [A9_MUX0], 0>,
333                                 InstrStage<1, [A9_AGU], 0>,
334                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
335  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                 InstrStage<1, [A9_MUX0], 0>,
337                                 InstrStage<2, [A9_AGU], 1>,
338                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
339  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
340  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                 InstrStage<1, [A9_MUX0], 0>,
342                                 InstrStage<2, [A9_AGU], 1>,
343                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
344  //
345  // Register offset
346  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                 InstrStage<1, [A9_MUX0], 0>,
348                                 InstrStage<1, [A9_AGU], 0>,
349                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
350  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                 InstrStage<1, [A9_MUX0], 0>,
352                                 InstrStage<2, [A9_AGU], 1>,
353                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                 InstrStage<1, [A9_MUX0], 0>,
356                                 InstrStage<2, [A9_AGU], 1>,
357                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358  //
359  // Scaled register offset
360  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                  InstrStage<1, [A9_MUX0], 0>,
362                                  InstrStage<1, [A9_AGU], 0>,
363                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
364  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
365                                  InstrStage<1, [A9_MUX0], 0>,
366                                  InstrStage<2, [A9_AGU], 1>,
367                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
368  //
369  // Immediate offset with update
370  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                  InstrStage<1, [A9_MUX0], 0>,
372                                  InstrStage<1, [A9_AGU], 0>,
373                                  InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
374  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
375                                  InstrStage<1, [A9_MUX0], 0>,
376                                  InstrStage<2, [A9_AGU], 1>,
377                                  InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
378  //
379  // Register offset with update
380  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
381                                  InstrStage<1, [A9_MUX0], 0>,
382                                  InstrStage<1, [A9_AGU], 0>,
383                                  InstrStage<1, [A9_LSUnit]>],
384                                 [2, 1, 1, 1]>,
385  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
386                                  InstrStage<1, [A9_MUX0], 0>,
387                                  InstrStage<2, [A9_AGU], 1>,
388                                  InstrStage<1, [A9_LSUnit]>],
389                                 [3, 1, 1, 1]>,
390  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
391                                  InstrStage<1, [A9_MUX0], 0>,
392                                  InstrStage<2, [A9_AGU], 1>,
393                                  InstrStage<1, [A9_LSUnit]>],
394                                 [3, 1, 1, 1]>,
395  //
396  // Scaled register offset with update
397  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
398                                    InstrStage<1, [A9_MUX0], 0>,
399                                    InstrStage<1, [A9_AGU], 0>,
400                                    InstrStage<1, [A9_LSUnit]>],
401                                   [2, 1, 1, 1]>,
402  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
403                                    InstrStage<1, [A9_MUX0], 0>,
404                                    InstrStage<2, [A9_AGU], 1>,
405                                    InstrStage<1, [A9_LSUnit]>],
406                                   [3, 1, 1, 1]>,
407  //
408  // Store multiple
409  InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
410                                InstrStage<1, [A9_MUX0], 0>,
411                                InstrStage<1, [A9_AGU], 0>,
412                                InstrStage<2, [A9_LSUnit]>]>,
413  //
414  // Store multiple + update
415  InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
416                                InstrStage<1, [A9_MUX0], 0>,
417                                InstrStage<1, [A9_AGU], 0>,
418                                InstrStage<2, [A9_LSUnit]>], [2]>,
419
420  //
421  // Preload
422  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
423
424  // Branch
425  //
426  // no delay slots, so the latency of a branch is unimportant
427  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
428                                InstrStage<1, [A9_Issue1], 0>,
429                                InstrStage<1, [A9_Branch]>]>,
430
431  // VFP and NEON shares the same register file. This means that every VFP
432  // instruction should wait for full completion of the consecutive NEON
433  // instruction and vice-versa. We model this behavior with two artificial FUs:
434  // DRegsVFP and DRegsVFP.
435  //
436  // Every VFP instruction:
437  //  - Acquires DRegsVFP resource for 1 cycle
438  //  - Reserves DRegsN resource for the whole duration (including time to
439  //    register file writeback!).
440  // Every NEON instruction does the same but with FUs swapped.
441  //
442  // Since the reserved FU cannot be acquired, this models precisely
443  // "cross-domain" stalls.
444
445  // VFP
446  // Issue through integer pipeline, and execute in NEON unit.
447
448  // FP Special Register to Integer Register File Move
449  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
450                              InstrStage<1, [A9_MUX0], 0>,
451                              InstrStage<1, [A9_DRegsVFP], 0, Required>,
452                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
453                              InstrStage<1, [A9_NPipe]>],
454                             [1]>,
455  //
456  // Single-precision FP Unary
457  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
458                               InstrStage<1, [A9_MUX0], 0>,
459                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
460                               // Extra latency cycles since wbck is 2 cycles
461                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
462                               InstrStage<1, [A9_NPipe]>],
463                              [1, 1]>,
464  //
465  // Double-precision FP Unary
466  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                               InstrStage<1, [A9_MUX0], 0>,
468                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                               // Extra latency cycles since wbck is 2 cycles
470                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
471                               InstrStage<1, [A9_NPipe]>],
472                              [1, 1]>,
473
474  //
475  // Single-precision FP Compare
476  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
477                               InstrStage<1, [A9_MUX0], 0>,
478                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
479                               // Extra latency cycles since wbck is 4 cycles
480                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
481                               InstrStage<1, [A9_NPipe]>],
482                              [1, 1]>,
483  //
484  // Double-precision FP Compare
485  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                               InstrStage<1, [A9_MUX0], 0>,
487                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
488                               // Extra latency cycles since wbck is 4 cycles
489                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
490                               InstrStage<1, [A9_NPipe]>],
491                              [1, 1]>,
492  //
493  // Single to Double FP Convert
494  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                               InstrStage<1, [A9_MUX0], 0>,
496                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
497                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
498                               InstrStage<1, [A9_NPipe]>],
499                              [4, 1]>,
500  //
501  // Double to Single FP Convert
502  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                               InstrStage<1, [A9_MUX0], 0>,
504                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
505                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
506                               InstrStage<1, [A9_NPipe]>],
507                              [4, 1]>,
508
509  //
510  // Single to Half FP Convert
511  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                               InstrStage<1, [A9_MUX0], 0>,
513                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
514                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
515                               InstrStage<1, [A9_NPipe]>],
516                              [4, 1]>,
517  //
518  // Half to Single FP Convert
519  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                               InstrStage<1, [A9_MUX0], 0>,
521                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
522                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
523                               InstrStage<1, [A9_NPipe]>],
524                              [2, 1]>,
525
526  //
527  // Single-Precision FP to Integer Convert
528  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529                               InstrStage<1, [A9_MUX0], 0>,
530                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
531                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
532                               InstrStage<1, [A9_NPipe]>],
533                              [4, 1]>,
534  //
535  // Double-Precision FP to Integer Convert
536  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
537                               InstrStage<1, [A9_MUX0], 0>,
538                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
539                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
540                               InstrStage<1, [A9_NPipe]>],
541                              [4, 1]>,
542  //
543  // Integer to Single-Precision FP Convert
544  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
545                               InstrStage<1, [A9_MUX0], 0>,
546                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
547                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
548                               InstrStage<1, [A9_NPipe]>],
549                              [4, 1]>,
550  //
551  // Integer to Double-Precision FP Convert
552  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553                               InstrStage<1, [A9_MUX0], 0>,
554                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
555                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
556                               InstrStage<1, [A9_NPipe]>],
557                              [4, 1]>,
558  //
559  // Single-precision FP ALU
560  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
561                               InstrStage<1, [A9_MUX0], 0>,
562                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
563                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
564                               InstrStage<1, [A9_NPipe]>],
565                              [4, 1, 1]>,
566  //
567  // Double-precision FP ALU
568  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
569                               InstrStage<1, [A9_MUX0], 0>,
570                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
571                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
572                               InstrStage<1, [A9_NPipe]>],
573                              [4, 1, 1]>,
574  //
575  // Single-precision FP Multiply
576  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
577                               InstrStage<1, [A9_MUX0], 0>,
578                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
579                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
580                               InstrStage<1, [A9_NPipe]>],
581                              [5, 1, 1]>,
582  //
583  // Double-precision FP Multiply
584  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
585                               InstrStage<1, [A9_MUX0], 0>,
586                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
587                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
588                               InstrStage<2, [A9_NPipe]>],
589                              [6, 1, 1]>,
590  //
591  // Single-precision FP MAC
592  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
593                               InstrStage<1, [A9_MUX0], 0>,
594                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
595                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
596                               InstrStage<1, [A9_NPipe]>],
597                              [8, 1, 1, 1]>,
598  //
599  // Double-precision FP MAC
600  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
601                               InstrStage<1,  [A9_MUX0], 0>,
602                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
603                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
604                               InstrStage<2,  [A9_NPipe]>],
605                              [9, 1, 1, 1]>,
606  //
607  // Single-precision Fused FP MAC
608  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
609                               InstrStage<1, [A9_MUX0], 0>,
610                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
611                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
612                               InstrStage<1, [A9_NPipe]>],
613                              [8, 1, 1, 1]>,
614  //
615  // Double-precision Fused FP MAC
616  InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
617                               InstrStage<1,  [A9_MUX0], 0>,
618                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
619                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
620                               InstrStage<2,  [A9_NPipe]>],
621                              [9, 1, 1, 1]>,
622  //
623  // Single-precision FP DIV
624  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
625                               InstrStage<1,  [A9_MUX0], 0>,
626                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
627                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
628                               InstrStage<10, [A9_NPipe]>],
629                              [15, 1, 1]>,
630  //
631  // Double-precision FP DIV
632  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
633                               InstrStage<1,  [A9_MUX0], 0>,
634                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
635                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
636                               InstrStage<20, [A9_NPipe]>],
637                              [25, 1, 1]>,
638  //
639  // Single-precision FP SQRT
640  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
641                               InstrStage<1,  [A9_MUX0], 0>,
642                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
643                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
644                               InstrStage<13, [A9_NPipe]>],
645                              [17, 1]>,
646  //
647  // Double-precision FP SQRT
648  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
649                               InstrStage<1,  [A9_MUX0], 0>,
650                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
651                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
652                               InstrStage<28, [A9_NPipe]>],
653                              [32, 1]>,
654
655  //
656  // Integer to Single-precision Move
657  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
658                               InstrStage<1, [A9_MUX0], 0>,
659                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
660                               // Extra 1 latency cycle since wbck is 2 cycles
661                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
662                               InstrStage<1, [A9_NPipe]>],
663                              [1, 1]>,
664  //
665  // Integer to Double-precision Move
666  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
667                               InstrStage<1, [A9_MUX0], 0>,
668                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
669                               // Extra 1 latency cycle since wbck is 2 cycles
670                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
671                               InstrStage<1, [A9_NPipe]>],
672                              [1, 1, 1]>,
673  //
674  // Single-precision to Integer Move
675  //
676  // On A9 move-from-VFP is free to issue with no stall if other VFP
677  // operations are in flight. I assume it still can't dual-issue though.
678  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
679                               InstrStage<1, [A9_MUX0], 0>],
680                              [2, 1]>,
681  //
682  // Double-precision to Integer Move
683  //
684  // On A9 move-from-VFP is free to issue with no stall if other VFP
685  // operations are in flight. I assume it still can't dual-issue though.
686  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
687                               InstrStage<1, [A9_MUX0], 0>],
688                              [2, 1, 1]>,
689  //
690  // Single-precision FP Load
691  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
692                               InstrStage<1, [A9_MUX0], 0>,
693                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
694                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
695                               InstrStage<1, [A9_NPipe], 0>,
696                               InstrStage<1, [A9_LSUnit]>],
697                              [1, 1]>,
698  //
699  // Double-precision FP Load
700  // FIXME: Result latency is 1 if address is 64-bit aligned.
701  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
702                               InstrStage<1, [A9_MUX0], 0>,
703                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
704                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
705                               InstrStage<1, [A9_NPipe], 0>,
706                               InstrStage<1, [A9_LSUnit]>],
707                              [2, 1]>,
708  //
709  // FP Load Multiple
710  // FIXME: assumes 2 doubles which requires 2 LS cycles.
711  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
712                               InstrStage<1, [A9_MUX0], 0>,
713                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
714                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
715                               InstrStage<1, [A9_NPipe], 0>,
716                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
717  //
718  // FP Load Multiple + update
719  // FIXME: assumes 2 doubles which requires 2 LS cycles.
720  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                               InstrStage<1, [A9_MUX0], 0>,
722                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
723                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
724                               InstrStage<1, [A9_NPipe], 0>,
725                               InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
726  //
727  // Single-precision FP Store
728  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
729                               InstrStage<1, [A9_MUX0], 0>,
730                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
731                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
732                               InstrStage<1, [A9_NPipe], 0>,
733                               InstrStage<1, [A9_LSUnit]>],
734                              [1, 1]>,
735  //
736  // Double-precision FP Store
737  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
738                               InstrStage<1, [A9_MUX0], 0>,
739                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
740                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
741                               InstrStage<1, [A9_NPipe], 0>,
742                               InstrStage<1, [A9_LSUnit]>],
743                              [1, 1]>,
744  //
745  // FP Store Multiple
746  // FIXME: assumes 2 doubles which requires 2 LS cycles.
747  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
748                               InstrStage<1, [A9_MUX0], 0>,
749                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
750                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
751                               InstrStage<1, [A9_NPipe], 0>,
752                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
753  //
754  // FP Store Multiple + update
755  // FIXME: assumes 2 doubles which requires 2 LS cycles.
756  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
757                                InstrStage<1, [A9_MUX0], 0>,
758                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
759                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
760                                InstrStage<1, [A9_NPipe], 0>,
761                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
762  // NEON
763  // VLD1
764  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
765                               InstrStage<1, [A9_MUX0], 0>,
766                               InstrStage<1, [A9_DRegsN],   0, Required>,
767                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
768                               InstrStage<1, [A9_NPipe], 0>,
769                               InstrStage<1, [A9_LSUnit]>],
770                              [1, 1]>,
771  // VLD1x2
772  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
773                               InstrStage<1, [A9_MUX0], 0>,
774                               InstrStage<1, [A9_DRegsN],   0, Required>,
775                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
776                               InstrStage<1, [A9_NPipe], 0>,
777                               InstrStage<1, [A9_LSUnit]>],
778                              [1, 1, 1]>,
779  // VLD1x3
780  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
781                               InstrStage<1, [A9_MUX0], 0>,
782                               InstrStage<1, [A9_DRegsN],   0, Required>,
783                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
784                               InstrStage<2, [A9_NPipe], 0>,
785                               InstrStage<2, [A9_LSUnit]>],
786                              [1, 1, 2, 1]>,
787  // VLD1x4
788  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
789                               InstrStage<1, [A9_MUX0], 0>,
790                               InstrStage<1, [A9_DRegsN],   0, Required>,
791                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
792                               InstrStage<2, [A9_NPipe], 0>,
793                               InstrStage<2, [A9_LSUnit]>],
794                              [1, 1, 2, 2, 1]>,
795  // VLD1u
796  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
797                               InstrStage<1, [A9_MUX0], 0>,
798                               InstrStage<1, [A9_DRegsN],   0, Required>,
799                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
800                               InstrStage<1, [A9_NPipe], 0>,
801                               InstrStage<1, [A9_LSUnit]>],
802                              [1, 2, 1]>,
803  // VLD1x2u
804  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
805                               InstrStage<1, [A9_MUX0], 0>,
806                               InstrStage<1, [A9_DRegsN],   0, Required>,
807                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
808                               InstrStage<1, [A9_NPipe], 0>,
809                               InstrStage<1, [A9_LSUnit]>],
810                              [1, 1, 2, 1]>,
811  // VLD1x3u
812  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
813                               InstrStage<1, [A9_MUX0], 0>,
814                               InstrStage<1, [A9_DRegsN],   0, Required>,
815                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
816                               InstrStage<2, [A9_NPipe], 0>,
817                               InstrStage<2, [A9_LSUnit]>],
818                              [1, 1, 2, 2, 1]>,
819  // VLD1x4u
820  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
821                               InstrStage<1, [A9_MUX0], 0>,
822                               InstrStage<1, [A9_DRegsN],   0, Required>,
823                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
824                               InstrStage<2, [A9_NPipe], 0>,
825                               InstrStage<2, [A9_LSUnit]>],
826                              [1, 1, 2, 2, 2, 1]>,
827  //
828  // VLD1ln
829  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
830                               InstrStage<1, [A9_MUX0], 0>,
831                               InstrStage<1, [A9_DRegsN],   0, Required>,
832                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
833                               InstrStage<2, [A9_NPipe], 0>,
834                               InstrStage<2, [A9_LSUnit]>],
835                              [3, 1, 1, 1]>,
836  //
837  // VLD1lnu
838  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
839                               InstrStage<1, [A9_MUX0], 0>,
840                               InstrStage<1, [A9_DRegsN],   0, Required>,
841                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
842                               InstrStage<2, [A9_NPipe], 0>,
843                               InstrStage<2, [A9_LSUnit]>],
844                              [3, 2, 1, 1, 1, 1]>,
845  //
846  // VLD1dup
847  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
848                               InstrStage<1, [A9_MUX0], 0>,
849                               InstrStage<1, [A9_DRegsN],   0, Required>,
850                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
851                               InstrStage<1, [A9_NPipe], 0>,
852                               InstrStage<1, [A9_LSUnit]>],
853                              [2, 1]>,
854  //
855  // VLD1dupu
856  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
857                               InstrStage<1, [A9_MUX0], 0>,
858                               InstrStage<1, [A9_DRegsN],   0, Required>,
859                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
860                               InstrStage<1, [A9_NPipe], 0>,
861                               InstrStage<1, [A9_LSUnit]>],
862                              [2, 2, 1, 1]>,
863  //
864  // VLD2
865  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
866                               InstrStage<1, [A9_MUX0], 0>,
867                               InstrStage<1, [A9_DRegsN],   0, Required>,
868                               // Extra latency cycles since wbck is 7 cycles
869                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
870                               InstrStage<1, [A9_NPipe], 0>,
871                               InstrStage<1, [A9_LSUnit]>],
872                              [2, 2, 1]>,
873  //
874  // VLD2x2
875  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
876                               InstrStage<1, [A9_MUX0], 0>,
877                               InstrStage<1, [A9_DRegsN],   0, Required>,
878                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
879                               InstrStage<2, [A9_NPipe], 0>,
880                               InstrStage<2, [A9_LSUnit]>],
881                              [2, 3, 2, 3, 1]>,
882  //
883  // VLD2ln
884  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
885                               InstrStage<1, [A9_MUX0], 0>,
886                               InstrStage<1, [A9_DRegsN],   0, Required>,
887                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
888                               InstrStage<2, [A9_NPipe], 0>,
889                               InstrStage<2, [A9_LSUnit]>],
890                              [3, 3, 1, 1, 1, 1]>,
891  //
892  // VLD2u
893  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
894                               InstrStage<1, [A9_MUX0], 0>,
895                               InstrStage<1, [A9_DRegsN],   0, Required>,
896                               // Extra latency cycles since wbck is 7 cycles
897                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
898                               InstrStage<1, [A9_NPipe], 0>,
899                               InstrStage<1, [A9_LSUnit]>],
900                              [2, 2, 2, 1, 1, 1]>,
901  //
902  // VLD2x2u
903  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
904                               InstrStage<1, [A9_MUX0], 0>,
905                               InstrStage<1, [A9_DRegsN],   0, Required>,
906                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
907                               InstrStage<2, [A9_NPipe], 0>,
908                               InstrStage<2, [A9_LSUnit]>],
909                              [2, 3, 2, 3, 2, 1]>,
910  //
911  // VLD2lnu
912  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
913                               InstrStage<1, [A9_MUX0], 0>,
914                               InstrStage<1, [A9_DRegsN],   0, Required>,
915                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
916                               InstrStage<2, [A9_NPipe], 0>,
917                               InstrStage<2, [A9_LSUnit]>],
918                              [3, 3, 2, 1, 1, 1, 1, 1]>,
919  //
920  // VLD2dup
921  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
922                               InstrStage<1, [A9_MUX0], 0>,
923                               InstrStage<1, [A9_DRegsN],   0, Required>,
924                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
925                               InstrStage<1, [A9_NPipe], 0>,
926                               InstrStage<1, [A9_LSUnit]>],
927                              [2, 2, 1]>,
928  //
929  // VLD2dupu
930  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
931                               InstrStage<1, [A9_MUX0], 0>,
932                               InstrStage<1, [A9_DRegsN],   0, Required>,
933                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
934                               InstrStage<1, [A9_NPipe], 0>,
935                               InstrStage<1, [A9_LSUnit]>],
936                              [2, 2, 2, 1, 1]>,
937  //
938  // VLD3
939  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
940                               InstrStage<1, [A9_MUX0], 0>,
941                               InstrStage<1, [A9_DRegsN],   0, Required>,
942                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
943                               InstrStage<3, [A9_NPipe], 0>,
944                               InstrStage<3, [A9_LSUnit]>],
945                              [3, 3, 4, 1]>,
946  //
947  // VLD3ln
948  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
949                               InstrStage<1, [A9_MUX0], 0>,
950                               InstrStage<1, [A9_DRegsN],   0, Required>,
951                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
952                               InstrStage<5, [A9_NPipe], 0>,
953                               InstrStage<5, [A9_LSUnit]>],
954                              [5, 5, 6, 1, 1, 1, 1, 2]>,
955  //
956  // VLD3u
957  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
958                               InstrStage<1, [A9_MUX0], 0>,
959                               InstrStage<1, [A9_DRegsN],   0, Required>,
960                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
961                               InstrStage<3, [A9_NPipe], 0>,
962                               InstrStage<3, [A9_LSUnit]>],
963                              [3, 3, 4, 2, 1]>,
964  //
965  // VLD3lnu
966  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
967                               InstrStage<1, [A9_MUX0], 0>,
968                               InstrStage<1, [A9_DRegsN],   0, Required>,
969                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
970                               InstrStage<5, [A9_NPipe], 0>,
971                               InstrStage<5, [A9_LSUnit]>],
972                              [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
973  //
974  // VLD3dup
975  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
976                               InstrStage<1, [A9_MUX0], 0>,
977                               InstrStage<1, [A9_DRegsN],   0, Required>,
978                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
979                               InstrStage<3, [A9_NPipe], 0>,
980                               InstrStage<3, [A9_LSUnit]>],
981                              [3, 3, 4, 1]>,
982  //
983  // VLD3dupu
984  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
985                               InstrStage<1, [A9_MUX0], 0>,
986                               InstrStage<1, [A9_DRegsN],   0, Required>,
987                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
988                               InstrStage<3, [A9_NPipe], 0>,
989                               InstrStage<3, [A9_LSUnit]>],
990                              [3, 3, 4, 2, 1, 1]>,
991  //
992  // VLD4
993  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
994                               InstrStage<1, [A9_MUX0], 0>,
995                               InstrStage<1, [A9_DRegsN],   0, Required>,
996                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
997                               InstrStage<3, [A9_NPipe], 0>,
998                               InstrStage<3, [A9_LSUnit]>],
999                              [3, 3, 4, 4, 1]>,
1000  //
1001  // VLD4ln
1002  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1003                               InstrStage<1, [A9_MUX0], 0>,
1004                               InstrStage<1, [A9_DRegsN],   0, Required>,
1005                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1006                               InstrStage<4, [A9_NPipe], 0>,
1007                               InstrStage<4, [A9_LSUnit]>],
1008                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1009  //
1010  // VLD4u
1011  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1012                               InstrStage<1, [A9_MUX0], 0>,
1013                               InstrStage<1, [A9_DRegsN],   0, Required>,
1014                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1015                               InstrStage<3, [A9_NPipe], 0>,
1016                               InstrStage<3, [A9_LSUnit]>],
1017                              [3, 3, 4, 4, 2, 1]>,
1018  //
1019  // VLD4lnu
1020  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1021                               InstrStage<1, [A9_MUX0], 0>,
1022                               InstrStage<1, [A9_DRegsN],   0, Required>,
1023                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1024                               InstrStage<4, [A9_NPipe], 0>,
1025                               InstrStage<4, [A9_LSUnit]>],
1026                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1027  //
1028  // VLD4dup
1029  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1030                               InstrStage<1, [A9_MUX0], 0>,
1031                               InstrStage<1, [A9_DRegsN],   0, Required>,
1032                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1033                               InstrStage<2, [A9_NPipe], 0>,
1034                               InstrStage<2, [A9_LSUnit]>],
1035                              [2, 2, 3, 3, 1]>,
1036  //
1037  // VLD4dupu
1038  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1039                               InstrStage<1, [A9_MUX0], 0>,
1040                               InstrStage<1, [A9_DRegsN],   0, Required>,
1041                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1042                               InstrStage<2, [A9_NPipe], 0>,
1043                               InstrStage<2, [A9_LSUnit]>],
1044                              [2, 2, 3, 3, 2, 1, 1]>,
1045  //
1046  // VST1
1047  InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1048                               InstrStage<1, [A9_MUX0], 0>,
1049                               InstrStage<1, [A9_DRegsN],   0, Required>,
1050                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1051                               InstrStage<1, [A9_NPipe], 0>,
1052                               InstrStage<1, [A9_LSUnit]>],
1053                              [1, 1, 1]>,
1054  //
1055  // VST1x2
1056  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1057                               InstrStage<1, [A9_MUX0], 0>,
1058                               InstrStage<1, [A9_DRegsN],   0, Required>,
1059                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1060                               InstrStage<1, [A9_NPipe], 0>,
1061                               InstrStage<1, [A9_LSUnit]>],
1062                              [1, 1, 1, 1]>,
1063  //
1064  // VST1x3
1065  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1066                               InstrStage<1, [A9_MUX0], 0>,
1067                               InstrStage<1, [A9_DRegsN],   0, Required>,
1068                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1069                               InstrStage<2, [A9_NPipe], 0>,
1070                               InstrStage<2, [A9_LSUnit]>],
1071                              [1, 1, 1, 1, 2]>,
1072  //
1073  // VST1x4
1074  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1075                               InstrStage<1, [A9_MUX0], 0>,
1076                               InstrStage<1, [A9_DRegsN],   0, Required>,
1077                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1078                               InstrStage<2, [A9_NPipe], 0>,
1079                               InstrStage<2, [A9_LSUnit]>],
1080                              [1, 1, 1, 1, 2, 2]>,
1081  //
1082  // VST1u
1083  InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1084                               InstrStage<1, [A9_MUX0], 0>,
1085                               InstrStage<1, [A9_DRegsN],   0, Required>,
1086                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1087                               InstrStage<1, [A9_NPipe], 0>,
1088                               InstrStage<1, [A9_LSUnit]>],
1089                              [2, 1, 1, 1, 1]>,
1090  //
1091  // VST1x2u
1092  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1093                               InstrStage<1, [A9_MUX0], 0>,
1094                               InstrStage<1, [A9_DRegsN],   0, Required>,
1095                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1096                               InstrStage<1, [A9_NPipe], 0>,
1097                               InstrStage<1, [A9_LSUnit]>],
1098                              [2, 1, 1, 1, 1, 1]>,
1099  //
1100  // VST1x3u
1101  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1102                               InstrStage<1, [A9_MUX0], 0>,
1103                               InstrStage<1, [A9_DRegsN],   0, Required>,
1104                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1105                               InstrStage<2, [A9_NPipe], 0>,
1106                               InstrStage<2, [A9_LSUnit]>],
1107                              [2, 1, 1, 1, 1, 1, 2]>,
1108  //
1109  // VST1x4u
1110  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1111                               InstrStage<1, [A9_MUX0], 0>,
1112                               InstrStage<1, [A9_DRegsN],   0, Required>,
1113                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1114                               InstrStage<2, [A9_NPipe], 0>,
1115                               InstrStage<2, [A9_LSUnit]>],
1116                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1117  //
1118  // VST1ln
1119  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1120                               InstrStage<1, [A9_MUX0], 0>,
1121                               InstrStage<1, [A9_DRegsN],   0, Required>,
1122                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1123                               InstrStage<1, [A9_NPipe], 0>,
1124                               InstrStage<1, [A9_LSUnit]>],
1125                              [1, 1, 1]>,
1126  //
1127  // VST1lnu
1128  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1129                               InstrStage<1, [A9_MUX0], 0>,
1130                               InstrStage<1, [A9_DRegsN],   0, Required>,
1131                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1132                               InstrStage<1, [A9_NPipe], 0>,
1133                               InstrStage<1, [A9_LSUnit]>],
1134                              [2, 1, 1, 1, 1]>,
1135  //
1136  // VST2
1137  InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1138                               InstrStage<1, [A9_MUX0], 0>,
1139                               InstrStage<1, [A9_DRegsN],   0, Required>,
1140                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1141                               InstrStage<1, [A9_NPipe], 0>,
1142                               InstrStage<1, [A9_LSUnit]>],
1143                              [1, 1, 1, 1]>,
1144  //
1145  // VST2x2
1146  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1147                               InstrStage<1, [A9_MUX0], 0>,
1148                               InstrStage<1, [A9_DRegsN],   0, Required>,
1149                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1150                               InstrStage<3, [A9_NPipe], 0>,
1151                               InstrStage<3, [A9_LSUnit]>],
1152                              [1, 1, 1, 1, 2, 2]>,
1153  //
1154  // VST2u
1155  InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1156                               InstrStage<1, [A9_MUX0], 0>,
1157                               InstrStage<1, [A9_DRegsN],   0, Required>,
1158                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1159                               InstrStage<1, [A9_NPipe], 0>,
1160                               InstrStage<1, [A9_LSUnit]>],
1161                              [2, 1, 1, 1, 1, 1]>,
1162  //
1163  // VST2x2u
1164  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1165                               InstrStage<1, [A9_MUX0], 0>,
1166                               InstrStage<1, [A9_DRegsN],   0, Required>,
1167                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1168                               InstrStage<3, [A9_NPipe], 0>,
1169                               InstrStage<3, [A9_LSUnit]>],
1170                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1171  //
1172  // VST2ln
1173  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1174                               InstrStage<1, [A9_MUX0], 0>,
1175                               InstrStage<1, [A9_DRegsN],   0, Required>,
1176                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1177                               InstrStage<1, [A9_NPipe], 0>,
1178                               InstrStage<1, [A9_LSUnit]>],
1179                              [1, 1, 1, 1]>,
1180  //
1181  // VST2lnu
1182  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1183                               InstrStage<1, [A9_MUX0], 0>,
1184                               InstrStage<1, [A9_DRegsN],   0, Required>,
1185                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1186                               InstrStage<1, [A9_NPipe], 0>,
1187                               InstrStage<1, [A9_LSUnit]>],
1188                              [2, 1, 1, 1, 1, 1]>,
1189  //
1190  // VST3
1191  InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1192                               InstrStage<1, [A9_MUX0], 0>,
1193                               InstrStage<1, [A9_DRegsN],   0, Required>,
1194                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1195                               InstrStage<2, [A9_NPipe], 0>,
1196                               InstrStage<2, [A9_LSUnit]>],
1197                              [1, 1, 1, 1, 2]>,
1198  //
1199  // VST3u
1200  InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1201                               InstrStage<1, [A9_MUX0], 0>,
1202                               InstrStage<1, [A9_DRegsN],   0, Required>,
1203                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1204                               InstrStage<2, [A9_NPipe], 0>,
1205                               InstrStage<2, [A9_LSUnit]>],
1206                              [2, 1, 1, 1, 1, 1, 2]>,
1207  //
1208  // VST3ln
1209  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1210                               InstrStage<1, [A9_MUX0], 0>,
1211                               InstrStage<1, [A9_DRegsN],   0, Required>,
1212                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1213                               InstrStage<3, [A9_NPipe], 0>,
1214                               InstrStage<3, [A9_LSUnit]>],
1215                              [1, 1, 1, 1, 2]>,
1216  //
1217  // VST3lnu
1218  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1219                               InstrStage<1, [A9_MUX0], 0>,
1220                               InstrStage<1, [A9_DRegsN],   0, Required>,
1221                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1222                               InstrStage<3, [A9_NPipe], 0>,
1223                               InstrStage<3, [A9_LSUnit]>],
1224                              [2, 1, 1, 1, 1, 1, 2]>,
1225  //
1226  // VST4
1227  InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1228                               InstrStage<1, [A9_MUX0], 0>,
1229                               InstrStage<1, [A9_DRegsN],   0, Required>,
1230                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1231                               InstrStage<2, [A9_NPipe], 0>,
1232                               InstrStage<2, [A9_LSUnit]>],
1233                              [1, 1, 1, 1, 2, 2]>,
1234  //
1235  // VST4u
1236  InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1237                               InstrStage<1, [A9_MUX0], 0>,
1238                               InstrStage<1, [A9_DRegsN],   0, Required>,
1239                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1240                               InstrStage<2, [A9_NPipe], 0>,
1241                               InstrStage<2, [A9_LSUnit]>],
1242                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1243  //
1244  // VST4ln
1245  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1246                               InstrStage<1, [A9_MUX0], 0>,
1247                               InstrStage<1, [A9_DRegsN],   0, Required>,
1248                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1249                               InstrStage<2, [A9_NPipe], 0>,
1250                               InstrStage<2, [A9_LSUnit]>],
1251                              [1, 1, 1, 1, 2, 2]>,
1252  //
1253  // VST4lnu
1254  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1255                               InstrStage<1, [A9_MUX0], 0>,
1256                               InstrStage<1, [A9_DRegsN],   0, Required>,
1257                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1258                               InstrStage<2, [A9_NPipe], 0>,
1259                               InstrStage<2, [A9_LSUnit]>],
1260                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1261
1262  //
1263  // Double-register Integer Unary
1264  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1265                               InstrStage<1, [A9_MUX0], 0>,
1266                               InstrStage<1, [A9_DRegsN],   0, Required>,
1267                               // Extra latency cycles since wbck is 6 cycles
1268                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1269                               InstrStage<1, [A9_NPipe]>],
1270                              [4, 2]>,
1271  //
1272  // Quad-register Integer Unary
1273  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1274                               InstrStage<1, [A9_MUX0], 0>,
1275                               InstrStage<1, [A9_DRegsN],   0, Required>,
1276                               // Extra latency cycles since wbck is 6 cycles
1277                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1278                               InstrStage<1, [A9_NPipe]>],
1279                              [4, 2]>,
1280  //
1281  // Double-register Integer Q-Unary
1282  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1283                               InstrStage<1, [A9_MUX0], 0>,
1284                               InstrStage<1, [A9_DRegsN],   0, Required>,
1285                               // Extra latency cycles since wbck is 6 cycles
1286                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1287                               InstrStage<1, [A9_NPipe]>],
1288                              [4, 1]>,
1289  //
1290  // Quad-register Integer CountQ-Unary
1291  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1292                               InstrStage<1, [A9_MUX0], 0>,
1293                               InstrStage<1, [A9_DRegsN],   0, Required>,
1294                               // Extra latency cycles since wbck is 6 cycles
1295                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1296                               InstrStage<1, [A9_NPipe]>],
1297                              [4, 1]>,
1298  //
1299  // Double-register Integer Binary
1300  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1301                               InstrStage<1, [A9_MUX0], 0>,
1302                               InstrStage<1, [A9_DRegsN],   0, Required>,
1303                               // Extra latency cycles since wbck is 6 cycles
1304                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1305                               InstrStage<1, [A9_NPipe]>],
1306                              [3, 2, 2]>,
1307  //
1308  // Quad-register Integer Binary
1309  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1310                               InstrStage<1, [A9_MUX0], 0>,
1311                               InstrStage<1, [A9_DRegsN],   0, Required>,
1312                               // Extra latency cycles since wbck is 6 cycles
1313                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1314                               InstrStage<1, [A9_NPipe]>],
1315                              [3, 2, 2]>,
1316  //
1317  // Double-register Integer Subtract
1318  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1319                               InstrStage<1, [A9_MUX0], 0>,
1320                               InstrStage<1, [A9_DRegsN],   0, Required>,
1321                               // Extra latency cycles since wbck is 6 cycles
1322                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1323                               InstrStage<1, [A9_NPipe]>],
1324                              [3, 2, 1]>,
1325  //
1326  // Quad-register Integer Subtract
1327  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1328                               InstrStage<1, [A9_MUX0], 0>,
1329                               InstrStage<1, [A9_DRegsN],   0, Required>,
1330                               // Extra latency cycles since wbck is 6 cycles
1331                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1332                               InstrStage<1, [A9_NPipe]>],
1333                              [3, 2, 1]>,
1334  //
1335  // Double-register Integer Shift
1336  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1337                               InstrStage<1, [A9_MUX0], 0>,
1338                               InstrStage<1, [A9_DRegsN],   0, Required>,
1339                               // Extra latency cycles since wbck is 6 cycles
1340                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1341                               InstrStage<1, [A9_NPipe]>],
1342                              [3, 1, 1]>,
1343  //
1344  // Quad-register Integer Shift
1345  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1346                               InstrStage<1, [A9_MUX0], 0>,
1347                               InstrStage<1, [A9_DRegsN],   0, Required>,
1348                               // Extra latency cycles since wbck is 6 cycles
1349                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1350                               InstrStage<1, [A9_NPipe]>],
1351                              [3, 1, 1]>,
1352  //
1353  // Double-register Integer Shift (4 cycle)
1354  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1355                               InstrStage<1, [A9_MUX0], 0>,
1356                               InstrStage<1, [A9_DRegsN],   0, Required>,
1357                               // Extra latency cycles since wbck is 6 cycles
1358                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1359                               InstrStage<1, [A9_NPipe]>],
1360                              [4, 1, 1]>,
1361  //
1362  // Quad-register Integer Shift (4 cycle)
1363  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1364                               InstrStage<1, [A9_MUX0], 0>,
1365                               InstrStage<1, [A9_DRegsN],   0, Required>,
1366                               // Extra latency cycles since wbck is 6 cycles
1367                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1368                               InstrStage<1, [A9_NPipe]>],
1369                              [4, 1, 1]>,
1370  //
1371  // Double-register Integer Binary (4 cycle)
1372  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1373                               InstrStage<1, [A9_MUX0], 0>,
1374                               InstrStage<1, [A9_DRegsN],   0, Required>,
1375                               // Extra latency cycles since wbck is 6 cycles
1376                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1377                               InstrStage<1, [A9_NPipe]>],
1378                              [4, 2, 2]>,
1379  //
1380  // Quad-register Integer Binary (4 cycle)
1381  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1382                               InstrStage<1, [A9_MUX0], 0>,
1383                               InstrStage<1, [A9_DRegsN],   0, Required>,
1384                               // Extra latency cycles since wbck is 6 cycles
1385                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1386                               InstrStage<1, [A9_NPipe]>],
1387                              [4, 2, 2]>,
1388  //
1389  // Double-register Integer Subtract (4 cycle)
1390  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1391                               InstrStage<1, [A9_MUX0], 0>,
1392                               InstrStage<1, [A9_DRegsN],   0, Required>,
1393                               // Extra latency cycles since wbck is 6 cycles
1394                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1395                               InstrStage<1, [A9_NPipe]>],
1396                              [4, 2, 1]>,
1397  //
1398  // Quad-register Integer Subtract (4 cycle)
1399  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400                               InstrStage<1, [A9_MUX0], 0>,
1401                               InstrStage<1, [A9_DRegsN],   0, Required>,
1402                               // Extra latency cycles since wbck is 6 cycles
1403                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1404                               InstrStage<1, [A9_NPipe]>],
1405                              [4, 2, 1]>,
1406
1407  //
1408  // Double-register Integer Count
1409  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1410                               InstrStage<1, [A9_MUX0], 0>,
1411                               InstrStage<1, [A9_DRegsN],   0, Required>,
1412                               // Extra latency cycles since wbck is 6 cycles
1413                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1414                               InstrStage<1, [A9_NPipe]>],
1415                              [3, 2, 2]>,
1416  //
1417  // Quad-register Integer Count
1418  // Result written in N3, but that is relative to the last cycle of multicycle,
1419  // so we use 4 for those cases
1420  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1421                               InstrStage<1, [A9_MUX0], 0>,
1422                               InstrStage<1, [A9_DRegsN],   0, Required>,
1423                               // Extra latency cycles since wbck is 7 cycles
1424                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1425                               InstrStage<2, [A9_NPipe]>],
1426                              [4, 2, 2]>,
1427  //
1428  // Double-register Absolute Difference and Accumulate
1429  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1430                               InstrStage<1, [A9_MUX0], 0>,
1431                               InstrStage<1, [A9_DRegsN],   0, Required>,
1432                               // Extra latency cycles since wbck is 6 cycles
1433                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1434                               InstrStage<1, [A9_NPipe]>],
1435                              [6, 3, 2, 1]>,
1436  //
1437  // Quad-register Absolute Difference and Accumulate
1438  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1439                               InstrStage<1, [A9_MUX0], 0>,
1440                               InstrStage<1, [A9_DRegsN],   0, Required>,
1441                               // Extra latency cycles since wbck is 6 cycles
1442                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1443                               InstrStage<2, [A9_NPipe]>],
1444                              [6, 3, 2, 1]>,
1445  //
1446  // Double-register Integer Pair Add Long
1447  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1448                               InstrStage<1, [A9_MUX0], 0>,
1449                               InstrStage<1, [A9_DRegsN],   0, Required>,
1450                               // Extra latency cycles since wbck is 6 cycles
1451                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1452                               InstrStage<1, [A9_NPipe]>],
1453                              [6, 3, 1]>,
1454  //
1455  // Quad-register Integer Pair Add Long
1456  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1457                               InstrStage<1, [A9_MUX0], 0>,
1458                               InstrStage<1, [A9_DRegsN],   0, Required>,
1459                               // Extra latency cycles since wbck is 6 cycles
1460                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1461                               InstrStage<2, [A9_NPipe]>],
1462                              [6, 3, 1]>,
1463
1464  //
1465  // Double-register Integer Multiply (.8, .16)
1466  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1467                               InstrStage<1, [A9_MUX0], 0>,
1468                               InstrStage<1, [A9_DRegsN],   0, Required>,
1469                               // Extra latency cycles since wbck is 6 cycles
1470                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1471                               InstrStage<1, [A9_NPipe]>],
1472                              [6, 2, 2]>,
1473  //
1474  // Quad-register Integer Multiply (.8, .16)
1475  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1476                               InstrStage<1, [A9_MUX0], 0>,
1477                               InstrStage<1, [A9_DRegsN],   0, Required>,
1478                               // Extra latency cycles since wbck is 7 cycles
1479                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1480                               InstrStage<2, [A9_NPipe]>],
1481                              [7, 2, 2]>,
1482
1483  //
1484  // Double-register Integer Multiply (.32)
1485  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1486                               InstrStage<1, [A9_MUX0], 0>,
1487                               InstrStage<1, [A9_DRegsN],   0, Required>,
1488                               // Extra latency cycles since wbck is 7 cycles
1489                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1490                               InstrStage<2, [A9_NPipe]>],
1491                              [7, 2, 1]>,
1492  //
1493  // Quad-register Integer Multiply (.32)
1494  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1495                               InstrStage<1, [A9_MUX0], 0>,
1496                               InstrStage<1, [A9_DRegsN],   0, Required>,
1497                               // Extra latency cycles since wbck is 9 cycles
1498                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1499                               InstrStage<4, [A9_NPipe]>],
1500                              [9, 2, 1]>,
1501  //
1502  // Double-register Integer Multiply-Accumulate (.8, .16)
1503  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1504                               InstrStage<1, [A9_MUX0], 0>,
1505                               InstrStage<1, [A9_DRegsN],   0, Required>,
1506                               // Extra latency cycles since wbck is 6 cycles
1507                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1508                               InstrStage<1, [A9_NPipe]>],
1509                              [6, 3, 2, 2]>,
1510  //
1511  // Double-register Integer Multiply-Accumulate (.32)
1512  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1513                               InstrStage<1, [A9_MUX0], 0>,
1514                               InstrStage<1, [A9_DRegsN],   0, Required>,
1515                               // Extra latency cycles since wbck is 7 cycles
1516                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1517                               InstrStage<2, [A9_NPipe]>],
1518                              [7, 3, 2, 1]>,
1519  //
1520  // Quad-register Integer Multiply-Accumulate (.8, .16)
1521  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1522                               InstrStage<1, [A9_MUX0], 0>,
1523                               InstrStage<1, [A9_DRegsN],   0, Required>,
1524                               // Extra latency cycles since wbck is 7 cycles
1525                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1526                               InstrStage<2, [A9_NPipe]>],
1527                              [7, 3, 2, 2]>,
1528  //
1529  // Quad-register Integer Multiply-Accumulate (.32)
1530  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1531                               InstrStage<1, [A9_MUX0], 0>,
1532                               InstrStage<1, [A9_DRegsN],   0, Required>,
1533                               // Extra latency cycles since wbck is 9 cycles
1534                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1535                               InstrStage<4, [A9_NPipe]>],
1536                              [9, 3, 2, 1]>,
1537
1538  //
1539  // Move
1540  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1541                               InstrStage<1, [A9_MUX0], 0>,
1542                               InstrStage<1, [A9_DRegsN],   0, Required>,
1543                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1544                               InstrStage<1, [A9_NPipe]>],
1545                              [1,1]>,
1546  //
1547  // Move Immediate
1548  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1549                               InstrStage<1, [A9_MUX0], 0>,
1550                               InstrStage<1, [A9_DRegsN],   0, Required>,
1551                               // Extra latency cycles since wbck is 6 cycles
1552                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1553                               InstrStage<1, [A9_NPipe]>],
1554                              [3]>,
1555  //
1556  // Double-register Permute Move
1557  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1558                               InstrStage<1, [A9_MUX0], 0>,
1559                               InstrStage<1, [A9_DRegsN],   0, Required>,
1560                               // Extra latency cycles since wbck is 6 cycles
1561                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1562                               InstrStage<1, [A9_NPipe]>],
1563                              [2, 1]>,
1564  //
1565  // Quad-register Permute Move
1566  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1567                               InstrStage<1, [A9_MUX0], 0>,
1568                               InstrStage<1, [A9_DRegsN],   0, Required>,
1569                               // Extra latency cycles since wbck is 6 cycles
1570                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1571                               InstrStage<1, [A9_NPipe]>],
1572                              [2, 1]>,
1573  //
1574  // Integer to Single-precision Move
1575  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1576                               InstrStage<1, [A9_MUX0], 0>,
1577                               InstrStage<1, [A9_DRegsN],   0, Required>,
1578                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1579                               InstrStage<1, [A9_NPipe]>],
1580                              [1, 1]>,
1581  //
1582  // Integer to Double-precision Move
1583  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1584                               InstrStage<1, [A9_MUX0], 0>,
1585                               InstrStage<1, [A9_DRegsN],   0, Required>,
1586                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1587                               InstrStage<1, [A9_NPipe]>],
1588                              [1, 1, 1]>,
1589  //
1590  // Single-precision to Integer Move
1591  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1592                               InstrStage<1, [A9_MUX0], 0>,
1593                               InstrStage<1, [A9_DRegsN],   0, Required>,
1594                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1595                               InstrStage<1, [A9_NPipe]>],
1596                              [2, 1]>,
1597  //
1598  // Double-precision to Integer Move
1599  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1600                               InstrStage<1, [A9_MUX0], 0>,
1601                               InstrStage<1, [A9_DRegsN],   0, Required>,
1602                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1603                               InstrStage<1, [A9_NPipe]>],
1604                              [2, 2, 1]>,
1605  //
1606  // Integer to Lane Move
1607  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1608                               InstrStage<1, [A9_MUX0], 0>,
1609                               InstrStage<1, [A9_DRegsN],   0, Required>,
1610                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1611                               InstrStage<2, [A9_NPipe]>],
1612                              [3, 1, 1]>,
1613
1614  //
1615  // Vector narrow move
1616  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1617                               InstrStage<1, [A9_MUX0], 0>,
1618                               InstrStage<1, [A9_DRegsN],   0, Required>,
1619                               // Extra latency cycles since wbck is 6 cycles
1620                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1621                               InstrStage<1, [A9_NPipe]>],
1622                              [3, 1]>,
1623  //
1624  // Double-register FP Unary
1625  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1626                               InstrStage<1, [A9_MUX0], 0>,
1627                               InstrStage<1, [A9_DRegsN],   0, Required>,
1628                               // Extra latency cycles since wbck is 6 cycles
1629                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1630                               InstrStage<1, [A9_NPipe]>],
1631                              [5, 2]>,
1632  //
1633  // Quad-register FP Unary
1634  // Result written in N5, but that is relative to the last cycle of multicycle,
1635  // so we use 6 for those cases
1636  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1637                               InstrStage<1, [A9_MUX0], 0>,
1638                               InstrStage<1, [A9_DRegsN],   0, Required>,
1639                               // Extra latency cycles since wbck is 7 cycles
1640                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1641                               InstrStage<2, [A9_NPipe]>],
1642                              [6, 2]>,
1643  //
1644  // Double-register FP Binary
1645  // FIXME: We're using this itin for many instructions and [2, 2] here is too
1646  // optimistic.
1647  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1648                               InstrStage<1, [A9_MUX0], 0>,
1649                               InstrStage<1, [A9_DRegsN],   0, Required>,
1650                               // Extra latency cycles since wbck is 6 cycles
1651                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1652                               InstrStage<1, [A9_NPipe]>],
1653                              [5, 2, 2]>,
1654
1655  //
1656  // VPADD, etc.
1657  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1658                               InstrStage<1, [A9_MUX0], 0>,
1659                               InstrStage<1, [A9_DRegsN],   0, Required>,
1660                               // Extra latency cycles since wbck is 6 cycles
1661                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1662                               InstrStage<1, [A9_NPipe]>],
1663                              [5, 1, 1]>,
1664  //
1665  // Double-register FP VMUL
1666  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1667                               InstrStage<1, [A9_MUX0], 0>,
1668                               InstrStage<1, [A9_DRegsN],   0, Required>,
1669                               // Extra latency cycles since wbck is 6 cycles
1670                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1671                               InstrStage<1, [A9_NPipe]>],
1672                              [5, 2, 1]>,
1673  //
1674  // Quad-register FP Binary
1675  // Result written in N5, but that is relative to the last cycle of multicycle,
1676  // so we use 6 for those cases
1677  // FIXME: We're using this itin for many instructions and [2, 2] here is too
1678  // optimistic.
1679  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680                               InstrStage<1, [A9_MUX0], 0>,
1681                               InstrStage<1, [A9_DRegsN],   0, Required>,
1682                               // Extra latency cycles since wbck is 7 cycles
1683                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1684                               InstrStage<2, [A9_NPipe]>],
1685                              [6, 2, 2]>,
1686  //
1687  // Quad-register FP VMUL
1688  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1689                               InstrStage<1, [A9_MUX0], 0>,
1690                               InstrStage<1, [A9_DRegsN],   0, Required>,
1691                               // Extra latency cycles since wbck is 7 cycles
1692                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1693                               InstrStage<1, [A9_NPipe]>],
1694                              [6, 2, 1]>,
1695  //
1696  // Double-register FP Multiple-Accumulate
1697  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1698                               InstrStage<1, [A9_MUX0], 0>,
1699                               InstrStage<1, [A9_DRegsN],   0, Required>,
1700                               // Extra latency cycles since wbck is 7 cycles
1701                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1702                               InstrStage<2, [A9_NPipe]>],
1703                              [6, 3, 2, 1]>,
1704  //
1705  // Quad-register FP Multiple-Accumulate
1706  // Result written in N9, but that is relative to the last cycle of multicycle,
1707  // so we use 10 for those cases
1708  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1709                               InstrStage<1, [A9_MUX0], 0>,
1710                               InstrStage<1, [A9_DRegsN],   0, Required>,
1711                               // Extra latency cycles since wbck is 9 cycles
1712                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1713                               InstrStage<4, [A9_NPipe]>],
1714                              [8, 4, 2, 1]>,
1715  //
1716  // Double-register Fused FP Multiple-Accumulate
1717  InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1718                               InstrStage<1, [A9_MUX0], 0>,
1719                               InstrStage<1, [A9_DRegsN],   0, Required>,
1720                               // Extra latency cycles since wbck is 7 cycles
1721                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1722                               InstrStage<2, [A9_NPipe]>],
1723                              [6, 3, 2, 1]>,
1724  //
1725  // Quad-register Fused FP Multiple-Accumulate
1726  // Result written in N9, but that is relative to the last cycle of multicycle,
1727  // so we use 10 for those cases
1728  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1729                               InstrStage<1, [A9_MUX0], 0>,
1730                               InstrStage<1, [A9_DRegsN],   0, Required>,
1731                               // Extra latency cycles since wbck is 9 cycles
1732                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1733                               InstrStage<4, [A9_NPipe]>],
1734                              [8, 4, 2, 1]>,
1735  //
1736  // Double-register Reciprical Step
1737  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1738                               InstrStage<1, [A9_MUX0], 0>,
1739                               InstrStage<1, [A9_DRegsN],   0, Required>,
1740                               // Extra latency cycles since wbck is 10 cycles
1741                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1742                               InstrStage<1, [A9_NPipe]>],
1743                              [9, 2, 2]>,
1744  //
1745  // Quad-register Reciprical Step
1746  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1747                               InstrStage<1, [A9_MUX0], 0>,
1748                               InstrStage<1, [A9_DRegsN],   0, Required>,
1749                               // Extra latency cycles since wbck is 11 cycles
1750                               InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1751                               InstrStage<2, [A9_NPipe]>],
1752                              [10, 2, 2]>,
1753  //
1754  // Double-register Permute
1755  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1756                               InstrStage<1, [A9_MUX0], 0>,
1757                               InstrStage<1, [A9_DRegsN],   0, Required>,
1758                               // Extra latency cycles since wbck is 6 cycles
1759                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1760                               InstrStage<1, [A9_NPipe]>],
1761                              [2, 2, 1, 1]>,
1762  //
1763  // Quad-register Permute
1764  // Result written in N2, but that is relative to the last cycle of multicycle,
1765  // so we use 3 for those cases
1766  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1767                               InstrStage<1, [A9_MUX0], 0>,
1768                               InstrStage<1, [A9_DRegsN],   0, Required>,
1769                               // Extra latency cycles since wbck is 7 cycles
1770                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1771                               InstrStage<2, [A9_NPipe]>],
1772                              [3, 3, 1, 1]>,
1773  //
1774  // Quad-register Permute (3 cycle issue)
1775  // Result written in N2, but that is relative to the last cycle of multicycle,
1776  // so we use 4 for those cases
1777  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1778                               InstrStage<1, [A9_MUX0], 0>,
1779                               InstrStage<1, [A9_DRegsN],   0, Required>,
1780                               // Extra latency cycles since wbck is 8 cycles
1781                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1782                               InstrStage<3, [A9_NPipe]>],
1783                              [4, 4, 1, 1]>,
1784
1785  //
1786  // Double-register VEXT
1787  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1788                               InstrStage<1, [A9_MUX0], 0>,
1789                               InstrStage<1, [A9_DRegsN],   0, Required>,
1790                               // Extra latency cycles since wbck is 6 cycles
1791                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1792                               InstrStage<1, [A9_NPipe]>],
1793                              [2, 1, 1]>,
1794  //
1795  // Quad-register VEXT
1796  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1797                               InstrStage<1, [A9_MUX0], 0>,
1798                               InstrStage<1, [A9_DRegsN],   0, Required>,
1799                               // Extra latency cycles since wbck is 7 cycles
1800                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1801                               InstrStage<2, [A9_NPipe]>],
1802                              [3, 1, 2]>,
1803  //
1804  // VTB
1805  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1806                               InstrStage<1, [A9_MUX0], 0>,
1807                               InstrStage<1, [A9_DRegsN],   0, Required>,
1808                               // Extra latency cycles since wbck is 7 cycles
1809                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1810                               InstrStage<2, [A9_NPipe]>],
1811                              [3, 2, 1]>,
1812  InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1813                               InstrStage<1, [A9_MUX0], 0>,
1814                               InstrStage<2, [A9_DRegsN],   0, Required>,
1815                               // Extra latency cycles since wbck is 7 cycles
1816                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1817                               InstrStage<2, [A9_NPipe]>],
1818                              [3, 2, 2, 1]>,
1819  InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1820                               InstrStage<1, [A9_MUX0], 0>,
1821                               InstrStage<2, [A9_DRegsN],   0, Required>,
1822                               // Extra latency cycles since wbck is 8 cycles
1823                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1824                               InstrStage<3, [A9_NPipe]>],
1825                              [4, 2, 2, 3, 1]>,
1826  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1827                               InstrStage<1, [A9_MUX0], 0>,
1828                               InstrStage<1, [A9_DRegsN],   0, Required>,
1829                               // Extra latency cycles since wbck is 8 cycles
1830                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1831                               InstrStage<3, [A9_NPipe]>],
1832                              [4, 2, 2, 3, 3, 1]>,
1833  //
1834  // VTBX
1835  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1836                               InstrStage<1, [A9_MUX0], 0>,
1837                               InstrStage<1, [A9_DRegsN],   0, Required>,
1838                               // Extra latency cycles since wbck is 7 cycles
1839                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1840                               InstrStage<2, [A9_NPipe]>],
1841                              [3, 1, 2, 1]>,
1842  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1843                               InstrStage<1, [A9_MUX0], 0>,
1844                               InstrStage<1, [A9_DRegsN],   0, Required>,
1845                               // Extra latency cycles since wbck is 7 cycles
1846                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1847                               InstrStage<2, [A9_NPipe]>],
1848                              [3, 1, 2, 2, 1]>,
1849  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1850                               InstrStage<1, [A9_MUX0], 0>,
1851                               InstrStage<1, [A9_DRegsN],   0, Required>,
1852                               // Extra latency cycles since wbck is 8 cycles
1853                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1854                               InstrStage<3, [A9_NPipe]>],
1855                              [4, 1, 2, 2, 3, 1]>,
1856  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1857                               InstrStage<1, [A9_MUX0], 0>,
1858                               InstrStage<1, [A9_DRegsN],   0, Required>,
1859                               // Extra latency cycles since wbck is 8 cycles
1860                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1861                               InstrStage<2, [A9_NPipe]>],
1862                              [4, 1, 2, 2, 3, 3, 1]>
1863]>;
1864