1//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the itinerary class data for the POWER7 processor. 11// 12//===----------------------------------------------------------------------===// 13 14// Primary reference: 15// IBM POWER7 multicore server processor 16// B. Sinharoy, et al. 17// IBM J. Res. & Dev. (55) 3. May/June 2011. 18 19// Scheduling for the P7 involves tracking two types of resources: 20// 1. The dispatch bundle slots 21// 2. The functional unit resources 22 23// Dispatch units: 24def P7_DU1 : FuncUnit; 25def P7_DU2 : FuncUnit; 26def P7_DU3 : FuncUnit; 27def P7_DU4 : FuncUnit; 28def P7_DU5 : FuncUnit; 29def P7_DU6 : FuncUnit; 30 31def P7_LS1 : FuncUnit; // Load/Store pipeline 1 32def P7_LS2 : FuncUnit; // Load/Store pipeline 2 33 34def P7_FX1 : FuncUnit; // FX pipeline 1 35def P7_FX2 : FuncUnit; // FX pipeline 2 36 37// VS pipeline 1 (vector integer ops. always here) 38def P7_VS1 : FuncUnit; // VS pipeline 1 39// VS pipeline 2 (128-bit stores and perms. here) 40def P7_VS2 : FuncUnit; // VS pipeline 2 41 42def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) 43def P7_BRU : FuncUnit; // BR unit 44 45// Notes: 46// Each LSU pipeline can also execute FX add and logical instructions. 47// Each LSU pipeline can complete a load or store in one cycle. 48// 49// Each store is broken into two parts, AGEN goes to the LSU while a 50// "data steering" op. goes to the FXU or VSU. 51// 52// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). 53// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). 54// 55// Frequent FX ops. take only one cycle and results can be used again in the 56// next cycle (there is a self-bypass). Getting results from the other FX 57// pipeline takes an additional cycle. 58// 59// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles 60// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. 61// Dispatch of an instruction to VS1 that uses four single prec. inputs 62// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any 63// floating point instruction. 64// 65// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles 66// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline 67// (unlike on the POWER6). 68// 69// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP 70// share the same write-back, and have a 5-cycle latency difference, so the 71// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP 72// op. has been dispatched to VS1. 73// 74// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. 75// 76// Instruction dispatch groups have (at most) four non-branch instructions, and 77// two branches. Unlike on the POWER4/5, a branch does not automatically 78// end the dispatch group, but a second branch must be the last in the group. 79 80def P7Itineraries : ProcessorItineraries< 81 [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6, 82 P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [ 83 InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2, 84 P7_DU3, P7_DU4], 0>, 85 InstrStage<1, [P7_FX1, P7_FX2, 86 P7_LS1, P7_LS2]>], 87 [1, 1, 1]>, 88 InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2, 89 P7_DU3, P7_DU4], 0>, 90 InstrStage<1, [P7_FX1, P7_FX2]>], 91 [1, 1, 1]>, 92 InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2, 93 P7_DU3, P7_DU4], 0>, 94 InstrStage<1, [P7_FX1, P7_FX2]>], 95 [1, 1, 1]>, 96 // FIXME: Add record-form itinerary data. 97 InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>, 98 InstrStage<1, [P7_DU2], 0>, 99 InstrStage<36, [P7_FX1, P7_FX2]>], 100 [36, 1, 1]>, 101 InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>, 102 InstrStage<1, [P7_DU2], 0>, 103 InstrStage<68, [P7_FX1, P7_FX2]>], 104 [68, 1, 1]>, 105 InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2, 106 P7_DU3, P7_DU4], 0>, 107 InstrStage<1, [P7_FX1, P7_FX2]>], 108 [4, 1, 1]>, 109 InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2, 110 P7_DU3, P7_DU4], 0>, 111 InstrStage<1, [P7_FX1, P7_FX2]>], 112 [4, 1, 1]>, 113 InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2, 114 P7_DU3, P7_DU4], 0>, 115 InstrStage<1, [P7_FX1, P7_FX2]>], 116 [4, 1, 1]>, 117 InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2, 118 P7_DU3, P7_DU4], 0>, 119 InstrStage<1, [P7_FX1, P7_FX2]>], 120 [1, 1, 1]>, 121 InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2, 122 P7_DU3, P7_DU4], 0>, 123 InstrStage<1, [P7_FX1, P7_FX2]>], 124 [1, 1, 1]>, 125 InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2, 126 P7_DU3, P7_DU4], 0>, 127 InstrStage<1, [P7_FX1, P7_FX2]>], 128 [1, 1, 1]>, 129 InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2, 130 P7_DU3, P7_DU4], 0>, 131 InstrStage<1, [P7_FX1, P7_FX2]>], 132 [1, 1]>, 133 InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2, 134 P7_DU3, P7_DU4], 0>, 135 InstrStage<1, [P7_FX1, P7_FX2]>], 136 [1, 1]>, 137 InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>, 138 InstrStage<1, [P7_BRU]>], 139 [3, 1, 1]>, 140 InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>, 141 InstrStage<1, [P7_CRU]>], 142 [3, 1, 1]>, 143 InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>, 144 InstrStage<1, [P7_BRU]>], 145 [3, 1, 1]>, 146 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>, 147 InstrStage<1, [P7_BRU]>], 148 [3, 1, 1]>, 149 InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2, 150 P7_DU3, P7_DU4], 0>, 151 InstrStage<1, [P7_LS1, P7_LS2]>], 152 [2, 1, 1]>, 153 InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>, 154 InstrStage<1, [P7_DU2], 0>, 155 InstrStage<1, [P7_LS1, P7_LS2], 0>, 156 InstrStage<1, [P7_FX1, P7_FX2]>], 157 [2, 2, 1, 1]>, 158 InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>, 159 InstrStage<1, [P7_DU2], 0>, 160 InstrStage<1, [P7_DU3], 0>, 161 InstrStage<1, [P7_DU4], 0>, 162 InstrStage<1, [P7_FX1, P7_FX2]>, 163 InstrStage<1, [P7_LS1, P7_LS2], 0>, 164 InstrStage<1, [P7_FX1, P7_FX2]>], 165 [3, 3, 1, 1]>, 166 InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2, 167 P7_DU3, P7_DU4], 0>, 168 InstrStage<1, [P7_LS1, P7_LS2]>], 169 [2, 1, 1]>, 170 InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>, 171 InstrStage<1, [P7_DU2], 0>, 172 InstrStage<1, [P7_LS1, P7_LS2], 0>, 173 InstrStage<1, [P7_FX1, P7_FX2]>], 174 [2, 2, 1, 1]>, 175 InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>, 176 InstrStage<1, [P7_DU2], 0>, 177 InstrStage<1, [P7_DU3], 0>, 178 InstrStage<1, [P7_DU4], 0>, 179 InstrStage<1, [P7_FX1, P7_FX2]>, 180 InstrStage<1, [P7_LS1, P7_LS2], 0>, 181 InstrStage<1, [P7_FX1, P7_FX2]>], 182 [3, 3, 1, 1]>, 183 InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2, 184 P7_DU3, P7_DU4], 0>, 185 InstrStage<1, [P7_LS1, P7_LS2]>], 186 [3, 1, 1]>, 187 InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2, 188 P7_DU3, P7_DU4], 0>, 189 InstrStage<1, [P7_LS1, P7_LS2]>], 190 [3, 1, 1]>, 191 InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>, 192 InstrStage<1, [P7_DU2], 0>, 193 InstrStage<1, [P7_LS1, P7_LS2], 0>, 194 InstrStage<1, [P7_FX1, P7_FX2]>], 195 [3, 3, 1, 1]>, 196 InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>, 197 InstrStage<1, [P7_DU2], 0>, 198 InstrStage<1, [P7_LS1, P7_LS2], 0>, 199 InstrStage<1, [P7_FX1, P7_FX2]>], 200 [3, 3, 1, 1]>, 201 InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>, 202 InstrStage<1, [P7_DU2], 0>, 203 InstrStage<1, [P7_LS1, P7_LS2]>, 204 InstrStage<1, [P7_FX1, P7_FX2]>], 205 [3, 1, 1]>, 206 InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>, 207 InstrStage<1, [P7_DU2], 0>, 208 InstrStage<1, [P7_LS1, P7_LS2], 0>, 209 InstrStage<1, [P7_FX1, P7_FX2]>, 210 InstrStage<1, [P7_FX1, P7_FX2]>], 211 [4, 4, 1, 1]>, 212 InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>, 213 InstrStage<1, [P7_DU2], 0>, 214 InstrStage<1, [P7_DU3], 0>, 215 InstrStage<1, [P7_DU4], 0>, 216 InstrStage<1, [P7_FX1, P7_FX2]>, 217 InstrStage<1, [P7_LS1, P7_LS2], 0>, 218 InstrStage<1, [P7_FX1, P7_FX2]>, 219 InstrStage<1, [P7_FX1, P7_FX2]>], 220 [4, 4, 1, 1]>, 221 InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>, 222 InstrStage<1, [P7_DU2], 0>, 223 InstrStage<1, [P7_LS1, P7_LS2]>, 224 InstrStage<1, [P7_FX1, P7_FX2]>], 225 [3, 1, 1]>, 226 InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>, 227 InstrStage<1, [P7_DU2], 0>, 228 InstrStage<1, [P7_DU3], 0>, 229 InstrStage<1, [P7_DU4], 0>, 230 InstrStage<1, [P7_LS1, P7_LS2]>], 231 [3, 1, 1]>, 232 InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>, 233 InstrStage<1, [P7_DU2], 0>, 234 InstrStage<1, [P7_DU3], 0>, 235 InstrStage<1, [P7_DU4], 0>, 236 InstrStage<1, [P7_LS1, P7_LS2]>], 237 [3, 1, 1]>, 238 InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2, 239 P7_DU3, P7_DU4], 0>, 240 InstrStage<1, [P7_LS1, P7_LS2]>], 241 [2, 1, 1]>, 242 InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2, 243 P7_DU3, P7_DU4], 0>, 244 InstrStage<1, [P7_LS1, P7_LS2], 0>, 245 InstrStage<1, [P7_FX1, P7_FX2]>], 246 [1, 1, 1]>, 247 InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2, 248 P7_DU3, P7_DU4], 0>, 249 InstrStage<1, [P7_LS1, P7_LS2], 0>, 250 InstrStage<1, [P7_FX1, P7_FX2]>], 251 [1, 1, 1]>, 252 InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>, 253 InstrStage<1, [P7_DU2], 0>, 254 InstrStage<1, [P7_LS1, P7_LS2], 0>, 255 InstrStage<1, [P7_FX1, P7_FX2]>, 256 InstrStage<1, [P7_FX1, P7_FX2]>], 257 [2, 1, 1, 1]>, 258 InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>, 259 InstrStage<1, [P7_DU2], 0>, 260 InstrStage<1, [P7_DU3], 0>, 261 InstrStage<1, [P7_DU4], 0>, 262 InstrStage<1, [P7_LS1, P7_LS2], 0>, 263 InstrStage<1, [P7_FX1, P7_FX2]>, 264 InstrStage<1, [P7_FX1, P7_FX2]>], 265 [2, 1, 1, 1]>, 266 InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2, 267 P7_DU3, P7_DU4], 0>, 268 InstrStage<1, [P7_LS1, P7_LS2], 0>, 269 InstrStage<1, [P7_VS1, P7_VS2]>], 270 [1, 1, 1]>, 271 InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>, 272 InstrStage<1, [P7_DU2], 0>, 273 InstrStage<1, [P7_LS1, P7_LS2], 0>, 274 InstrStage<1, [P7_FX1, P7_FX2], 0>, 275 InstrStage<1, [P7_VS1, P7_VS2]>], 276 [2, 1, 1, 1]>, 277 InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2, 278 P7_DU3, P7_DU4], 0>, 279 InstrStage<1, [P7_LS1, P7_LS2], 0>, 280 InstrStage<1, [P7_VS2]>], 281 [1, 1, 1]>, 282 InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>, 283 InstrStage<1, [P7_DU2], 0>, 284 InstrStage<1, [P7_DU3], 0>, 285 InstrStage<1, [P7_DU4], 0>, 286 InstrStage<1, [P7_LS1, P7_LS2]>], 287 [1, 1, 1]>, 288 InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>, 289 InstrStage<1, [P7_DU2], 0>, 290 InstrStage<1, [P7_DU3], 0>, 291 InstrStage<1, [P7_DU4], 0>, 292 InstrStage<1, [P7_LS1, P7_LS2]>], 293 [1, 1, 1]>, 294 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>, 295 InstrStage<1, [P7_DU2], 0>, 296 InstrStage<1, [P7_DU3], 0>, 297 InstrStage<1, [P7_DU4], 0>, 298 InstrStage<1, [P7_CRU]>, 299 InstrStage<1, [P7_FX1, P7_FX2]>], 300 [3, 1]>, // mtcr 301 InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>, 302 InstrStage<1, [P7_CRU]>], 303 [6, 1]>, 304 InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>, 305 InstrStage<1, [P7_CRU]>], 306 [3, 1]>, 307 InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>, 308 InstrStage<1, [P7_FX1]>], 309 [4, 1]>, // mtctr 310 InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2, 311 P7_DU3, P7_DU4], 0>, 312 InstrStage<1, [P7_VS1, P7_VS2]>], 313 [5, 1, 1]>, 314 InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, 315 P7_DU3, P7_DU4], 0>, 316 InstrStage<1, [P7_VS1, P7_VS2]>], 317 [8, 1, 1]>, 318 InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2, 319 P7_DU3, P7_DU4], 0>, 320 InstrStage<1, [P7_VS1, P7_VS2]>], 321 [33, 1, 1]>, 322 InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2, 323 P7_DU3, P7_DU4], 0>, 324 InstrStage<1, [P7_VS1, P7_VS2]>], 325 [27, 1, 1]>, 326 InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2, 327 P7_DU3, P7_DU4], 0>, 328 InstrStage<1, [P7_VS1, P7_VS2]>], 329 [44, 1, 1]>, 330 InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2, 331 P7_DU3, P7_DU4], 0>, 332 InstrStage<1, [P7_VS1, P7_VS2]>], 333 [32, 1, 1]>, 334 InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2, 335 P7_DU3, P7_DU4], 0>, 336 InstrStage<1, [P7_VS1, P7_VS2]>], 337 [5, 1, 1, 1]>, 338 InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2, 339 P7_DU3, P7_DU4], 0>, 340 InstrStage<1, [P7_VS1, P7_VS2]>], 341 [5, 1, 1]>, 342 InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>, 343 InstrStage<1, [P7_VS1]>], 344 [2, 1, 1]>, 345 InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>, 346 InstrStage<1, [P7_VS1]>], 347 [2, 1, 1]>, 348 InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>, 349 InstrStage<1, [P7_VS1]>], 350 [2, 1, 1]>, 351 InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>, 352 InstrStage<1, [P7_VS1, P7_VS2]>], 353 [6, 1, 1]>, 354 InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>, 355 InstrStage<1, [P7_VS1, P7_VS2]>], 356 [6, 1, 1]>, 357 InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>, 358 InstrStage<1, [P7_VS1, P7_VS2]>], 359 [6, 1, 1]>, 360 InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>, 361 InstrStage<1, [P7_VS1]>], 362 [7, 1, 1]>, 363 InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>, 364 InstrStage<1, [P7_VS2]>], 365 [3, 1, 1]> 366]>; 367 368// ===---------------------------------------------------------------------===// 369// P7 machine model for scheduling and other instruction cost heuristics. 370 371def P7Model : SchedMachineModel { 372 let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. 373 // Note that the dispatch bundle size is 6 (including 374 // branches), but the total internal issue bandwidth per 375 // cycle (from all queues) is 8. 376 377 let MinLatency = 0; // Out-of-order dispatch. 378 let LoadLatency = 3; // Optimistic load latency assuming bypass. 379 // This is overriden by OperandCycles if the 380 // Itineraries are queried instead. 381 let MispredictPenalty = 16; 382 383 let Itineraries = P7Itineraries; 384} 385 386