1//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8//===----------------------------------------------------------------------===// 9// Instruction scheduling annotations for in-order and out-of-order CPUs. 10// These annotations are independent of the itinerary class defined below. 11// Here we define the subtarget independent read/write per-operand resources. 12// The subtarget schedule definitions will then map these to the subtarget's 13// resource usages. 14// For example: 15// The instruction cycle timings table might contain an entry for an operation 16// like the following: 17// Rd <- ADD Rn, Rm, <shift> Rs 18// Uops | Latency from register | Uops - resource requirements - latency 19// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 20// | | uopc Rd, Rn, T0 - P01 - 1 21// This is telling us that the result will be available in destination register 22// Rd after a minimum of three cycles after the result in Rm and Rs is available 23// and one cycle after the result in Rn is available. The micro-ops can execute 24// on resource P01. 25// To model this, we need to express that we need to dispatch two micro-ops, 26// that the resource P01 is needed and that the latency to Rn is different than 27// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by 28// two. 29// We will do this by assigning (abstract) resources to register defs/uses. 30// ARMSchedule.td: 31// def WriteALUsr : SchedWrite; 32// def ReadAdvanceALUsr : ScheRead; 33// 34// ARMInstrInfo.td: 35// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, 36// ReadDefault]> { ...} 37// ReadAdvance read resources allow us to define "pipeline by-passes" or 38// shorter latencies to certain registers as needed in the example above. 39// The "ReadDefault" can be omitted. 40// Next, the subtarget td file assigns resources to the abstract resources 41// defined here. 42// ARMScheduleSubtarget.td: 43// // Resources. 44// def P01 : ProcResource<3>; // ALU unit (3 of it). 45// ... 46// // Resource usages. 47// def : WriteRes<WriteALUsr, [P01, P01]> { 48// Latency = 4; // Latency of 4. 49// NumMicroOps = 2; // Dispatch 2 micro-ops. 50// // The two instances of resource P01 are occupied for one cycle. It is one 51// // cycle because these resources happen to be pipelined. 52// ResourceCycles = [1, 1]; 53// } 54// def : ReadAdvance<ReadAdvanceALUsr, 3>; 55 56//===----------------------------------------------------------------------===// 57// Sched definitions for integer pipeline instructions 58// 59// Basic ALU operation. 60def WriteALU : SchedWrite; 61def ReadALU : SchedRead; 62 63// Basic ALU with shifts. 64def WriteALUsi : SchedWrite; // Shift by immediate. 65def WriteALUsr : SchedWrite; // Shift by register. 66def WriteALUSsr : SchedWrite; // Shift by register (flag setting). 67def ReadALUsr : SchedRead; // Some operands are read later. 68 69// Compares. 70def WriteCMP : SchedWrite; 71def WriteCMPsi : SchedWrite; 72def WriteCMPsr : SchedWrite; 73 74// Multiplys. 75def WriteMUL16 : SchedWrite; // 16-bit multiply. 76def WriteMUL32 : SchedWrite; // 32-bit multiply. 77def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. 78def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. 79def ReadMUL : SchedRead; 80 81// Multiply-accumulates. 82def WriteMAC16 : SchedWrite; // 16-bit mac. 83def WriteMAC32 : SchedWrite; // 32-bit mac. 84def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. 85def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. 86def ReadMAC : SchedRead; 87 88// Divisions. 89def WriteDIV : SchedWrite; 90 91// Loads/Stores. 92def WriteLd : SchedWrite; 93def WritePreLd : SchedWrite; 94def WriteST : SchedWrite; 95 96// Branches. 97def WriteBr : SchedWrite; 98def WriteBrL : SchedWrite; 99def WriteBrTbl : SchedWrite; 100 101// Noop. 102def WriteNoop : SchedWrite; 103 104//===----------------------------------------------------------------------===// 105// Sched definitions for floating-point and neon instructions 106// 107// Floating point conversions 108def WriteFPCVT : SchedWrite; 109def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa 110 111// ALU operations (32/64-bit) 112def WriteFPALU32 : SchedWrite; 113def WriteFPALU64 : SchedWrite; 114 115// Multiplication 116def WriteFPMUL32 : SchedWrite; 117def WriteFPMUL64 : SchedWrite; 118def ReadFPMUL : SchedRead; // multiplier read 119def ReadFPMAC : SchedRead; // accumulator read 120 121// Multiply-accumulate 122def WriteFPMAC32 : SchedWrite; 123def WriteFPMAC64 : SchedWrite; 124 125// Division 126def WriteFPDIV32 : SchedWrite; 127def WriteFPDIV64 : SchedWrite; 128 129// Square-root 130def WriteFPSQRT32 : SchedWrite; 131def WriteFPSQRT64 : SchedWrite; 132 133// Vector load and stores 134def WriteVLD1 : SchedWrite; 135def WriteVLD2 : SchedWrite; 136def WriteVLD3 : SchedWrite; 137def WriteVLD4 : SchedWrite; 138def WriteVST1 : SchedWrite; 139def WriteVST2 : SchedWrite; 140def WriteVST3 : SchedWrite; 141def WriteVST4 : SchedWrite; 142 143 144// Define TII for use in SchedVariant Predicates. 145def : PredicateProlog<[{ 146 const ARMBaseInstrInfo *TII = 147 static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); 148 (void)TII; 149 const ARMSubtarget *STI = 150 static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo()); 151 (void)STI; 152}]>; 153 154def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; 155 156//===----------------------------------------------------------------------===// 157// Instruction Itinerary classes used for ARM 158// 159def IIC_iALUx : InstrItinClass; 160def IIC_iALUi : InstrItinClass; 161def IIC_iALUr : InstrItinClass; 162def IIC_iALUsi : InstrItinClass; 163def IIC_iALUsir : InstrItinClass; 164def IIC_iALUsr : InstrItinClass; 165def IIC_iBITi : InstrItinClass; 166def IIC_iBITr : InstrItinClass; 167def IIC_iBITsi : InstrItinClass; 168def IIC_iBITsr : InstrItinClass; 169def IIC_iUNAr : InstrItinClass; 170def IIC_iUNAsi : InstrItinClass; 171def IIC_iEXTr : InstrItinClass; 172def IIC_iEXTAr : InstrItinClass; 173def IIC_iEXTAsr : InstrItinClass; 174def IIC_iCMPi : InstrItinClass; 175def IIC_iCMPr : InstrItinClass; 176def IIC_iCMPsi : InstrItinClass; 177def IIC_iCMPsr : InstrItinClass; 178def IIC_iTSTi : InstrItinClass; 179def IIC_iTSTr : InstrItinClass; 180def IIC_iTSTsi : InstrItinClass; 181def IIC_iTSTsr : InstrItinClass; 182def IIC_iMOVi : InstrItinClass; 183def IIC_iMOVr : InstrItinClass; 184def IIC_iMOVsi : InstrItinClass; 185def IIC_iMOVsr : InstrItinClass; 186def IIC_iMOVix2 : InstrItinClass; 187def IIC_iMOVix2addpc : InstrItinClass; 188def IIC_iMOVix2ld : InstrItinClass; 189def IIC_iMVNi : InstrItinClass; 190def IIC_iMVNr : InstrItinClass; 191def IIC_iMVNsi : InstrItinClass; 192def IIC_iMVNsr : InstrItinClass; 193def IIC_iCMOVi : InstrItinClass; 194def IIC_iCMOVr : InstrItinClass; 195def IIC_iCMOVsi : InstrItinClass; 196def IIC_iCMOVsr : InstrItinClass; 197def IIC_iCMOVix2 : InstrItinClass; 198def IIC_iMUL16 : InstrItinClass; 199def IIC_iMAC16 : InstrItinClass; 200def IIC_iMUL32 : InstrItinClass; 201def IIC_iMAC32 : InstrItinClass; 202def IIC_iMUL64 : InstrItinClass; 203def IIC_iMAC64 : InstrItinClass; 204def IIC_iDIV : InstrItinClass; 205def IIC_iLoad_i : InstrItinClass; 206def IIC_iLoad_r : InstrItinClass; 207def IIC_iLoad_si : InstrItinClass; 208def IIC_iLoad_iu : InstrItinClass; 209def IIC_iLoad_ru : InstrItinClass; 210def IIC_iLoad_siu : InstrItinClass; 211def IIC_iLoad_bh_i : InstrItinClass; 212def IIC_iLoad_bh_r : InstrItinClass; 213def IIC_iLoad_bh_si : InstrItinClass; 214def IIC_iLoad_bh_iu : InstrItinClass; 215def IIC_iLoad_bh_ru : InstrItinClass; 216def IIC_iLoad_bh_siu : InstrItinClass; 217def IIC_iLoad_d_i : InstrItinClass; 218def IIC_iLoad_d_r : InstrItinClass; 219def IIC_iLoad_d_ru : InstrItinClass; 220def IIC_iLoad_m : InstrItinClass; 221def IIC_iLoad_mu : InstrItinClass; 222def IIC_iLoad_mBr : InstrItinClass; 223def IIC_iPop : InstrItinClass; 224def IIC_iPop_Br : InstrItinClass; 225def IIC_iLoadiALU : InstrItinClass; 226def IIC_iStore_i : InstrItinClass; 227def IIC_iStore_r : InstrItinClass; 228def IIC_iStore_si : InstrItinClass; 229def IIC_iStore_iu : InstrItinClass; 230def IIC_iStore_ru : InstrItinClass; 231def IIC_iStore_siu : InstrItinClass; 232def IIC_iStore_bh_i : InstrItinClass; 233def IIC_iStore_bh_r : InstrItinClass; 234def IIC_iStore_bh_si : InstrItinClass; 235def IIC_iStore_bh_iu : InstrItinClass; 236def IIC_iStore_bh_ru : InstrItinClass; 237def IIC_iStore_bh_siu : InstrItinClass; 238def IIC_iStore_d_i : InstrItinClass; 239def IIC_iStore_d_r : InstrItinClass; 240def IIC_iStore_d_ru : InstrItinClass; 241def IIC_iStore_m : InstrItinClass; 242def IIC_iStore_mu : InstrItinClass; 243def IIC_Preload : InstrItinClass; 244def IIC_Br : InstrItinClass; 245def IIC_fpSTAT : InstrItinClass; 246def IIC_fpUNA16 : InstrItinClass; 247def IIC_fpUNA32 : InstrItinClass; 248def IIC_fpUNA64 : InstrItinClass; 249def IIC_fpCMP16 : InstrItinClass; 250def IIC_fpCMP32 : InstrItinClass; 251def IIC_fpCMP64 : InstrItinClass; 252def IIC_fpCVTSD : InstrItinClass; 253def IIC_fpCVTDS : InstrItinClass; 254def IIC_fpCVTSH : InstrItinClass; 255def IIC_fpCVTHS : InstrItinClass; 256def IIC_fpCVTIH : InstrItinClass; 257def IIC_fpCVTIS : InstrItinClass; 258def IIC_fpCVTID : InstrItinClass; 259def IIC_fpCVTHI : InstrItinClass; 260def IIC_fpCVTSI : InstrItinClass; 261def IIC_fpCVTDI : InstrItinClass; 262def IIC_fpMOVIS : InstrItinClass; 263def IIC_fpMOVID : InstrItinClass; 264def IIC_fpMOVSI : InstrItinClass; 265def IIC_fpMOVDI : InstrItinClass; 266def IIC_fpALU16 : InstrItinClass; 267def IIC_fpALU32 : InstrItinClass; 268def IIC_fpALU64 : InstrItinClass; 269def IIC_fpMUL16 : InstrItinClass; 270def IIC_fpMUL32 : InstrItinClass; 271def IIC_fpMUL64 : InstrItinClass; 272def IIC_fpMAC16 : InstrItinClass; 273def IIC_fpMAC32 : InstrItinClass; 274def IIC_fpMAC64 : InstrItinClass; 275def IIC_fpFMAC16 : InstrItinClass; 276def IIC_fpFMAC32 : InstrItinClass; 277def IIC_fpFMAC64 : InstrItinClass; 278def IIC_fpDIV16 : InstrItinClass; 279def IIC_fpDIV32 : InstrItinClass; 280def IIC_fpDIV64 : InstrItinClass; 281def IIC_fpSQRT16 : InstrItinClass; 282def IIC_fpSQRT32 : InstrItinClass; 283def IIC_fpSQRT64 : InstrItinClass; 284def IIC_fpLoad16 : InstrItinClass; 285def IIC_fpLoad32 : InstrItinClass; 286def IIC_fpLoad64 : InstrItinClass; 287def IIC_fpLoad_m : InstrItinClass; 288def IIC_fpLoad_mu : InstrItinClass; 289def IIC_fpStore16 : InstrItinClass; 290def IIC_fpStore32 : InstrItinClass; 291def IIC_fpStore64 : InstrItinClass; 292def IIC_fpStore_m : InstrItinClass; 293def IIC_fpStore_mu : InstrItinClass; 294def IIC_VLD1 : InstrItinClass; 295def IIC_VLD1x2 : InstrItinClass; 296def IIC_VLD1x3 : InstrItinClass; 297def IIC_VLD1x4 : InstrItinClass; 298def IIC_VLD1u : InstrItinClass; 299def IIC_VLD1x2u : InstrItinClass; 300def IIC_VLD1x3u : InstrItinClass; 301def IIC_VLD1x4u : InstrItinClass; 302def IIC_VLD1ln : InstrItinClass; 303def IIC_VLD1lnu : InstrItinClass; 304def IIC_VLD1dup : InstrItinClass; 305def IIC_VLD1dupu : InstrItinClass; 306def IIC_VLD2 : InstrItinClass; 307def IIC_VLD2x2 : InstrItinClass; 308def IIC_VLD2u : InstrItinClass; 309def IIC_VLD2x2u : InstrItinClass; 310def IIC_VLD2ln : InstrItinClass; 311def IIC_VLD2lnu : InstrItinClass; 312def IIC_VLD2dup : InstrItinClass; 313def IIC_VLD2dupu : InstrItinClass; 314def IIC_VLD3 : InstrItinClass; 315def IIC_VLD3ln : InstrItinClass; 316def IIC_VLD3u : InstrItinClass; 317def IIC_VLD3lnu : InstrItinClass; 318def IIC_VLD3dup : InstrItinClass; 319def IIC_VLD3dupu : InstrItinClass; 320def IIC_VLD4 : InstrItinClass; 321def IIC_VLD4ln : InstrItinClass; 322def IIC_VLD4u : InstrItinClass; 323def IIC_VLD4lnu : InstrItinClass; 324def IIC_VLD4dup : InstrItinClass; 325def IIC_VLD4dupu : InstrItinClass; 326def IIC_VST1 : InstrItinClass; 327def IIC_VST1x2 : InstrItinClass; 328def IIC_VST1x3 : InstrItinClass; 329def IIC_VST1x4 : InstrItinClass; 330def IIC_VST1u : InstrItinClass; 331def IIC_VST1x2u : InstrItinClass; 332def IIC_VST1x3u : InstrItinClass; 333def IIC_VST1x4u : InstrItinClass; 334def IIC_VST1ln : InstrItinClass; 335def IIC_VST1lnu : InstrItinClass; 336def IIC_VST2 : InstrItinClass; 337def IIC_VST2x2 : InstrItinClass; 338def IIC_VST2u : InstrItinClass; 339def IIC_VST2x2u : InstrItinClass; 340def IIC_VST2ln : InstrItinClass; 341def IIC_VST2lnu : InstrItinClass; 342def IIC_VST3 : InstrItinClass; 343def IIC_VST3u : InstrItinClass; 344def IIC_VST3ln : InstrItinClass; 345def IIC_VST3lnu : InstrItinClass; 346def IIC_VST4 : InstrItinClass; 347def IIC_VST4u : InstrItinClass; 348def IIC_VST4ln : InstrItinClass; 349def IIC_VST4lnu : InstrItinClass; 350def IIC_VUNAD : InstrItinClass; 351def IIC_VUNAQ : InstrItinClass; 352def IIC_VBIND : InstrItinClass; 353def IIC_VBINQ : InstrItinClass; 354def IIC_VPBIND : InstrItinClass; 355def IIC_VFMULD : InstrItinClass; 356def IIC_VFMULQ : InstrItinClass; 357def IIC_VMOV : InstrItinClass; 358def IIC_VMOVImm : InstrItinClass; 359def IIC_VMOVD : InstrItinClass; 360def IIC_VMOVQ : InstrItinClass; 361def IIC_VMOVIS : InstrItinClass; 362def IIC_VMOVID : InstrItinClass; 363def IIC_VMOVISL : InstrItinClass; 364def IIC_VMOVSI : InstrItinClass; 365def IIC_VMOVDI : InstrItinClass; 366def IIC_VMOVN : InstrItinClass; 367def IIC_VPERMD : InstrItinClass; 368def IIC_VPERMQ : InstrItinClass; 369def IIC_VPERMQ3 : InstrItinClass; 370def IIC_VMACD : InstrItinClass; 371def IIC_VMACQ : InstrItinClass; 372def IIC_VFMACD : InstrItinClass; 373def IIC_VFMACQ : InstrItinClass; 374def IIC_VRECSD : InstrItinClass; 375def IIC_VRECSQ : InstrItinClass; 376def IIC_VCNTiD : InstrItinClass; 377def IIC_VCNTiQ : InstrItinClass; 378def IIC_VUNAiD : InstrItinClass; 379def IIC_VUNAiQ : InstrItinClass; 380def IIC_VQUNAiD : InstrItinClass; 381def IIC_VQUNAiQ : InstrItinClass; 382def IIC_VBINiD : InstrItinClass; 383def IIC_VBINiQ : InstrItinClass; 384def IIC_VSUBiD : InstrItinClass; 385def IIC_VSUBiQ : InstrItinClass; 386def IIC_VBINi4D : InstrItinClass; 387def IIC_VBINi4Q : InstrItinClass; 388def IIC_VSUBi4D : InstrItinClass; 389def IIC_VSUBi4Q : InstrItinClass; 390def IIC_VABAD : InstrItinClass; 391def IIC_VABAQ : InstrItinClass; 392def IIC_VSHLiD : InstrItinClass; 393def IIC_VSHLiQ : InstrItinClass; 394def IIC_VSHLi4D : InstrItinClass; 395def IIC_VSHLi4Q : InstrItinClass; 396def IIC_VPALiD : InstrItinClass; 397def IIC_VPALiQ : InstrItinClass; 398def IIC_VMULi16D : InstrItinClass; 399def IIC_VMULi32D : InstrItinClass; 400def IIC_VMULi16Q : InstrItinClass; 401def IIC_VMULi32Q : InstrItinClass; 402def IIC_VMACi16D : InstrItinClass; 403def IIC_VMACi32D : InstrItinClass; 404def IIC_VMACi16Q : InstrItinClass; 405def IIC_VMACi32Q : InstrItinClass; 406def IIC_VEXTD : InstrItinClass; 407def IIC_VEXTQ : InstrItinClass; 408def IIC_VTB1 : InstrItinClass; 409def IIC_VTB2 : InstrItinClass; 410def IIC_VTB3 : InstrItinClass; 411def IIC_VTB4 : InstrItinClass; 412def IIC_VTBX1 : InstrItinClass; 413def IIC_VTBX2 : InstrItinClass; 414def IIC_VTBX3 : InstrItinClass; 415def IIC_VTBX4 : InstrItinClass; 416def IIC_VDOTPROD : InstrItinClass; 417 418//===----------------------------------------------------------------------===// 419// Processor instruction itineraries. 420 421include "ARMScheduleV6.td" 422include "ARMScheduleA8.td" 423include "ARMScheduleA9.td" 424include "ARMScheduleSwift.td" 425include "ARMScheduleR52.td" 426include "ARMScheduleA57.td" 427include "ARMScheduleM4.td" 428