1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling 10 /// for a MachineInstr-based dependency graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 16 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PointerIntPair.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/SparseMultiSet.h" 22 #include "llvm/ADT/SparseSet.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSchedule.h" 28 #include "llvm/MC/LaneBitmask.h" 29 #include <cassert> 30 #include <cstdint> 31 #include <list> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 class AAResults; 38 class LiveIntervals; 39 class MachineFrameInfo; 40 class MachineFunction; 41 class MachineInstr; 42 class MachineLoopInfo; 43 class MachineOperand; 44 struct MCSchedClassDesc; 45 class PressureDiffs; 46 class PseudoSourceValue; 47 class RegPressureTracker; 48 class UndefValue; 49 class Value; 50 51 /// An individual mapping from virtual register number to SUnit. 52 struct VReg2SUnit { 53 unsigned VirtReg; 54 LaneBitmask LaneMask; 55 SUnit *SU; 56 VReg2SUnitVReg2SUnit57 VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) 58 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} 59 getSparseSetIndexVReg2SUnit60 unsigned getSparseSetIndex() const { 61 return Register::virtReg2Index(VirtReg); 62 } 63 }; 64 65 /// Mapping from virtual register to SUnit including an operand index. 66 struct VReg2SUnitOperIdx : public VReg2SUnit { 67 unsigned OperandIndex; 68 VReg2SUnitOperIdxVReg2SUnitOperIdx69 VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, 70 unsigned OperandIndex, SUnit *SU) 71 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} 72 }; 73 74 /// Record a physical register access. 75 /// For non-data-dependent uses, OpIdx == -1. 76 struct PhysRegSUOper { 77 SUnit *SU; 78 int OpIdx; 79 unsigned Reg; 80 PhysRegSUOperPhysRegSUOper81 PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {} 82 getSparseSetIndexPhysRegSUOper83 unsigned getSparseSetIndex() const { return Reg; } 84 }; 85 86 /// Use a SparseMultiSet to track physical registers. Storage is only 87 /// allocated once for the pass. It can be cleared in constant time and reused 88 /// without any frees. 89 using Reg2SUnitsMap = 90 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; 91 92 /// Use SparseSet as a SparseMap by relying on the fact that it never 93 /// compares ValueT's, only unsigned keys. This allows the set to be cleared 94 /// between scheduling regions in constant time as long as ValueT does not 95 /// require a destructor. 96 using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>; 97 98 /// Track local uses of virtual registers. These uses are gathered by the DAG 99 /// builder and may be consulted by the scheduler to avoid iterating an entire 100 /// vreg use list. 101 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; 102 103 using VReg2SUnitOperIdxMultiMap = 104 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; 105 106 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; 107 108 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObjectUnderlyingObject109 UnderlyingObject(ValueType V, bool MayAlias) 110 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} 111 getValueUnderlyingObject112 ValueType getValue() const { return getPointer(); } mayAliasUnderlyingObject113 bool mayAlias() const { return getInt(); } 114 }; 115 116 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; 117 118 /// A ScheduleDAG for scheduling lists of MachineInstr. 119 class ScheduleDAGInstrs : public ScheduleDAG { 120 protected: 121 const MachineLoopInfo *MLI; 122 const MachineFrameInfo &MFI; 123 124 /// TargetSchedModel provides an interface to the machine model. 125 TargetSchedModel SchedModel; 126 127 /// True if the DAG builder should remove kill flags (in preparation for 128 /// rescheduling). 129 bool RemoveKillFlags; 130 131 /// The standard DAG builder does not normally include terminators as DAG 132 /// nodes because it does not create the necessary dependencies to prevent 133 /// reordering. A specialized scheduler can override 134 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate 135 /// it has taken responsibility for scheduling the terminator correctly. 136 bool CanHandleTerminators = false; 137 138 /// Whether lane masks should get tracked. 139 bool TrackLaneMasks = false; 140 141 // State specific to the current scheduling region. 142 // ------------------------------------------------ 143 144 /// The block in which to insert instructions 145 MachineBasicBlock *BB; 146 147 /// The beginning of the range to be scheduled. 148 MachineBasicBlock::iterator RegionBegin; 149 150 /// The end of the range to be scheduled. 151 MachineBasicBlock::iterator RegionEnd; 152 153 /// Instructions in this region (distance(RegionBegin, RegionEnd)). 154 unsigned NumRegionInstrs; 155 156 /// After calling BuildSchedGraph, each machine instruction in the current 157 /// scheduling region is mapped to an SUnit. 158 DenseMap<MachineInstr*, SUnit*> MISUnitMap; 159 160 // State internal to DAG building. 161 // ------------------------------- 162 163 /// Defs, Uses - Remember where defs and uses of each register are as we 164 /// iterate upward through the instructions. This is allocated here instead 165 /// of inside BuildSchedGraph to avoid the need for it to be initialized and 166 /// destructed for each block. 167 Reg2SUnitsMap Defs; 168 Reg2SUnitsMap Uses; 169 170 /// Tracks the last instruction(s) in this region defining each virtual 171 /// register. There may be multiple current definitions for a register with 172 /// disjunct lanemasks. 173 VReg2SUnitMultiMap CurrentVRegDefs; 174 /// Tracks the last instructions in this region using each virtual register. 175 VReg2SUnitOperIdxMultiMap CurrentVRegUses; 176 177 AAResults *AAForDep = nullptr; 178 179 /// Remember a generic side-effecting instruction as we proceed. 180 /// No other SU ever gets scheduled around it (except in the special 181 /// case of a huge region that gets reduced). 182 SUnit *BarrierChain = nullptr; 183 184 public: 185 /// A list of SUnits, used in Value2SUsMap, during DAG construction. 186 /// Note: to gain speed it might be worth investigating an optimized 187 /// implementation of this data structure, such as a singly linked list 188 /// with a memory pool (SmallVector was tried but slow and SparseSet is not 189 /// applicable). 190 using SUList = std::list<SUnit *>; 191 192 protected: 193 /// A map from ValueType to SUList, used during DAG construction, as 194 /// a means of remembering which SUs depend on which memory locations. 195 class Value2SUsMap; 196 197 /// Reduces maps in FIFO order, by N SUs. This is better than turning 198 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since 199 /// it avoids unnecessary edges between seen SUs above the new BarrierChain, 200 /// and those below it. 201 void reduceHugeMemNodeMaps(Value2SUsMap &stores, 202 Value2SUsMap &loads, unsigned N); 203 204 /// Adds a chain edge between SUa and SUb, but only if both 205 /// AAResults and Target fail to deny the dependency. 206 void addChainDependency(SUnit *SUa, SUnit *SUb, 207 unsigned Latency = 0); 208 209 /// Adds dependencies as needed from all SUs in list to SU. addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)210 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { 211 for (SUnit *Entry : SUs) 212 addChainDependency(SU, Entry, Latency); 213 } 214 215 /// Adds dependencies as needed from all SUs in map, to SU. 216 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); 217 218 /// Adds dependencies as needed to SU, from all SUs mapped to V. 219 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, 220 ValueType V); 221 222 /// Adds barrier chain edges from all SUs in map, and then clear the map. 223 /// This is equivalent to insertBarrierChain(), but optimized for the common 224 /// case where the new BarrierChain (a global memory object) has a higher 225 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set 226 /// before calling this. 227 void addBarrierChain(Value2SUsMap &map); 228 229 /// Inserts a barrier chain in a huge region, far below current SU. 230 /// Adds barrier chain edges from all SUs in map with higher NodeNums than 231 /// this new BarrierChain, and remove them from map. It is assumed 232 /// BarrierChain has been set before calling this. 233 void insertBarrierChain(Value2SUsMap &map); 234 235 /// For an unanalyzable memory access, this Value is used in maps. 236 UndefValue *UnknownValue; 237 238 239 /// Topo - A topological ordering for SUnits which permits fast IsReachable 240 /// and similar queries. 241 ScheduleDAGTopologicalSort Topo; 242 243 using DbgValueVector = 244 std::vector<std::pair<MachineInstr *, MachineInstr *>>; 245 /// Remember instruction that precedes DBG_VALUE. 246 /// These are generated by buildSchedGraph but persist so they can be 247 /// referenced when emitting the final schedule. 248 DbgValueVector DbgValues; 249 MachineInstr *FirstDbgValue = nullptr; 250 251 /// Set of live physical registers for updating kill flags. 252 LivePhysRegs LiveRegs; 253 254 public: 255 explicit ScheduleDAGInstrs(MachineFunction &mf, 256 const MachineLoopInfo *mli, 257 bool RemoveKillFlags = false); 258 259 ~ScheduleDAGInstrs() override = default; 260 261 /// Gets the machine model for instruction scheduling. getSchedModel()262 const TargetSchedModel *getSchedModel() const { return &SchedModel; } 263 264 /// Resolves and cache a resolved scheduling class for an SUnit. getSchedClass(SUnit * SU)265 const MCSchedClassDesc *getSchedClass(SUnit *SU) const { 266 if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) 267 SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); 268 return SU->SchedClass; 269 } 270 271 /// Returns an iterator to the top of the current scheduling region. begin()272 MachineBasicBlock::iterator begin() const { return RegionBegin; } 273 274 /// Returns an iterator to the bottom of the current scheduling region. end()275 MachineBasicBlock::iterator end() const { return RegionEnd; } 276 277 /// Creates a new SUnit and return a ptr to it. 278 SUnit *newSUnit(MachineInstr *MI); 279 280 /// Returns an existing SUnit for this MI, or nullptr. 281 SUnit *getSUnit(MachineInstr *MI) const; 282 283 /// If this method returns true, handling of the scheduling regions 284 /// themselves (in case of a scheduling boundary in MBB) will be done 285 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()286 virtual bool doMBBSchedRegionsTopDown() const { return false; } 287 288 /// Prepares to perform scheduling in the given block. 289 virtual void startBlock(MachineBasicBlock *BB); 290 291 /// Cleans up after scheduling in the given block. 292 virtual void finishBlock(); 293 294 /// Initialize the DAG and common scheduler state for a new 295 /// scheduling region. This does not actually create the DAG, only clears 296 /// it. The scheduling driver may call BuildSchedGraph multiple times per 297 /// scheduling region. 298 virtual void enterRegion(MachineBasicBlock *bb, 299 MachineBasicBlock::iterator begin, 300 MachineBasicBlock::iterator end, 301 unsigned regioninstrs); 302 303 /// Called when the scheduler has finished scheduling the current region. 304 virtual void exitRegion(); 305 306 /// Builds SUnits for the current region. 307 /// If \p RPTracker is non-null, compute register pressure as a side effect. 308 /// The DAG builder is an efficient place to do it because it already visits 309 /// operands. 310 void buildSchedGraph(AAResults *AA, 311 RegPressureTracker *RPTracker = nullptr, 312 PressureDiffs *PDiffs = nullptr, 313 LiveIntervals *LIS = nullptr, 314 bool TrackLaneMasks = false); 315 316 /// Adds dependencies from instructions in the current list of 317 /// instructions being scheduled to scheduling barrier. We want to make sure 318 /// instructions which define registers that are either used by the 319 /// terminator or are live-out are properly scheduled. This is especially 320 /// important when the definition latency of the return value(s) are too 321 /// high to be hidden by the branch or when the liveout registers used by 322 /// instructions in the fallthrough block. 323 void addSchedBarrierDeps(); 324 325 /// Orders nodes according to selected style. 326 /// 327 /// Typically, a scheduling algorithm will implement schedule() without 328 /// overriding enterRegion() or exitRegion(). 329 virtual void schedule() = 0; 330 331 /// Allow targets to perform final scheduling actions at the level of the 332 /// whole MachineFunction. By default does nothing. finalizeSchedule()333 virtual void finalizeSchedule() {} 334 335 void dumpNode(const SUnit &SU) const override; 336 void dump() const override; 337 338 /// Returns a label for a DAG node that points to an instruction. 339 std::string getGraphNodeLabel(const SUnit *SU) const override; 340 341 /// Returns a label for the region of code covered by the DAG. 342 std::string getDAGName() const override; 343 344 /// Fixes register kill flags that scheduling has made invalid. 345 void fixupKills(MachineBasicBlock &MBB); 346 347 /// True if an edge can be added from PredSU to SuccSU without creating 348 /// a cycle. 349 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 350 351 /// Add a DAG edge to the given SU with the given predecessor 352 /// dependence data. 353 /// 354 /// \returns true if the edge may be added without creating a cycle OR if an 355 /// equivalent edge already existed (false indicates failure). 356 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 357 358 protected: 359 void initSUnits(); 360 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); 361 void addPhysRegDeps(SUnit *SU, unsigned OperIdx); 362 void addVRegDefDeps(SUnit *SU, unsigned OperIdx); 363 void addVRegUseDeps(SUnit *SU, unsigned OperIdx); 364 365 /// Initializes register live-range state for updating kills. 366 /// PostRA helper for rewriting kill flags. 367 void startBlockForKills(MachineBasicBlock *BB); 368 369 /// Toggles a register operand kill flag. 370 /// 371 /// Other adjustments may be made to the instruction if necessary. Return 372 /// true if the operand has been deleted, false if not. 373 void toggleKillFlag(MachineInstr &MI, MachineOperand &MO); 374 375 /// Returns a mask for which lanes get read/written by the given (register) 376 /// machine operand. 377 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; 378 379 /// Returns true if the def register in \p MO has no uses. 380 bool deadDefHasNoUse(const MachineOperand &MO); 381 }; 382 383 /// Creates a new SUnit and return a ptr to it. newSUnit(MachineInstr * MI)384 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { 385 #ifndef NDEBUG 386 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; 387 #endif 388 SUnits.emplace_back(MI, (unsigned)SUnits.size()); 389 assert((Addr == nullptr || Addr == &SUnits[0]) && 390 "SUnits std::vector reallocated on the fly!"); 391 return &SUnits.back(); 392 } 393 394 /// Returns an existing SUnit for this MI, or nullptr. getSUnit(MachineInstr * MI)395 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { 396 DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI); 397 if (I == MISUnitMap.end()) 398 return nullptr; 399 return I->second; 400 } 401 402 } // end namespace llvm 403 404 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 405