• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling
10 /// for a MachineInstr-based dependency graph.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PointerIntPair.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/SparseMultiSet.h"
22 #include "llvm/ADT/SparseSet.h"
23 #include "llvm/CodeGen/LivePhysRegs.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSchedule.h"
28 #include "llvm/MC/LaneBitmask.h"
29 #include <cassert>
30 #include <cstdint>
31 #include <list>
32 #include <utility>
33 #include <vector>
34 
35 namespace llvm {
36 
37   class AAResults;
38   class LiveIntervals;
39   class MachineFrameInfo;
40   class MachineFunction;
41   class MachineInstr;
42   class MachineLoopInfo;
43   class MachineOperand;
44   struct MCSchedClassDesc;
45   class PressureDiffs;
46   class PseudoSourceValue;
47   class RegPressureTracker;
48   class UndefValue;
49   class Value;
50 
51   /// An individual mapping from virtual register number to SUnit.
52   struct VReg2SUnit {
53     unsigned VirtReg;
54     LaneBitmask LaneMask;
55     SUnit *SU;
56 
VReg2SUnitVReg2SUnit57     VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
58       : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
59 
getSparseSetIndexVReg2SUnit60     unsigned getSparseSetIndex() const {
61       return Register::virtReg2Index(VirtReg);
62     }
63   };
64 
65   /// Mapping from virtual register to SUnit including an operand index.
66   struct VReg2SUnitOperIdx : public VReg2SUnit {
67     unsigned OperandIndex;
68 
VReg2SUnitOperIdxVReg2SUnitOperIdx69     VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
70                       unsigned OperandIndex, SUnit *SU)
71       : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
72   };
73 
74   /// Record a physical register access.
75   /// For non-data-dependent uses, OpIdx == -1.
76   struct PhysRegSUOper {
77     SUnit *SU;
78     int OpIdx;
79     unsigned Reg;
80 
PhysRegSUOperPhysRegSUOper81     PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}
82 
getSparseSetIndexPhysRegSUOper83     unsigned getSparseSetIndex() const { return Reg; }
84   };
85 
86   /// Use a SparseMultiSet to track physical registers. Storage is only
87   /// allocated once for the pass. It can be cleared in constant time and reused
88   /// without any frees.
89   using Reg2SUnitsMap =
90       SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
91 
92   /// Use SparseSet as a SparseMap by relying on the fact that it never
93   /// compares ValueT's, only unsigned keys. This allows the set to be cleared
94   /// between scheduling regions in constant time as long as ValueT does not
95   /// require a destructor.
96   using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>;
97 
98   /// Track local uses of virtual registers. These uses are gathered by the DAG
99   /// builder and may be consulted by the scheduler to avoid iterating an entire
100   /// vreg use list.
101   using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
102 
103   using VReg2SUnitOperIdxMultiMap =
104       SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
105 
106   using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
107 
108   struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
UnderlyingObjectUnderlyingObject109     UnderlyingObject(ValueType V, bool MayAlias)
110         : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
111 
getValueUnderlyingObject112     ValueType getValue() const { return getPointer(); }
mayAliasUnderlyingObject113     bool mayAlias() const { return getInt(); }
114   };
115 
116   using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
117 
118   /// A ScheduleDAG for scheduling lists of MachineInstr.
119   class ScheduleDAGInstrs : public ScheduleDAG {
120   protected:
121     const MachineLoopInfo *MLI;
122     const MachineFrameInfo &MFI;
123 
124     /// TargetSchedModel provides an interface to the machine model.
125     TargetSchedModel SchedModel;
126 
127     /// True if the DAG builder should remove kill flags (in preparation for
128     /// rescheduling).
129     bool RemoveKillFlags;
130 
131     /// The standard DAG builder does not normally include terminators as DAG
132     /// nodes because it does not create the necessary dependencies to prevent
133     /// reordering. A specialized scheduler can override
134     /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
135     /// it has taken responsibility for scheduling the terminator correctly.
136     bool CanHandleTerminators = false;
137 
138     /// Whether lane masks should get tracked.
139     bool TrackLaneMasks = false;
140 
141     // State specific to the current scheduling region.
142     // ------------------------------------------------
143 
144     /// The block in which to insert instructions
145     MachineBasicBlock *BB;
146 
147     /// The beginning of the range to be scheduled.
148     MachineBasicBlock::iterator RegionBegin;
149 
150     /// The end of the range to be scheduled.
151     MachineBasicBlock::iterator RegionEnd;
152 
153     /// Instructions in this region (distance(RegionBegin, RegionEnd)).
154     unsigned NumRegionInstrs;
155 
156     /// After calling BuildSchedGraph, each machine instruction in the current
157     /// scheduling region is mapped to an SUnit.
158     DenseMap<MachineInstr*, SUnit*> MISUnitMap;
159 
160     // State internal to DAG building.
161     // -------------------------------
162 
163     /// Defs, Uses - Remember where defs and uses of each register are as we
164     /// iterate upward through the instructions. This is allocated here instead
165     /// of inside BuildSchedGraph to avoid the need for it to be initialized and
166     /// destructed for each block.
167     Reg2SUnitsMap Defs;
168     Reg2SUnitsMap Uses;
169 
170     /// Tracks the last instruction(s) in this region defining each virtual
171     /// register. There may be multiple current definitions for a register with
172     /// disjunct lanemasks.
173     VReg2SUnitMultiMap CurrentVRegDefs;
174     /// Tracks the last instructions in this region using each virtual register.
175     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
176 
177     AAResults *AAForDep = nullptr;
178 
179     /// Remember a generic side-effecting instruction as we proceed.
180     /// No other SU ever gets scheduled around it (except in the special
181     /// case of a huge region that gets reduced).
182     SUnit *BarrierChain = nullptr;
183 
184   public:
185     /// A list of SUnits, used in Value2SUsMap, during DAG construction.
186     /// Note: to gain speed it might be worth investigating an optimized
187     /// implementation of this data structure, such as a singly linked list
188     /// with a memory pool (SmallVector was tried but slow and SparseSet is not
189     /// applicable).
190     using SUList = std::list<SUnit *>;
191 
192   protected:
193     /// A map from ValueType to SUList, used during DAG construction, as
194     /// a means of remembering which SUs depend on which memory locations.
195     class Value2SUsMap;
196 
197     /// Reduces maps in FIFO order, by N SUs. This is better than turning
198     /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
199     /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
200     /// and those below it.
201     void reduceHugeMemNodeMaps(Value2SUsMap &stores,
202                                Value2SUsMap &loads, unsigned N);
203 
204     /// Adds a chain edge between SUa and SUb, but only if both
205     /// AAResults and Target fail to deny the dependency.
206     void addChainDependency(SUnit *SUa, SUnit *SUb,
207                             unsigned Latency = 0);
208 
209     /// Adds dependencies as needed from all SUs in list to SU.
addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)210     void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
211       for (SUnit *Entry : SUs)
212         addChainDependency(SU, Entry, Latency);
213     }
214 
215     /// Adds dependencies as needed from all SUs in map, to SU.
216     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
217 
218     /// Adds dependencies as needed to SU, from all SUs mapped to V.
219     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
220                               ValueType V);
221 
222     /// Adds barrier chain edges from all SUs in map, and then clear the map.
223     /// This is equivalent to insertBarrierChain(), but optimized for the common
224     /// case where the new BarrierChain (a global memory object) has a higher
225     /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
226     /// before calling this.
227     void addBarrierChain(Value2SUsMap &map);
228 
229     /// Inserts a barrier chain in a huge region, far below current SU.
230     /// Adds barrier chain edges from all SUs in map with higher NodeNums than
231     /// this new BarrierChain, and remove them from map. It is assumed
232     /// BarrierChain has been set before calling this.
233     void insertBarrierChain(Value2SUsMap &map);
234 
235     /// For an unanalyzable memory access, this Value is used in maps.
236     UndefValue *UnknownValue;
237 
238 
239     /// Topo - A topological ordering for SUnits which permits fast IsReachable
240     /// and similar queries.
241     ScheduleDAGTopologicalSort Topo;
242 
243     using DbgValueVector =
244         std::vector<std::pair<MachineInstr *, MachineInstr *>>;
245     /// Remember instruction that precedes DBG_VALUE.
246     /// These are generated by buildSchedGraph but persist so they can be
247     /// referenced when emitting the final schedule.
248     DbgValueVector DbgValues;
249     MachineInstr *FirstDbgValue = nullptr;
250 
251     /// Set of live physical registers for updating kill flags.
252     LivePhysRegs LiveRegs;
253 
254   public:
255     explicit ScheduleDAGInstrs(MachineFunction &mf,
256                                const MachineLoopInfo *mli,
257                                bool RemoveKillFlags = false);
258 
259     ~ScheduleDAGInstrs() override = default;
260 
261     /// Gets the machine model for instruction scheduling.
getSchedModel()262     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
263 
264     /// Resolves and cache a resolved scheduling class for an SUnit.
getSchedClass(SUnit * SU)265     const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
266       if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
267         SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
268       return SU->SchedClass;
269     }
270 
271     /// Returns an iterator to the top of the current scheduling region.
begin()272     MachineBasicBlock::iterator begin() const { return RegionBegin; }
273 
274     /// Returns an iterator to the bottom of the current scheduling region.
end()275     MachineBasicBlock::iterator end() const { return RegionEnd; }
276 
277     /// Creates a new SUnit and return a ptr to it.
278     SUnit *newSUnit(MachineInstr *MI);
279 
280     /// Returns an existing SUnit for this MI, or nullptr.
281     SUnit *getSUnit(MachineInstr *MI) const;
282 
283     /// If this method returns true, handling of the scheduling regions
284     /// themselves (in case of a scheduling boundary in MBB) will be done
285     /// beginning with the topmost region of MBB.
doMBBSchedRegionsTopDown()286     virtual bool doMBBSchedRegionsTopDown() const { return false; }
287 
288     /// Prepares to perform scheduling in the given block.
289     virtual void startBlock(MachineBasicBlock *BB);
290 
291     /// Cleans up after scheduling in the given block.
292     virtual void finishBlock();
293 
294     /// Initialize the DAG and common scheduler state for a new
295     /// scheduling region. This does not actually create the DAG, only clears
296     /// it. The scheduling driver may call BuildSchedGraph multiple times per
297     /// scheduling region.
298     virtual void enterRegion(MachineBasicBlock *bb,
299                              MachineBasicBlock::iterator begin,
300                              MachineBasicBlock::iterator end,
301                              unsigned regioninstrs);
302 
303     /// Called when the scheduler has finished scheduling the current region.
304     virtual void exitRegion();
305 
306     /// Builds SUnits for the current region.
307     /// If \p RPTracker is non-null, compute register pressure as a side effect.
308     /// The DAG builder is an efficient place to do it because it already visits
309     /// operands.
310     void buildSchedGraph(AAResults *AA,
311                          RegPressureTracker *RPTracker = nullptr,
312                          PressureDiffs *PDiffs = nullptr,
313                          LiveIntervals *LIS = nullptr,
314                          bool TrackLaneMasks = false);
315 
316     /// Adds dependencies from instructions in the current list of
317     /// instructions being scheduled to scheduling barrier. We want to make sure
318     /// instructions which define registers that are either used by the
319     /// terminator or are live-out are properly scheduled. This is especially
320     /// important when the definition latency of the return value(s) are too
321     /// high to be hidden by the branch or when the liveout registers used by
322     /// instructions in the fallthrough block.
323     void addSchedBarrierDeps();
324 
325     /// Orders nodes according to selected style.
326     ///
327     /// Typically, a scheduling algorithm will implement schedule() without
328     /// overriding enterRegion() or exitRegion().
329     virtual void schedule() = 0;
330 
331     /// Allow targets to perform final scheduling actions at the level of the
332     /// whole MachineFunction. By default does nothing.
finalizeSchedule()333     virtual void finalizeSchedule() {}
334 
335     void dumpNode(const SUnit &SU) const override;
336     void dump() const override;
337 
338     /// Returns a label for a DAG node that points to an instruction.
339     std::string getGraphNodeLabel(const SUnit *SU) const override;
340 
341     /// Returns a label for the region of code covered by the DAG.
342     std::string getDAGName() const override;
343 
344     /// Fixes register kill flags that scheduling has made invalid.
345     void fixupKills(MachineBasicBlock &MBB);
346 
347     /// True if an edge can be added from PredSU to SuccSU without creating
348     /// a cycle.
349     bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
350 
351     /// Add a DAG edge to the given SU with the given predecessor
352     /// dependence data.
353     ///
354     /// \returns true if the edge may be added without creating a cycle OR if an
355     /// equivalent edge already existed (false indicates failure).
356     bool addEdge(SUnit *SuccSU, const SDep &PredDep);
357 
358   protected:
359     void initSUnits();
360     void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
361     void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
362     void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
363     void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
364 
365     /// Initializes register live-range state for updating kills.
366     /// PostRA helper for rewriting kill flags.
367     void startBlockForKills(MachineBasicBlock *BB);
368 
369     /// Toggles a register operand kill flag.
370     ///
371     /// Other adjustments may be made to the instruction if necessary. Return
372     /// true if the operand has been deleted, false if not.
373     void toggleKillFlag(MachineInstr &MI, MachineOperand &MO);
374 
375     /// Returns a mask for which lanes get read/written by the given (register)
376     /// machine operand.
377     LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
378 
379     /// Returns true if the def register in \p MO has no uses.
380     bool deadDefHasNoUse(const MachineOperand &MO);
381   };
382 
383   /// Creates a new SUnit and return a ptr to it.
newSUnit(MachineInstr * MI)384   inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
385 #ifndef NDEBUG
386     const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
387 #endif
388     SUnits.emplace_back(MI, (unsigned)SUnits.size());
389     assert((Addr == nullptr || Addr == &SUnits[0]) &&
390            "SUnits std::vector reallocated on the fly!");
391     return &SUnits.back();
392   }
393 
394   /// Returns an existing SUnit for this MI, or nullptr.
getSUnit(MachineInstr * MI)395   inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
396     DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
397     if (I == MISUnitMap.end())
398       return nullptr;
399     return I->second;
400   }
401 
402 } // end namespace llvm
403 
404 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
405