• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Routine to compute the strongly connected components (SCCs) of a graph.
2 //!
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O*(*n*) time.
7 
8 use crate::fx::FxHashSet;
9 use crate::graph::vec_graph::VecGraph;
10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
11 use rustc_index::{Idx, IndexSlice, IndexVec};
12 use std::ops::Range;
13 
14 #[cfg(test)]
15 mod tests;
16 
17 /// Strongly connected components (SCC) of a graph. The type `N` is
18 /// the index type for the graph nodes and `S` is the index type for
19 /// the SCCs. We can map from each node to the SCC that it
20 /// participates in, and we also have the successors of each SCC.
21 pub struct Sccs<N: Idx, S: Idx> {
22     /// For each node, what is the SCC index of the SCC to which it
23     /// belongs.
24     scc_indices: IndexVec<N, S>,
25 
26     /// Data about each SCC.
27     scc_data: SccData<S>,
28 }
29 
30 pub struct SccData<S: Idx> {
31     /// For each SCC, the range of `all_successors` where its
32     /// successors can be found.
33     ranges: IndexVec<S, Range<usize>>,
34 
35     /// Contains the successors for all the Sccs, concatenated. The
36     /// range of indices corresponding to a given SCC is found in its
37     /// SccData.
38     all_successors: Vec<S>,
39 }
40 
41 impl<N: Idx, S: Idx + Ord> Sccs<N, S> {
new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self42     pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
43         SccsConstruction::construct(graph)
44     }
45 
scc_indices(&self) -> &IndexSlice<N, S>46     pub fn scc_indices(&self) -> &IndexSlice<N, S> {
47         &self.scc_indices
48     }
49 
scc_data(&self) -> &SccData<S>50     pub fn scc_data(&self) -> &SccData<S> {
51         &self.scc_data
52     }
53 
54     /// Returns the number of SCCs in the graph.
num_sccs(&self) -> usize55     pub fn num_sccs(&self) -> usize {
56         self.scc_data.len()
57     }
58 
59     /// Returns an iterator over the SCCs in the graph.
60     ///
61     /// The SCCs will be iterated in **dependency order** (or **post order**),
62     /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
63     /// This is convenient when the edges represent dependencies: when you visit
64     /// `S1`, the value for `S2` will already have been computed.
all_sccs(&self) -> impl Iterator<Item = S>65     pub fn all_sccs(&self) -> impl Iterator<Item = S> {
66         (0..self.scc_data.len()).map(S::new)
67     }
68 
69     /// Returns the SCC to which a node `r` belongs.
scc(&self, r: N) -> S70     pub fn scc(&self, r: N) -> S {
71         self.scc_indices[r]
72     }
73 
74     /// Returns the successors of the given SCC.
successors(&self, scc: S) -> &[S]75     pub fn successors(&self, scc: S) -> &[S] {
76         self.scc_data.successors(scc)
77     }
78 
79     /// Construct the reverse graph of the SCC graph.
reverse(&self) -> VecGraph<S>80     pub fn reverse(&self) -> VecGraph<S> {
81         VecGraph::new(
82             self.num_sccs(),
83             self.all_sccs()
84                 .flat_map(|source| {
85                     self.successors(source).iter().map(move |&target| (target, source))
86                 })
87                 .collect(),
88         )
89     }
90 }
91 
92 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> {
93     type Node = S;
94 }
95 
96 impl<N: Idx, S: Idx + Ord> WithNumNodes for Sccs<N, S> {
num_nodes(&self) -> usize97     fn num_nodes(&self) -> usize {
98         self.num_sccs()
99     }
100 }
101 
102 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> {
num_edges(&self) -> usize103     fn num_edges(&self) -> usize {
104         self.scc_data.all_successors.len()
105     }
106 }
107 
108 impl<'graph, N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> {
109     type Item = S;
110 
111     type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>;
112 }
113 
114 impl<N: Idx, S: Idx + Ord> WithSuccessors for Sccs<N, S> {
successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter115     fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter {
116         self.successors(node).iter().cloned()
117     }
118 }
119 
120 impl<S: Idx> SccData<S> {
121     /// Number of SCCs,
len(&self) -> usize122     fn len(&self) -> usize {
123         self.ranges.len()
124     }
125 
ranges(&self) -> &IndexSlice<S, Range<usize>>126     pub fn ranges(&self) -> &IndexSlice<S, Range<usize>> {
127         &self.ranges
128     }
129 
all_successors(&self) -> &Vec<S>130     pub fn all_successors(&self) -> &Vec<S> {
131         &self.all_successors
132     }
133 
134     /// Returns the successors of the given SCC.
successors(&self, scc: S) -> &[S]135     fn successors(&self, scc: S) -> &[S] {
136         // Annoyingly, `range` does not implement `Copy`, so we have
137         // to do `range.start..range.end`:
138         let range = &self.ranges[scc];
139         &self.all_successors[range.start..range.end]
140     }
141 
142     /// Creates a new SCC with `successors` as its successors and
143     /// returns the resulting index.
create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S144     fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
145         // Store the successors on `scc_successors_vec`, remembering
146         // the range of indices.
147         let all_successors_start = self.all_successors.len();
148         self.all_successors.extend(successors);
149         let all_successors_end = self.all_successors.len();
150 
151         debug!(
152             "create_scc({:?}) successors={:?}",
153             self.ranges.len(),
154             &self.all_successors[all_successors_start..all_successors_end],
155         );
156 
157         self.ranges.push(all_successors_start..all_successors_end)
158     }
159 }
160 
161 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> {
162     graph: &'c G,
163 
164     /// The state of each node; used during walk to record the stack
165     /// and after walk to record what cycle each node ended up being
166     /// in.
167     node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
168 
169     /// The stack of nodes that we are visiting as part of the DFS.
170     node_stack: Vec<G::Node>,
171 
172     /// The stack of successors: as we visit a node, we mark our
173     /// position in this stack, and when we encounter a successor SCC,
174     /// we push it on the stack. When we complete an SCC, we can pop
175     /// everything off the stack that was found along the way.
176     successors_stack: Vec<S>,
177 
178     /// A set used to strip duplicates. As we accumulate successors
179     /// into the successors_stack, we sometimes get duplicate entries.
180     /// We use this set to remove those -- we also keep its storage
181     /// around between successors to amortize memory allocation costs.
182     duplicate_set: FxHashSet<S>,
183 
184     scc_data: SccData<S>,
185 }
186 
187 #[derive(Copy, Clone, Debug)]
188 enum NodeState<N, S> {
189     /// This node has not yet been visited as part of the DFS.
190     ///
191     /// After SCC construction is complete, this state ought to be
192     /// impossible.
193     NotVisited,
194 
195     /// This node is currently being walk as part of our DFS. It is on
196     /// the stack at the depth `depth`.
197     ///
198     /// After SCC construction is complete, this state ought to be
199     /// impossible.
200     BeingVisited { depth: usize },
201 
202     /// Indicates that this node is a member of the given cycle.
203     InCycle { scc_index: S },
204 
205     /// Indicates that this node is a member of whatever cycle
206     /// `parent` is a member of. This state is transient: whenever we
207     /// see it, we try to overwrite it with the current state of
208     /// `parent` (this is the "path compression" step of a union-find
209     /// algorithm).
210     InCycleWith { parent: N },
211 }
212 
213 #[derive(Copy, Clone, Debug)]
214 enum WalkReturn<S> {
215     Cycle { min_depth: usize },
216     Complete { scc_index: S },
217 }
218 
219 impl<'c, G, S> SccsConstruction<'c, G, S>
220 where
221     G: DirectedGraph + WithNumNodes + WithSuccessors,
222     S: Idx,
223 {
224     /// Identifies SCCs in the graph `G` and computes the resulting
225     /// DAG. This uses a variant of [Tarjan's
226     /// algorithm][wikipedia]. The high-level summary of the algorithm
227     /// is that we do a depth-first search. Along the way, we keep a
228     /// stack of each node whose successors are being visited. We
229     /// track the depth of each node on this stack (there is no depth
230     /// if the node is not on the stack). When we find that some node
231     /// N with depth D can reach some other node N' with lower depth
232     /// D' (i.e., D' < D), we know that N, N', and all nodes in
233     /// between them on the stack are part of an SCC.
234     ///
235     /// [wikipedia]: https://bit.ly/2EZIx84
construct(graph: &'c G) -> Sccs<G::Node, S>236     fn construct(graph: &'c G) -> Sccs<G::Node, S> {
237         let num_nodes = graph.num_nodes();
238 
239         let mut this = Self {
240             graph,
241             node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
242             node_stack: Vec::with_capacity(num_nodes),
243             successors_stack: Vec::new(),
244             scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
245             duplicate_set: FxHashSet::default(),
246         };
247 
248         let scc_indices = (0..num_nodes)
249             .map(G::Node::new)
250             .map(|node| match this.start_walk_from(node) {
251                 WalkReturn::Complete { scc_index } => scc_index,
252                 WalkReturn::Cycle { min_depth } => {
253                     panic!("`start_walk_node({node:?})` returned cycle with depth {min_depth:?}")
254                 }
255             })
256             .collect();
257 
258         Sccs { scc_indices, scc_data: this.scc_data }
259     }
260 
start_walk_from(&mut self, node: G::Node) -> WalkReturn<S>261     fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
262         if let Some(result) = self.inspect_node(node) {
263             result
264         } else {
265             self.walk_unvisited_node(node)
266         }
267     }
268 
269     /// Inspect a node during the DFS. We first examine its current
270     /// state -- if it is not yet visited (`NotVisited`), return `None` so
271     /// that the caller might push it onto the stack and start walking its
272     /// successors.
273     ///
274     /// If it is already on the DFS stack it will be in the state
275     /// `BeingVisited`. In that case, we have found a cycle and we
276     /// return the depth from the stack.
277     ///
278     /// Otherwise, we are looking at a node that has already been
279     /// completely visited. We therefore return `WalkReturn::Complete`
280     /// with its associated SCC index.
inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>>281     fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
282         Some(match self.find_state(node) {
283             NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
284 
285             NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
286 
287             NodeState::NotVisited => return None,
288 
289             NodeState::InCycleWith { parent } => panic!(
290                 "`find_state` returned `InCycleWith({parent:?})`, which ought to be impossible"
291             ),
292         })
293     }
294 
295     /// Fetches the state of the node `r`. If `r` is recorded as being
296     /// in a cycle with some other node `r2`, then fetches the state
297     /// of `r2` (and updates `r` to reflect current result). This is
298     /// basically the "find" part of a standard union-find algorithm
299     /// (with path compression).
find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S>300     fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
301         // To avoid recursion we temporarily reuse the `parent` of each
302         // InCycleWith link to encode a downwards link while compressing
303         // the path. After we have found the root or deepest node being
304         // visited, we traverse the reverse links and correct the node
305         // states on the way.
306         //
307         // **Note**: This mutation requires that this is a leaf function
308         // or at least that none of the called functions inspects the
309         // current node states. Luckily, we are a leaf.
310 
311         // Remember one previous link. The termination condition when
312         // following links downwards is then simply as soon as we have
313         // found the initial self-loop.
314         let mut previous_node = node;
315 
316         // Ultimately assigned by the parent when following
317         // `InCycleWith` upwards.
318         let node_state = loop {
319             debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
320             match self.node_states[node] {
321                 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
322                 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
323                 NodeState::NotVisited => break NodeState::NotVisited,
324                 NodeState::InCycleWith { parent } => {
325                     // We test this, to be extremely sure that we never
326                     // ever break our termination condition for the
327                     // reverse iteration loop.
328                     assert!(node != parent, "Node can not be in cycle with itself");
329                     // Store the previous node as an inverted list link
330                     self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
331                     // Update to parent node.
332                     previous_node = node;
333                     node = parent;
334                 }
335             }
336         };
337 
338         // The states form a graph where up to one outgoing link is stored at
339         // each node. Initially in general,
340         //
341         //                                                  E
342         //                                                  ^
343         //                                                  |
344         //                                InCycleWith/BeingVisited/NotVisited
345         //                                                  |
346         //   A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
347         //   |
348         //   = node, previous_node
349         //
350         // After the first loop, this will look like
351         //                                                  E
352         //                                                  ^
353         //                                                  |
354         //                                InCycleWith/BeingVisited/NotVisited
355         //                                                  |
356         // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
357         // | |                             |              |
358         // | InCycleWith                   |              = node
359         // +-+                             =previous_node
360         //
361         // Note in particular that A will be linked to itself in a self-cycle
362         // and no other self-cycles occur due to how InCycleWith is assigned in
363         // the find phase implemented by `walk_unvisited_node`.
364         //
365         // We now want to compress the path, that is assign the state of the
366         // link D-E to all other links.
367         //
368         // We can then walk backwards, starting from `previous_node`, and assign
369         // each node in the list with the updated state. The loop terminates
370         // when we reach the self-cycle.
371 
372         // Move backwards until we found the node where we started. We
373         // will know when we hit the state where previous_node == node.
374         loop {
375             // Back at the beginning, we can return.
376             if previous_node == node {
377                 return node_state;
378             }
379             // Update to previous node in the link.
380             match self.node_states[previous_node] {
381                 NodeState::InCycleWith { parent: previous } => {
382                     node = previous_node;
383                     previous_node = previous;
384                 }
385                 // Only InCycleWith nodes were added to the reverse linked list.
386                 other => panic!("Invalid previous link while compressing cycle: {other:?}"),
387             }
388 
389             debug!("find_state: parent_state = {:?}", node_state);
390 
391             // Update the node state from the parent state. The assigned
392             // state is actually a loop invariant but it will only be
393             // evaluated if there is at least one backlink to follow.
394             // Fully trusting llvm here to find this loop optimization.
395             match node_state {
396                 // Path compression, make current node point to the same root.
397                 NodeState::InCycle { .. } => {
398                     self.node_states[node] = node_state;
399                 }
400                 // Still visiting nodes, compress to cycle to the node
401                 // at that depth.
402                 NodeState::BeingVisited { depth } => {
403                     self.node_states[node] =
404                         NodeState::InCycleWith { parent: self.node_stack[depth] };
405                 }
406                 // These are never allowed as parent nodes. InCycleWith
407                 // should have been followed to a real parent and
408                 // NotVisited can not be part of a cycle since it should
409                 // have instead gotten explored.
410                 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
411                     panic!("invalid parent state: {node_state:?}")
412                 }
413             }
414         }
415     }
416 
417     /// Walks a node that has never been visited before.
418     ///
419     /// Call this method when `inspect_node` has returned `None`. Having the
420     /// caller decide avoids mutual recursion between the two methods and allows
421     /// us to maintain an allocated stack for nodes on the path between calls.
422     #[instrument(skip(self, initial), level = "debug")]
walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S>423     fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
424         struct VisitingNodeFrame<G: DirectedGraph, Successors> {
425             node: G::Node,
426             iter: Option<Successors>,
427             depth: usize,
428             min_depth: usize,
429             successors_len: usize,
430             min_cycle_root: G::Node,
431             successor_node: G::Node,
432         }
433 
434         // Move the stack to a local variable. We want to utilize the existing allocation and
435         // mutably borrow it without borrowing self at the same time.
436         let mut successors_stack = core::mem::take(&mut self.successors_stack);
437         debug_assert_eq!(successors_stack.len(), 0);
438 
439         let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
440             node: initial,
441             depth: 0,
442             min_depth: 0,
443             iter: None,
444             successors_len: 0,
445             min_cycle_root: initial,
446             successor_node: initial,
447         }];
448 
449         let mut return_value = None;
450 
451         'recurse: while let Some(frame) = stack.last_mut() {
452             let VisitingNodeFrame {
453                 node,
454                 depth,
455                 iter,
456                 successors_len,
457                 min_depth,
458                 min_cycle_root,
459                 successor_node,
460             } = frame;
461 
462             let node = *node;
463             let depth = *depth;
464 
465             let successors = match iter {
466                 Some(iter) => iter,
467                 None => {
468                     // This None marks that we still have the initialize this node's frame.
469                     debug!(?depth, ?node);
470 
471                     debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
472 
473                     // Push `node` onto the stack.
474                     self.node_states[node] = NodeState::BeingVisited { depth };
475                     self.node_stack.push(node);
476 
477                     // Walk each successor of the node, looking to see if any of
478                     // them can reach a node that is presently on the stack. If
479                     // so, that means they can also reach us.
480                     *successors_len = successors_stack.len();
481                     // Set and return a reference, this is currently empty.
482                     iter.get_or_insert(self.graph.successors(node))
483                 }
484             };
485 
486             // Now that iter is initialized, this is a constant for this frame.
487             let successors_len = *successors_len;
488 
489             // Construct iterators for the nodes and walk results. There are two cases:
490             // * The walk of a successor node returned.
491             // * The remaining successor nodes.
492             let returned_walk =
493                 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
494 
495             let successor_walk = successors.by_ref().map(|successor_node| {
496                 debug!(?node, ?successor_node);
497                 (successor_node, self.inspect_node(successor_node))
498             });
499 
500             for (successor_node, walk) in returned_walk.chain(successor_walk) {
501                 match walk {
502                     Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
503                         // Track the minimum depth we can reach.
504                         assert!(successor_min_depth <= depth);
505                         if successor_min_depth < *min_depth {
506                             debug!(?node, ?successor_min_depth);
507                             *min_depth = successor_min_depth;
508                             *min_cycle_root = successor_node;
509                         }
510                     }
511 
512                     Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
513                         // Push the completed SCC indices onto
514                         // the `successors_stack` for later.
515                         debug!(?node, ?successor_scc_index);
516                         successors_stack.push(successor_scc_index);
517                     }
518 
519                     None => {
520                         let depth = depth + 1;
521                         debug!(?depth, ?successor_node);
522                         // Remember which node the return value will come from.
523                         frame.successor_node = successor_node;
524                         // Start a new stack frame the step into it.
525                         stack.push(VisitingNodeFrame {
526                             node: successor_node,
527                             depth,
528                             iter: None,
529                             successors_len: 0,
530                             min_depth: depth,
531                             min_cycle_root: successor_node,
532                             successor_node,
533                         });
534                         continue 'recurse;
535                     }
536                 }
537             }
538 
539             // Completed walk, remove `node` from the stack.
540             let r = self.node_stack.pop();
541             debug_assert_eq!(r, Some(node));
542 
543             // Remove the frame, it's done.
544             let frame = stack.pop().unwrap();
545 
546             // If `min_depth == depth`, then we are the root of the
547             // cycle: we can't reach anyone further down the stack.
548 
549             // Pass the 'return value' down the stack.
550             // We return one frame at a time so there can't be another return value.
551             debug_assert!(return_value.is_none());
552             return_value = Some(if frame.min_depth == depth {
553                 // Note that successor stack may have duplicates, so we
554                 // want to remove those:
555                 let deduplicated_successors = {
556                     let duplicate_set = &mut self.duplicate_set;
557                     duplicate_set.clear();
558                     successors_stack
559                         .drain(successors_len..)
560                         .filter(move |&i| duplicate_set.insert(i))
561                 };
562                 let scc_index = self.scc_data.create_scc(deduplicated_successors);
563                 self.node_states[node] = NodeState::InCycle { scc_index };
564                 WalkReturn::Complete { scc_index }
565             } else {
566                 // We are not the head of the cycle. Return back to our
567                 // caller. They will take ownership of the
568                 // `self.successors` data that we pushed.
569                 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
570                 WalkReturn::Cycle { min_depth: frame.min_depth }
571             });
572         }
573 
574         // Keep the allocation we used for successors_stack.
575         self.successors_stack = successors_stack;
576         debug_assert_eq!(self.successors_stack.len(), 0);
577 
578         return_value.unwrap()
579     }
580 }
581