1 //! Routine to compute the strongly connected components (SCCs) of a graph. 2 //! 3 //! Also computes as the resulting DAG if each SCC is replaced with a 4 //! node in the graph. This uses [Tarjan's algorithm]( 5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) 6 //! that completes in *O*(*n*) time. 7 8 use crate::fx::FxHashSet; 9 use crate::graph::vec_graph::VecGraph; 10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; 11 use rustc_index::{Idx, IndexSlice, IndexVec}; 12 use std::ops::Range; 13 14 #[cfg(test)] 15 mod tests; 16 17 /// Strongly connected components (SCC) of a graph. The type `N` is 18 /// the index type for the graph nodes and `S` is the index type for 19 /// the SCCs. We can map from each node to the SCC that it 20 /// participates in, and we also have the successors of each SCC. 21 pub struct Sccs<N: Idx, S: Idx> { 22 /// For each node, what is the SCC index of the SCC to which it 23 /// belongs. 24 scc_indices: IndexVec<N, S>, 25 26 /// Data about each SCC. 27 scc_data: SccData<S>, 28 } 29 30 pub struct SccData<S: Idx> { 31 /// For each SCC, the range of `all_successors` where its 32 /// successors can be found. 33 ranges: IndexVec<S, Range<usize>>, 34 35 /// Contains the successors for all the Sccs, concatenated. The 36 /// range of indices corresponding to a given SCC is found in its 37 /// SccData. 38 all_successors: Vec<S>, 39 } 40 41 impl<N: Idx, S: Idx + Ord> Sccs<N, S> { new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self42 pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self { 43 SccsConstruction::construct(graph) 44 } 45 scc_indices(&self) -> &IndexSlice<N, S>46 pub fn scc_indices(&self) -> &IndexSlice<N, S> { 47 &self.scc_indices 48 } 49 scc_data(&self) -> &SccData<S>50 pub fn scc_data(&self) -> &SccData<S> { 51 &self.scc_data 52 } 53 54 /// Returns the number of SCCs in the graph. num_sccs(&self) -> usize55 pub fn num_sccs(&self) -> usize { 56 self.scc_data.len() 57 } 58 59 /// Returns an iterator over the SCCs in the graph. 60 /// 61 /// The SCCs will be iterated in **dependency order** (or **post order**), 62 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after. 63 /// This is convenient when the edges represent dependencies: when you visit 64 /// `S1`, the value for `S2` will already have been computed. all_sccs(&self) -> impl Iterator<Item = S>65 pub fn all_sccs(&self) -> impl Iterator<Item = S> { 66 (0..self.scc_data.len()).map(S::new) 67 } 68 69 /// Returns the SCC to which a node `r` belongs. scc(&self, r: N) -> S70 pub fn scc(&self, r: N) -> S { 71 self.scc_indices[r] 72 } 73 74 /// Returns the successors of the given SCC. successors(&self, scc: S) -> &[S]75 pub fn successors(&self, scc: S) -> &[S] { 76 self.scc_data.successors(scc) 77 } 78 79 /// Construct the reverse graph of the SCC graph. reverse(&self) -> VecGraph<S>80 pub fn reverse(&self) -> VecGraph<S> { 81 VecGraph::new( 82 self.num_sccs(), 83 self.all_sccs() 84 .flat_map(|source| { 85 self.successors(source).iter().map(move |&target| (target, source)) 86 }) 87 .collect(), 88 ) 89 } 90 } 91 92 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> { 93 type Node = S; 94 } 95 96 impl<N: Idx, S: Idx + Ord> WithNumNodes for Sccs<N, S> { num_nodes(&self) -> usize97 fn num_nodes(&self) -> usize { 98 self.num_sccs() 99 } 100 } 101 102 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> { num_edges(&self) -> usize103 fn num_edges(&self) -> usize { 104 self.scc_data.all_successors.len() 105 } 106 } 107 108 impl<'graph, N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> { 109 type Item = S; 110 111 type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>; 112 } 113 114 impl<N: Idx, S: Idx + Ord> WithSuccessors for Sccs<N, S> { successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter115 fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter { 116 self.successors(node).iter().cloned() 117 } 118 } 119 120 impl<S: Idx> SccData<S> { 121 /// Number of SCCs, len(&self) -> usize122 fn len(&self) -> usize { 123 self.ranges.len() 124 } 125 ranges(&self) -> &IndexSlice<S, Range<usize>>126 pub fn ranges(&self) -> &IndexSlice<S, Range<usize>> { 127 &self.ranges 128 } 129 all_successors(&self) -> &Vec<S>130 pub fn all_successors(&self) -> &Vec<S> { 131 &self.all_successors 132 } 133 134 /// Returns the successors of the given SCC. successors(&self, scc: S) -> &[S]135 fn successors(&self, scc: S) -> &[S] { 136 // Annoyingly, `range` does not implement `Copy`, so we have 137 // to do `range.start..range.end`: 138 let range = &self.ranges[scc]; 139 &self.all_successors[range.start..range.end] 140 } 141 142 /// Creates a new SCC with `successors` as its successors and 143 /// returns the resulting index. create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S144 fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S { 145 // Store the successors on `scc_successors_vec`, remembering 146 // the range of indices. 147 let all_successors_start = self.all_successors.len(); 148 self.all_successors.extend(successors); 149 let all_successors_end = self.all_successors.len(); 150 151 debug!( 152 "create_scc({:?}) successors={:?}", 153 self.ranges.len(), 154 &self.all_successors[all_successors_start..all_successors_end], 155 ); 156 157 self.ranges.push(all_successors_start..all_successors_end) 158 } 159 } 160 161 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> { 162 graph: &'c G, 163 164 /// The state of each node; used during walk to record the stack 165 /// and after walk to record what cycle each node ended up being 166 /// in. 167 node_states: IndexVec<G::Node, NodeState<G::Node, S>>, 168 169 /// The stack of nodes that we are visiting as part of the DFS. 170 node_stack: Vec<G::Node>, 171 172 /// The stack of successors: as we visit a node, we mark our 173 /// position in this stack, and when we encounter a successor SCC, 174 /// we push it on the stack. When we complete an SCC, we can pop 175 /// everything off the stack that was found along the way. 176 successors_stack: Vec<S>, 177 178 /// A set used to strip duplicates. As we accumulate successors 179 /// into the successors_stack, we sometimes get duplicate entries. 180 /// We use this set to remove those -- we also keep its storage 181 /// around between successors to amortize memory allocation costs. 182 duplicate_set: FxHashSet<S>, 183 184 scc_data: SccData<S>, 185 } 186 187 #[derive(Copy, Clone, Debug)] 188 enum NodeState<N, S> { 189 /// This node has not yet been visited as part of the DFS. 190 /// 191 /// After SCC construction is complete, this state ought to be 192 /// impossible. 193 NotVisited, 194 195 /// This node is currently being walk as part of our DFS. It is on 196 /// the stack at the depth `depth`. 197 /// 198 /// After SCC construction is complete, this state ought to be 199 /// impossible. 200 BeingVisited { depth: usize }, 201 202 /// Indicates that this node is a member of the given cycle. 203 InCycle { scc_index: S }, 204 205 /// Indicates that this node is a member of whatever cycle 206 /// `parent` is a member of. This state is transient: whenever we 207 /// see it, we try to overwrite it with the current state of 208 /// `parent` (this is the "path compression" step of a union-find 209 /// algorithm). 210 InCycleWith { parent: N }, 211 } 212 213 #[derive(Copy, Clone, Debug)] 214 enum WalkReturn<S> { 215 Cycle { min_depth: usize }, 216 Complete { scc_index: S }, 217 } 218 219 impl<'c, G, S> SccsConstruction<'c, G, S> 220 where 221 G: DirectedGraph + WithNumNodes + WithSuccessors, 222 S: Idx, 223 { 224 /// Identifies SCCs in the graph `G` and computes the resulting 225 /// DAG. This uses a variant of [Tarjan's 226 /// algorithm][wikipedia]. The high-level summary of the algorithm 227 /// is that we do a depth-first search. Along the way, we keep a 228 /// stack of each node whose successors are being visited. We 229 /// track the depth of each node on this stack (there is no depth 230 /// if the node is not on the stack). When we find that some node 231 /// N with depth D can reach some other node N' with lower depth 232 /// D' (i.e., D' < D), we know that N, N', and all nodes in 233 /// between them on the stack are part of an SCC. 234 /// 235 /// [wikipedia]: https://bit.ly/2EZIx84 construct(graph: &'c G) -> Sccs<G::Node, S>236 fn construct(graph: &'c G) -> Sccs<G::Node, S> { 237 let num_nodes = graph.num_nodes(); 238 239 let mut this = Self { 240 graph, 241 node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes), 242 node_stack: Vec::with_capacity(num_nodes), 243 successors_stack: Vec::new(), 244 scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }, 245 duplicate_set: FxHashSet::default(), 246 }; 247 248 let scc_indices = (0..num_nodes) 249 .map(G::Node::new) 250 .map(|node| match this.start_walk_from(node) { 251 WalkReturn::Complete { scc_index } => scc_index, 252 WalkReturn::Cycle { min_depth } => { 253 panic!("`start_walk_node({node:?})` returned cycle with depth {min_depth:?}") 254 } 255 }) 256 .collect(); 257 258 Sccs { scc_indices, scc_data: this.scc_data } 259 } 260 start_walk_from(&mut self, node: G::Node) -> WalkReturn<S>261 fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> { 262 if let Some(result) = self.inspect_node(node) { 263 result 264 } else { 265 self.walk_unvisited_node(node) 266 } 267 } 268 269 /// Inspect a node during the DFS. We first examine its current 270 /// state -- if it is not yet visited (`NotVisited`), return `None` so 271 /// that the caller might push it onto the stack and start walking its 272 /// successors. 273 /// 274 /// If it is already on the DFS stack it will be in the state 275 /// `BeingVisited`. In that case, we have found a cycle and we 276 /// return the depth from the stack. 277 /// 278 /// Otherwise, we are looking at a node that has already been 279 /// completely visited. We therefore return `WalkReturn::Complete` 280 /// with its associated SCC index. inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>>281 fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> { 282 Some(match self.find_state(node) { 283 NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index }, 284 285 NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth }, 286 287 NodeState::NotVisited => return None, 288 289 NodeState::InCycleWith { parent } => panic!( 290 "`find_state` returned `InCycleWith({parent:?})`, which ought to be impossible" 291 ), 292 }) 293 } 294 295 /// Fetches the state of the node `r`. If `r` is recorded as being 296 /// in a cycle with some other node `r2`, then fetches the state 297 /// of `r2` (and updates `r` to reflect current result). This is 298 /// basically the "find" part of a standard union-find algorithm 299 /// (with path compression). find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S>300 fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> { 301 // To avoid recursion we temporarily reuse the `parent` of each 302 // InCycleWith link to encode a downwards link while compressing 303 // the path. After we have found the root or deepest node being 304 // visited, we traverse the reverse links and correct the node 305 // states on the way. 306 // 307 // **Note**: This mutation requires that this is a leaf function 308 // or at least that none of the called functions inspects the 309 // current node states. Luckily, we are a leaf. 310 311 // Remember one previous link. The termination condition when 312 // following links downwards is then simply as soon as we have 313 // found the initial self-loop. 314 let mut previous_node = node; 315 316 // Ultimately assigned by the parent when following 317 // `InCycleWith` upwards. 318 let node_state = loop { 319 debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]); 320 match self.node_states[node] { 321 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index }, 322 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth }, 323 NodeState::NotVisited => break NodeState::NotVisited, 324 NodeState::InCycleWith { parent } => { 325 // We test this, to be extremely sure that we never 326 // ever break our termination condition for the 327 // reverse iteration loop. 328 assert!(node != parent, "Node can not be in cycle with itself"); 329 // Store the previous node as an inverted list link 330 self.node_states[node] = NodeState::InCycleWith { parent: previous_node }; 331 // Update to parent node. 332 previous_node = node; 333 node = parent; 334 } 335 } 336 }; 337 338 // The states form a graph where up to one outgoing link is stored at 339 // each node. Initially in general, 340 // 341 // E 342 // ^ 343 // | 344 // InCycleWith/BeingVisited/NotVisited 345 // | 346 // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+ 347 // | 348 // = node, previous_node 349 // 350 // After the first loop, this will look like 351 // E 352 // ^ 353 // | 354 // InCycleWith/BeingVisited/NotVisited 355 // | 356 // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+ 357 // | | | | 358 // | InCycleWith | = node 359 // +-+ =previous_node 360 // 361 // Note in particular that A will be linked to itself in a self-cycle 362 // and no other self-cycles occur due to how InCycleWith is assigned in 363 // the find phase implemented by `walk_unvisited_node`. 364 // 365 // We now want to compress the path, that is assign the state of the 366 // link D-E to all other links. 367 // 368 // We can then walk backwards, starting from `previous_node`, and assign 369 // each node in the list with the updated state. The loop terminates 370 // when we reach the self-cycle. 371 372 // Move backwards until we found the node where we started. We 373 // will know when we hit the state where previous_node == node. 374 loop { 375 // Back at the beginning, we can return. 376 if previous_node == node { 377 return node_state; 378 } 379 // Update to previous node in the link. 380 match self.node_states[previous_node] { 381 NodeState::InCycleWith { parent: previous } => { 382 node = previous_node; 383 previous_node = previous; 384 } 385 // Only InCycleWith nodes were added to the reverse linked list. 386 other => panic!("Invalid previous link while compressing cycle: {other:?}"), 387 } 388 389 debug!("find_state: parent_state = {:?}", node_state); 390 391 // Update the node state from the parent state. The assigned 392 // state is actually a loop invariant but it will only be 393 // evaluated if there is at least one backlink to follow. 394 // Fully trusting llvm here to find this loop optimization. 395 match node_state { 396 // Path compression, make current node point to the same root. 397 NodeState::InCycle { .. } => { 398 self.node_states[node] = node_state; 399 } 400 // Still visiting nodes, compress to cycle to the node 401 // at that depth. 402 NodeState::BeingVisited { depth } => { 403 self.node_states[node] = 404 NodeState::InCycleWith { parent: self.node_stack[depth] }; 405 } 406 // These are never allowed as parent nodes. InCycleWith 407 // should have been followed to a real parent and 408 // NotVisited can not be part of a cycle since it should 409 // have instead gotten explored. 410 NodeState::NotVisited | NodeState::InCycleWith { .. } => { 411 panic!("invalid parent state: {node_state:?}") 412 } 413 } 414 } 415 } 416 417 /// Walks a node that has never been visited before. 418 /// 419 /// Call this method when `inspect_node` has returned `None`. Having the 420 /// caller decide avoids mutual recursion between the two methods and allows 421 /// us to maintain an allocated stack for nodes on the path between calls. 422 #[instrument(skip(self, initial), level = "debug")] walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S>423 fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> { 424 struct VisitingNodeFrame<G: DirectedGraph, Successors> { 425 node: G::Node, 426 iter: Option<Successors>, 427 depth: usize, 428 min_depth: usize, 429 successors_len: usize, 430 min_cycle_root: G::Node, 431 successor_node: G::Node, 432 } 433 434 // Move the stack to a local variable. We want to utilize the existing allocation and 435 // mutably borrow it without borrowing self at the same time. 436 let mut successors_stack = core::mem::take(&mut self.successors_stack); 437 debug_assert_eq!(successors_stack.len(), 0); 438 439 let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame { 440 node: initial, 441 depth: 0, 442 min_depth: 0, 443 iter: None, 444 successors_len: 0, 445 min_cycle_root: initial, 446 successor_node: initial, 447 }]; 448 449 let mut return_value = None; 450 451 'recurse: while let Some(frame) = stack.last_mut() { 452 let VisitingNodeFrame { 453 node, 454 depth, 455 iter, 456 successors_len, 457 min_depth, 458 min_cycle_root, 459 successor_node, 460 } = frame; 461 462 let node = *node; 463 let depth = *depth; 464 465 let successors = match iter { 466 Some(iter) => iter, 467 None => { 468 // This None marks that we still have the initialize this node's frame. 469 debug!(?depth, ?node); 470 471 debug_assert!(matches!(self.node_states[node], NodeState::NotVisited)); 472 473 // Push `node` onto the stack. 474 self.node_states[node] = NodeState::BeingVisited { depth }; 475 self.node_stack.push(node); 476 477 // Walk each successor of the node, looking to see if any of 478 // them can reach a node that is presently on the stack. If 479 // so, that means they can also reach us. 480 *successors_len = successors_stack.len(); 481 // Set and return a reference, this is currently empty. 482 iter.get_or_insert(self.graph.successors(node)) 483 } 484 }; 485 486 // Now that iter is initialized, this is a constant for this frame. 487 let successors_len = *successors_len; 488 489 // Construct iterators for the nodes and walk results. There are two cases: 490 // * The walk of a successor node returned. 491 // * The remaining successor nodes. 492 let returned_walk = 493 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk))); 494 495 let successor_walk = successors.by_ref().map(|successor_node| { 496 debug!(?node, ?successor_node); 497 (successor_node, self.inspect_node(successor_node)) 498 }); 499 500 for (successor_node, walk) in returned_walk.chain(successor_walk) { 501 match walk { 502 Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => { 503 // Track the minimum depth we can reach. 504 assert!(successor_min_depth <= depth); 505 if successor_min_depth < *min_depth { 506 debug!(?node, ?successor_min_depth); 507 *min_depth = successor_min_depth; 508 *min_cycle_root = successor_node; 509 } 510 } 511 512 Some(WalkReturn::Complete { scc_index: successor_scc_index }) => { 513 // Push the completed SCC indices onto 514 // the `successors_stack` for later. 515 debug!(?node, ?successor_scc_index); 516 successors_stack.push(successor_scc_index); 517 } 518 519 None => { 520 let depth = depth + 1; 521 debug!(?depth, ?successor_node); 522 // Remember which node the return value will come from. 523 frame.successor_node = successor_node; 524 // Start a new stack frame the step into it. 525 stack.push(VisitingNodeFrame { 526 node: successor_node, 527 depth, 528 iter: None, 529 successors_len: 0, 530 min_depth: depth, 531 min_cycle_root: successor_node, 532 successor_node, 533 }); 534 continue 'recurse; 535 } 536 } 537 } 538 539 // Completed walk, remove `node` from the stack. 540 let r = self.node_stack.pop(); 541 debug_assert_eq!(r, Some(node)); 542 543 // Remove the frame, it's done. 544 let frame = stack.pop().unwrap(); 545 546 // If `min_depth == depth`, then we are the root of the 547 // cycle: we can't reach anyone further down the stack. 548 549 // Pass the 'return value' down the stack. 550 // We return one frame at a time so there can't be another return value. 551 debug_assert!(return_value.is_none()); 552 return_value = Some(if frame.min_depth == depth { 553 // Note that successor stack may have duplicates, so we 554 // want to remove those: 555 let deduplicated_successors = { 556 let duplicate_set = &mut self.duplicate_set; 557 duplicate_set.clear(); 558 successors_stack 559 .drain(successors_len..) 560 .filter(move |&i| duplicate_set.insert(i)) 561 }; 562 let scc_index = self.scc_data.create_scc(deduplicated_successors); 563 self.node_states[node] = NodeState::InCycle { scc_index }; 564 WalkReturn::Complete { scc_index } 565 } else { 566 // We are not the head of the cycle. Return back to our 567 // caller. They will take ownership of the 568 // `self.successors` data that we pushed. 569 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root }; 570 WalkReturn::Cycle { min_depth: frame.min_depth } 571 }); 572 } 573 574 // Keep the allocation we used for successors_stack. 575 self.successors_stack = successors_stack; 576 debug_assert_eq!(self.successors_stack.len(), 0); 577 578 return_value.unwrap() 579 } 580 } 581