1#!/usr/bin/env python3 2# 3# ======- check-ninja-deps - build debugging script ----*- python -*--========# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9# ==------------------------------------------------------------------------==# 10 11"""Script to find missing formal dependencies in a build.ninja file. 12 13Suppose you have a header file that's autogenerated by (for example) Tablegen. 14If a C++ compilation step needs to include that header, then it must be 15executed after the Tablegen build step that generates the header. So the 16dependency graph in build.ninja should have the Tablegen build step as an 17ancestor of the C++ one. If it does not, then there's a latent build-failure 18bug, because depending on the order that ninja chooses to schedule its build 19steps, the C++ build step could run first, and fail because the header it needs 20does not exist yet. 21 22But because that kind of bug can easily be latent or intermittent, you might 23not notice, if your local test build happens to succeed. What you'd like is a 24way to detect problems of this kind reliably, even if they _didn't_ cause a 25failure on your first test. 26 27This script tries to do that. It's specific to the 'ninja' build tool, because 28ninja has useful auxiliary output modes that produce the necessary data: 29 30 - 'ninja -t graph' emits the full DAG of formal dependencies derived from 31 build.ninja (in Graphviz format) 32 33 - 'ninja -t deps' dumps the database of dependencies discovered at build time 34 by finding out which headers each source file actually included 35 36By cross-checking these two sources of data against each other, you can find 37true dependencies shown by 'deps' that are not reflected as formal dependencies 38in 'graph', i.e. a generated header that is required by a given source file but 39not forced to be built first. 40 41To run it: 42 43 - set up a build directory using ninja as the build tool (cmake -G Ninja) 44 45 - in that build directory, run ninja to perform an actual build (populating 46 the dependency database) 47 48 - then, in the same build directory, run this script. No arguments are needed 49 (but -C and -f are accepted, and propagated to ninja for convenience). 50 51Requirements outside core Python: the 'pygraphviz' module, available via pip or 52as the 'python3-pygraphviz' package in Debian and Ubuntu. 53 54""" 55 56import sys 57import argparse 58import subprocess 59import pygraphviz 60 61def toposort(g): 62 """Topologically sort a graph. 63 64 The input g is a pygraphviz graph object representing a DAG. The function 65 yields the vertices of g in an arbitrary order consistent with the edges, 66 so that for any edge v->w, v is output before w.""" 67 68 # Count the number of immediate predecessors *not yet output* for each 69 # vertex. Initially this is simply their in-degrees. 70 ideg = {v: g.in_degree(v) for v in g.nodes_iter()} 71 72 # Set of vertices which can be output next, which is true if they have no 73 # immediate predecessor that has not already been output. 74 ready = {v for v, d in ideg.items() if d == 0} 75 76 # Keep outputting vertices while we have any to output. 77 while len(ready) > 0: 78 v = next(iter(ready)) 79 yield v 80 ready.remove(v) 81 82 # Having output v, find each immediate successor w, and decrement its 83 # 'ideg' value by 1, to indicate that one more of its predecessors has 84 # now been output. 85 for w in g.out_neighbors(v): 86 ideg[w] -= 1 87 if ideg[w] == 0: 88 # If that counter reaches zero, w is ready to output. 89 ready.add(w) 90 91def ancestors(g, translate = lambda x: x): 92 """Form the set of ancestors for each vertex of a graph. 93 94 The input g is a pygraphviz graph object representing a DAG. The function 95 yields a sequence of pairs (vertex, set of proper ancestors). 96 97 The vertex names are all mapped through 'translate' before output. This 98 allows us to produce output referring to the label rather than the 99 identifier of every vertex. 100 """ 101 102 # Store the set of (translated) ancestors for each vertex so far. a[v] 103 # includes (the translation of) v itself. 104 a = {} 105 106 for v in toposort(g): 107 vm = translate(v) 108 109 # Make up a[v], based on a[predecessors of v]. 110 a[v] = {vm} # include v itself 111 for w in g.in_neighbors(v): 112 a[v].update(a[w]) 113 114 # Remove v itself from the set before yielding it, so that the caller 115 # doesn't get the trivial dependency of v on itself. 116 yield vm, a[v].difference({vm}) 117 118def main(): 119 parser = argparse.ArgumentParser( 120 description='Find missing formal dependencies on generated include ' 121 'files in a build.ninja file.') 122 parser.add_argument("-C", "--build-dir", 123 help="Build directory (default cwd)") 124 parser.add_argument("-f", "--build-file", 125 help="Build directory (default build.ninja)") 126 args = parser.parse_args() 127 128 errs = 0 129 130 ninja_prefix = ["ninja"] 131 if args.build_dir is not None: 132 ninja_prefix.extend(["-C", args.build_dir]) 133 if args.build_file is not None: 134 ninja_prefix.extend(["-f", args.build_file]) 135 136 # Get the formal dependency graph and decode it using pygraphviz. 137 g = pygraphviz.AGraph(subprocess.check_output( 138 ninja_prefix + ["-t", "graph"]).decode("UTF-8")) 139 140 # Helper function to ask for the label of a vertex, which is where ninja's 141 # Graphviz output keeps the actual file name of the target. 142 label = lambda v: g.get_node(v).attr["label"] 143 144 # Start by making a list of build targets, i.e. generated files. These are 145 # just any graph vertex with at least one predecessor. 146 targets = set(label(v) for v in g.nodes_iter() if g.in_degree(v) > 0) 147 148 # Find the set of ancestors of each graph vertex. We pass in 'label' as a 149 # translation function, so that this gives us the set of ancestor _files_ 150 # for a given _file_ rather than arbitrary numeric vertex ids. 151 deps = dict(ancestors(g, label)) 152 153 # Fetch the cached dependency data and check it against our formal ancestry 154 # data. 155 currtarget = None 156 for line in (subprocess.check_output(ninja_prefix + ["-t", "deps"]) 157 .decode("UTF-8").splitlines()): 158 # ninja -t deps output consists of stanzas of the following form, 159 # separated by a blank line: 160 # 161 # target: [other information we don't need] 162 # some_file.cpp 163 # some_header.h 164 # other_header.h 165 # 166 # We parse this ad-hoc by detecting the four leading spaces in a 167 # source-file line, and the colon in a target line. 'currtarget' stores 168 # the last target name we saw. 169 if line.startswith(" "): 170 dep = line[4:] 171 assert currtarget is not None, "Source file appeared before target" 172 173 # We're only interested in this dependency if it's a *generated* 174 # file, i.e. it is in our set of targets. Also, we must check that 175 # currtarget is actually a target we know about: the dependency 176 # cache is not cleared when build.ninja changes, so it can contain 177 # stale data from targets that existed only in past builds in the 178 # same directory. 179 if (dep in targets and currtarget in deps and 180 dep not in deps[currtarget]): 181 print("error:", currtarget, "requires", dep, 182 "but has no dependency on it", file=sys.stderr) 183 errs += 1 184 elif ":" in line: 185 currtarget = line.split(":", 1)[0] 186 187 if errs: 188 sys.exit("{:d} errors found".format(errs)) 189 190if __name__ == '__main__': 191 main() 192