1#!/usr/bin/env python 2 3# Copyright 2018 the V8 project authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7""" locs.py - Count lines of code before and after preprocessor expansion 8 Consult --help for more information. 9""" 10 11# for py2/py3 compatibility 12from __future__ import print_function 13 14import argparse 15import json 16import multiprocessing 17import os 18import re 19import subprocess 20import sys 21import tempfile 22import time 23from collections import defaultdict 24from concurrent.futures import ThreadPoolExecutor 25from pathlib import Path 26 27# for py2/py3 compatibility 28try: 29 FileNotFoundError 30except NameError: 31 FileNotFoundError = IOError 32 33ARGPARSE = argparse.ArgumentParser( 34 description=("A script that computes LoC for a build dir"), 35 epilog="""Examples: 36 Count with default settings for build in out/Default: 37 locs.py --build-dir out/Default 38 Count only a custom group of files settings for build in out/Default: 39 tools/locs.py --build-dir out/Default 40 --group src-compiler '\.\./\.\./src/compiler' 41 --only src-compiler 42 Report the 10 files with the worst expansion: 43 tools/locs.py --build-dir out/Default --worst 10 44 Report the 10 files with the worst expansion in src/compiler: 45 tools/locs.py --build-dir out/Default --worst 10 46 --group src-compiler '\.\./\.\./src/compiler' 47 --only src-compiler 48 Report the 10 largest files after preprocessing: 49 tools/locs.py --build-dir out/Default --largest 10 50 Report the 10 smallest input files: 51 tools/locs.py --build-dir out/Default --smallest 10""", 52 formatter_class=argparse.RawTextHelpFormatter 53) 54 55ARGPARSE.add_argument( 56 '--json', 57 action='store_true', 58 default=False, 59 help="output json instead of short summary") 60ARGPARSE.add_argument( 61 '--build-dir', 62 type=str, 63 help="Use specified build dir and generate necessary files", 64 required=True) 65ARGPARSE.add_argument( 66 '--echocmd', 67 action='store_true', 68 default=False, 69 help="output command used to compute LoC") 70ARGPARSE.add_argument( 71 '--only', 72 action='append', 73 default=[], 74 help="Restrict counting to report group (can be passed multiple times)") 75ARGPARSE.add_argument( 76 '--not', 77 action='append', 78 default=[], 79 help="Exclude specific group (can be passed multiple times)") 80ARGPARSE.add_argument( 81 '--list-groups', 82 action='store_true', 83 default=False, 84 help="List groups and associated regular expressions") 85ARGPARSE.add_argument( 86 '--group', 87 nargs=2, 88 action='append', 89 default=[], 90 help="Add a report group (can be passed multiple times)") 91ARGPARSE.add_argument( 92 '--largest', 93 type=int, 94 nargs='?', 95 default=0, 96 const=3, 97 help="Output the n largest files after preprocessing") 98ARGPARSE.add_argument( 99 '--worst', 100 type=int, 101 nargs='?', 102 default=0, 103 const=3, 104 help="Output the n files with worst expansion by preprocessing") 105ARGPARSE.add_argument( 106 '--smallest', 107 type=int, 108 nargs='?', 109 default=0, 110 const=3, 111 help="Output the n smallest input files") 112ARGPARSE.add_argument( 113 '--files', 114 type=int, 115 nargs='?', 116 default=0, 117 const=3, 118 help="Output results for each file separately") 119ARGPARSE.add_argument( 120 '--jobs', 121 type=int, 122 default=multiprocessing.cpu_count(), 123 help="Process specified number of files concurrently") 124 125ARGS = vars(ARGPARSE.parse_args()) 126 127 128def MaxWidth(strings): 129 max_width = 0 130 for s in strings: 131 max_width = max(max_width, len(s)) 132 return max_width 133 134 135def GenerateCompileCommandsAndBuild(build_dir, out): 136 if not os.path.isdir(build_dir): 137 print("Error: Specified build dir {} is not a directory.".format( 138 build_dir), file=sys.stderr) 139 exit(1) 140 141 autoninja = "autoninja -C {}".format(build_dir) 142 if subprocess.call(autoninja, shell=True, stdout=out) != 0: 143 print("Error: Building {} failed.".format(build_dir), file=sys.stderr) 144 exit(1) 145 146 compile_commands_file = "{}/compile_commands.json".format(build_dir) 147 print("Generating compile commands in {}.".format( 148 compile_commands_file), file=out) 149 ninja = "ninja -C {} -t compdb cxx cc > {}".format( 150 build_dir, compile_commands_file) 151 if subprocess.call(ninja, shell=True, stdout=out) != 0: 152 print("Error: Cound not generate {} for {}.".format( 153 compile_commands_file, build_dir), file=sys.stderr) 154 exit(1) 155 156 ninja_deps_file = "{}/ninja-deps.txt".format(build_dir) 157 print("Generating ninja dependencies in {}.".format( 158 ninja_deps_file), file=out) 159 ninja = "ninja -C {} -t deps > {}".format( 160 build_dir, ninja_deps_file) 161 if subprocess.call(ninja, shell=True, stdout=out) != 0: 162 print("Error: Cound not generate {} for {}.".format( 163 ninja_deps_file, build_dir), file=sys.stderr) 164 exit(1) 165 166 return compile_commands_file, ninja_deps_file 167 168 169def fmt_bytes(num_bytes): 170 if num_bytes > 1024*1024*1024: 171 return int(num_bytes / (1024*1024)), "MB" 172 elif num_bytes > 1024*1024: 173 return int(num_bytes / (1024)), "kB" 174 return int(num_bytes), " B" 175 176 177class CompilationData: 178 def __init__(self, loc, in_bytes, expanded, expanded_bytes): 179 self.loc = loc 180 self.in_bytes = in_bytes 181 self.expanded = expanded 182 self.expanded_bytes = expanded_bytes 183 184 def ratio(self): 185 return self.expanded / (self.loc+1) 186 187 def to_string(self): 188 exp_bytes, exp_unit = fmt_bytes(self.expanded_bytes) 189 in_bytes, in_unit = fmt_bytes(self.in_bytes) 190 return "{:>9,} LoC ({:>7,} {}) to {:>12,} LoC ({:>7,} {}) ({:>5.0f}x)".format( 191 self.loc, in_bytes, in_unit, self.expanded, exp_bytes, exp_unit, self.ratio()) 192 193 194class File(CompilationData): 195 def __init__(self, file, target, loc, in_bytes, expanded, expanded_bytes): 196 super().__init__(loc, in_bytes, expanded, expanded_bytes) 197 self.file = file 198 self.target = target 199 200 def to_string(self): 201 return "{} {} {}".format(super().to_string(), self.file, self.target) 202 203 204class Group(CompilationData): 205 def __init__(self, name, regexp_string): 206 super().__init__(0, 0, 0, 0) 207 self.name = name 208 self.count = 0 209 self.regexp = re.compile(regexp_string) 210 211 def account(self, unit): 212 if (self.regexp.match(unit.file)): 213 self.loc += unit.loc 214 self.in_bytes += unit.in_bytes 215 self.expanded += unit.expanded 216 self.expanded_bytes += unit.expanded_bytes 217 self.count += 1 218 219 def to_string(self, name_width): 220 return "{:<{}} ({:>5} files): {}".format( 221 self.name, name_width, self.count, super().to_string()) 222 223 224def SetupReportGroups(): 225 default_report_groups = {"total": '.*', 226 "src": '\\.\\./\\.\\./src', 227 "test": '\\.\\./\\.\\./test', 228 "third_party": '\\.\\./\\.\\./third_party', 229 "gen": 'gen'} 230 231 report_groups = default_report_groups.copy() 232 report_groups.update(dict(ARGS['group'])) 233 234 if ARGS['only']: 235 for only_arg in ARGS['only']: 236 if not only_arg in report_groups.keys(): 237 print("Error: specified report group '{}' is not defined.".format( 238 ARGS['only'])) 239 exit(1) 240 else: 241 report_groups = { 242 k: v for (k, v) in report_groups.items() if k in ARGS['only']} 243 244 if ARGS['not']: 245 report_groups = { 246 k: v for (k, v) in report_groups.items() if k not in ARGS['not']} 247 248 if ARGS['list_groups']: 249 print_cat_max_width = MaxWidth(list(report_groups.keys()) + ["Category"]) 250 print(" {:<{}} {}".format("Category", 251 print_cat_max_width, "Regular expression")) 252 for cat, regexp_string in report_groups.items(): 253 print(" {:<{}}: {}".format( 254 cat, print_cat_max_width, regexp_string)) 255 256 report_groups = {k: Group(k, v) for (k, v) in report_groups.items()} 257 258 return report_groups 259 260 261class Results: 262 def __init__(self): 263 self.groups = SetupReportGroups() 264 self.units = {} 265 self.source_dependencies = {} 266 self.header_dependents = {} 267 268 def track(self, filename): 269 is_tracked = False 270 for group in self.groups.values(): 271 if group.regexp.match(filename): 272 is_tracked = True 273 return is_tracked 274 275 def recordFile(self, filename, targetname, loc, in_bytes, expanded, expanded_bytes): 276 unit = File(filename, targetname, loc, in_bytes, expanded, expanded_bytes) 277 self.units[filename] = unit 278 for group in self.groups.values(): 279 group.account(unit) 280 281 def maxGroupWidth(self): 282 return MaxWidth([v.name for v in self.groups.values()]) 283 284 def printGroupResults(self, file): 285 for key in sorted(self.groups.keys()): 286 print(self.groups[key].to_string(self.maxGroupWidth()), file=file) 287 288 def printSorted(self, key, count, reverse, out): 289 for unit in sorted(list(self.units.values()), key=key, reverse=reverse)[:count]: 290 print(unit.to_string(), file=out) 291 292 def addHeaderDeps(self, source_dependencies, header_dependents): 293 self.source_dependencies = source_dependencies 294 self.header_dependents = header_dependents 295 296 297class LocsEncoder(json.JSONEncoder): 298 def default(self, o): 299 if isinstance(o, File): 300 return {"file": o.file, "target": o.target, "loc": o.loc, "in_bytes": o.in_bytes, 301 "expanded": o.expanded, "expanded_bytes": o.expanded_bytes} 302 if isinstance(o, Group): 303 return {"name": o.name, "loc": o.loc, "in_bytes": o.in_bytes, 304 "expanded": o.expanded, "expanded_bytes": o.expanded_bytes} 305 if isinstance(o, Results): 306 return {"groups": o.groups, "units": o.units, 307 "source_dependencies": o.source_dependencies, 308 "header_dependents": o.header_dependents} 309 return json.JSONEncoder.default(self, o) 310 311 312class StatusLine: 313 def __init__(self): 314 self.max_width = 0 315 316 def print(self, statusline, end="\r", file=sys.stdout): 317 self.max_width = max(self.max_width, len(statusline)) 318 print("{0:<{1}}".format(statusline, self.max_width), 319 end=end, file=file, flush=True) 320 321 322class CommandSplitter: 323 def __init__(self): 324 self.cmd_pattern = re.compile( 325 "([^\\s]*\\s+)?(?P<clangcmd>[^\\s]*clang.*)" 326 " -c (?P<infile>.*) -o (?P<outfile>.*)") 327 328 def process(self, compilation_unit): 329 cmd = self.cmd_pattern.match(compilation_unit['command']) 330 outfilename = cmd.group('outfile') 331 infilename = cmd.group('infile') 332 infile = Path(compilation_unit['directory']).joinpath(infilename) 333 return (cmd.group('clangcmd'), infilename, infile, outfilename) 334 335 336def parse_ninja_deps(ninja_deps): 337 source_dependencies = {} 338 header_dependents = defaultdict(int) 339 current_target = None 340 for line in ninja_deps: 341 line = line.rstrip() 342 # Ignore empty lines 343 if not line: 344 current_target = None 345 continue 346 if line[0] == ' ': 347 # New dependency 348 if len(line) < 5 or line[0:4] != ' ' or line[5] == ' ': 349 sys.exit('Lines must have no indentation or exactly four ' + 350 'spaces.') 351 dep = line[4:] 352 if not re.search(r"\.(h|hpp)$", dep): 353 continue 354 header_dependents[dep] += 1 355 continue 356 # New target 357 colon_pos = line.find(':') 358 if colon_pos < 0: 359 sys.exit('Unindented line must have a colon') 360 if current_target is not None: 361 sys.exit('Missing empty line before new target') 362 current_target = line[0:colon_pos] 363 match = re.search(r"#deps (\d+)", line) 364 deps_number = match.group(1) 365 source_dependencies[current_target] = int(deps_number) 366 367 return (source_dependencies, header_dependents) 368 369 370def Main(): 371 out = sys.stdout 372 if ARGS['json']: 373 out = sys.stderr 374 375 compile_commands_file, ninja_deps_file = GenerateCompileCommandsAndBuild( 376 ARGS['build_dir'], out) 377 378 result = Results() 379 status = StatusLine() 380 381 try: 382 with open(compile_commands_file) as file: 383 compile_commands = json.load(file) 384 with open(ninja_deps_file) as file: 385 source_dependencies, header_dependents = parse_ninja_deps(file) 386 result.addHeaderDeps(source_dependencies, header_dependents) 387 except FileNotFoundError: 388 print("Error: Cannot read '{}'. Consult --help to get started.".format( 389 ninja_deps_file)) 390 exit(1) 391 392 cmd_splitter = CommandSplitter() 393 394 def count_lines_of_unit(ikey): 395 i, key = ikey 396 if not result.track(key['file']): 397 return 398 message = "[{}/{}] Counting LoCs of {}".format( 399 i, len(compile_commands), key['file']) 400 status.print(message, file=out) 401 clangcmd, infilename, infile, outfilename = cmd_splitter.process(key) 402 if not infile.is_file(): 403 return 404 405 clangcmd = clangcmd + " -E -P " + \ 406 str(infile) + " -o /dev/stdout | sed '/^\\s*$/d' | wc -lc" 407 loccmd = ("cat {} | sed '\\;^\\s*//;d' | sed '\\;^/\\*;d'" 408 " | sed '/^\\*/d' | sed '/^\\s*$/d' | wc -lc") 409 loccmd = loccmd.format(infile) 410 runcmd = " {} ; {}".format(clangcmd, loccmd) 411 if ARGS['echocmd']: 412 print(runcmd) 413 process = subprocess.Popen( 414 runcmd, shell=True, cwd=key['directory'], stdout=subprocess.PIPE) 415 p = {'process': process, 'infile': infilename, 'outfile': outfilename} 416 output, _ = p['process'].communicate() 417 expanded, expanded_bytes, loc, in_bytes = list(map(int, output.split())) 418 result.recordFile(p['infile'], p['outfile'], loc, 419 in_bytes, expanded, expanded_bytes) 420 421 with tempfile.TemporaryDirectory(dir='/tmp/', prefix="locs.") as temp: 422 start = time.time() 423 424 with ThreadPoolExecutor(max_workers=ARGS['jobs']) as executor: 425 list(executor.map(count_lines_of_unit, enumerate(compile_commands))) 426 427 end = time.time() 428 if ARGS['json']: 429 print(json.dumps(result, ensure_ascii=False, cls=LocsEncoder)) 430 status.print("Processed {:,} files in {:,.2f} sec.".format( 431 len(compile_commands), end-start), end="\n", file=out) 432 result.printGroupResults(file=out) 433 434 if ARGS['largest']: 435 print("Largest {} files after expansion:".format(ARGS['largest'])) 436 result.printSorted( 437 lambda v: v.expanded, ARGS['largest'], reverse=True, out=out) 438 439 if ARGS['worst']: 440 print("Worst expansion ({} files):".format(ARGS['worst'])) 441 result.printSorted( 442 lambda v: v.ratio(), ARGS['worst'], reverse=True, out=out) 443 444 if ARGS['smallest']: 445 print("Smallest {} input files:".format(ARGS['smallest'])) 446 result.printSorted( 447 lambda v: v.loc, ARGS['smallest'], reverse=False, out=out) 448 449 if ARGS['files']: 450 print("List of input files:") 451 result.printSorted( 452 lambda v: v.file, ARGS['files'], reverse=False, out=out) 453 454 return 0 455 456 457if __name__ == '__main__': 458 sys.exit(Main()) 459