• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3# Copyright 2018 the V8 project authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7""" locs.py - Count lines of code before and after preprocessor expansion
8  Consult --help for more information.
9"""
10
11# for py2/py3 compatibility
12from __future__ import print_function
13
14import argparse
15import json
16import multiprocessing
17import os
18import re
19import subprocess
20import sys
21import tempfile
22import time
23from collections import defaultdict
24from concurrent.futures import ThreadPoolExecutor
25from pathlib import Path
26
27# for py2/py3 compatibility
28try:
29  FileNotFoundError
30except NameError:
31  FileNotFoundError = IOError
32
33ARGPARSE = argparse.ArgumentParser(
34    description=("A script that computes LoC for a build dir"),
35    epilog="""Examples:
36 Count with default settings for build in out/Default:
37   locs.py --build-dir out/Default
38 Count only a custom group of files settings for build in out/Default:
39   tools/locs.py --build-dir out/Default
40                 --group src-compiler '\.\./\.\./src/compiler'
41                 --only src-compiler
42 Report the 10 files with the worst expansion:
43   tools/locs.py --build-dir out/Default --worst 10
44 Report the 10 files with the worst expansion in src/compiler:
45   tools/locs.py --build-dir out/Default --worst 10
46                 --group src-compiler '\.\./\.\./src/compiler'
47                 --only src-compiler
48 Report the 10 largest files after preprocessing:
49   tools/locs.py --build-dir out/Default --largest 10
50 Report the 10 smallest input files:
51   tools/locs.py --build-dir out/Default --smallest 10""",
52    formatter_class=argparse.RawTextHelpFormatter
53)
54
55ARGPARSE.add_argument(
56    '--json',
57    action='store_true',
58    default=False,
59    help="output json instead of short summary")
60ARGPARSE.add_argument(
61    '--build-dir',
62    type=str,
63    help="Use specified build dir and generate necessary files",
64    required=True)
65ARGPARSE.add_argument(
66    '--echocmd',
67    action='store_true',
68    default=False,
69    help="output command used to compute LoC")
70ARGPARSE.add_argument(
71    '--only',
72    action='append',
73    default=[],
74    help="Restrict counting to report group (can be passed multiple times)")
75ARGPARSE.add_argument(
76    '--not',
77    action='append',
78    default=[],
79    help="Exclude specific group (can be passed multiple times)")
80ARGPARSE.add_argument(
81    '--list-groups',
82    action='store_true',
83    default=False,
84    help="List groups and associated regular expressions")
85ARGPARSE.add_argument(
86    '--group',
87    nargs=2,
88    action='append',
89    default=[],
90    help="Add a report group (can be passed multiple times)")
91ARGPARSE.add_argument(
92    '--largest',
93    type=int,
94    nargs='?',
95    default=0,
96    const=3,
97    help="Output the n largest files after preprocessing")
98ARGPARSE.add_argument(
99    '--worst',
100    type=int,
101    nargs='?',
102    default=0,
103    const=3,
104    help="Output the n files with worst expansion by preprocessing")
105ARGPARSE.add_argument(
106    '--smallest',
107    type=int,
108    nargs='?',
109    default=0,
110    const=3,
111    help="Output the n smallest input files")
112ARGPARSE.add_argument(
113    '--files',
114    type=int,
115    nargs='?',
116    default=0,
117    const=3,
118    help="Output results for each file separately")
119ARGPARSE.add_argument(
120    '--jobs',
121    type=int,
122    default=multiprocessing.cpu_count(),
123    help="Process specified number of files concurrently")
124
125ARGS = vars(ARGPARSE.parse_args())
126
127
128def MaxWidth(strings):
129  max_width = 0
130  for s in strings:
131    max_width = max(max_width, len(s))
132  return max_width
133
134
135def GenerateCompileCommandsAndBuild(build_dir, out):
136  if not os.path.isdir(build_dir):
137    print("Error: Specified build dir {} is not a directory.".format(
138        build_dir), file=sys.stderr)
139    exit(1)
140
141  autoninja = "autoninja -C {}".format(build_dir)
142  if subprocess.call(autoninja, shell=True, stdout=out) != 0:
143    print("Error: Building {} failed.".format(build_dir), file=sys.stderr)
144    exit(1)
145
146  compile_commands_file = "{}/compile_commands.json".format(build_dir)
147  print("Generating compile commands in {}.".format(
148      compile_commands_file), file=out)
149  ninja = "ninja -C {} -t compdb cxx cc > {}".format(
150      build_dir, compile_commands_file)
151  if subprocess.call(ninja, shell=True, stdout=out) != 0:
152    print("Error: Cound not generate {} for {}.".format(
153        compile_commands_file, build_dir), file=sys.stderr)
154    exit(1)
155
156  ninja_deps_file = "{}/ninja-deps.txt".format(build_dir)
157  print("Generating ninja dependencies in {}.".format(
158      ninja_deps_file), file=out)
159  ninja = "ninja -C {} -t deps > {}".format(
160      build_dir, ninja_deps_file)
161  if subprocess.call(ninja, shell=True, stdout=out) != 0:
162    print("Error: Cound not generate {} for {}.".format(
163        ninja_deps_file, build_dir), file=sys.stderr)
164    exit(1)
165
166  return compile_commands_file, ninja_deps_file
167
168
169def fmt_bytes(num_bytes):
170  if num_bytes > 1024*1024*1024:
171    return int(num_bytes / (1024*1024)), "MB"
172  elif num_bytes > 1024*1024:
173    return int(num_bytes / (1024)), "kB"
174  return int(num_bytes), " B"
175
176
177class CompilationData:
178  def __init__(self, loc, in_bytes, expanded, expanded_bytes):
179    self.loc = loc
180    self.in_bytes = in_bytes
181    self.expanded = expanded
182    self.expanded_bytes = expanded_bytes
183
184  def ratio(self):
185    return self.expanded / (self.loc+1)
186
187  def to_string(self):
188    exp_bytes, exp_unit = fmt_bytes(self.expanded_bytes)
189    in_bytes, in_unit = fmt_bytes(self.in_bytes)
190    return "{:>9,} LoC ({:>7,} {}) to {:>12,} LoC ({:>7,} {}) ({:>5.0f}x)".format(
191        self.loc, in_bytes, in_unit, self.expanded, exp_bytes, exp_unit, self.ratio())
192
193
194class File(CompilationData):
195  def __init__(self, file, target, loc, in_bytes, expanded, expanded_bytes):
196    super().__init__(loc, in_bytes, expanded, expanded_bytes)
197    self.file = file
198    self.target = target
199
200  def to_string(self):
201    return "{} {} {}".format(super().to_string(), self.file, self.target)
202
203
204class Group(CompilationData):
205  def __init__(self, name, regexp_string):
206    super().__init__(0, 0, 0, 0)
207    self.name = name
208    self.count = 0
209    self.regexp = re.compile(regexp_string)
210
211  def account(self, unit):
212    if (self.regexp.match(unit.file)):
213      self.loc += unit.loc
214      self.in_bytes += unit.in_bytes
215      self.expanded += unit.expanded
216      self.expanded_bytes += unit.expanded_bytes
217      self.count += 1
218
219  def to_string(self, name_width):
220    return "{:<{}} ({:>5} files): {}".format(
221        self.name, name_width, self.count, super().to_string())
222
223
224def SetupReportGroups():
225  default_report_groups = {"total": '.*',
226                           "src": '\\.\\./\\.\\./src',
227                           "test": '\\.\\./\\.\\./test',
228                           "third_party": '\\.\\./\\.\\./third_party',
229                           "gen": 'gen'}
230
231  report_groups = default_report_groups.copy()
232  report_groups.update(dict(ARGS['group']))
233
234  if ARGS['only']:
235    for only_arg in ARGS['only']:
236      if not only_arg in report_groups.keys():
237        print("Error: specified report group '{}' is not defined.".format(
238            ARGS['only']))
239        exit(1)
240      else:
241        report_groups = {
242            k: v for (k, v) in report_groups.items() if k in ARGS['only']}
243
244  if ARGS['not']:
245    report_groups = {
246        k: v for (k, v) in report_groups.items() if k not in ARGS['not']}
247
248  if ARGS['list_groups']:
249    print_cat_max_width = MaxWidth(list(report_groups.keys()) + ["Category"])
250    print("  {:<{}}  {}".format("Category",
251                                print_cat_max_width, "Regular expression"))
252    for cat, regexp_string in report_groups.items():
253      print("  {:<{}}: {}".format(
254          cat, print_cat_max_width, regexp_string))
255
256  report_groups = {k: Group(k, v) for (k, v) in report_groups.items()}
257
258  return report_groups
259
260
261class Results:
262  def __init__(self):
263    self.groups = SetupReportGroups()
264    self.units = {}
265    self.source_dependencies = {}
266    self.header_dependents = {}
267
268  def track(self, filename):
269    is_tracked = False
270    for group in self.groups.values():
271      if group.regexp.match(filename):
272        is_tracked = True
273    return is_tracked
274
275  def recordFile(self, filename, targetname, loc, in_bytes, expanded, expanded_bytes):
276    unit = File(filename, targetname, loc, in_bytes, expanded, expanded_bytes)
277    self.units[filename] = unit
278    for group in self.groups.values():
279      group.account(unit)
280
281  def maxGroupWidth(self):
282    return MaxWidth([v.name for v in self.groups.values()])
283
284  def printGroupResults(self, file):
285    for key in sorted(self.groups.keys()):
286      print(self.groups[key].to_string(self.maxGroupWidth()), file=file)
287
288  def printSorted(self, key, count, reverse, out):
289    for unit in sorted(list(self.units.values()), key=key, reverse=reverse)[:count]:
290      print(unit.to_string(), file=out)
291
292  def addHeaderDeps(self, source_dependencies, header_dependents):
293    self.source_dependencies = source_dependencies
294    self.header_dependents = header_dependents
295
296
297class LocsEncoder(json.JSONEncoder):
298  def default(self, o):
299    if isinstance(o, File):
300      return {"file": o.file, "target": o.target, "loc": o.loc, "in_bytes": o.in_bytes,
301              "expanded": o.expanded, "expanded_bytes": o.expanded_bytes}
302    if isinstance(o, Group):
303      return {"name": o.name, "loc": o.loc, "in_bytes": o.in_bytes,
304              "expanded": o.expanded, "expanded_bytes": o.expanded_bytes}
305    if isinstance(o, Results):
306      return {"groups": o.groups, "units": o.units,
307              "source_dependencies": o.source_dependencies,
308              "header_dependents": o.header_dependents}
309    return json.JSONEncoder.default(self, o)
310
311
312class StatusLine:
313  def __init__(self):
314    self.max_width = 0
315
316  def print(self, statusline, end="\r", file=sys.stdout):
317    self.max_width = max(self.max_width, len(statusline))
318    print("{0:<{1}}".format(statusline, self.max_width),
319          end=end, file=file, flush=True)
320
321
322class CommandSplitter:
323  def __init__(self):
324    self.cmd_pattern = re.compile(
325        "([^\\s]*\\s+)?(?P<clangcmd>[^\\s]*clang.*)"
326        " -c (?P<infile>.*) -o (?P<outfile>.*)")
327
328  def process(self, compilation_unit):
329    cmd = self.cmd_pattern.match(compilation_unit['command'])
330    outfilename = cmd.group('outfile')
331    infilename = cmd.group('infile')
332    infile = Path(compilation_unit['directory']).joinpath(infilename)
333    return (cmd.group('clangcmd'), infilename, infile, outfilename)
334
335
336def parse_ninja_deps(ninja_deps):
337  source_dependencies = {}
338  header_dependents = defaultdict(int)
339  current_target = None
340  for line in ninja_deps:
341    line = line.rstrip()
342    # Ignore empty lines
343    if not line:
344      current_target = None
345      continue
346    if line[0] == ' ':
347      # New dependency
348      if len(line) < 5 or line[0:4] != '    ' or line[5] == ' ':
349        sys.exit('Lines must have no indentation or exactly four ' +
350                 'spaces.')
351      dep = line[4:]
352      if not re.search(r"\.(h|hpp)$", dep):
353        continue
354      header_dependents[dep] += 1
355      continue
356    # New target
357    colon_pos = line.find(':')
358    if colon_pos < 0:
359      sys.exit('Unindented line must have a colon')
360    if current_target is not None:
361      sys.exit('Missing empty line before new target')
362    current_target = line[0:colon_pos]
363    match = re.search(r"#deps (\d+)", line)
364    deps_number = match.group(1)
365    source_dependencies[current_target] = int(deps_number)
366
367  return (source_dependencies, header_dependents)
368
369
370def Main():
371  out = sys.stdout
372  if ARGS['json']:
373    out = sys.stderr
374
375  compile_commands_file, ninja_deps_file = GenerateCompileCommandsAndBuild(
376      ARGS['build_dir'], out)
377
378  result = Results()
379  status = StatusLine()
380
381  try:
382    with open(compile_commands_file) as file:
383      compile_commands = json.load(file)
384    with open(ninja_deps_file) as file:
385      source_dependencies, header_dependents = parse_ninja_deps(file)
386      result.addHeaderDeps(source_dependencies, header_dependents)
387  except FileNotFoundError:
388    print("Error: Cannot read '{}'. Consult --help to get started.".format(
389        ninja_deps_file))
390    exit(1)
391
392  cmd_splitter = CommandSplitter()
393
394  def count_lines_of_unit(ikey):
395    i, key = ikey
396    if not result.track(key['file']):
397      return
398    message = "[{}/{}] Counting LoCs of {}".format(
399        i, len(compile_commands), key['file'])
400    status.print(message, file=out)
401    clangcmd, infilename, infile, outfilename = cmd_splitter.process(key)
402    if not infile.is_file():
403      return
404
405    clangcmd = clangcmd + " -E -P " + \
406        str(infile) + " -o /dev/stdout | sed '/^\\s*$/d' | wc -lc"
407    loccmd = ("cat {}  | sed '\\;^\\s*//;d' | sed '\\;^/\\*;d'"
408              " | sed '/^\\*/d' | sed '/^\\s*$/d' | wc -lc")
409    loccmd = loccmd.format(infile)
410    runcmd = " {} ; {}".format(clangcmd, loccmd)
411    if ARGS['echocmd']:
412      print(runcmd)
413    process = subprocess.Popen(
414        runcmd, shell=True, cwd=key['directory'], stdout=subprocess.PIPE)
415    p = {'process': process, 'infile': infilename, 'outfile': outfilename}
416    output, _ = p['process'].communicate()
417    expanded, expanded_bytes, loc, in_bytes = list(map(int, output.split()))
418    result.recordFile(p['infile'], p['outfile'], loc,
419                      in_bytes, expanded, expanded_bytes)
420
421  with tempfile.TemporaryDirectory(dir='/tmp/', prefix="locs.") as temp:
422    start = time.time()
423
424    with ThreadPoolExecutor(max_workers=ARGS['jobs']) as executor:
425      list(executor.map(count_lines_of_unit, enumerate(compile_commands)))
426
427    end = time.time()
428    if ARGS['json']:
429      print(json.dumps(result, ensure_ascii=False, cls=LocsEncoder))
430    status.print("Processed {:,} files in {:,.2f} sec.".format(
431        len(compile_commands), end-start), end="\n", file=out)
432    result.printGroupResults(file=out)
433
434    if ARGS['largest']:
435      print("Largest {} files after expansion:".format(ARGS['largest']))
436      result.printSorted(
437          lambda v: v.expanded, ARGS['largest'], reverse=True, out=out)
438
439    if ARGS['worst']:
440      print("Worst expansion ({} files):".format(ARGS['worst']))
441      result.printSorted(
442          lambda v: v.ratio(), ARGS['worst'], reverse=True, out=out)
443
444    if ARGS['smallest']:
445      print("Smallest {} input files:".format(ARGS['smallest']))
446      result.printSorted(
447          lambda v: v.loc, ARGS['smallest'], reverse=False, out=out)
448
449    if ARGS['files']:
450      print("List of input files:")
451      result.printSorted(
452          lambda v: v.file, ARGS['files'], reverse=False, out=out)
453
454  return 0
455
456
457if __name__ == '__main__':
458  sys.exit(Main())
459