1#! /usr/bin/env python 2 3"""Show file statistics by extension.""" 4 5from __future__ import print_function 6 7import os 8import sys 9 10class Stats: 11 12 def __init__(self): 13 self.stats = {} 14 15 def statargs(self, args): 16 for arg in args: 17 if os.path.isdir(arg): 18 self.statdir(arg) 19 elif os.path.isfile(arg): 20 self.statfile(arg) 21 else: 22 sys.stderr.write("Can't find %s\n" % arg) 23 self.addstats("<???>", "unknown", 1) 24 25 def statdir(self, dir): 26 self.addstats("<dir>", "dirs", 1) 27 try: 28 names = sorted(os.listdir(dir)) 29 except os.error as err: 30 sys.stderr.write("Can't list %s: %s\n" % (dir, err)) 31 self.addstats("<dir>", "unlistable", 1) 32 return 33 for name in names: 34 if name.startswith(".#"): 35 continue # Skip CVS temp files 36 if name.endswith("~"): 37 continue# Skip Emacs backup files 38 full = os.path.join(dir, name) 39 if os.path.islink(full): 40 self.addstats("<lnk>", "links", 1) 41 elif os.path.isdir(full): 42 self.statdir(full) 43 else: 44 self.statfile(full) 45 46 def statfile(self, filename): 47 head, ext = os.path.splitext(filename) 48 head, base = os.path.split(filename) 49 if ext == base: 50 ext = "" # E.g. .cvsignore is deemed not to have an extension 51 ext = os.path.normcase(ext) 52 if not ext: 53 ext = "<none>" 54 self.addstats(ext, "files", 1) 55 try: 56 f = open(filename, "rb") 57 except IOError as err: 58 sys.stderr.write("Can't open %s: %s\n" % (filename, err)) 59 self.addstats(ext, "unopenable", 1) 60 return 61 data = f.read() 62 f.close() 63 self.addstats(ext, "bytes", len(data)) 64 if b'\0' in data: 65 self.addstats(ext, "binary", 1) 66 return 67 if not data: 68 self.addstats(ext, "empty", 1) 69 #self.addstats(ext, "chars", len(data)) 70 lines = data.splitlines() 71 self.addstats(ext, "lines", len(lines)) 72 del lines 73 words = data.split() 74 self.addstats(ext, "words", len(words)) 75 76 def addstats(self, ext, key, n): 77 d = self.stats.setdefault(ext, {}) 78 d[key] = d.get(key, 0) + n 79 80 def report(self): 81 exts = sorted(self.stats.keys()) 82 # Get the column keys 83 columns = {} 84 for ext in exts: 85 columns.update(self.stats[ext]) 86 cols = sorted(columns.keys()) 87 colwidth = {} 88 colwidth["ext"] = max([len(ext) for ext in exts]) 89 minwidth = 6 90 self.stats["TOTAL"] = {} 91 for col in cols: 92 total = 0 93 cw = max(minwidth, len(col)) 94 for ext in exts: 95 value = self.stats[ext].get(col) 96 if value is None: 97 w = 0 98 else: 99 w = len("%d" % value) 100 total += value 101 cw = max(cw, w) 102 cw = max(cw, len(str(total))) 103 colwidth[col] = cw 104 self.stats["TOTAL"][col] = total 105 exts.append("TOTAL") 106 for ext in exts: 107 self.stats[ext]["ext"] = ext 108 cols.insert(0, "ext") 109 def printheader(): 110 for col in cols: 111 print("%*s" % (colwidth[col], col), end=" ") 112 print() 113 printheader() 114 for ext in exts: 115 for col in cols: 116 value = self.stats[ext].get(col, "") 117 print("%*s" % (colwidth[col], value), end=" ") 118 print() 119 printheader() # Another header at the bottom 120 121def main(): 122 args = sys.argv[1:] 123 if not args: 124 args = [os.curdir] 125 s = Stats() 126 s.statargs(args) 127 s.report() 128 129if __name__ == "__main__": 130 main() 131