1#! /usr/bin/env python3 2 3"""Show file statistics by extension.""" 4 5import os 6import sys 7 8 9class Stats: 10 11 def __init__(self): 12 self.stats = {} 13 14 def statargs(self, args): 15 for arg in args: 16 if os.path.isdir(arg): 17 self.statdir(arg) 18 elif os.path.isfile(arg): 19 self.statfile(arg) 20 else: 21 sys.stderr.write("Can't find %s\n" % arg) 22 self.addstats("<???>", "unknown", 1) 23 24 def statdir(self, dir): 25 self.addstats("<dir>", "dirs", 1) 26 try: 27 names = os.listdir(dir) 28 except OSError as err: 29 sys.stderr.write("Can't list %s: %s\n" % (dir, err)) 30 self.addstats("<dir>", "unlistable", 1) 31 return 32 for name in sorted(names): 33 if name.startswith(".#"): 34 continue # Skip CVS temp files 35 if name.endswith("~"): 36 continue # Skip Emacs backup files 37 full = os.path.join(dir, name) 38 if os.path.islink(full): 39 self.addstats("<lnk>", "links", 1) 40 elif os.path.isdir(full): 41 self.statdir(full) 42 else: 43 self.statfile(full) 44 45 def statfile(self, filename): 46 head, ext = os.path.splitext(filename) 47 head, base = os.path.split(filename) 48 if ext == base: 49 ext = "" # E.g. .cvsignore is deemed not to have an extension 50 ext = os.path.normcase(ext) 51 if not ext: 52 ext = "<none>" 53 self.addstats(ext, "files", 1) 54 try: 55 with open(filename, "rb") as f: 56 data = f.read() 57 except IOError as err: 58 sys.stderr.write("Can't open %s: %s\n" % (filename, err)) 59 self.addstats(ext, "unopenable", 1) 60 return 61 self.addstats(ext, "bytes", len(data)) 62 if b'\0' in data: 63 self.addstats(ext, "binary", 1) 64 return 65 if not data: 66 self.addstats(ext, "empty", 1) 67 # self.addstats(ext, "chars", len(data)) 68 lines = str(data, "latin-1").splitlines() 69 self.addstats(ext, "lines", len(lines)) 70 del lines 71 words = data.split() 72 self.addstats(ext, "words", len(words)) 73 74 def addstats(self, ext, key, n): 75 d = self.stats.setdefault(ext, {}) 76 d[key] = d.get(key, 0) + n 77 78 def report(self): 79 exts = sorted(self.stats) 80 # Get the column keys 81 columns = {} 82 for ext in exts: 83 columns.update(self.stats[ext]) 84 cols = sorted(columns) 85 colwidth = {} 86 colwidth["ext"] = max(map(len, exts)) 87 minwidth = 6 88 self.stats["TOTAL"] = {} 89 for col in cols: 90 total = 0 91 cw = max(minwidth, len(col)) 92 for ext in exts: 93 value = self.stats[ext].get(col) 94 if value is None: 95 w = 0 96 else: 97 w = len("%d" % value) 98 total += value 99 cw = max(cw, w) 100 cw = max(cw, len(str(total))) 101 colwidth[col] = cw 102 self.stats["TOTAL"][col] = total 103 exts.append("TOTAL") 104 for ext in exts: 105 self.stats[ext]["ext"] = ext 106 cols.insert(0, "ext") 107 108 def printheader(): 109 for col in cols: 110 print("%*s" % (colwidth[col], col), end=' ') 111 print() 112 113 printheader() 114 for ext in exts: 115 for col in cols: 116 value = self.stats[ext].get(col, "") 117 print("%*s" % (colwidth[col], value), end=' ') 118 print() 119 printheader() # Another header at the bottom 120 121 122def main(): 123 args = sys.argv[1:] 124 if not args: 125 args = [os.curdir] 126 s = Stats() 127 s.statargs(args) 128 s.report() 129 130 131if __name__ == "__main__": 132 main() 133