1#!/usr/bin/env python3 2 3import sys, os, re, difflib, unicodedata, errno, cgi, itertools 4from itertools import * 5 6diff_symbols = "-+=*&^%$#@!~/" 7diff_colors = ['red', 'green', 'blue'] 8 9def codepoints(s): 10 return (ord (u) for u in s) 11 12class ColorFormatter: 13 14 class Null: 15 @staticmethod 16 def start_color (c): return '' 17 @staticmethod 18 def end_color (): return '' 19 @staticmethod 20 def escape (s): return s 21 @staticmethod 22 def newline (): return '\n' 23 24 class ANSI: 25 @staticmethod 26 def start_color (c): 27 return { 28 'red': '\033[41;37;1m', 29 'green': '\033[42;37;1m', 30 'blue': '\033[44;37;1m', 31 }[c] 32 @staticmethod 33 def end_color (): 34 return '\033[m' 35 @staticmethod 36 def escape (s): return s 37 @staticmethod 38 def newline (): return '\n' 39 40 class HTML: 41 @staticmethod 42 def start_color (c): 43 return '<span style="background:%s">' % c 44 @staticmethod 45 def end_color (): 46 return '</span>' 47 @staticmethod 48 def escape (s): return cgi.escape (s) 49 @staticmethod 50 def newline (): return '<br/>\n' 51 52 @staticmethod 53 def Auto (argv = [], out = sys.stdout): 54 format = ColorFormatter.ANSI 55 if "--format" in argv: 56 argv.remove ("--format") 57 format = ColorFormatter.ANSI 58 if "--format=ansi" in argv: 59 argv.remove ("--format=ansi") 60 format = ColorFormatter.ANSI 61 if "--format=html" in argv: 62 argv.remove ("--format=html") 63 format = ColorFormatter.HTML 64 if "--no-format" in argv: 65 argv.remove ("--no-format") 66 format = ColorFormatter.Null 67 return format 68 69 70class DiffColorizer: 71 72 diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)') 73 74 def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols): 75 self.formatter = formatter 76 self.colors = colors 77 self.symbols = symbols 78 79 def colorize_lines (self, lines): 80 lines = (l if l else '' for l in lines) 81 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines] 82 oo = ["",""] 83 st = [False, False] 84 for l in difflib.Differ().compare (*ss): 85 if l[0] == '?': 86 continue 87 if l[0] == ' ': 88 for i in range(2): 89 if st[i]: 90 oo[i] += self.formatter.end_color () 91 st[i] = False 92 oo = [o + self.formatter.escape (l[2:]) for o in oo] 93 continue 94 if l[0] in self.symbols: 95 i = self.symbols.index (l[0]) 96 if not st[i]: 97 oo[i] += self.formatter.start_color (self.colors[i]) 98 st[i] = True 99 oo[i] += self.formatter.escape (l[2:]) 100 continue 101 for i in range(2): 102 if st[i]: 103 oo[i] += self.formatter.end_color () 104 st[i] = False 105 oo = [o.replace ('\n', '') for o in oo] 106 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2] 107 108 def colorize_diff (self, f): 109 lines = [None, None] 110 for l in f: 111 if l[0] not in self.symbols: 112 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ()) 113 continue 114 i = self.symbols.index (l[0]) 115 if lines[i]: 116 # Flush 117 for line in self.colorize_lines (lines): 118 yield line 119 lines = [None, None] 120 lines[i] = l[1:] 121 if (all (lines)): 122 # Flush 123 for line in self.colorize_lines (lines): 124 yield line 125 lines = [None, None] 126 if (any (lines)): 127 # Flush 128 for line in self.colorize_lines (lines): 129 yield line 130 131 132class ZipDiffer: 133 134 @staticmethod 135 def diff_files (files, symbols=diff_symbols): 136 files = tuple (files) # in case it's a generator, copy it 137 try: 138 for lines in itertools.zip_longest (*files): 139 if all (lines[0] == line for line in lines[1:]): 140 sys.stdout.writelines ([" ", lines[0]]) 141 continue 142 143 for i, l in enumerate (lines): 144 if l: 145 sys.stdout.writelines ([symbols[i], l]) 146 except IOError as e: 147 if e.errno != errno.EPIPE: 148 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)) 149 150 151class DiffFilters: 152 153 @staticmethod 154 def filter_failures (f): 155 for key, lines in DiffHelpers.separate_test_cases (f): 156 lines = list (lines) 157 if not DiffHelpers.test_passed (lines): 158 for l in lines: yield l 159 160class Stat: 161 162 def __init__ (self): 163 self.count = 0 164 self.freq = 0 165 166 def add (self, test): 167 self.count += 1 168 self.freq += test.freq 169 170class Stats: 171 172 def __init__ (self): 173 self.passed = Stat () 174 self.failed = Stat () 175 self.total = Stat () 176 177 def add (self, test): 178 self.total.add (test) 179 if test.passed: 180 self.passed.add (test) 181 else: 182 self.failed.add (test) 183 184 def mean (self): 185 return float (self.passed.count) / self.total.count 186 187 def variance (self): 188 return (float (self.passed.count) / self.total.count) * \ 189 (float (self.failed.count) / self.total.count) 190 191 def stddev (self): 192 return self.variance () ** .5 193 194 def zscore (self, population): 195 """Calculate the standard score. 196 Population is the Stats for population. 197 Self is Stats for sample. 198 Returns larger absolute value if sample is highly unlikely to be random. 199 Anything outside of -3..+3 is very unlikely to be random. 200 See: https://en.wikipedia.org/wiki/Standard_score""" 201 202 return (self.mean () - population.mean ()) / population.stddev () 203 204 205 206 207class DiffSinks: 208 209 @staticmethod 210 def print_stat (f): 211 passed = 0 212 failed = 0 213 # XXX port to Stats, but that would really slow us down here 214 for key, lines in DiffHelpers.separate_test_cases (f): 215 if DiffHelpers.test_passed (lines): 216 passed += 1 217 else: 218 failed += 1 219 total = passed + failed 220 print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)) 221 222 223class Test: 224 225 def __init__ (self, lines): 226 self.freq = 1 227 self.passed = True 228 self.identifier = None 229 self.text = None 230 self.unicodes = None 231 self.glyphs = None 232 for l in lines: 233 symbol = l[0] 234 if symbol != ' ': 235 self.passed = False 236 i = 1 237 if ':' in l: 238 i = l.index (':') 239 if not self.identifier: 240 self.identifier = l[1:i] 241 i = i + 2 # Skip colon and space 242 j = -1 243 if l[j] == '\n': 244 j -= 1 245 brackets = l[i] + l[j] 246 l = l[i+1:-2] 247 if brackets == '()': 248 self.text = l 249 elif brackets == '<>': 250 self.unicodes = Unicode.parse (l) 251 elif brackets == '[]': 252 # XXX we don't handle failed tests here 253 self.glyphs = l 254 255 256class DiffHelpers: 257 258 @staticmethod 259 def separate_test_cases (f): 260 '''Reads lines from f, and if the lines have identifiers, ie. 261 have a colon character, groups them by identifier, 262 yielding lists of all lines with the same identifier.''' 263 264 def identifier (l): 265 if ':' in l[1:]: 266 return l[1:l.index (':')] 267 return l 268 return groupby (f, key=identifier) 269 270 @staticmethod 271 def test_passed (lines): 272 lines = list (lines) 273 # XXX This is a hack, but does the job for now. 274 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True 275 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True 276 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True 277 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True 278 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True 279 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True 280 return all (l[0] == ' ' for l in lines) 281 282 283class FilterHelpers: 284 285 @staticmethod 286 def filter_printer_function (filter_callback): 287 def printer (f): 288 for line in filter_callback (f): 289 print (line) 290 return printer 291 292 @staticmethod 293 def filter_printer_function_no_newline (filter_callback): 294 def printer (f): 295 for line in filter_callback (f): 296 sys.stdout.writelines ([line]) 297 return printer 298 299 300class Ngram: 301 302 @staticmethod 303 def generator (n): 304 305 def gen (f): 306 l = [] 307 for x in f: 308 l.append (x) 309 if len (l) == n: 310 yield tuple (l) 311 l[:1] = [] 312 313 gen.n = n 314 return gen 315 316 317class UtilMains: 318 319 @staticmethod 320 def process_multiple_files (callback, mnemonic = "FILE"): 321 322 if "--help" in sys.argv: 323 sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic)) 324 325 try: 326 files = sys.argv[1:] if len (sys.argv) > 1 else ['-'] 327 for s in files: 328 callback (FileHelpers.open_file_or_stdin (s)) 329 except IOError as e: 330 if e.errno != errno.EPIPE: 331 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)) 332 333 @staticmethod 334 def process_multiple_args (callback, mnemonic): 335 336 if len (sys.argv) == 1 or "--help" in sys.argv: 337 sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic)) 338 339 try: 340 for s in sys.argv[1:]: 341 callback (s) 342 except IOError as e: 343 if e.errno != errno.EPIPE: 344 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)) 345 346 @staticmethod 347 def filter_multiple_strings_or_stdin (callback, mnemonic, \ 348 separator = " ", \ 349 concat_separator = False): 350 351 if "--help" in sys.argv: 352 sys.exit ("""Usage: 353 %s %s... 354or: 355 %s 356When called with no arguments, input is read from standard input. 357""" % (sys.argv[0], mnemonic, sys.argv[0])) 358 359 try: 360 if len (sys.argv) == 1: 361 while (1): 362 line = sys.stdin.readline () 363 if not len (line): 364 break 365 if line[-1] == '\n': 366 line = line[:-1] 367 print (callback (line)) 368 else: 369 args = sys.argv[1:] 370 if concat_separator != False: 371 args = [concat_separator.join (args)] 372 print (separator.join (callback (x) for x in (args))) 373 except IOError as e: 374 if e.errno != errno.EPIPE: 375 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)) 376 377 378class Unicode: 379 380 @staticmethod 381 def decode (s): 382 return ','.join ("U+%04X" % cp for cp in codepoints (s)) 383 384 @staticmethod 385 def parse (s): 386 s = re.sub (r"0[xX]", " ", s) 387 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s) 388 return [int (x, 16) for x in s.split ()] 389 390 @staticmethod 391 def encode (s): 392 return ''.join (chr (x) for x in Unicode.parse (s)) 393 394 shorthands = { 395 "ZERO WIDTH NON-JOINER": "ZWNJ", 396 "ZERO WIDTH JOINER": "ZWJ", 397 "NARROW NO-BREAK SPACE": "NNBSP", 398 "COMBINING GRAPHEME JOINER": "CGJ", 399 "LEFT-TO-RIGHT MARK": "LRM", 400 "RIGHT-TO-LEFT MARK": "RLM", 401 "LEFT-TO-RIGHT EMBEDDING": "LRE", 402 "RIGHT-TO-LEFT EMBEDDING": "RLE", 403 "POP DIRECTIONAL FORMATTING": "PDF", 404 "LEFT-TO-RIGHT OVERRIDE": "LRO", 405 "RIGHT-TO-LEFT OVERRIDE": "RLO", 406 } 407 408 @staticmethod 409 def pretty_name (u): 410 try: 411 s = unicodedata.name (u) 412 except ValueError: 413 return "XXX" 414 s = re.sub (".* LETTER ", "", s) 415 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s) 416 s = re.sub (".* SIGN ", "", s) 417 s = re.sub (".* COMBINING ", "", s) 418 if re.match (".* VIRAMA", s): 419 s = "HALANT" 420 if s in Unicode.shorthands: 421 s = Unicode.shorthands[s] 422 return s 423 424 @staticmethod 425 def pretty_names (s): 426 s = re.sub (r"[<+>\\uU]", " ", s) 427 s = re.sub (r"0[xX]", " ", s) 428 s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)] 429 return ' + '.join (Unicode.pretty_name (x) for x in s) 430 431 432class FileHelpers: 433 434 @staticmethod 435 def open_file_or_stdin (f): 436 if f == '-': 437 return sys.stdin 438 return open (f) 439 440 441class Manifest: 442 443 @staticmethod 444 def read (s, strict = True): 445 446 if not os.path.exists (s): 447 if strict: 448 sys.exit ("%s: %s does not exist" % (sys.argv[0], s)) 449 return 450 451 s = os.path.normpath (s) 452 453 if os.path.isdir (s): 454 455 try: 456 m = open (os.path.join (s, "MANIFEST")) 457 items = [x.strip () for x in m.readlines ()] 458 for f in items: 459 for p in Manifest.read (os.path.join (s, f)): 460 yield p 461 except IOError: 462 if strict: 463 sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))) 464 return 465 else: 466 yield s 467 468 @staticmethod 469 def update_recursive (s): 470 471 for dirpath, dirnames, filenames in os.walk (s, followlinks=True): 472 473 for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]: 474 if f in dirnames: 475 dirnames.remove (f) 476 if f in filenames: 477 filenames.remove (f) 478 dirnames.sort () 479 filenames.sort () 480 ms = os.path.join (dirpath, "MANIFEST") 481 print (" GEN %s" % ms) 482 m = open (ms, "w") 483 for f in filenames: 484 print (f, file=m) 485 for f in dirnames: 486 print (f, file=m) 487 for f in dirnames: 488 Manifest.update_recursive (os.path.join (dirpath, f)) 489 490if __name__ == '__main__': 491 pass 492