• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3import sys, os, re, difflib, unicodedata, errno, cgi, itertools
4from itertools import *
5
6diff_symbols = "-+=*&^%$#@!~/"
7diff_colors = ['red', 'green', 'blue']
8
9def codepoints(s):
10	return (ord (u) for u in s)
11
12class ColorFormatter:
13
14	class Null:
15		@staticmethod
16		def start_color (c): return ''
17		@staticmethod
18		def end_color (): return ''
19		@staticmethod
20		def escape (s): return s
21		@staticmethod
22		def newline (): return '\n'
23
24	class ANSI:
25		@staticmethod
26		def start_color (c):
27			return {
28				'red': '\033[41;37;1m',
29				'green': '\033[42;37;1m',
30				'blue': '\033[44;37;1m',
31			}[c]
32		@staticmethod
33		def end_color ():
34			return '\033[m'
35		@staticmethod
36		def escape (s): return s
37		@staticmethod
38		def newline (): return '\n'
39
40	class HTML:
41		@staticmethod
42		def start_color (c):
43			return '<span style="background:%s">' % c
44		@staticmethod
45		def end_color ():
46			return '</span>'
47		@staticmethod
48		def escape (s): return cgi.escape (s)
49		@staticmethod
50		def newline (): return '<br/>\n'
51
52	@staticmethod
53	def Auto (argv = [], out = sys.stdout):
54		format = ColorFormatter.ANSI
55		if "--format" in argv:
56			argv.remove ("--format")
57			format = ColorFormatter.ANSI
58		if "--format=ansi" in argv:
59			argv.remove ("--format=ansi")
60			format = ColorFormatter.ANSI
61		if "--format=html" in argv:
62			argv.remove ("--format=html")
63			format = ColorFormatter.HTML
64		if "--no-format" in argv:
65			argv.remove ("--no-format")
66			format = ColorFormatter.Null
67		return format
68
69
70class DiffColorizer:
71
72	diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
73
74	def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
75		self.formatter = formatter
76		self.colors = colors
77		self.symbols = symbols
78
79	def colorize_lines (self, lines):
80		lines = (l if l else '' for l in lines)
81		ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
82		oo = ["",""]
83		st = [False, False]
84		for l in difflib.Differ().compare (*ss):
85			if l[0] == '?':
86				continue
87			if l[0] == ' ':
88				for i in range(2):
89					if st[i]:
90						oo[i] += self.formatter.end_color ()
91						st[i] = False
92				oo = [o + self.formatter.escape (l[2:]) for o in oo]
93				continue
94			if l[0] in self.symbols:
95				i = self.symbols.index (l[0])
96				if not st[i]:
97					oo[i] += self.formatter.start_color (self.colors[i])
98					st[i] = True
99				oo[i] += self.formatter.escape (l[2:])
100				continue
101		for i in range(2):
102			if st[i]:
103				oo[i] += self.formatter.end_color ()
104				st[i] = False
105		oo = [o.replace ('\n', '') for o in oo]
106		return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
107
108	def colorize_diff (self, f):
109		lines = [None, None]
110		for l in f:
111			if l[0] not in self.symbols:
112				yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
113				continue
114			i = self.symbols.index (l[0])
115			if lines[i]:
116				# Flush
117				for line in self.colorize_lines (lines):
118					yield line
119				lines = [None, None]
120			lines[i] = l[1:]
121			if (all (lines)):
122				# Flush
123				for line in self.colorize_lines (lines):
124					yield line
125				lines = [None, None]
126		if (any (lines)):
127			# Flush
128			for line in self.colorize_lines (lines):
129				yield line
130
131
132class ZipDiffer:
133
134	@staticmethod
135	def diff_files (files, symbols=diff_symbols):
136		files = tuple (files) # in case it's a generator, copy it
137		try:
138			for lines in itertools.zip_longest (*files):
139				if all (lines[0] == line for line in lines[1:]):
140					sys.stdout.writelines ([" ", lines[0]])
141					continue
142
143				for i, l in enumerate (lines):
144					if l:
145						sys.stdout.writelines ([symbols[i], l])
146		except IOError as e:
147			if e.errno != errno.EPIPE:
148				sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
149
150
151class DiffFilters:
152
153	@staticmethod
154	def filter_failures (f):
155		for key, lines in DiffHelpers.separate_test_cases (f):
156			lines = list (lines)
157			if not DiffHelpers.test_passed (lines):
158				for l in lines: yield l
159
160class Stat:
161
162	def __init__ (self):
163		self.count = 0
164		self.freq = 0
165
166	def add (self, test):
167		self.count += 1
168		self.freq += test.freq
169
170class Stats:
171
172	def __init__ (self):
173		self.passed = Stat ()
174		self.failed = Stat ()
175		self.total  = Stat ()
176
177	def add (self, test):
178		self.total.add (test)
179		if test.passed:
180			self.passed.add (test)
181		else:
182			self.failed.add (test)
183
184	def mean (self):
185		return float (self.passed.count) / self.total.count
186
187	def variance (self):
188		return (float (self.passed.count) / self.total.count) * \
189		       (float (self.failed.count) / self.total.count)
190
191	def stddev (self):
192		return self.variance () ** .5
193
194	def zscore (self, population):
195		"""Calculate the standard score.
196		   Population is the Stats for population.
197		   Self is Stats for sample.
198		   Returns larger absolute value if sample is highly unlikely to be random.
199		   Anything outside of -3..+3 is very unlikely to be random.
200		   See: https://en.wikipedia.org/wiki/Standard_score"""
201
202		return (self.mean () - population.mean ()) / population.stddev ()
203
204
205
206
207class DiffSinks:
208
209	@staticmethod
210	def print_stat (f):
211		passed = 0
212		failed = 0
213		# XXX port to Stats, but that would really slow us down here
214		for key, lines in DiffHelpers.separate_test_cases (f):
215			if DiffHelpers.test_passed (lines):
216				passed += 1
217			else:
218				failed += 1
219		total = passed + failed
220		print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
221
222
223class Test:
224
225	def __init__ (self, lines):
226		self.freq = 1
227		self.passed = True
228		self.identifier = None
229		self.text = None
230		self.unicodes = None
231		self.glyphs = None
232		for l in lines:
233			symbol = l[0]
234			if symbol != ' ':
235				self.passed = False
236			i = 1
237			if ':' in l:
238				i = l.index (':')
239				if not self.identifier:
240					self.identifier = l[1:i]
241				i = i + 2 # Skip colon and space
242			j = -1
243			if l[j] == '\n':
244				j -= 1
245			brackets = l[i] + l[j]
246			l = l[i+1:-2]
247			if brackets == '()':
248				self.text = l
249			elif brackets == '<>':
250				self.unicodes = Unicode.parse (l)
251			elif brackets == '[]':
252				# XXX we don't handle failed tests here
253				self.glyphs = l
254
255
256class DiffHelpers:
257
258	@staticmethod
259	def separate_test_cases (f):
260		'''Reads lines from f, and if the lines have identifiers, ie.
261		   have a colon character, groups them by identifier,
262		   yielding lists of all lines with the same identifier.'''
263
264		def identifier (l):
265			if ':' in l[1:]:
266				return l[1:l.index (':')]
267			return l
268		return groupby (f, key=identifier)
269
270	@staticmethod
271	def test_passed (lines):
272		lines = list (lines)
273		# XXX This is a hack, but does the job for now.
274		if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
275		if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
276		if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
277		if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
278		if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
279		if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
280		return all (l[0] == ' ' for l in lines)
281
282
283class FilterHelpers:
284
285	@staticmethod
286	def filter_printer_function (filter_callback):
287		def printer (f):
288			for line in filter_callback (f):
289				print (line)
290		return printer
291
292	@staticmethod
293	def filter_printer_function_no_newline (filter_callback):
294		def printer (f):
295			for line in filter_callback (f):
296				sys.stdout.writelines ([line])
297		return printer
298
299
300class Ngram:
301
302	@staticmethod
303	def generator (n):
304
305		def gen (f):
306			l = []
307			for x in f:
308				l.append (x)
309				if len (l) == n:
310					yield tuple (l)
311					l[:1] = []
312
313		gen.n = n
314		return gen
315
316
317class UtilMains:
318
319	@staticmethod
320	def process_multiple_files (callback, mnemonic = "FILE"):
321
322		if "--help" in sys.argv:
323			sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
324
325		try:
326			files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
327			for s in files:
328				callback (FileHelpers.open_file_or_stdin (s))
329		except IOError as e:
330			if e.errno != errno.EPIPE:
331				sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
332
333	@staticmethod
334	def process_multiple_args (callback, mnemonic):
335
336		if len (sys.argv) == 1 or "--help" in sys.argv:
337			sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
338
339		try:
340			for s in sys.argv[1:]:
341				callback (s)
342		except IOError as e:
343			if e.errno != errno.EPIPE:
344				sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
345
346	@staticmethod
347	def filter_multiple_strings_or_stdin (callback, mnemonic, \
348					      separator = " ", \
349					      concat_separator = False):
350
351		if "--help" in sys.argv:
352			sys.exit ("""Usage:
353  %s %s...
354or:
355  %s
356When called with no arguments, input is read from standard input.
357""" % (sys.argv[0], mnemonic, sys.argv[0]))
358
359		try:
360			if len (sys.argv) == 1:
361				while (1):
362					line = sys.stdin.readline ()
363					if not len (line):
364						break
365					if line[-1] == '\n':
366						line = line[:-1]
367					print (callback (line))
368			else:
369				args = sys.argv[1:]
370				if concat_separator != False:
371					args = [concat_separator.join (args)]
372				print (separator.join (callback (x) for x in (args)))
373		except IOError as e:
374			if e.errno != errno.EPIPE:
375				sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
376
377
378class Unicode:
379
380	@staticmethod
381	def decode (s):
382		return ','.join ("U+%04X" % cp for cp in codepoints (s))
383
384	@staticmethod
385	def parse (s):
386		s = re.sub (r"0[xX]", " ", s)
387		s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
388		return [int (x, 16) for x in s.split ()]
389
390	@staticmethod
391	def encode (s):
392		return ''.join (chr (x) for x in Unicode.parse (s))
393
394	shorthands = {
395		"ZERO WIDTH NON-JOINER": "ZWNJ",
396		"ZERO WIDTH JOINER": "ZWJ",
397		"NARROW NO-BREAK SPACE": "NNBSP",
398		"COMBINING GRAPHEME JOINER": "CGJ",
399		"LEFT-TO-RIGHT MARK": "LRM",
400		"RIGHT-TO-LEFT MARK": "RLM",
401		"LEFT-TO-RIGHT EMBEDDING": "LRE",
402		"RIGHT-TO-LEFT EMBEDDING": "RLE",
403		"POP DIRECTIONAL FORMATTING": "PDF",
404		"LEFT-TO-RIGHT OVERRIDE": "LRO",
405		"RIGHT-TO-LEFT OVERRIDE": "RLO",
406	}
407
408	@staticmethod
409	def pretty_name (u):
410		try:
411			s = unicodedata.name (u)
412		except ValueError:
413			return "XXX"
414		s = re.sub (".* LETTER ", "", s)
415		s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
416		s = re.sub (".* SIGN ", "", s)
417		s = re.sub (".* COMBINING ", "", s)
418		if re.match (".* VIRAMA", s):
419			s = "HALANT"
420		if s in Unicode.shorthands:
421			s = Unicode.shorthands[s]
422		return s
423
424	@staticmethod
425	def pretty_names (s):
426		s = re.sub (r"[<+>\\uU]", " ", s)
427		s = re.sub (r"0[xX]", " ", s)
428		s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
429		return ' + '.join (Unicode.pretty_name (x) for x in s)
430
431
432class FileHelpers:
433
434	@staticmethod
435	def open_file_or_stdin (f):
436		if f == '-':
437			return sys.stdin
438		return open (f)
439
440
441class Manifest:
442
443	@staticmethod
444	def read (s, strict = True):
445
446		if not os.path.exists (s):
447			if strict:
448				sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
449			return
450
451		s = os.path.normpath (s)
452
453		if os.path.isdir (s):
454
455			try:
456				m = open (os.path.join (s, "MANIFEST"))
457				items = [x.strip () for x in m.readlines ()]
458				for f in items:
459					for p in Manifest.read (os.path.join (s, f)):
460						yield p
461			except IOError:
462				if strict:
463					sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
464				return
465		else:
466			yield s
467
468	@staticmethod
469	def update_recursive (s):
470
471		for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
472
473			for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
474				if f in dirnames:
475					dirnames.remove (f)
476				if f in filenames:
477					filenames.remove (f)
478			dirnames.sort ()
479			filenames.sort ()
480			ms = os.path.join (dirpath, "MANIFEST")
481			print ("  GEN    %s" % ms)
482			m = open (ms, "w")
483			for f in filenames:
484				print (f, file=m)
485			for f in dirnames:
486				print (f, file=m)
487			for f in dirnames:
488				Manifest.update_recursive (os.path.join (dirpath, f))
489
490if __name__ == '__main__':
491	pass
492