1"""Generate 10,000 unique examples for the Levenshtein short-circuit tests.""" 2 3import argparse 4from functools import lru_cache 5import json 6import os.path 7from random import choices, randrange 8 9 10# This should be in sync with Lib/traceback.py. It's not importing those values 11# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree 12# build of Python. 13_MOVE_COST = 2 14_CASE_COST = 1 15 16 17def _substitution_cost(ch_a, ch_b): 18 if ch_a == ch_b: 19 return 0 20 if ch_a.lower() == ch_b.lower(): 21 return _CASE_COST 22 return _MOVE_COST 23 24 25@lru_cache(None) 26def levenshtein(a, b): 27 if not a or not b: 28 return (len(a) + len(b)) * _MOVE_COST 29 option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1]) 30 option2 = levenshtein(a[:-1], b) + _MOVE_COST 31 option3 = levenshtein(a, b[:-1]) + _MOVE_COST 32 return min(option1, option2, option3) 33 34 35def main(): 36 parser = argparse.ArgumentParser(description=__doc__) 37 parser.add_argument('output_path', metavar='FILE', type=str) 38 parser.add_argument('--overwrite', dest='overwrite', action='store_const', 39 const=True, default=False, 40 help='overwrite an existing test file') 41 42 args = parser.parse_args() 43 output_path = os.path.realpath(args.output_path) 44 if not args.overwrite and os.path.isfile(output_path): 45 print(f"{output_path} already exists, skipping regeneration.") 46 print( 47 "To force, add --overwrite to the invocation of this tool or" 48 " delete the existing file." 49 ) 50 return 51 52 examples = set() 53 # Create a lot of non-empty examples, which should end up with a Gauss-like 54 # distribution for even costs (moves) and odd costs (case substitutions). 55 while len(examples) < 9990: 56 a = ''.join(choices("abcABC", k=randrange(1, 10))) 57 b = ''.join(choices("abcABC", k=randrange(1, 10))) 58 expected = levenshtein(a, b) 59 examples.add((a, b, expected)) 60 # Create one empty case each for strings between 0 and 9 in length. 61 for i in range(10): 62 b = ''.join(choices("abcABC", k=i)) 63 expected = levenshtein("", b) 64 examples.add(("", b, expected)) 65 with open(output_path, "w") as f: 66 json.dump(sorted(examples), f, indent=2) 67 68 69if __name__ == "__main__": 70 main() 71