• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
2
3import argparse
4from functools import lru_cache
5import json
6import os.path
7from random import choices, randrange
8
9
10# This should be in sync with Lib/traceback.py.  It's not importing those values
11# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
12# build of Python.
13_MOVE_COST = 2
14_CASE_COST = 1
15
16
17def _substitution_cost(ch_a, ch_b):
18    if ch_a == ch_b:
19        return 0
20    if ch_a.lower() == ch_b.lower():
21        return _CASE_COST
22    return _MOVE_COST
23
24
25@lru_cache(None)
26def levenshtein(a, b):
27    if not a or not b:
28        return (len(a) + len(b)) * _MOVE_COST
29    option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
30    option2 = levenshtein(a[:-1], b) + _MOVE_COST
31    option3 = levenshtein(a, b[:-1]) + _MOVE_COST
32    return min(option1, option2, option3)
33
34
35def main():
36    parser = argparse.ArgumentParser(description=__doc__)
37    parser.add_argument('output_path', metavar='FILE', type=str)
38    parser.add_argument('--overwrite', dest='overwrite', action='store_const',
39                        const=True, default=False,
40                        help='overwrite an existing test file')
41
42    args = parser.parse_args()
43    output_path = os.path.realpath(args.output_path)
44    if not args.overwrite and os.path.isfile(output_path):
45        print(f"{output_path} already exists, skipping regeneration.")
46        print(
47            "To force, add --overwrite to the invocation of this tool or"
48            " delete the existing file."
49        )
50        return
51
52    examples = set()
53    # Create a lot of non-empty examples, which should end up with a Gauss-like
54    # distribution for even costs (moves) and odd costs (case substitutions).
55    while len(examples) < 9990:
56        a = ''.join(choices("abcABC", k=randrange(1, 10)))
57        b = ''.join(choices("abcABC", k=randrange(1, 10)))
58        expected = levenshtein(a, b)
59        examples.add((a, b, expected))
60    # Create one empty case each for strings between 0 and 9 in length.
61    for i in range(10):
62        b = ''.join(choices("abcABC", k=i))
63        expected = levenshtein("", b)
64        examples.add(("", b, expected))
65    with open(output_path, "w") as f:
66        json.dump(sorted(examples), f, indent=2)
67
68
69if __name__ == "__main__":
70    main()
71