• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright (C) 1998, 1999 Tom Tromey
3# Copyright (C) 2001 Red Hat Software
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2, or (at your option)
8# any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, see <http://www.gnu.org/licenses/>.
17
18"""
19gen-casefold-txt.py - Generate test cases for casefolding from Unicode data.
20See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
21Usage:
22    I consider the output of this program to be unrestricted.
23    Use it as you will.
24"""
25
26import sys
27import argparse
28
29
30def main(argv):
31    parser = argparse.ArgumentParser(
32        description="Generate test cases for casefolding from Unicode data"
33    )
34    parser.add_argument("UNICODE-VERSION")
35    parser.add_argument("CaseFolding.txt")
36    args = parser.parse_args(argv[1:])
37    version = getattr(args, "UNICODE-VERSION")
38    filename = getattr(args, "CaseFolding.txt")
39
40    print(
41        """\
42# Test cases generated from Unicode {} data
43# by gen-casefold-txt.py. Do not edit.
44#
45# Some special hand crafted tests
46#
47AaBbCc@@\taabbcc@@
48#
49# Now the automatic tests
50#""".format(
51            version
52        )
53    )
54
55    # Names of fields in the CaseFolding table
56    CODE, STATUS, MAPPING = range(3)
57
58    with open(filename, encoding="utf-8") as fileobj:
59        for line in fileobj:
60            # strip comments and skip empty lines
61            line = line.split("#", 1)[0].strip()
62            if not line:
63                continue
64
65            fields = [f.strip() for f in line.split(";", 3)[:3]]
66            if len(fields) != 3:
67                raise SystemExit(
68                    "Entry for %s has wrong number of fields (%d)"
69                    % (fields[CODE], len(fields))
70                )
71
72            status = fields[STATUS]
73            # skip simple and Turkic mappings
74            if status in "ST":
75                continue
76
77            code = chr(int(fields[CODE], 16))
78            values = "".join([chr(int(v, 16)) for v in fields[MAPPING].split()])
79            print("{}\t{}".format(code, values))
80
81
82if __name__ == "__main__":
83    sys.exit(main(sys.argv))
84