1#!/usr/bin/env python3 2# Copyright (C) 1998, 1999 Tom Tromey 3# Copyright (C) 2001 Red Hat Software 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 2, or (at your option) 8# any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, see <http://www.gnu.org/licenses/>. 17 18""" 19gen-casefold-txt.py - Generate test cases for casefolding from Unicode data. 20See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html 21Usage: 22 I consider the output of this program to be unrestricted. 23 Use it as you will. 24""" 25 26import sys 27import argparse 28 29 30def main(argv): 31 parser = argparse.ArgumentParser( 32 description="Generate test cases for casefolding from Unicode data" 33 ) 34 parser.add_argument("UNICODE-VERSION") 35 parser.add_argument("CaseFolding.txt") 36 args = parser.parse_args(argv[1:]) 37 version = getattr(args, "UNICODE-VERSION") 38 filename = getattr(args, "CaseFolding.txt") 39 40 print( 41 """\ 42# Test cases generated from Unicode {} data 43# by gen-casefold-txt.py. Do not edit. 44# 45# Some special hand crafted tests 46# 47AaBbCc@@\taabbcc@@ 48# 49# Now the automatic tests 50#""".format( 51 version 52 ) 53 ) 54 55 # Names of fields in the CaseFolding table 56 CODE, STATUS, MAPPING = range(3) 57 58 with open(filename, encoding="utf-8") as fileobj: 59 for line in fileobj: 60 # strip comments and skip empty lines 61 line = line.split("#", 1)[0].strip() 62 if not line: 63 continue 64 65 fields = [f.strip() for f in line.split(";", 3)[:3]] 66 if len(fields) != 3: 67 raise SystemExit( 68 "Entry for %s has wrong number of fields (%d)" 69 % (fields[CODE], len(fields)) 70 ) 71 72 status = fields[STATUS] 73 # skip simple and Turkic mappings 74 if status in "ST": 75 continue 76 77 code = chr(int(fields[CODE], 16)) 78 values = "".join([chr(int(v, 16)) for v in fields[MAPPING].split()]) 79 print("{}\t{}".format(code, values)) 80 81 82if __name__ == "__main__": 83 sys.exit(main(sys.argv)) 84