1#! /usr/bin/python 2 3# PCRE2 UNICODE PROPERTY SUPPORT 4# ------------------------------ 5 6# This script generates the pcre2_ucp.h file from Unicode data files. This 7# header uses enumerations to give names to Unicode property types and script 8# names. 9 10# This script was created in December 2021 as part of the Unicode data 11# generation refactoring. 12 13 14# Import common data lists and functions 15 16from GenerateCommon import \ 17 bidi_classes, \ 18 bool_properties, \ 19 bool_props_list_item_size, \ 20 break_properties, \ 21 category_names, \ 22 general_category_names, \ 23 script_list_item_size, \ 24 script_names, \ 25 open_output 26 27# Open the output file (no return on failure). This call also writes standard 28# header boilerplate. 29 30f = open_output("pcre2_ucp.h") 31 32# Output this file's heading text 33 34f.write("""\ 35#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD 36#define PCRE2_UCP_H_IDEMPOTENT_GUARD 37 38/* This file contains definitions of the Unicode property values that are 39returned by the UCD access macros and used throughout PCRE2. 40 41IMPORTANT: The specific values of the first two enums (general and particular 42character categories) are assumed by the table called catposstab in the file 43pcre2_auto_possess.c. They are unlikely to change, but should be checked after 44an update. */ 45\n""") 46 47f.write("/* These are the general character categories. */\n\nenum {\n") 48for i in general_category_names: 49 f.write(" ucp_%s,\n" % i) 50f.write("};\n\n") 51 52f.write("/* These are the particular character categories. */\n\nenum {\n") 53for i in range(0, len(category_names), 2): 54 f.write(" ucp_%s, /* %s */\n" % (category_names[i], category_names[i+1])) 55f.write("};\n\n") 56 57f.write("/* These are Boolean properties. */\n\nenum {\n") 58for i in bool_properties: 59 f.write(" ucp_%s,\n" % i) 60 61f.write(" /* This must be last */\n") 62f.write(" ucp_Bprop_Count\n};\n\n") 63 64f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n") 65f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size) 66 67f.write("/* These are the bidi class values. */\n\nenum {\n") 68for i in range(0, len(bidi_classes), 2): 69 sp = ' ' * (4 - len(bidi_classes[i])) 70 f.write(" ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1])) 71f.write("};\n\n") 72 73f.write("/* These are grapheme break properties. The Extended Pictographic " 74 "property\ncomes from the emoji-data.txt file. */\n\nenum {\n") 75for i in range(0, len(break_properties), 2): 76 sp = ' ' * (21 - len(break_properties[i])) 77 f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1])) 78f.write("};\n\n") 79 80f.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n") 81for i in script_names: 82 if i == "Unknown": 83 f.write("\n /* Scripts which has no characters in other scripts. */\n") 84 f.write(" ucp_%s,\n" % i) 85f.write("\n") 86 87f.write(" /* This must be last */\n") 88f.write(" ucp_Script_Count\n};\n\n") 89 90f.write("/* Size of entries in ucd_script_sets[] */\n\n") 91f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size) 92 93f.write("#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n") 94f.write("/* End of pcre2_ucp.h */\n") 95 96f.close() 97 98# End 99