1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4 * Copyright (c) 1999-2016, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 *
7 * Generator for source/i18n/collunsafe.h
8 * see Makefile
9 */
10
11 #include <stdio.h>
12 #include "unicode/uversion.h"
13 #include "unicode/uniset.h"
14 #include "collationroot.h"
15 #include "collationtailoring.h"
16
17 using icu::CollationCacheEntry;
18 using icu::CollationRoot;
19 using icu::UnicodeSet;
20
21 /**
22 * Define the type of generator to use. Choose one.
23 */
24 #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
25 #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
26 #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
27
main(int argc,const char * argv[])28 int main(int argc, const char *argv[]) {
29 UErrorCode errorCode = U_ZERO_ERROR;
30
31 // Get the unsafeBackwardsSet
32 const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
33 if(U_FAILURE(errorCode)) {
34 fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
35 return 1;
36 }
37 const UVersionInfo &version = rootEntry->tailoring->version;
38 const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
39 char verString[20];
40 u_versionToString(version, verString);
41 fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
42 int32_t rangeCount = unsafeBackwardSet->getRangeCount();
43
44 #if SERIALIZE
45 fprintf(stderr, ".. serializing\n");
46 // UnicodeSet serialization
47
48 UErrorCode preflightCode = U_ZERO_ERROR;
49 // preflight
50 int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode);
51 if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
52 fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
53 return 1;
54 }
55 uint16_t *serializedData = new uint16_t[serializedCount];
56 // serialize
57 unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
58 if(U_FAILURE(errorCode)) {
59 delete [] serializedData;
60 fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
61 return 1;
62 }
63 #endif
64
65 #if PATTERN
66 fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
67 // attempt to use pattern
68
69 UnicodeString pattern;
70 UnicodeSet set(*unsafeBackwardSet);
71 set.compact();
72 set.toPattern(pattern, false);
73
74 if(U_SUCCESS(errorCode)) {
75 // This fails (bug# ?) - which is why this method was abandoned.
76
77 // UnicodeSet usA(pattern, errorCode);
78 // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
79 // return 1;
80 }
81
82
83 const char16_t *buf = pattern.getBuffer();
84 int32_t needed = pattern.length();
85
86 // print
87 {
88 char buf2[2048];
89 int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
90 buf2[len2]=0;
91 fprintf(stderr,"===\n%s\n===\n", buf2);
92 }
93
94 const UnicodeString unsafeBackwardPattern(false, buf, needed);
95 if(U_SUCCESS(errorCode)) {
96 //UnicodeSet us(unsafeBackwardPattern, errorCode);
97 // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
98 } else {
99 fprintf(stderr, "Uset OK - \n");
100 }
101 #endif
102
103
104 // Generate the output file.
105
106 printf("// collunsafe.h\n");
107 printf("// %s\n", U_COPYRIGHT_STRING);
108 printf("\n");
109 printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
110 printf("// Machine generated, do not edit.\n");
111 printf("\n");
112 printf("#ifndef COLLUNSAFE_H\n"
113 "#define COLLUNSAFE_H\n"
114 "\n"
115 "#include \"unicode/utypes.h\"\n"
116 "\n"
117 "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
118 printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
119
120
121
122 #if PATTERN
123 printf("#define COLLUNSAFE_PATTERN 1\n");
124 printf("static const int32_t collunsafe_len = %d;\n", needed);
125 printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n");
126 for(int i=0;i<needed;i++) {
127 if( (i>0) && (i%8 == 0) ) {
128 printf(" // %d\n", i);
129 }
130 printf("0x%04X", buf[i]); // TODO check
131 if(i != (needed-1)) {
132 printf(", ");
133 }
134 }
135 printf(" //%d\n};\n", (needed-1));
136 #endif
137
138 #if RANGE
139 fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
140 printf("#define COLLUNSAFE_RANGE 1\n");
141 printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
142 printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
143 for(int32_t i=0;i<rangeCount;i++) {
144 printf(" 0x%04X, 0x%04X, // %d\n",
145 unsafeBackwardSet->getRangeStart(i),
146 unsafeBackwardSet->getRangeEnd(i),
147 i);
148 }
149 printf("};\n");
150 #endif
151
152 #if SERIALIZE
153 printf("#define COLLUNSAFE_SERIALIZE 1\n");
154 printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
155 printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
156 for(int32_t i=0;i<serializedCount;i++) {
157 if( (i>0) && (i%8 == 0) ) {
158 printf(" // %d\n", i);
159 }
160 printf("0x%04X", serializedData[i]); // TODO check
161 if(i != (serializedCount-1)) {
162 printf(", ");
163 }
164 }
165 printf("};\n");
166 #endif
167
168 printf("#endif\n");
169 fflush(stderr);
170 fflush(stdout);
171 return(U_SUCCESS(errorCode)?0:1);
172 }
173