• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  * Copyright (c) 1999-2016, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  *
7  * Generator for source/i18n/collunsafe.h
8  * see Makefile
9  */
10 
11 #include <stdio.h>
12 #include "unicode/uversion.h"
13 #include "unicode/uniset.h"
14 #include "collationroot.h"
15 #include "collationtailoring.h"
16 
17 using icu::CollationCacheEntry;
18 using icu::CollationRoot;
19 using icu::UnicodeSet;
20 
21 /**
22  * Define the type of generator to use. Choose one.
23  */
24 #define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor
25 #define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
26 #define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
27 
main(int argc,const char * argv[])28 int main(int argc, const char *argv[]) {
29     UErrorCode errorCode = U_ZERO_ERROR;
30 
31     // Get the unsafeBackwardsSet
32     const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
33     if(U_FAILURE(errorCode)) {
34       fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
35       return 1;
36     }
37     const UVersionInfo &version = rootEntry->tailoring->version;
38     const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
39     char verString[20];
40     u_versionToString(version, verString);
41     fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
42     int32_t rangeCount = unsafeBackwardSet->getRangeCount();
43 
44 #if SERIALIZE
45     fprintf(stderr, ".. serializing\n");
46     // UnicodeSet serialization
47 
48     UErrorCode preflightCode = U_ZERO_ERROR;
49     // preflight
50     int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode);
51     if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
52       fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
53       return 1;
54     }
55     uint16_t *serializedData = new uint16_t[serializedCount];
56     // serialize
57     unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
58     if(U_FAILURE(errorCode)) {
59       delete [] serializedData;
60       fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
61       return 1;
62     }
63 #endif
64 
65 #if PATTERN
66     fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
67     // attempt to use pattern
68 
69     UnicodeString pattern;
70     UnicodeSet set(*unsafeBackwardSet);
71     set.compact();
72     set.toPattern(pattern, false);
73 
74     if(U_SUCCESS(errorCode)) {
75       // This fails (bug# ?) - which is why this method was abandoned.
76 
77       // UnicodeSet usA(pattern, errorCode);
78       // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
79       // return 1;
80     }
81 
82 
83     const char16_t *buf = pattern.getBuffer();
84     int32_t needed = pattern.length();
85 
86     // print
87     {
88       char buf2[2048];
89       int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
90       buf2[len2]=0;
91       fprintf(stderr,"===\n%s\n===\n", buf2);
92     }
93 
94     const UnicodeString unsafeBackwardPattern(false, buf, needed);
95   if(U_SUCCESS(errorCode)) {
96     //UnicodeSet us(unsafeBackwardPattern, errorCode);
97     //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
98   } else {
99     fprintf(stderr, "Uset OK - \n");
100   }
101 #endif
102 
103 
104   // Generate the output file.
105 
106   printf("// collunsafe.h\n");
107   printf("// %s\n", U_COPYRIGHT_STRING);
108   printf("\n");
109   printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
110   printf("// Machine generated, do not edit.\n");
111   printf("\n");
112   printf("#ifndef COLLUNSAFE_H\n"
113          "#define COLLUNSAFE_H\n"
114          "\n"
115          "#include \"unicode/utypes.h\"\n"
116          "\n"
117          "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
118   printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
119 
120 
121 
122 #if PATTERN
123   printf("#define COLLUNSAFE_PATTERN 1\n");
124   printf("static const int32_t collunsafe_len = %d;\n", needed);
125   printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n");
126   for(int i=0;i<needed;i++) {
127     if( (i>0) && (i%8 == 0) ) {
128       printf(" // %d\n", i);
129     }
130     printf("0x%04X", buf[i]); // TODO check
131     if(i != (needed-1)) {
132       printf(", ");
133     }
134     }
135   printf(" //%d\n};\n", (needed-1));
136 #endif
137 
138 #if RANGE
139     fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
140     printf("#define COLLUNSAFE_RANGE 1\n");
141     printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
142     printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
143     for(int32_t i=0;i<rangeCount;i++) {
144       printf(" 0x%04X, 0x%04X, // %d\n",
145              unsafeBackwardSet->getRangeStart(i),
146              unsafeBackwardSet->getRangeEnd(i),
147              i);
148     }
149     printf("};\n");
150 #endif
151 
152 #if SERIALIZE
153     printf("#define COLLUNSAFE_SERIALIZE 1\n");
154     printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
155     printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
156     for(int32_t i=0;i<serializedCount;i++) {
157       if( (i>0) && (i%8 == 0) ) {
158         printf(" // %d\n", i);
159       }
160       printf("0x%04X", serializedData[i]); // TODO check
161       if(i != (serializedCount-1)) {
162         printf(", ");
163       }
164     }
165     printf("};\n");
166 #endif
167 
168     printf("#endif\n");
169     fflush(stderr);
170     fflush(stdout);
171     return(U_SUCCESS(errorCode)?0:1);
172 }
173