1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <iostream>
5 #include <vector>
6 #include "toolutil.h"
7 #include "uoptions.h"
8 #include "cmemory.h"
9 #include "charstr.h"
10 #include "cstring.h"
11 #include "unicode/uchar.h"
12 #include "unicode/errorcode.h"
13 #include "unicode/uniset.h"
14 #include "unicode/putil.h"
15 #include "unicode/umutablecptrie.h"
16 #include "writesrc.h"
17
18 U_NAMESPACE_USE
19
20 /*
21 * Global - verbosity
22 */
23 UBool VERBOSE = FALSE;
24 UBool QUIET = FALSE;
25
26 UBool haveCopyright = TRUE;
27 UCPTrieType trieType = UCPTRIE_TYPE_SMALL;
28 const char* destdir = "";
29
handleError(ErrorCode & status,const char * context)30 void handleError(ErrorCode& status, const char* context) {
31 if (status.isFailure()) {
32 std::cerr << "Error: " << context << ": " << status.errorName() << std::endl;
33 exit(status.reset());
34 }
35 }
36
37 class PropertyValueNameGetter : public ValueNameGetter {
38 public:
PropertyValueNameGetter(UProperty prop)39 PropertyValueNameGetter(UProperty prop) : property(prop) {}
40 ~PropertyValueNameGetter() override;
getName(uint32_t value)41 const char *getName(uint32_t value) override {
42 return u_getPropertyValueName(property, value, U_SHORT_PROPERTY_NAME);
43 }
44
45 private:
46 UProperty property;
47 };
48
~PropertyValueNameGetter()49 PropertyValueNameGetter::~PropertyValueNameGetter() {}
50
dumpBinaryProperty(UProperty uproperty,FILE * f)51 void dumpBinaryProperty(UProperty uproperty, FILE* f) {
52 IcuToolErrorCode status("icuexportdata: dumpBinaryProperty");
53 const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
54 const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
55 const USet* uset = u_getBinaryPropertySet(uproperty, status);
56 handleError(status, fullPropName);
57
58 fputs("[[binary_property]]\n", f);
59 fprintf(f, "long_name = \"%s\"\n", fullPropName);
60 if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
61 usrc_writeUnicodeSet(f, uset, UPRV_TARGET_SYNTAX_TOML);
62 }
63
dumpEnumeratedProperty(UProperty uproperty,FILE * f)64 void dumpEnumeratedProperty(UProperty uproperty, FILE* f) {
65 IcuToolErrorCode status("icuexportdata: dumpEnumeratedProperty");
66 const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
67 const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
68 const UCPMap* umap = u_getIntPropertyMap(uproperty, status);
69 handleError(status, fullPropName);
70
71 fputs("[[enum_property]]\n", f);
72 fprintf(f, "long_name = \"%s\"\n", fullPropName);
73 if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
74 PropertyValueNameGetter valueNameGetter(uproperty);
75 usrc_writeUCPMap(f, umap, &valueNameGetter, UPRV_TARGET_SYNTAX_TOML);
76 fputs("\n", f);
77
78 U_ASSERT(u_getIntPropertyMinValue(uproperty) >= 0);
79 int32_t maxValue = u_getIntPropertyMaxValue(uproperty);
80 U_ASSERT(maxValue >= 0);
81 UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32;
82 if (maxValue <= 0xff) {
83 width = UCPTRIE_VALUE_BITS_8;
84 } else if (maxValue <= 0xffff) {
85 width = UCPTRIE_VALUE_BITS_16;
86 }
87 LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(umap, status));
88 LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
89 builder.getAlias(),
90 trieType,
91 width,
92 status));
93 handleError(status, fullPropName);
94
95 fputs("[enum_property.code_point_trie]\n", f);
96 usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
97 }
98
prepareOutputFile(const char * basename)99 FILE* prepareOutputFile(const char* basename) {
100 IcuToolErrorCode status("icuexportdata");
101 CharString outFileName;
102 if (destdir != nullptr && *destdir != 0) {
103 outFileName.append(destdir, status).ensureEndsWithFileSeparator(status);
104 }
105 outFileName.append(basename, status);
106 outFileName.append(".toml", status);
107 handleError(status, basename);
108
109 FILE* f = fopen(outFileName.data(), "w");
110 if (f == nullptr) {
111 std::cerr << "Unable to open file: " << outFileName.data() << std::endl;
112 exit(U_FILE_ACCESS_ERROR);
113 }
114 if (!QUIET) {
115 std::cout << "Writing to: " << outFileName.data() << std::endl;
116 }
117
118 if (haveCopyright) {
119 usrc_writeCopyrightHeader(f, "#", 2021);
120 }
121 usrc_writeFileNameGeneratedBy(f, "#", basename, "icuexportdata.cpp");
122
123 return f;
124 }
125
126 enum {
127 OPT_HELP_H,
128 OPT_HELP_QUESTION_MARK,
129 OPT_MODE,
130 OPT_TRIE_TYPE,
131 OPT_VERSION,
132 OPT_DESTDIR,
133 OPT_ALL,
134 OPT_INDEX,
135 OPT_COPYRIGHT,
136 OPT_VERBOSE,
137 OPT_QUIET,
138
139 OPT_COUNT
140 };
141
142 #define UOPTION_MODE UOPTION_DEF("mode", 'm', UOPT_REQUIRES_ARG)
143 #define UOPTION_TRIE_TYPE UOPTION_DEF("trie-type", '\1', UOPT_REQUIRES_ARG)
144 #define UOPTION_ALL UOPTION_DEF("all", '\1', UOPT_NO_ARG)
145 #define UOPTION_INDEX UOPTION_DEF("index", '\1', UOPT_NO_ARG)
146
147 static UOption options[]={
148 UOPTION_HELP_H,
149 UOPTION_HELP_QUESTION_MARK,
150 UOPTION_MODE,
151 UOPTION_TRIE_TYPE,
152 UOPTION_VERSION,
153 UOPTION_DESTDIR,
154 UOPTION_ALL,
155 UOPTION_INDEX,
156 UOPTION_COPYRIGHT,
157 UOPTION_VERBOSE,
158 UOPTION_QUIET,
159 };
160
main(int argc,char * argv[])161 int main(int argc, char* argv[]) {
162 U_MAIN_INIT_ARGS(argc, argv);
163
164 /* preset then read command line options */
165 options[OPT_DESTDIR].value=u_getDataDirectory();
166 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
167
168 if(options[OPT_VERSION].doesOccur) {
169 printf("icuexportdata version %s, ICU tool to dump data files for external consumers\n",
170 U_ICU_DATA_VERSION);
171 printf("%s\n", U_COPYRIGHT_STRING);
172 exit(0);
173 }
174
175 /* error handling, printing usage message */
176 if(argc<0) {
177 fprintf(stderr,
178 "error in command line argument \"%s\"\n",
179 argv[-argc]);
180 } else if(argc<2) {
181 argc=-1;
182 }
183
184 /* get the options values */
185 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
186 destdir = options[OPT_DESTDIR].value;
187 VERBOSE = options[OPT_VERBOSE].doesOccur;
188 QUIET = options[OPT_QUIET].doesOccur;
189
190 // Load list of Unicode properties
191 std::vector<const char*> propNames;
192 for (int i=1; i<argc; i++) {
193 propNames.push_back(argv[i]);
194 }
195 if (options[OPT_ALL].doesOccur) {
196 for (int i=UCHAR_BINARY_START; i<UCHAR_INT_LIMIT; i++) {
197 if (i == UCHAR_BINARY_LIMIT) {
198 i = UCHAR_INT_START;
199 }
200 UProperty uprop = static_cast<UProperty>(i);
201 const char* propName = u_getPropertyName(uprop, U_SHORT_PROPERTY_NAME);
202 if (propName == NULL) {
203 propName = u_getPropertyName(uprop, U_LONG_PROPERTY_NAME);
204 if (propName != NULL && VERBOSE) {
205 std::cerr << "Note: falling back to long name for: " << propName << std::endl;
206 }
207 }
208 if (propName != NULL) {
209 propNames.push_back(propName);
210 }
211 }
212 }
213
214 if (propNames.empty()
215 || options[OPT_HELP_H].doesOccur
216 || options[OPT_HELP_QUESTION_MARK].doesOccur
217 || !options[OPT_MODE].doesOccur) {
218 FILE *stdfile=argc<0 ? stderr : stdout;
219 fprintf(stdfile,
220 "usage: %s -m uprops [-options] [--all | properties...]\n"
221 "\tdump Unicode property data to .toml files\n"
222 "options:\n"
223 "\t-h or -? or --help this usage text\n"
224 "\t-V or --version show a version message\n"
225 "\t-m or --mode mode: currently only 'uprops', but more may be added\n"
226 "\t --trie-type set the trie type (small or fast, default small)\n"
227 "\t-d or --destdir destination directory, followed by the path\n"
228 "\t --all write out all properties known to icuexportdata\n"
229 "\t --index write an _index.toml summarizing all data exported\n"
230 "\t-c or --copyright include a copyright notice\n"
231 "\t-v or --verbose Turn on verbose output\n"
232 "\t-q or --quiet do not display warnings and progress\n",
233 argv[0]);
234 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
235 }
236
237 const char* mode = options[OPT_MODE].value;
238 if (uprv_strcmp(mode, "uprops") != 0) {
239 fprintf(stderr, "Invalid option for --mode (must be uprops)\n");
240 return U_ILLEGAL_ARGUMENT_ERROR;
241 }
242
243 if (options[OPT_TRIE_TYPE].doesOccur) {
244 if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) {
245 trieType = UCPTRIE_TYPE_FAST;
246 } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) {
247 trieType = UCPTRIE_TYPE_SMALL;
248 } else {
249 fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n");
250 return U_ILLEGAL_ARGUMENT_ERROR;
251 }
252 }
253
254 for (const char* propName : propNames) {
255 UProperty propEnum = u_getPropertyEnum(propName);
256 if (propEnum == UCHAR_INVALID_CODE) {
257 std::cerr << "Error: Invalid property alias: " << propName << std::endl;
258 return U_ILLEGAL_ARGUMENT_ERROR;
259 }
260
261 FILE* f = prepareOutputFile(propName);
262
263 UVersionInfo versionInfo;
264 u_getUnicodeVersion(versionInfo);
265 char uvbuf[U_MAX_VERSION_STRING_LENGTH];
266 u_versionToString(versionInfo, uvbuf);
267 fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n",
268 U_ICU_VERSION,
269 uvbuf);
270
271 if (propEnum < UCHAR_BINARY_LIMIT) {
272 dumpBinaryProperty(propEnum, f);
273 } else if (UCHAR_INT_START <= propEnum && propEnum <= UCHAR_INT_LIMIT) {
274 dumpEnumeratedProperty(propEnum, f);
275 } else {
276 std::cerr << "Don't know how to write property: " << propEnum << std::endl;
277 return U_INTERNAL_PROGRAM_ERROR;
278 }
279
280 fclose(f);
281 }
282
283 if (options[OPT_INDEX].doesOccur) {
284 FILE* f = prepareOutputFile("_index");
285 fprintf(f, "index = [\n");
286 for (const char* propName : propNames) {
287 // At this point, propName is a valid property name, so it should be alphanum ASCII
288 fprintf(f, " { filename=\"%s.toml\" },\n", propName);
289 }
290 fprintf(f, "]\n");
291 fclose(f);
292 }
293
294 return 0;
295 }
296