• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include <iostream>
5 #include <vector>
6 #include "toolutil.h"
7 #include "uoptions.h"
8 #include "cmemory.h"
9 #include "charstr.h"
10 #include "cstring.h"
11 #include "unicode/uchar.h"
12 #include "unicode/errorcode.h"
13 #include "unicode/uniset.h"
14 #include "unicode/putil.h"
15 #include "unicode/umutablecptrie.h"
16 #include "writesrc.h"
17 
18 U_NAMESPACE_USE
19 
20 /*
21  * Global - verbosity
22  */
23 UBool VERBOSE = FALSE;
24 UBool QUIET = FALSE;
25 
26 UBool haveCopyright = TRUE;
27 UCPTrieType trieType = UCPTRIE_TYPE_SMALL;
28 const char* destdir = "";
29 
handleError(ErrorCode & status,const char * context)30 void handleError(ErrorCode& status, const char* context) {
31     if (status.isFailure()) {
32         std::cerr << "Error: " << context << ": " << status.errorName() << std::endl;
33         exit(status.reset());
34     }
35 }
36 
37 class PropertyValueNameGetter : public ValueNameGetter {
38 public:
PropertyValueNameGetter(UProperty prop)39     PropertyValueNameGetter(UProperty prop) : property(prop) {}
40     ~PropertyValueNameGetter() override;
getName(uint32_t value)41     const char *getName(uint32_t value) override {
42         return u_getPropertyValueName(property, value, U_SHORT_PROPERTY_NAME);
43     }
44 
45 private:
46     UProperty property;
47 };
48 
~PropertyValueNameGetter()49 PropertyValueNameGetter::~PropertyValueNameGetter() {}
50 
dumpBinaryProperty(UProperty uproperty,FILE * f)51 void dumpBinaryProperty(UProperty uproperty, FILE* f) {
52     IcuToolErrorCode status("icuexportdata: dumpBinaryProperty");
53     const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
54     const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
55     const USet* uset = u_getBinaryPropertySet(uproperty, status);
56     handleError(status, fullPropName);
57 
58     fputs("[[binary_property]]\n", f);
59     fprintf(f, "long_name = \"%s\"\n", fullPropName);
60     if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
61     usrc_writeUnicodeSet(f, uset, UPRV_TARGET_SYNTAX_TOML);
62 }
63 
dumpEnumeratedProperty(UProperty uproperty,FILE * f)64 void dumpEnumeratedProperty(UProperty uproperty, FILE* f) {
65     IcuToolErrorCode status("icuexportdata: dumpEnumeratedProperty");
66     const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
67     const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
68     const UCPMap* umap = u_getIntPropertyMap(uproperty, status);
69     handleError(status, fullPropName);
70 
71     fputs("[[enum_property]]\n", f);
72     fprintf(f, "long_name = \"%s\"\n", fullPropName);
73     if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
74     PropertyValueNameGetter valueNameGetter(uproperty);
75     usrc_writeUCPMap(f, umap, &valueNameGetter, UPRV_TARGET_SYNTAX_TOML);
76     fputs("\n", f);
77 
78     U_ASSERT(u_getIntPropertyMinValue(uproperty) >= 0);
79     int32_t maxValue = u_getIntPropertyMaxValue(uproperty);
80     U_ASSERT(maxValue >= 0);
81     UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32;
82     if (maxValue <= 0xff) {
83         width = UCPTRIE_VALUE_BITS_8;
84     } else if (maxValue <= 0xffff) {
85         width = UCPTRIE_VALUE_BITS_16;
86     }
87     LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(umap, status));
88     LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
89         builder.getAlias(),
90         trieType,
91         width,
92         status));
93     handleError(status, fullPropName);
94 
95     fputs("[enum_property.code_point_trie]\n", f);
96     usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
97 }
98 
prepareOutputFile(const char * basename)99 FILE* prepareOutputFile(const char* basename) {
100     IcuToolErrorCode status("icuexportdata");
101     CharString outFileName;
102     if (destdir != nullptr && *destdir != 0) {
103         outFileName.append(destdir, status).ensureEndsWithFileSeparator(status);
104     }
105     outFileName.append(basename, status);
106     outFileName.append(".toml", status);
107     handleError(status, basename);
108 
109     FILE* f = fopen(outFileName.data(), "w");
110     if (f == nullptr) {
111         std::cerr << "Unable to open file: " << outFileName.data() << std::endl;
112         exit(U_FILE_ACCESS_ERROR);
113     }
114     if (!QUIET) {
115         std::cout << "Writing to: " << outFileName.data() << std::endl;
116     }
117 
118     if (haveCopyright) {
119         usrc_writeCopyrightHeader(f, "#", 2021);
120     }
121     usrc_writeFileNameGeneratedBy(f, "#", basename, "icuexportdata.cpp");
122 
123     return f;
124 }
125 
126 enum {
127     OPT_HELP_H,
128     OPT_HELP_QUESTION_MARK,
129     OPT_MODE,
130     OPT_TRIE_TYPE,
131     OPT_VERSION,
132     OPT_DESTDIR,
133     OPT_ALL,
134     OPT_INDEX,
135     OPT_COPYRIGHT,
136     OPT_VERBOSE,
137     OPT_QUIET,
138 
139     OPT_COUNT
140 };
141 
142 #define UOPTION_MODE UOPTION_DEF("mode", 'm', UOPT_REQUIRES_ARG)
143 #define UOPTION_TRIE_TYPE UOPTION_DEF("trie-type", '\1', UOPT_REQUIRES_ARG)
144 #define UOPTION_ALL UOPTION_DEF("all", '\1', UOPT_NO_ARG)
145 #define UOPTION_INDEX UOPTION_DEF("index", '\1', UOPT_NO_ARG)
146 
147 static UOption options[]={
148     UOPTION_HELP_H,
149     UOPTION_HELP_QUESTION_MARK,
150     UOPTION_MODE,
151     UOPTION_TRIE_TYPE,
152     UOPTION_VERSION,
153     UOPTION_DESTDIR,
154     UOPTION_ALL,
155     UOPTION_INDEX,
156     UOPTION_COPYRIGHT,
157     UOPTION_VERBOSE,
158     UOPTION_QUIET,
159 };
160 
main(int argc,char * argv[])161 int main(int argc, char* argv[]) {
162     U_MAIN_INIT_ARGS(argc, argv);
163 
164     /* preset then read command line options */
165     options[OPT_DESTDIR].value=u_getDataDirectory();
166     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
167 
168     if(options[OPT_VERSION].doesOccur) {
169         printf("icuexportdata version %s, ICU tool to dump data files for external consumers\n",
170                U_ICU_DATA_VERSION);
171         printf("%s\n", U_COPYRIGHT_STRING);
172         exit(0);
173     }
174 
175     /* error handling, printing usage message */
176     if(argc<0) {
177         fprintf(stderr,
178             "error in command line argument \"%s\"\n",
179             argv[-argc]);
180     } else if(argc<2) {
181         argc=-1;
182     }
183 
184     /* get the options values */
185     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
186     destdir = options[OPT_DESTDIR].value;
187     VERBOSE = options[OPT_VERBOSE].doesOccur;
188     QUIET = options[OPT_QUIET].doesOccur;
189 
190     // Load list of Unicode properties
191     std::vector<const char*> propNames;
192     for (int i=1; i<argc; i++) {
193         propNames.push_back(argv[i]);
194     }
195     if (options[OPT_ALL].doesOccur) {
196         for (int i=UCHAR_BINARY_START; i<UCHAR_INT_LIMIT; i++) {
197             if (i == UCHAR_BINARY_LIMIT) {
198                 i = UCHAR_INT_START;
199             }
200             UProperty uprop = static_cast<UProperty>(i);
201             const char* propName = u_getPropertyName(uprop, U_SHORT_PROPERTY_NAME);
202             if (propName == NULL) {
203                 propName = u_getPropertyName(uprop, U_LONG_PROPERTY_NAME);
204                 if (propName != NULL && VERBOSE) {
205                     std::cerr << "Note: falling back to long name for: " << propName << std::endl;
206                 }
207             }
208             if (propName != NULL) {
209                 propNames.push_back(propName);
210             }
211         }
212     }
213 
214     if (propNames.empty()
215             || options[OPT_HELP_H].doesOccur
216             || options[OPT_HELP_QUESTION_MARK].doesOccur
217             || !options[OPT_MODE].doesOccur) {
218         FILE *stdfile=argc<0 ? stderr : stdout;
219         fprintf(stdfile,
220             "usage: %s -m uprops [-options] [--all | properties...]\n"
221             "\tdump Unicode property data to .toml files\n"
222             "options:\n"
223             "\t-h or -? or --help  this usage text\n"
224             "\t-V or --version     show a version message\n"
225             "\t-m or --mode        mode: currently only 'uprops', but more may be added\n"
226             "\t      --trie-type   set the trie type (small or fast, default small)\n"
227             "\t-d or --destdir     destination directory, followed by the path\n"
228             "\t      --all         write out all properties known to icuexportdata\n"
229             "\t      --index       write an _index.toml summarizing all data exported\n"
230             "\t-c or --copyright   include a copyright notice\n"
231             "\t-v or --verbose     Turn on verbose output\n"
232             "\t-q or --quiet       do not display warnings and progress\n",
233             argv[0]);
234         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
235     }
236 
237     const char* mode = options[OPT_MODE].value;
238     if (uprv_strcmp(mode, "uprops") != 0) {
239         fprintf(stderr, "Invalid option for --mode (must be uprops)\n");
240         return U_ILLEGAL_ARGUMENT_ERROR;
241     }
242 
243     if (options[OPT_TRIE_TYPE].doesOccur) {
244         if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) {
245             trieType = UCPTRIE_TYPE_FAST;
246         } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) {
247             trieType = UCPTRIE_TYPE_SMALL;
248         } else {
249             fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n");
250             return U_ILLEGAL_ARGUMENT_ERROR;
251         }
252     }
253 
254     for (const char* propName : propNames) {
255         UProperty propEnum = u_getPropertyEnum(propName);
256         if (propEnum == UCHAR_INVALID_CODE) {
257             std::cerr << "Error: Invalid property alias: " << propName << std::endl;
258             return U_ILLEGAL_ARGUMENT_ERROR;
259         }
260 
261         FILE* f = prepareOutputFile(propName);
262 
263         UVersionInfo versionInfo;
264         u_getUnicodeVersion(versionInfo);
265         char uvbuf[U_MAX_VERSION_STRING_LENGTH];
266         u_versionToString(versionInfo, uvbuf);
267         fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n",
268             U_ICU_VERSION,
269             uvbuf);
270 
271         if (propEnum < UCHAR_BINARY_LIMIT) {
272             dumpBinaryProperty(propEnum, f);
273         } else if (UCHAR_INT_START <= propEnum && propEnum <= UCHAR_INT_LIMIT) {
274             dumpEnumeratedProperty(propEnum, f);
275         } else {
276             std::cerr << "Don't know how to write property: " << propEnum << std::endl;
277             return U_INTERNAL_PROGRAM_ERROR;
278         }
279 
280         fclose(f);
281     }
282 
283     if (options[OPT_INDEX].doesOccur) {
284         FILE* f = prepareOutputFile("_index");
285         fprintf(f, "index = [\n");
286         for (const char* propName : propNames) {
287             // At this point, propName is a valid property name, so it should be alphanum ASCII
288             fprintf(f, "  { filename=\"%s.toml\" },\n", propName);
289         }
290         fprintf(f, "]\n");
291         fclose(f);
292     }
293 
294     return 0;
295 }
296