1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 // Copyright (C) 2008-2012 IBM Corporation and Others. All Rights Reserved.
4 #include <ostream>
5 #include "xmlout.h"
6 #include <stdio.h>
7 #include <stdlib.h>
8
9 #include "uoptions.h"
10 #include "unicode/putil.h"
11 #include "unicode/ucol.h"
12 #include "unicode/ucal.h"
13 #include "unicode/uchar.h"
14 #include "unicode/ures.h"
15 #include "unicode/udat.h"
16 #include "unicode/ustring.h"
17 #if (U_ICU_VERSION_MAJOR_NUM > 2) || ((U_ICU_VERSION_MAJOR_NUM>1)&&(U_ICU_VERSION_MINOR_NUM>5))
18 #include "unicode/uclean.h"
19 #endif
20
21
22 static char *progName;
23 static UOption options[]={
24 UOPTION_HELP_H, /* 0 */
25 UOPTION_HELP_QUESTION_MARK, /* 1 */
26 UOPTION_VERBOSE, /* 2 */
27 UOPTION_ICUDATADIR, /* 3 */
28 UOPTION_DESTDIR, /* 4 */
29 UOPTION_COPYRIGHT, /* 5 */
30 };
31
u_errorNameShort(UErrorCode code)32 const char *u_errorNameShort(UErrorCode code) {
33 switch(code) {
34 case U_ZERO_ERROR: return "ok";
35 case U_MISSING_RESOURCE_ERROR: return "missing";
36 default: return u_errorName(code);
37 }
38 }
39
usageAndDie(int retCode)40 void usageAndDie(int retCode) {
41 printf("Usage: %s [-v] [-options] -o output-file dictionary-file\n", progName);
42 printf("\tRead in word list and write out compact trie dictionary\n"
43 "options:\n"
44 "\t-h or -? or --help this usage text\n"
45 "\t-V or --version show a version message\n"
46 "\t-c or --copyright include a copyright notice\n"
47 "\t-v or --verbose turn on verbose output\n"
48 "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
49 "\t followed by path, defaults to %s\n"
50 "\t-d or --destdir destination directory, followed by the path\n",
51 u_getDataDirectory());
52 exit (retCode);
53 }
54
55 /*U_CAPI void U_EXPORT2*/
_versionFromUString(UVersionInfo versionArray,const UChar * versionString)56 static void _versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
57 if(versionArray==NULL) {
58 return;
59 }
60
61 if(versionString!=NULL) {
62 char verchars[U_MAX_VERSION_LENGTH+1];
63 u_UCharsToChars(versionString, verchars, U_MAX_VERSION_LENGTH);
64 u_versionFromString(versionArray, verchars);
65 }
66 }
67
68 /*U_CAPI void U_EXPORT2*/
_getCLDRVersionDirect(UVersionInfo versionArray,UErrorCode * status)69 static void _getCLDRVersionDirect(UVersionInfo versionArray, UErrorCode *status) {
70 UResourceBundle *resindx;
71 resindx = ures_openDirect(NULL, "supplementalData", status);
72 if(!U_FAILURE(*status)) {
73 // fprintf(stderr, "Err: could not open res_index, %s\n", u_errorName(status));
74 // fflush(stderr);
75 // } else {
76 const UChar *cldrver;
77 int32_t len;
78 cldrver = ures_getStringByKey(resindx, "cldrVersion", &len, status);
79 if(!U_FAILURE(*status)) {
80 // fprintf(stderr, "ERR: could not load CLDRVersion key: %s\n", u_errorName(*status));
81 // fflush(stderr);
82 // } else {
83 // UVersionInfo cldrVersion;
84 _versionFromUString(versionArray, cldrver);
85 // strcpy(tmp, "type=\"cldr\" version=\"");
86 // u_versionToString(cldrVersion, tmp+strlen(tmp));
87 // strcat(tmp, "\"");
88 // XMLElement icuData(xf, "feature", tmp, TRUE);
89 }
90 ures_close(resindx);
91 }
92 }
93
94 /*U_CAPI void U_EXPORT2*/
_getCLDRVersionOld(UVersionInfo versionArray,UErrorCode * status)95 static void _getCLDRVersionOld(UVersionInfo versionArray, UErrorCode *status) {
96 UResourceBundle *resindx;
97 resindx = ures_openDirect(NULL, "res_index", status);
98 if(!U_FAILURE(*status)) {
99 // fprintf(stderr, "Err: could not open res_index, %s\n", u_errorName(status));
100 // fflush(stderr);
101 // } else {
102 const UChar *cldrver;
103 int32_t len;
104 cldrver = ures_getStringByKey(resindx, "CLDRVersion", &len, status);
105 if(!U_FAILURE(*status)) {
106 // fprintf(stderr, "ERR: could not load CLDRVersion key: %s\n", u_errorName(*status));
107 // fflush(stderr);
108 // } else {
109 // UVersionInfo cldrVersion;
110 _versionFromUString(versionArray, cldrver);
111 // strcpy(tmp, "type=\"cldr\" version=\"");
112 // u_versionToString(cldrVersion, tmp+strlen(tmp));
113 // strcat(tmp, "\"");
114 // XMLElement icuData(xf, "feature", tmp, TRUE);
115 }
116 ures_close(resindx);
117 }
118 }
119
could_open(const char * locale,char * comments)120 int could_open(const char *locale, char *comments) {
121 char tmp[200];
122 UResourceBundle *rb = NULL;
123 UErrorCode status = U_ZERO_ERROR;
124 rb = ures_open(NULL, locale, &status);
125 if(U_FAILURE(status)) {
126 sprintf(tmp, " open:%s", u_errorName(status));
127 strcat(comments, tmp);
128 return 0;
129 } else {
130 ures_close(rb);
131 sprintf(tmp, " open:%s", u_errorNameShort(status));
132 strcat(comments, tmp);
133 return 1;
134 }
135 }
col_could_open(const char * locale,char * comments)136 int col_could_open(const char *locale, char *comments) {
137 char tmp[200];
138 UCollator *rb = NULL;
139 UErrorCode status = U_ZERO_ERROR;
140 rb = ucol_open(locale, &status);
141 if(U_FAILURE(status)) {
142 sprintf(tmp, " open:%s", u_errorName(status));
143 /*strcat(comments, tmp); */
144 return 0;
145 } else {
146 ucol_close(rb);
147 sprintf(tmp, " open:%s", u_errorNameShort(status));
148 /* strcat(comments, tmp); */
149 return 1;
150 }
151 }
152
UDateFormatSymbolType_name(UDateFormatSymbolType i)153 const char *UDateFormatSymbolType_name(UDateFormatSymbolType i) {
154 switch(i) {
155 case UDAT_ERAS: return "UDAT_ERAS"; break;
156 /** The month names, for example February */
157 case UDAT_MONTHS: return "UDAT_MONTHS"; break;
158 /** The short month names, for example Feb. */
159 case UDAT_SHORT_MONTHS: return "UDAT_SHORT_MONTHS"; break;
160 /** The weekday names, for example Monday */
161 case UDAT_WEEKDAYS: return "UDAT_WEEKDAYS"; break;
162 /** The short weekday names, for example Mon. */
163 case UDAT_SHORT_WEEKDAYS: return "UDAT_SHORT_WEEKDAYS"; break;
164 /** The AM/PM names, for example AM */
165 case UDAT_AM_PMS: return "UDAT_AM_PMS"; break;
166 /** The localized characters */
167 case UDAT_LOCALIZED_CHARS: return "UDAT_LOCALIZED_CHARS"; break;
168 /** The long era names, for example Anno Domini */
169 #if U_ICU_VERSION_MAJOR_NUM>3 || U_ICU_VERSION_MAJOR_NUM>3
170 case UDAT_ERA_NAMES: return "UDAT_ERA_NAMES"; break;
171 #endif
172 #if U_ICU_VERSION_MAJOR_NUM>3 || U_ICU_VERSION_MAJOR_NUM>3
173 /** The narrow month names, for example F */
174 case UDAT_NARROW_MONTHS: return "UDAT_NARROW_MONTHS"; break;
175 /** The narrow weekday names, for example N */
176 case UDAT_NARROW_WEEKDAYS: return "UDAT_NARROW_WEEKDAYS"; break;
177 /** Standalone context versions of months */
178 case UDAT_STANDALONE_MONTHS: return "UDAT_STANDALONE_MONTHS"; break;
179 case UDAT_STANDALONE_SHORT_MONTHS: return "UDAT_STANDALONE_SHORT_MONTHS"; break;
180 case UDAT_STANDALONE_NARROW_MONTHS: return "UDAT_STANDALONE_NARROW_MONTHS"; break;
181 /** Standalone context versions of weekdays */
182 case UDAT_STANDALONE_WEEKDAYS: return "UDAT_STANDALONE_WEEKDAYS"; break;
183 case UDAT_STANDALONE_SHORT_WEEKDAYS: return "UDAT_STANDALONE_SHORT_WEEKDAYS"; break;
184 case UDAT_STANDALONE_NARROW_WEEKDAYS: return "UDAT_STANDALONE_NARROW_WEEKDAYS"; break;
185 #endif
186 #if U_ICU_VERSION_MAJOR_NUM>3 || U_ICU_VERSION_MAJOR_NUM>4
187 /** The quarters, for example 1st Quarter */
188 case UDAT_QUARTERS: return "UDAT_QUARTERS"; break;
189 /** The short quarter names, for example Q1 */
190 case UDAT_SHORT_QUARTERS: return "UDAT_SHORT_QUARTERS"; break;
191 /** Standalone context versions of quarters */
192 case UDAT_STANDALONE_QUARTERS: return "UDAT_STANDALONE_QUARTERS"; break;
193 case UDAT_STANDALONE_SHORT_QUARTERS: return "UDAT_STANDALONE_SHORT_QUARTERS"; break;
194 #endif
195 }
196 return "<Unknown>";
197 }
198
199
200 UDateFormatSymbolType scanArray[] = {
201 UDAT_ERAS,
202 /** The month names, for example February */
203 UDAT_MONTHS,
204 /** The short month names, for example Feb. */
205 UDAT_SHORT_MONTHS,
206 /** The weekday names, for example Monday */
207 UDAT_WEEKDAYS,
208 /** The short weekday names, for example Mon. */
209 UDAT_SHORT_WEEKDAYS,
210 /** The AM/PM names, for example AM */
211 // UDAT_AM_PMS,
212 /** The localized characters */
213 // UDAT_LOCALIZED_CHARS,
214 /** The long era names, for example Anno Domini */
215 // UDAT_ERA_NAMES,
216 /** The narrow month names, for example F */
217 // UDAT_NARROW_MONTHS,
218 };
219
220 int *starts = NULL;
221
222 UChar ***rootdata = NULL;
223
initroot(UErrorCode * status)224 void initroot(UErrorCode *status) {
225 UDateFormat *fmt;
226 fmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, "root", NULL, -1,NULL,0, status);
227 rootdata = (UChar***)malloc((sizeof(scanArray)/sizeof(scanArray[0]))*sizeof(rootdata[0]));
228 starts = (int*)malloc((sizeof(scanArray)/sizeof(scanArray[0]))*sizeof(starts[0]));
229 for(int i=0;U_SUCCESS(*status)&&i<sizeof(scanArray)/sizeof(scanArray[0]);i++) {
230 int thisCount = udat_countSymbols(fmt, scanArray[i]);
231 rootdata[i]=0;
232 rootdata[i]=(UChar**)malloc(thisCount*sizeof(rootdata[i][0]));
233 switch(scanArray[i]) {
234 case UDAT_WEEKDAYS:
235 case UDAT_SHORT_WEEKDAYS:
236 starts[i]=1;
237 break;
238 default:
239 starts[i]=0;
240 }
241 for(int j=starts[i];U_SUCCESS(*status)&&j<thisCount;j++) {
242 rootdata[i][j]=(UChar*)malloc(1024);
243 int sz =
244 udat_getSymbols(fmt,
245 scanArray[i],
246 j,
247 rootdata[i][j],
248 1024,
249 status);
250 }
251 }
252 }
253
254 /* Format the date */
255 static void
date(const UChar * tz,UDateFormatStyle style,char * format,const char * locale,char * comments,UErrorCode * status)256 date(const UChar *tz,
257 UDateFormatStyle style,
258 char *format,
259 const char *locale, char *comments,
260 UErrorCode *status)
261 {
262 UChar *s = 0;
263 int32_t len = 0;
264 UDateFormat *fmt;
265 UChar uFormat[100];
266 char tmp[200];
267
268 int tc=0; // total count
269 int tf=0; // total found
270 int tl = 0;
271
272 fmt = udat_open(style, style, locale, tz, -1,NULL,0, status);
273 if ( format != NULL ) {
274 u_charsToUChars(format,uFormat,strlen(format)),
275 udat_applyPattern(fmt,FALSE,uFormat,strlen(format));
276 }
277 len = udat_format(fmt, ucal_getNow(), 0, len, 0, status);
278 if(*status == U_BUFFER_OVERFLOW_ERROR) {
279 *status = U_ZERO_ERROR;
280 s = (UChar*) malloc(sizeof(UChar) * (len+1));
281 if(s == 0) goto finish;
282 udat_format(fmt, ucal_getNow(), s, len + 1, 0, status);
283 if(U_FAILURE(*status)) goto finish;
284 }
285
286 /* print the date string */
287 //uprint(s, stdout, status);
288
289 /* print a trailing newline */
290 //printf("\n");
291 /* count bits */
292 UChar outbuf[1024];
293 for(int i=0;U_SUCCESS(*status)&&i<sizeof(scanArray)/sizeof(scanArray[0]);i++) {
294 int thisCount = udat_countSymbols(fmt, scanArray[i]);
295 tc += thisCount;
296 for(int j=starts[i];U_SUCCESS(*status)&&j<thisCount;j++) {
297 *status = U_ZERO_ERROR;
298 int sz =
299 udat_getSymbols(fmt,
300 scanArray[i],
301 j,
302 outbuf,
303 1024,
304 status);
305 if(U_SUCCESS(*status)) { tf++; tl += u_strlen(outbuf); }
306 //if(!u_strcmp(outbuf,rootdata[i][j])) {
307 if(*status != U_ZERO_ERROR) {
308 #if 0
309 fprintf(stderr, "<!-- %s: err: data %s:%d:%d is missing: %X... -->\n", locale, UDateFormatSymbolType_name(scanArray[i]), i, j, outbuf[0]);
310 #endif
311 sprintf(tmp, " missing: %s#%d-%s ", UDateFormatSymbolType_name(scanArray[i]), j, u_errorNameShort(*status));
312 *status = U_MISSING_RESOURCE_ERROR;
313 strcat(comments, tmp);
314 }
315 }
316 }
317
318 finish:
319 sprintf(tmp, " syms:%d/%d#%d:%s", tf, tc, tl, u_errorNameShort(*status));
320 strcat(comments,tmp);
321
322 udat_close(fmt);
323 free(s);
324 }
325
writeOkComments(XMLFile & xf,int ok,const char * comments,const char * locale)326 static void writeOkComments(XMLFile &xf, int ok, const char *comments, const char *locale) {
327 char tmp[2000];
328 tmp[0]=0;
329 if(ok) {
330 if(!comments||!*comments) {
331 strcpy(tmp,locale);
332 strcat(tmp, " ");
333 } else {
334 sprintf(tmp, "%s <!-- %s -->", locale, comments);
335 }
336 } else if(comments&&*comments) {
337 sprintf(tmp, "<!-- !! %s: %s -->", locale, comments);
338 }
339 if(tmp&&*tmp) {
340 xf.writeln(tmp);
341 }
342 }
343
344
could_fmt_dow(const char * locale,char * comments)345 int could_fmt_dow(const char *locale, char *comments) {
346 char tmp[200];
347 // UResourceBundle *rb = NULL;
348 UErrorCode status = U_ZERO_ERROR;
349
350 date(NULL,
351 UDAT_LONG,
352 NULL,
353 locale, comments,
354 &status);
355
356 if(U_FAILURE(status) || status != U_ZERO_ERROR) {
357
358 sprintf(tmp, " fmt:%s", u_errorNameShort(status));
359 strcat(comments, tmp);
360 return 0;
361 } else {
362 sprintf(tmp, " fmt:%s", u_errorNameShort(status));
363 strcat(comments, tmp);
364 return 1;
365 }
366 }
367
probeCapability(XMLFile & xf,const char * locale)368 void probeCapability(XMLFile& xf, const char *locale) {
369 char comments[1000];
370 int ok=1;
371 int rc =0;
372
373 //fprintf(stderr, "PROBE: %s\n", locale);
374
375 comments[0]=0;
376
377 if(!could_open(locale, comments)) {
378 ok = 0;
379 }
380
381 #if (U_ICU_VERSION_MAJOR_NUM > 2) || ((U_ICU_VERSION_MAJOR_NUM>1)&&(U_ICU_VERSION_MINOR_NUM>2))
382 if(!could_fmt_dow(locale, comments)) {
383 ok = 0;
384 }
385 #endif
386
387 writeOkComments(xf,ok, comments,locale);
388 }
389
probeColCapability(XMLFile & xf,const char * locale)390 void probeColCapability(XMLFile& xf, const char *locale) {
391 char comments[1000];
392 int ok=1;
393 int rc =0;
394 UErrorCode status = U_ZERO_ERROR;
395
396 //fprintf(stderr, "PROBE: %s\n", locale);
397
398 comments[0]=0;
399
400 if(!col_could_open(locale, comments)) {
401 ok = 0;
402 }
403
404 /*
405 if(!col_could_fmt_dow(locale, comments)) {
406 ok = 0;
407 }
408 */
409 writeOkComments(xf,ok, comments,locale);
410 }
411
main(int argc,char ** argv)412 int main (int argc, char ** argv) {
413 U_MAIN_INIT_ARGS(argc, argv);
414 progName = argv[0];
415 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
416 // const char *loc;
417
418 {
419 UErrorCode status = U_ZERO_ERROR;
420 #if (U_ICU_VERSION_MAJOR_NUM > 2) || ((U_ICU_VERSION_MAJOR_NUM>1)&&(U_ICU_VERSION_MINOR_NUM>5))
421 u_init(&status);
422 #else
423 ures_open(NULL, "en_US", &status);
424 #endif
425 fprintf(stderr, " Init: %s\n", u_errorName(status));
426
427 }
428
429 {
430 UErrorCode is = U_ZERO_ERROR;
431 #if (U_ICU_VERSION_MAJOR_NUM > 2) || ((U_ICU_VERSION_MAJOR_NUM>1)&&(U_ICU_VERSION_MINOR_NUM>2))
432 initroot(&is);
433 fprintf(stderr, "Init: %s\n", u_errorNameShort(is));
434 #endif
435 }
436
437 if(argc<0) {
438 // Unrecognized option
439 fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
440 usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
441 }
442
443 if(options[0].doesOccur || options[1].doesOccur) {
444 // -? or -h for help.
445 usageAndDie(0);
446 }
447
448
449
450 {
451 char tmp[200];
452 XMLFile xf(stdout);
453 {
454 xf.writeln("<!DOCTYPE icuInfo SYSTEM \"http://icu-project.org/dtd/icumeta.dtd\">");
455 XMLElement icuInfo(xf, "icuInfo");
456 XMLElement icuProducts(xf, "icuProducts");
457 XMLElement icuProduct(xf, "icuProduct", "type=\"icu4c\"");
458 XMLElement releases(xf, "releases");
459 sprintf(tmp, "version=\"%s\"", U_ICU_VERSION);
460 XMLElement release(xf, "release", tmp);
461
462 XMLElement capabilities(xf, "capabilities");
463 {
464 sprintf(tmp, "type=\"unicode\" version=\"%s\"",
465 U_UNICODE_VERSION);
466 XMLElement icuData(xf, "feature", tmp, TRUE);
467 }
468 {
469 UCollator *col;
470 char ucavers[200];
471 UVersionInfo vers;
472 UErrorCode status = U_ZERO_ERROR;
473 col = ucol_open("root", &status);
474 #if (U_ICU_VERSION_MAJOR_NUM>2) || ((U_ICU_VERSION_MAJOR_NUM>1)&&(U_ICU_VERSION_MINOR_NUM>7))
475 ucol_getUCAVersion(col, vers);
476 u_versionToString(vers, ucavers);
477 #else
478 strcpy(ucavers, "???");
479 #endif
480 sprintf(tmp, "type=\"uca\" version=\"%s\"",
481 ucavers);
482 XMLElement icuData(xf, "feature", tmp, TRUE);
483 ucol_close(col);
484 }
485 #if (U_ICU_VERSION_MAJOR_NUM>3) || ((U_ICU_VERSION_MAJOR_NUM > 2) && (U_ICU_VERSION_MINOR_NUM >7))
486 {
487 const char *tzvers;
488 UErrorCode status = U_ZERO_ERROR;
489 tzvers = ucal_getTZDataVersion(&status);
490 sprintf(tmp, "type=\"tz\" version=\"%s\"",
491 tzvers);
492 XMLElement icuData(xf, "feature", tmp, TRUE);
493 }
494 #endif
495 {
496 UErrorCode status = U_ZERO_ERROR;
497 UVersionInfo cldrVersion;
498 _getCLDRVersionDirect(cldrVersion, &status);
499 if(U_FAILURE(status)) {
500 UErrorCode subStatus = U_ZERO_ERROR;
501 _getCLDRVersionOld(cldrVersion, &subStatus);
502 if(U_SUCCESS(subStatus)) {
503 status = subStatus;
504 }
505 }
506 if(U_FAILURE(status)) {
507 fprintf(stderr, "Err: could not get CLDR Version, %s\n", u_errorName(status));
508 fflush(stderr);
509 } else {
510 strcpy(tmp, "type=\"cldr\" version=\"");
511 u_versionToString(cldrVersion, tmp+strlen(tmp));
512 strcat(tmp, "\"");
513 XMLElement icuData(xf, "feature", tmp, TRUE);
514 }
515 }
516 if(1) {
517 int n = uloc_countAvailable();
518 sprintf(tmp, "type=\"formatting\" total=\"%d\" version=\"%s\"",
519 n,
520 "???");
521 XMLElement icuData(xf, "feature", tmp);
522
523 // probeCapability(xf, "root");
524 for(int j=0;j<n;j++) {
525 probeCapability(xf, uloc_getAvailable(j));
526 }
527
528 }
529 if(1) {
530 int n = ucol_countAvailable();
531 sprintf(tmp, "type=\"collation\" total=\"%d\" version=\"%s\"",
532 n,
533 "???");
534 XMLElement icuData(xf, "feature", tmp);
535
536 // probeCapability(xf, "root");
537 for(int j=0;j<n;j++) {
538 probeColCapability(xf, ucol_getAvailable(j));
539 }
540
541 }
542
543 }
544 }
545
546
547 return 0;
548 }
549