1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: derb.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2000sep6
16 * created by: Vladimir Weinstein as an ICU workshop example
17 * maintained by: Yves Arrouye <yves@realnames.com>
18 */
19
20 #include "unicode/stringpiece.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/unistr.h"
23 #include "unicode/ustring.h"
24 #include "unicode/putil.h"
25 #include "unicode/ustdio.h"
26
27 #include "charstr.h"
28 #include "uresimp.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "uoptions.h"
32 #include "toolutil.h"
33 #include "ustrfmt.h"
34
35 #if !UCONFIG_NO_FORMATTING
36
37 #define DERB_VERSION "1.1"
38
39 #define DERB_DEFAULT_TRUNC 80
40
41 static const int32_t indentsize = 4;
42 static int32_t truncsize = DERB_DEFAULT_TRUNC;
43 static UBool opt_truncate = FALSE;
44
45 static const char *getEncodingName(const char *encoding);
46 static void reportError(const char *pname, UErrorCode *status, const char *when);
47 static UChar *quotedString(const UChar *string);
48 static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status);
49 static void printString(UFILE *out, const UChar *str, int32_t len);
50 static void printCString(UFILE *out, const char *str, int32_t len);
51 static void printIndent(UFILE *out, int32_t indent);
52 static void printHex(UFILE *out, uint8_t what);
53
54 static UOption options[]={
55 UOPTION_HELP_H,
56 UOPTION_HELP_QUESTION_MARK,
57 /* 2 */ UOPTION_ENCODING,
58 /* 3 */ { "to-stdout", NULL, NULL, NULL, 'c', UOPT_NO_ARG, 0 } ,
59 /* 4 */ { "truncate", NULL, NULL, NULL, 't', UOPT_OPTIONAL_ARG, 0 },
60 /* 5 */ UOPTION_VERBOSE,
61 /* 6 */ UOPTION_DESTDIR,
62 /* 7 */ UOPTION_SOURCEDIR,
63 /* 8 */ { "bom", NULL, NULL, NULL, 0, UOPT_NO_ARG, 0 },
64 /* 9 */ UOPTION_ICUDATADIR,
65 /* 10 */ UOPTION_VERSION,
66 /* 11 */ { "suppressAliases", NULL, NULL, NULL, 'A', UOPT_NO_ARG, 0 },
67 };
68
69 static UBool verbose = FALSE;
70 static UBool suppressAliases = FALSE;
71 static UFILE *ustderr = NULL;
72
73 extern int
main(int argc,char * argv[])74 main(int argc, char* argv[]) {
75 const char *encoding = NULL;
76 const char *outputDir = NULL; /* NULL = no output directory, use current */
77 const char *inputDir = ".";
78 int tostdout = 0;
79 int prbom = 0;
80
81 const char *pname;
82
83 UResourceBundle *bundle = NULL;
84 int32_t i = 0;
85
86 const char* arg;
87
88 /* Get the name of tool. */
89 pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
90 #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
91 if (!pname) {
92 pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
93 }
94 #endif
95 if (!pname) {
96 pname = *argv;
97 } else {
98 ++pname;
99 }
100
101 /* error handling, printing usage message */
102 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
103
104 /* error handling, printing usage message */
105 if(argc<0) {
106 fprintf(stderr,
107 "%s: error in command line argument \"%s\"\n", pname,
108 argv[-argc]);
109 }
110 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
111 fprintf(argc < 0 ? stderr : stdout,
112 "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
113 " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
114 " [ -t, --truncate [ size ] ]\n"
115 " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
116 " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
117 " [ -A, --suppressAliases]\n"
118 " bundle ...\n", argc < 0 ? 'u' : 'U',
119 pname);
120 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
121 }
122
123 if(options[10].doesOccur) {
124 fprintf(stderr,
125 "%s version %s (ICU version %s).\n"
126 "%s\n",
127 pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
128 return U_ZERO_ERROR;
129 }
130 if(options[2].doesOccur) {
131 encoding = options[2].value;
132 }
133
134 if (options[3].doesOccur) {
135 if(options[2].doesOccur) {
136 fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
137 return 3;
138 }
139 tostdout = 1;
140 }
141
142 if(options[4].doesOccur) {
143 opt_truncate = TRUE;
144 if(options[4].value != NULL) {
145 truncsize = atoi(options[4].value); /* user defined printable size */
146 } else {
147 truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
148 }
149 } else {
150 opt_truncate = FALSE;
151 }
152
153 if(options[5].doesOccur) {
154 verbose = TRUE;
155 }
156
157 if (options[6].doesOccur) {
158 outputDir = options[6].value;
159 }
160
161 if(options[7].doesOccur) {
162 inputDir = options[7].value; /* we'll use users resources */
163 }
164
165 if (options[8].doesOccur) {
166 prbom = 1;
167 }
168
169 if (options[9].doesOccur) {
170 u_setDataDirectory(options[9].value);
171 }
172
173 if (options[11].doesOccur) {
174 suppressAliases = TRUE;
175 }
176
177 fflush(stderr); // use ustderr now.
178 ustderr = u_finit(stderr, NULL, NULL);
179
180 for (i = 1; i < argc; ++i) {
181 static const UChar sp[] = { 0x0020 }; /* " " */
182
183 arg = getLongPathname(argv[i]);
184
185 if (verbose) {
186 u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
187 }
188
189 icu::CharString locale;
190 UErrorCode status = U_ZERO_ERROR;
191 {
192 const char *p = findBasename(arg);
193 const char *q = uprv_strrchr(p, '.');
194 if (q == NULL) {
195 locale.append(p, status);
196 } else {
197 locale.append(p, (int32_t)(q - p), status);
198 }
199 }
200 if (U_FAILURE(status)) {
201 return status;
202 }
203
204 icu::CharString infile;
205 const char *thename = NULL;
206 UBool fromICUData = !uprv_strcmp(inputDir, "-");
207 if (!fromICUData) {
208 UBool absfilename = *arg == U_FILE_SEP_CHAR;
209 #if U_PLATFORM_HAS_WIN32_API
210 if (!absfilename) {
211 absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
212 && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
213 }
214 #endif
215 if (absfilename) {
216 thename = arg;
217 } else {
218 const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
219 #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
220 if (q == NULL) {
221 q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
222 }
223 #endif
224 infile.append(inputDir, status);
225 if(q != NULL) {
226 infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status);
227 }
228 if (U_FAILURE(status)) {
229 return status;
230 }
231 thename = infile.data();
232 }
233 }
234 if (thename) {
235 bundle = ures_openDirect(thename, locale.data(), &status);
236 } else {
237 bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status);
238 }
239 if (U_SUCCESS(status)) {
240 UFILE *out = NULL;
241
242 const char *filename = 0;
243 const char *ext = 0;
244
245 if (locale.isEmpty() || !tostdout) {
246 filename = findBasename(arg);
247 ext = uprv_strrchr(filename, '.');
248 if (!ext) {
249 ext = uprv_strchr(filename, 0);
250 }
251 }
252
253 if (tostdout) {
254 out = u_get_stdout();
255 } else {
256 icu::CharString thefile;
257 if (outputDir) {
258 thefile.append(outputDir, status);
259 }
260 thefile.appendPathPart(filename, status);
261 if (*ext) {
262 thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext));
263 }
264 thefile.append(".txt", status);
265 if (U_FAILURE(status)) {
266 return status;
267 }
268
269 out = u_fopen(thefile.data(), "w", NULL, encoding);
270 if (!out) {
271 u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data());
272 u_fclose(ustderr);
273 return 4;
274 }
275 }
276
277 // now, set the callback.
278 ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
279 if (U_FAILURE(status)) {
280 u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
281 u_fclose(ustderr);
282 if(!tostdout) {
283 u_fclose(out);
284 }
285 return 3;
286 }
287
288 if (prbom) { /* XXX: Should be done only for UTFs */
289 u_fputc(0xFEFF, out);
290 }
291 u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
292 u_fprintf(out, "// This file was dumped by derb(8) from ");
293 if (thename) {
294 u_fprintf(out, "%s", thename);
295 } else if (fromICUData) {
296 u_fprintf(out, "the ICU internal %s locale", locale.data());
297 }
298
299 u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");
300
301 if (!locale.isEmpty()) {
302 u_fprintf(out, "%s", locale.data());
303 } else {
304 u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp);
305 }
306 printOutBundle(out, bundle, 0, pname, &status);
307
308 if (!tostdout) {
309 u_fclose(out);
310 }
311 }
312 else {
313 reportError(pname, &status, "opening resource file");
314 }
315
316 ures_close(bundle);
317 }
318
319 return 0;
320 }
321
quotedString(const UChar * string)322 static UChar *quotedString(const UChar *string) {
323 int len = u_strlen(string);
324 int alen = len;
325 const UChar *sp;
326 UChar *newstr, *np;
327
328 for (sp = string; *sp; ++sp) {
329 switch (*sp) {
330 case '\n':
331 case 0x0022:
332 ++alen;
333 break;
334 }
335 }
336
337 newstr = (UChar *) uprv_malloc((1 + alen) * U_SIZEOF_UCHAR);
338 for (sp = string, np = newstr; *sp; ++sp) {
339 switch (*sp) {
340 case '\n':
341 *np++ = 0x005C;
342 *np++ = 0x006E;
343 break;
344
345 case 0x0022:
346 *np++ = 0x005C;
347 U_FALLTHROUGH;
348 default:
349 *np++ = *sp;
350 break;
351 }
352 }
353 *np = 0;
354
355 return newstr;
356 }
357
358
printString(UFILE * out,const UChar * str,int32_t len)359 static void printString(UFILE *out, const UChar *str, int32_t len) {
360 u_file_write(str, len, out);
361 }
362
printCString(UFILE * out,const char * str,int32_t len)363 static void printCString(UFILE *out, const char *str, int32_t len) {
364 if(len==-1) {
365 u_fprintf(out, "%s", str);
366 } else {
367 u_fprintf(out, "%.*s", len, str);
368 }
369 }
370
printIndent(UFILE * out,int32_t indent)371 static void printIndent(UFILE *out, int32_t indent) {
372 icu::UnicodeString inchar(indent, 0x20, indent);
373 printString(out, inchar.getBuffer(), indent);
374 }
375
printHex(UFILE * out,uint8_t what)376 static void printHex(UFILE *out, uint8_t what) {
377 static const char map[] = "0123456789ABCDEF";
378 UChar hex[2];
379
380 hex[0] = map[what >> 4];
381 hex[1] = map[what & 0xf];
382
383 printString(out, hex, 2);
384 }
385
printOutAlias(UFILE * out,UResourceBundle * parent,Resource r,const char * key,int32_t indent,const char * pname,UErrorCode * status)386 static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) {
387 static const UChar cr[] = { 0xA }; // LF
388 int32_t len = 0;
389 const UChar* thestr = res_getAlias(&(parent->fResData), r, &len);
390 UChar *string = quotedString(thestr);
391 if(opt_truncate && len > truncsize) {
392 char msg[128];
393 printIndent(out, indent);
394 sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
395 (long)len, (long)truncsize/2);
396 printCString(out, msg, -1);
397 len = truncsize;
398 }
399 if(U_SUCCESS(*status)) {
400 static const UChar openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */
401 static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */
402 printIndent(out, indent);
403 if(key != NULL) {
404 printCString(out, key, -1);
405 }
406 printString(out, openStr, UPRV_LENGTHOF(openStr));
407 printString(out, string, len);
408 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
409 if(verbose) {
410 printCString(out, " // ALIAS", -1);
411 }
412 printString(out, cr, UPRV_LENGTHOF(cr));
413 } else {
414 reportError(pname, status, "getting binary value");
415 }
416 uprv_free(string);
417 }
418
printOutBundle(UFILE * out,UResourceBundle * resource,int32_t indent,const char * pname,UErrorCode * status)419 static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status)
420 {
421 static const UChar cr[] = { 0xA }; // LF
422
423 /* int32_t noOfElements = ures_getSize(resource);*/
424 int32_t i = 0;
425 const char *key = ures_getKey(resource);
426
427 switch(ures_getType(resource)) {
428 case URES_STRING :
429 {
430 int32_t len=0;
431 const UChar* thestr = ures_getString(resource, &len, status);
432 UChar *string = quotedString(thestr);
433
434 /* TODO: String truncation */
435 if(opt_truncate && len > truncsize) {
436 char msg[128];
437 printIndent(out, indent);
438 sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
439 (long)len, (long)(truncsize/2));
440 printCString(out, msg, -1);
441 len = truncsize/2;
442 }
443 printIndent(out, indent);
444 if(key != NULL) {
445 static const UChar openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */
446 static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */
447 printCString(out, key, (int32_t)uprv_strlen(key));
448 printString(out, openStr, UPRV_LENGTHOF(openStr));
449 printString(out, string, len);
450 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
451 } else {
452 static const UChar openStr[] = { 0x0022 }; /* "\"" */
453 static const UChar closeStr[] = { 0x0022, 0x002C }; /* "\"," */
454
455 printString(out, openStr, UPRV_LENGTHOF(openStr));
456 printString(out, string, (int32_t)(u_strlen(string)));
457 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
458 }
459
460 if(verbose) {
461 printCString(out, "// STRING", -1);
462 }
463 printString(out, cr, UPRV_LENGTHOF(cr));
464
465 uprv_free(string);
466 }
467 break;
468
469 case URES_INT :
470 {
471 static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */
472 static const UChar closeStr[] = { 0x0020, 0x007D }; /* " }" */
473 UChar num[20];
474
475 printIndent(out, indent);
476 if(key != NULL) {
477 printCString(out, key, -1);
478 }
479 printString(out, openStr, UPRV_LENGTHOF(openStr));
480 uprv_itou(num, 20, ures_getInt(resource, status), 10, 0);
481 printString(out, num, u_strlen(num));
482 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
483
484 if(verbose) {
485 printCString(out, "// INT", -1);
486 }
487 printString(out, cr, UPRV_LENGTHOF(cr));
488 break;
489 }
490 case URES_BINARY :
491 {
492 int32_t len = 0;
493 const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status);
494 if(opt_truncate && len > truncsize) {
495 char msg[128];
496 printIndent(out, indent);
497 sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
498 (long)len, (long)(truncsize/2));
499 printCString(out, msg, -1);
500 len = truncsize;
501 }
502 if(U_SUCCESS(*status)) {
503 static const UChar openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */
504 static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
505 printIndent(out, indent);
506 if(key != NULL) {
507 printCString(out, key, -1);
508 }
509 printString(out, openStr, UPRV_LENGTHOF(openStr));
510 for(i = 0; i<len; i++) {
511 printHex(out, *data++);
512 }
513 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
514 if(verbose) {
515 printCString(out, " // BINARY", -1);
516 }
517 printString(out, cr, UPRV_LENGTHOF(cr));
518 } else {
519 reportError(pname, status, "getting binary value");
520 }
521 }
522 break;
523 case URES_INT_VECTOR :
524 {
525 int32_t len = 0;
526 const int32_t *data = ures_getIntVector(resource, &len, status);
527 if(U_SUCCESS(*status)) {
528 static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */
529 static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
530 UChar num[20];
531
532 printIndent(out, indent);
533 if(key != NULL) {
534 printCString(out, key, -1);
535 }
536 printString(out, openStr, UPRV_LENGTHOF(openStr));
537 for(i = 0; i < len - 1; i++) {
538 int32_t numLen = uprv_itou(num, 20, data[i], 10, 0);
539 num[numLen++] = 0x002C; /* ',' */
540 num[numLen++] = 0x0020; /* ' ' */
541 num[numLen] = 0;
542 printString(out, num, u_strlen(num));
543 }
544 if(len > 0) {
545 uprv_itou(num, 20, data[len - 1], 10, 0);
546 printString(out, num, u_strlen(num));
547 }
548 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
549 if(verbose) {
550 printCString(out, "// INTVECTOR", -1);
551 }
552 printString(out, cr, UPRV_LENGTHOF(cr));
553 } else {
554 reportError(pname, status, "getting int vector");
555 }
556 }
557 break;
558 case URES_TABLE :
559 case URES_ARRAY :
560 {
561 static const UChar openStr[] = { 0x007B }; /* "{" */
562 static const UChar closeStr[] = { 0x007D, '\n' }; /* "}\n" */
563
564 UResourceBundle *t = NULL;
565 ures_resetIterator(resource);
566 printIndent(out, indent);
567 if(key != NULL) {
568 printCString(out, key, -1);
569 }
570 printString(out, openStr, UPRV_LENGTHOF(openStr));
571 if(verbose) {
572 if(ures_getType(resource) == URES_TABLE) {
573 printCString(out, "// TABLE", -1);
574 } else {
575 printCString(out, "// ARRAY", -1);
576 }
577 }
578 printString(out, cr, UPRV_LENGTHOF(cr));
579
580 if(suppressAliases == FALSE) {
581 while(U_SUCCESS(*status) && ures_hasNext(resource)) {
582 t = ures_getNextResource(resource, t, status);
583 if(U_SUCCESS(*status)) {
584 printOutBundle(out, t, indent+indentsize, pname, status);
585 } else {
586 reportError(pname, status, "While processing table");
587 *status = U_ZERO_ERROR;
588 }
589 }
590 } else { /* we have to use low level access to do this */
591 Resource r;
592 int32_t resSize = ures_getSize(resource);
593 UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE);
594 for(i = 0; i < resSize; i++) {
595 /* need to know if it's an alias */
596 if(isTable) {
597 r = res_getTableItemByIndex(&resource->fResData, resource->fRes, i, &key);
598 } else {
599 r = res_getArrayItem(&resource->fResData, resource->fRes, i);
600 }
601 if(U_SUCCESS(*status)) {
602 if(res_getPublicType(r) == URES_ALIAS) {
603 printOutAlias(out, resource, r, key, indent+indentsize, pname, status);
604 } else {
605 t = ures_getByIndex(resource, i, t, status);
606 printOutBundle(out, t, indent+indentsize, pname, status);
607 }
608 } else {
609 reportError(pname, status, "While processing table");
610 *status = U_ZERO_ERROR;
611 }
612 }
613 }
614
615 printIndent(out, indent);
616 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
617 ures_close(t);
618 }
619 break;
620 default:
621 break;
622 }
623
624 }
625
getEncodingName(const char * encoding)626 static const char *getEncodingName(const char *encoding) {
627 UErrorCode err;
628 const char *enc;
629
630 err = U_ZERO_ERROR;
631 if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) {
632 err = U_ZERO_ERROR;
633 if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) {
634 // do nothing
635 }
636 }
637
638 return enc;
639 }
640
reportError(const char * pname,UErrorCode * status,const char * when)641 static void reportError(const char *pname, UErrorCode *status, const char *when) {
642 u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status));
643 }
644
645 #else
646 extern int
main(int argc,char * argv[])647 main(int argc, char* argv[]) {
648 /* Changing stdio.h ustdio.h requires that formatting not be disabled. */
649 return 3;
650 }
651 #endif /* !UCONFIG_NO_FORMATTING */
652
653 /*
654 * Local Variables:
655 * indent-tabs-mode: nil
656 * End:
657 */
658