// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 1999-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: derb.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2000sep6 * created by: Vladimir Weinstein as an ICU workshop example * maintained by: Yves Arrouye <yves@realnames.com> */ #include "unicode/stringpiece.h" #include "unicode/ucnv.h" #include "unicode/unistr.h" #include "unicode/ustring.h" #include "unicode/putil.h" #include "unicode/ustdio.h" #include "charstr.h" #include "uresimp.h" #include "cmemory.h" #include "cstring.h" #include "uoptions.h" #include "toolutil.h" #include "ustrfmt.h" #if !UCONFIG_NO_FORMATTING #define DERB_VERSION "1.1" #define DERB_DEFAULT_TRUNC 80 static const int32_t indentsize = 4; static int32_t truncsize = DERB_DEFAULT_TRUNC; static UBool opt_truncate = FALSE; static const char *getEncodingName(const char *encoding); static void reportError(const char *pname, UErrorCode *status, const char *when); static UChar *quotedString(const UChar *string); static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status); static void printString(UFILE *out, const UChar *str, int32_t len); static void printCString(UFILE *out, const char *str, int32_t len); static void printIndent(UFILE *out, int32_t indent); static void printHex(UFILE *out, uint8_t what); static UOption options[]={ UOPTION_HELP_H, UOPTION_HELP_QUESTION_MARK, /* 2 */ UOPTION_ENCODING, /* 3 */ { "to-stdout", NULL, NULL, NULL, 'c', UOPT_NO_ARG, 0 } , /* 4 */ { "truncate", NULL, NULL, NULL, 't', UOPT_OPTIONAL_ARG, 0 }, /* 5 */ UOPTION_VERBOSE, /* 6 */ UOPTION_DESTDIR, /* 7 */ UOPTION_SOURCEDIR, /* 8 */ { "bom", NULL, NULL, NULL, 0, UOPT_NO_ARG, 0 }, /* 9 */ UOPTION_ICUDATADIR, /* 10 */ UOPTION_VERSION, /* 11 */ { "suppressAliases", NULL, NULL, NULL, 'A', UOPT_NO_ARG, 0 }, }; static UBool verbose = FALSE; static UBool suppressAliases = FALSE; static UFILE *ustderr = NULL; extern int main(int argc, char* argv[]) { const char *encoding = NULL; const char *outputDir = NULL; /* NULL = no output directory, use current */ const char *inputDir = "."; int tostdout = 0; int prbom = 0; const char *pname; UResourceBundle *bundle = NULL; int32_t i = 0; const char* arg; /* Get the name of tool. */ pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (!pname) { pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); } #endif if (!pname) { pname = *argv; } else { ++pname; } /* error handling, printing usage message */ argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "%s: error in command line argument \"%s\"\n", pname, argv[-argc]); } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { fprintf(argc < 0 ? stderr : stdout, "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" " [ -t, --truncate [ size ] ]\n" " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" " [ -A, --suppressAliases]\n" " bundle ...\n", argc < 0 ? 'u' : 'U', pname); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[10].doesOccur) { fprintf(stderr, "%s version %s (ICU version %s).\n" "%s\n", pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); return U_ZERO_ERROR; } if(options[2].doesOccur) { encoding = options[2].value; } if (options[3].doesOccur) { if(options[2].doesOccur) { fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname); return 3; } tostdout = 1; } if(options[4].doesOccur) { opt_truncate = TRUE; if(options[4].value != NULL) { truncsize = atoi(options[4].value); /* user defined printable size */ } else { truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ } } else { opt_truncate = FALSE; } if(options[5].doesOccur) { verbose = TRUE; } if (options[6].doesOccur) { outputDir = options[6].value; } if(options[7].doesOccur) { inputDir = options[7].value; /* we'll use users resources */ } if (options[8].doesOccur) { prbom = 1; } if (options[9].doesOccur) { u_setDataDirectory(options[9].value); } if (options[11].doesOccur) { suppressAliases = TRUE; } fflush(stderr); // use ustderr now. ustderr = u_finit(stderr, NULL, NULL); for (i = 1; i < argc; ++i) { static const UChar sp[] = { 0x0020 }; /* " " */ arg = getLongPathname(argv[i]); if (verbose) { u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]); } icu::CharString locale; UErrorCode status = U_ZERO_ERROR; { const char *p = findBasename(arg); const char *q = uprv_strrchr(p, '.'); if (q == NULL) { locale.append(p, status); } else { locale.append(p, (int32_t)(q - p), status); } } if (U_FAILURE(status)) { return status; } icu::CharString infile; const char *thename = NULL; UBool fromICUData = !uprv_strcmp(inputDir, "-"); if (!fromICUData) { UBool absfilename = *arg == U_FILE_SEP_CHAR; #if U_PLATFORM_HAS_WIN32_API if (!absfilename) { absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); } #endif if (absfilename) { thename = arg; } else { const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR); #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR if (q == NULL) { q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); } #endif infile.append(inputDir, status); if(q != NULL) { infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status); } if (U_FAILURE(status)) { return status; } thename = infile.data(); } } if (thename) { bundle = ures_openDirect(thename, locale.data(), &status); } else { bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status); } if (U_SUCCESS(status)) { UFILE *out = NULL; const char *filename = 0; const char *ext = 0; if (locale.isEmpty() || !tostdout) { filename = findBasename(arg); ext = uprv_strrchr(filename, '.'); if (!ext) { ext = uprv_strchr(filename, 0); } } if (tostdout) { out = u_get_stdout(); } else { icu::CharString thefile; if (outputDir) { thefile.append(outputDir, status); } thefile.appendPathPart(filename, status); if (*ext) { thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext)); } thefile.append(".txt", status); if (U_FAILURE(status)) { return status; } out = u_fopen(thefile.data(), "w", NULL, encoding); if (!out) { u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data()); u_fclose(ustderr); return 4; } } // now, set the callback. ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status); if (U_FAILURE(status)) { u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname); u_fclose(ustderr); if(!tostdout) { u_fclose(out); } return 3; } if (prbom) { /* XXX: Should be done only for UTFs */ u_fputc(0xFEFF, out); } u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName())); u_fprintf(out, "// This file was dumped by derb(8) from "); if (thename) { u_fprintf(out, "%s", thename); } else if (fromICUData) { u_fprintf(out, "the ICU internal %s locale", locale.data()); } u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n"); if (!locale.isEmpty()) { u_fprintf(out, "%s", locale.data()); } else { u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp); } printOutBundle(out, bundle, 0, pname, &status); if (!tostdout) { u_fclose(out); } } else { reportError(pname, &status, "opening resource file"); } ures_close(bundle); } return 0; } static UChar *quotedString(const UChar *string) { int len = u_strlen(string); int alen = len; const UChar *sp; UChar *newstr, *np; for (sp = string; *sp; ++sp) { switch (*sp) { case '\n': case 0x0022: ++alen; break; } } newstr = (UChar *) uprv_malloc((1 + alen) * U_SIZEOF_UCHAR); for (sp = string, np = newstr; *sp; ++sp) { switch (*sp) { case '\n': *np++ = 0x005C; *np++ = 0x006E; break; case 0x0022: *np++ = 0x005C; U_FALLTHROUGH; default: *np++ = *sp; break; } } *np = 0; return newstr; } static void printString(UFILE *out, const UChar *str, int32_t len) { u_file_write(str, len, out); } static void printCString(UFILE *out, const char *str, int32_t len) { if(len==-1) { u_fprintf(out, "%s", str); } else { u_fprintf(out, "%.*s", len, str); } } static void printIndent(UFILE *out, int32_t indent) { icu::UnicodeString inchar(indent, 0x20, indent); printString(out, inchar.getBuffer(), indent); } static void printHex(UFILE *out, uint8_t what) { static const char map[] = "0123456789ABCDEF"; UChar hex[2]; hex[0] = map[what >> 4]; hex[1] = map[what & 0xf]; printString(out, hex, 2); } static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) { static const UChar cr[] = { 0xA }; // LF int32_t len = 0; const UChar* thestr = res_getAlias(&(parent->fResData), r, &len); UChar *string = quotedString(thestr); if(opt_truncate && len > truncsize) { char msg[128]; printIndent(out, indent); sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n", (long)len, (long)truncsize/2); printCString(out, msg, -1); len = truncsize; } if(U_SUCCESS(*status)) { static const UChar openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */ static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */ printIndent(out, indent); if(key != NULL) { printCString(out, key, -1); } printString(out, openStr, UPRV_LENGTHOF(openStr)); printString(out, string, len); printString(out, closeStr, UPRV_LENGTHOF(closeStr)); if(verbose) { printCString(out, " // ALIAS", -1); } printString(out, cr, UPRV_LENGTHOF(cr)); } else { reportError(pname, status, "getting binary value"); } uprv_free(string); } static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status) { static const UChar cr[] = { 0xA }; // LF /* int32_t noOfElements = ures_getSize(resource);*/ int32_t i = 0; const char *key = ures_getKey(resource); switch(ures_getType(resource)) { case URES_STRING : { int32_t len=0; const UChar* thestr = ures_getString(resource, &len, status); UChar *string = quotedString(thestr); /* TODO: String truncation */ if(opt_truncate && len > truncsize) { char msg[128]; printIndent(out, indent); sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n", (long)len, (long)(truncsize/2)); printCString(out, msg, -1); len = truncsize/2; } printIndent(out, indent); if(key != NULL) { static const UChar openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */ static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */ printCString(out, key, (int32_t)uprv_strlen(key)); printString(out, openStr, UPRV_LENGTHOF(openStr)); printString(out, string, len); printString(out, closeStr, UPRV_LENGTHOF(closeStr)); } else { static const UChar openStr[] = { 0x0022 }; /* "\"" */ static const UChar closeStr[] = { 0x0022, 0x002C }; /* "\"," */ printString(out, openStr, UPRV_LENGTHOF(openStr)); printString(out, string, (int32_t)(u_strlen(string))); printString(out, closeStr, UPRV_LENGTHOF(closeStr)); } if(verbose) { printCString(out, "// STRING", -1); } printString(out, cr, UPRV_LENGTHOF(cr)); uprv_free(string); } break; case URES_INT : { static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */ static const UChar closeStr[] = { 0x0020, 0x007D }; /* " }" */ UChar num[20]; printIndent(out, indent); if(key != NULL) { printCString(out, key, -1); } printString(out, openStr, UPRV_LENGTHOF(openStr)); uprv_itou(num, 20, ures_getInt(resource, status), 10, 0); printString(out, num, u_strlen(num)); printString(out, closeStr, UPRV_LENGTHOF(closeStr)); if(verbose) { printCString(out, "// INT", -1); } printString(out, cr, UPRV_LENGTHOF(cr)); break; } case URES_BINARY : { int32_t len = 0; const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status); if(opt_truncate && len > truncsize) { char msg[128]; printIndent(out, indent); sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n", (long)len, (long)(truncsize/2)); printCString(out, msg, -1); len = truncsize; } if(U_SUCCESS(*status)) { static const UChar openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */ static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */ printIndent(out, indent); if(key != NULL) { printCString(out, key, -1); } printString(out, openStr, UPRV_LENGTHOF(openStr)); for(i = 0; i<len; i++) { printHex(out, *data++); } printString(out, closeStr, UPRV_LENGTHOF(closeStr)); if(verbose) { printCString(out, " // BINARY", -1); } printString(out, cr, UPRV_LENGTHOF(cr)); } else { reportError(pname, status, "getting binary value"); } } break; case URES_INT_VECTOR : { int32_t len = 0; const int32_t *data = ures_getIntVector(resource, &len, status); if(U_SUCCESS(*status)) { static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */ static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */ UChar num[20]; printIndent(out, indent); if(key != NULL) { printCString(out, key, -1); } printString(out, openStr, UPRV_LENGTHOF(openStr)); for(i = 0; i < len - 1; i++) { int32_t numLen = uprv_itou(num, 20, data[i], 10, 0); num[numLen++] = 0x002C; /* ',' */ num[numLen++] = 0x0020; /* ' ' */ num[numLen] = 0; printString(out, num, u_strlen(num)); } if(len > 0) { uprv_itou(num, 20, data[len - 1], 10, 0); printString(out, num, u_strlen(num)); } printString(out, closeStr, UPRV_LENGTHOF(closeStr)); if(verbose) { printCString(out, "// INTVECTOR", -1); } printString(out, cr, UPRV_LENGTHOF(cr)); } else { reportError(pname, status, "getting int vector"); } } break; case URES_TABLE : case URES_ARRAY : { static const UChar openStr[] = { 0x007B }; /* "{" */ static const UChar closeStr[] = { 0x007D, '\n' }; /* "}\n" */ UResourceBundle *t = NULL; ures_resetIterator(resource); printIndent(out, indent); if(key != NULL) { printCString(out, key, -1); } printString(out, openStr, UPRV_LENGTHOF(openStr)); if(verbose) { if(ures_getType(resource) == URES_TABLE) { printCString(out, "// TABLE", -1); } else { printCString(out, "// ARRAY", -1); } } printString(out, cr, UPRV_LENGTHOF(cr)); if(suppressAliases == FALSE) { while(U_SUCCESS(*status) && ures_hasNext(resource)) { t = ures_getNextResource(resource, t, status); if(U_SUCCESS(*status)) { printOutBundle(out, t, indent+indentsize, pname, status); } else { reportError(pname, status, "While processing table"); *status = U_ZERO_ERROR; } } } else { /* we have to use low level access to do this */ Resource r; int32_t resSize = ures_getSize(resource); UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE); for(i = 0; i < resSize; i++) { /* need to know if it's an alias */ if(isTable) { r = res_getTableItemByIndex(&resource->fResData, resource->fRes, i, &key); } else { r = res_getArrayItem(&resource->fResData, resource->fRes, i); } if(U_SUCCESS(*status)) { if(res_getPublicType(r) == URES_ALIAS) { printOutAlias(out, resource, r, key, indent+indentsize, pname, status); } else { t = ures_getByIndex(resource, i, t, status); printOutBundle(out, t, indent+indentsize, pname, status); } } else { reportError(pname, status, "While processing table"); *status = U_ZERO_ERROR; } } } printIndent(out, indent); printString(out, closeStr, UPRV_LENGTHOF(closeStr)); ures_close(t); } break; default: break; } } static const char *getEncodingName(const char *encoding) { UErrorCode err; const char *enc; err = U_ZERO_ERROR; if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) { err = U_ZERO_ERROR; if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) { // do nothing } } return enc; } static void reportError(const char *pname, UErrorCode *status, const char *when) { u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status)); } #else extern int main(int argc, char* argv[]) { /* Changing stdio.h ustdio.h requires that formatting not be disabled. */ return 3; } #endif /* !UCONFIG_NO_FORMATTING */ /* * Local Variables: * indent-tabs-mode: nil * End: */