• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  icupkg.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005jul29
16 *   created by: Markus W. Scherer
17 *
18 *   This tool operates on ICU data (.dat package) files.
19 *   It takes one as input, or creates an empty one, and can remove, add, and
20 *   extract data pieces according to command-line options.
21 *   At the same time, it swaps each piece to a consistent set of platform
22 *   properties as desired.
23 *   Useful as an install-time tool for shipping only one flavor of ICU data
24 *   and preparing data files for the target platform.
25 *   Also for customizing ICU data (pruning, augmenting, replacing) and for
26 *   taking it apart.
27 *   Subsumes functionality and implementation code from
28 *   gencmn, decmn, and icuswap tools.
29 *   Will not work with data DLLs (shared libraries).
30 */
31 
32 #include "unicode/utypes.h"
33 #include "unicode/putil.h"
34 #include "cstring.h"
35 #include "toolutil.h"
36 #include "uoptions.h"
37 #include "uparse.h"
38 #include "filestrm.h"
39 #include "package.h"
40 #include "pkg_icu.h"
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 
46 U_NAMESPACE_USE
47 
48 // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching?
49 
50 // general definitions ----------------------------------------------------- ***
51 
52 // main() ------------------------------------------------------------------ ***
53 
54 static void
printUsage(const char * pname,UBool isHelp)55 printUsage(const char *pname, UBool isHelp) {
56     FILE *where=isHelp ? stdout : stderr;
57 
58     fprintf(where,
59             "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n"
60             "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n"
61             "\t[-s path] [-d path] [-w] [-m mode]\n"
62             "\t[--ignore-deps]\n"
63             "\t[--auto_toc_prefix] [--auto_toc_prefix_with_type] [--toc_prefix]\n"
64             "\tinfilename [outfilename]\n",
65             isHelp ? 'U' : 'u', pname);
66     if(isHelp) {
67         fprintf(where,
68             "\n"
69             "Read the input ICU .dat package file, modify it according to the options,\n"
70             "swap it to the desired platform properties (charset & endianness),\n"
71             "and optionally write the resulting ICU .dat package to the output file.\n"
72             "Items are removed, then added, then extracted and listed.\n"
73             "An ICU .dat package is written if items are removed or added,\n"
74             "or if the input and output filenames differ,\n"
75             "or if the --writepkg (-w) option is set.\n");
76         fprintf(where,
77             "\n"
78             "If the input filename is \"new\" then an empty package is created.\n"
79             "If the output filename is missing, then it is automatically generated\n"
80             "from the input filename: If the input filename ends with an l, b, or e\n"
81             "matching its platform properties, then the output filename will\n"
82             "contain the letter from the -t (--type) option.\n");
83         fprintf(where,
84             "\n"
85             "This tool can also be used to just swap a single ICU data file, replacing the\n"
86             "former icuswap tool. For this mode, provide the infilename (and optional\n"
87             "outfilename) for a non-package ICU data file.\n"
88             "Allowed options include -t, -w, -s and -d.\n"
89             "The filenames can be absolute, or relative to the source/dest dir paths.\n"
90             "Other options are not allowed in this mode.\n");
91         fprintf(where,
92             "\n"
93             "Options:\n"
94             "\t(Only the last occurrence of an option is used.)\n"
95             "\n"
96             "\t-h or -? or --help    print this message and exit\n");
97         fprintf(where,
98             "\n"
99             "\t-tl or --type l   output for little-endian/ASCII charset family\n"
100             "\t-tb or --type b   output for big-endian/ASCII charset family\n"
101             "\t-te or --type e   output for big-endian/EBCDIC charset family\n"
102             "\t                  The output type defaults to the input type.\n"
103             "\n"
104             "\t-c or --copyright include the ICU copyright notice\n"
105             "\t-C comment or --comment comment   include a comment string\n");
106         fprintf(where,
107             "\n"
108             "\t-a list or --add list      add items to the package\n"
109             "\t-r list or --remove list   remove items from the package\n"
110             "\t-x list or --extract list  extract items from the package\n"
111             "\tThe list can be a single item's filename,\n"
112             "\tor a .txt filename with a list of item filenames,\n"
113             "\tor an ICU .dat package filename.\n");
114         fprintf(where,
115             "\n"
116             "\t-w or --writepkg  write the output package even if no items are removed\n"
117             "\t                  or added (e.g., for only swapping the data)\n");
118         fprintf(where,
119             "\n"
120             "\t-m mode or --matchmode mode  set the matching mode for item names with\n"
121             "\t                             wildcards\n"
122             "\t        noslash: the '*' wildcard does not match the '/' tree separator\n");
123         fprintf(where,
124             "\n"
125             "\t--ignore-deps     Do not fail if not all resource dependencies are met. Use this\n"
126             "\t                  option if the missing resources come from another source.");
127         fprintf(where,
128             "\n"
129             "\tIn the .dat package, the Table of Contents (ToC) contains an entry\n"
130             "\tfor each item of the form prefix/tree/itemname .\n"
131             "\tThe prefix normally matches the package basename, and icupkg checks that,\n"
132             "\tbut this is not necessary when ICU need not find and load the package by filename.\n"
133             "\tICU package names end with the platform type letter, and thus differ\n"
134             "\tbetween platform types. This is not required for user data packages.\n");
135         fprintf(where,
136             "\n"
137             "\t--auto_toc_prefix            automatic ToC entries prefix\n"
138             "\t                             Uses the prefix of the first entry of the\n"
139             "\t                             input package, rather than its basename.\n"
140             "\t                             Requires a non-empty input package.\n"
141             "\t--auto_toc_prefix_with_type  auto_toc_prefix + adjust platform type\n"
142             "\t                             Same as auto_toc_prefix but also checks that\n"
143             "\t                             the prefix ends with the input platform\n"
144             "\t                             type letter, and modifies it to the output\n"
145             "\t                             platform type letter.\n"
146             "\t                At most one of the auto_toc_prefix options\n"
147             "\t                can be used at a time.\n"
148             "\t--toc_prefix prefix          ToC prefix to be used in the output package\n"
149             "\t                             Overrides the package basename\n"
150             "\t                             and --auto_toc_prefix.\n"
151             "\t                             Cannot be combined with --auto_toc_prefix_with_type.\n");
152         /*
153          * Usage text columns, starting after the initial TAB.
154          *      1         2         3         4         5         6         7         8
155          *     901234567890123456789012345678901234567890123456789012345678901234567890
156          */
157         fprintf(where,
158             "\n"
159             "\tList file syntax: Items are listed on one or more lines and separated\n"
160             "\tby whitespace (space+tab).\n"
161             "\tComments begin with # and are ignored. Empty lines are ignored.\n"
162             "\tLines where the first non-whitespace character is one of %s\n"
163             "\tare also ignored, to reserve for future syntax.\n",
164             U_PKG_RESERVED_CHARS);
165         fprintf(where,
166             "\tItems for removal or extraction may contain a single '*' wildcard\n"
167             "\tcharacter. The '*' matches zero or more characters.\n"
168             "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n"
169             "\tdoes not match '/'.\n");
170         fprintf(where,
171             "\n"
172             "\tItems must be listed relative to the package, and the --sourcedir or\n"
173             "\tthe --destdir path will be prepended.\n"
174             "\tThe paths are only prepended to item filenames while adding or\n"
175             "\textracting items, not to ICU .dat package or list filenames.\n"
176             "\t\n"
177             "\tPaths may contain '/' instead of the platform's\n"
178             "\tfile separator character, and are converted as appropriate.\n");
179         fprintf(where,
180             "\n"
181             "\t-s path or --sourcedir path  directory for the --add items\n"
182             "\t-d path or --destdir path    directory for the --extract items\n"
183             "\n"
184             "\t-l or --list                 list the package items\n"
185             "\t                             (after modifying the package)\n"
186             "\t                             to stdout or to output list file\n"
187             "\t-o path or --outlist path    path/filename for the --list output\n");
188     }
189 }
190 
191 static UOption options[]={
192     UOPTION_HELP_H,
193     UOPTION_HELP_QUESTION_MARK,
194     UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG),
195 
196     UOPTION_COPYRIGHT,
197     UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG),
198 
199     UOPTION_SOURCEDIR,
200     UOPTION_DESTDIR,
201 
202     UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG),
203 
204     UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG),
205 
206     UOPTION_DEF("ignore-deps", '\1', UOPT_NO_ARG),
207 
208     UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG),
209     UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG),
210     UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG),
211 
212     UOPTION_DEF("list", 'l', UOPT_NO_ARG),
213     UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG),
214 
215     UOPTION_DEF("auto_toc_prefix", '\1', UOPT_NO_ARG),
216     UOPTION_DEF("auto_toc_prefix_with_type", '\1', UOPT_NO_ARG),
217     UOPTION_DEF("toc_prefix", '\1', UOPT_REQUIRES_ARG)
218 };
219 
220 enum {
221     OPT_HELP_H,
222     OPT_HELP_QUESTION_MARK,
223     OPT_OUT_TYPE,
224 
225     OPT_COPYRIGHT,
226     OPT_COMMENT,
227 
228     OPT_SOURCEDIR,
229     OPT_DESTDIR,
230 
231     OPT_WRITEPKG,
232 
233     OPT_MATCHMODE,
234 
235     OPT_IGNORE_DEPS,
236 
237     OPT_ADD_LIST,
238     OPT_REMOVE_LIST,
239     OPT_EXTRACT_LIST,
240 
241     OPT_LIST_ITEMS,
242     OPT_LIST_FILE,
243 
244     OPT_AUTO_TOC_PREFIX,
245     OPT_AUTO_TOC_PREFIX_WITH_TYPE,
246     OPT_TOC_PREFIX,
247 
248     OPT_COUNT
249 };
250 
251 static UBool
isPackageName(const char * filename)252 isPackageName(const char *filename) {
253     int32_t len;
254 
255     len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */
256     return (UBool)(len>0 && 0==strcmp(filename+len, ".dat"));
257 }
258 /*
259 This line is required by MinGW because it incorrectly globs the arguments.
260 So when \* is used, it turns into a list of files instead of a literal "*"
261 */
262 int _CRT_glob = 0;
263 
264 extern int
main(int argc,char * argv[])265 main(int argc, char *argv[]) {
266     const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment;
267     char outType;
268     UBool isHelp, isModified, isPackage;
269     int result = 0;
270 
271     Package *pkg, *listPkg, *addListPkg;
272 
273     U_MAIN_INIT_ARGS(argc, argv);
274 
275     /* get the program basename */
276     pname=findBasename(argv[0]);
277 
278     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
279     isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
280     if(isHelp) {
281         printUsage(pname, true);
282         return U_ZERO_ERROR;
283     }
284 
285     pkg=new Package;
286     if(pkg==nullptr) {
287         fprintf(stderr, "icupkg: not enough memory\n");
288         return U_MEMORY_ALLOCATION_ERROR;
289     }
290     isModified=false;
291 
292     int autoPrefix=0;
293     if(options[OPT_AUTO_TOC_PREFIX].doesOccur) {
294         pkg->setAutoPrefix();
295         ++autoPrefix;
296     }
297     if(options[OPT_AUTO_TOC_PREFIX_WITH_TYPE].doesOccur) {
298         if(options[OPT_TOC_PREFIX].doesOccur) {
299             fprintf(stderr, "icupkg: --auto_toc_prefix_with_type and also --toc_prefix\n");
300             printUsage(pname, false);
301             return U_ILLEGAL_ARGUMENT_ERROR;
302         }
303         pkg->setAutoPrefixWithType();
304         ++autoPrefix;
305     }
306     if(argc<2 || 3<argc || autoPrefix>1) {
307         printUsage(pname, false);
308         return U_ILLEGAL_ARGUMENT_ERROR;
309     }
310 
311     if(options[OPT_SOURCEDIR].doesOccur) {
312         sourcePath=options[OPT_SOURCEDIR].value;
313     } else {
314         // work relative to the current working directory
315         sourcePath=nullptr;
316     }
317     if(options[OPT_DESTDIR].doesOccur) {
318         destPath=options[OPT_DESTDIR].value;
319     } else {
320         // work relative to the current working directory
321         destPath=nullptr;
322     }
323 
324     if(0==strcmp(argv[1], "new")) {
325         if(autoPrefix) {
326             fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but no input package\n");
327             printUsage(pname, false);
328             return U_ILLEGAL_ARGUMENT_ERROR;
329         }
330         inFilename=nullptr;
331         isPackage=true;
332     } else {
333         inFilename=argv[1];
334         if(isPackageName(inFilename)) {
335             pkg->readPackage(inFilename);
336             isPackage=true;
337         } else {
338             /* swap a single file (icuswap replacement) rather than work on a package */
339             pkg->addFile(sourcePath, inFilename);
340             isPackage=false;
341         }
342     }
343 
344     if(argc>=3) {
345         outFilename=argv[2];
346         if(0!=strcmp(argv[1], argv[2])) {
347             isModified=true;
348         }
349     } else if(isPackage) {
350         outFilename=nullptr;
351     } else /* !isPackage */ {
352         outFilename=inFilename;
353         isModified=(UBool)(sourcePath!=destPath);
354     }
355 
356     /* parse the output type option */
357     if(options[OPT_OUT_TYPE].doesOccur) {
358         const char *type=options[OPT_OUT_TYPE].value;
359         if(type[0]==0 || type[1]!=0) {
360             /* the type must be exactly one letter */
361             printUsage(pname, false);
362             return U_ILLEGAL_ARGUMENT_ERROR;
363         }
364         outType=type[0];
365         switch(outType) {
366         case 'l':
367         case 'b':
368         case 'e':
369             break;
370         default:
371             printUsage(pname, false);
372             return U_ILLEGAL_ARGUMENT_ERROR;
373         }
374 
375         /*
376          * Set the isModified flag if the output type differs from the
377          * input package type.
378          * If we swap a single file, just assume that we are modifying it.
379          * The Package class does not give us access to the item and its type.
380          */
381         isModified|=(UBool)(!isPackage || outType!=pkg->getInType());
382     } else if(isPackage) {
383         outType=pkg->getInType(); // default to input type
384     } else /* !isPackage: swap single file */ {
385         outType=0; /* tells extractItem() to not swap */
386     }
387 
388     if(options[OPT_WRITEPKG].doesOccur) {
389         isModified=true;
390     }
391 
392     if(!isPackage) {
393         /*
394          * icuswap tool replacement: Only swap a single file.
395          * Check that irrelevant options are not set.
396          */
397         if( options[OPT_COMMENT].doesOccur ||
398             options[OPT_COPYRIGHT].doesOccur ||
399             options[OPT_MATCHMODE].doesOccur ||
400             options[OPT_REMOVE_LIST].doesOccur ||
401             options[OPT_ADD_LIST].doesOccur ||
402             options[OPT_EXTRACT_LIST].doesOccur ||
403             options[OPT_LIST_ITEMS].doesOccur
404         ) {
405             printUsage(pname, false);
406             return U_ILLEGAL_ARGUMENT_ERROR;
407         }
408         if(isModified) {
409             pkg->extractItem(destPath, outFilename, 0, outType);
410         }
411 
412         delete pkg;
413         return result;
414     }
415 
416     /* Work with a package. */
417 
418     if(options[OPT_COMMENT].doesOccur) {
419         outComment=options[OPT_COMMENT].value;
420     } else if(options[OPT_COPYRIGHT].doesOccur) {
421         outComment=U_COPYRIGHT_STRING;
422     } else {
423         outComment=nullptr;
424     }
425 
426     if(options[OPT_MATCHMODE].doesOccur) {
427         if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) {
428             pkg->setMatchMode(Package::MATCH_NOSLASH);
429         } else {
430             printUsage(pname, false);
431             return U_ILLEGAL_ARGUMENT_ERROR;
432         }
433     }
434 
435     /* remove items */
436     if(options[OPT_REMOVE_LIST].doesOccur) {
437         listPkg=new Package();
438         if(listPkg==nullptr) {
439             fprintf(stderr, "icupkg: not enough memory\n");
440             exit(U_MEMORY_ALLOCATION_ERROR);
441         }
442         if(readList(nullptr, options[OPT_REMOVE_LIST].value, false, listPkg)) {
443             pkg->removeItems(*listPkg);
444             delete listPkg;
445             isModified=true;
446         } else {
447             printUsage(pname, false);
448             return U_ILLEGAL_ARGUMENT_ERROR;
449         }
450     }
451 
452     /*
453      * add items
454      * use a separate Package so that its memory and items stay around
455      * as long as the main Package
456      */
457     addListPkg=nullptr;
458     if(options[OPT_ADD_LIST].doesOccur) {
459         addListPkg=new Package();
460         if(addListPkg==nullptr) {
461             fprintf(stderr, "icupkg: not enough memory\n");
462             exit(U_MEMORY_ALLOCATION_ERROR);
463         }
464         if(readList(sourcePath, options[OPT_ADD_LIST].value, true, addListPkg)) {
465             pkg->addItems(*addListPkg);
466             // delete addListPkg; deferred until after writePackage()
467             isModified=true;
468         } else {
469             printUsage(pname, false);
470             return U_ILLEGAL_ARGUMENT_ERROR;
471         }
472     }
473 
474     /* extract items */
475     if(options[OPT_EXTRACT_LIST].doesOccur) {
476         listPkg=new Package();
477         if(listPkg==nullptr) {
478             fprintf(stderr, "icupkg: not enough memory\n");
479             exit(U_MEMORY_ALLOCATION_ERROR);
480         }
481         if(readList(nullptr, options[OPT_EXTRACT_LIST].value, false, listPkg)) {
482             pkg->extractItems(destPath, *listPkg, outType);
483             delete listPkg;
484         } else {
485             printUsage(pname, false);
486             return U_ILLEGAL_ARGUMENT_ERROR;
487         }
488     }
489 
490     /* list items */
491     if(options[OPT_LIST_ITEMS].doesOccur) {
492         int32_t i;
493         if (options[OPT_LIST_FILE].doesOccur) {
494             FileStream *out;
495             out = T_FileStream_open(options[OPT_LIST_FILE].value, "w");
496             if (out != nullptr) {
497                 for(i=0; i<pkg->getItemCount(); ++i) {
498                     T_FileStream_writeLine(out, pkg->getItem(i)->name);
499                     T_FileStream_writeLine(out, "\n");
500                 }
501                 T_FileStream_close(out);
502             } else {
503                 return U_ILLEGAL_ARGUMENT_ERROR;
504             }
505         } else {
506             for(i=0; i<pkg->getItemCount(); ++i) {
507                 fprintf(stdout, "%s\n", pkg->getItem(i)->name);
508             }
509         }
510     }
511 
512     /* check dependencies between items */
513     if(!options[OPT_IGNORE_DEPS].doesOccur && !pkg->checkDependencies()) {
514         /* some dependencies are not fulfilled */
515         return U_MISSING_RESOURCE_ERROR;
516     }
517 
518     /* write the output .dat package if there are any modifications */
519     if(isModified) {
520         char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary
521 
522         if(outFilename==nullptr || outFilename[0]==0) {
523             if(inFilename==nullptr || inFilename[0]==0) {
524                 fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n");
525                 exit(U_ILLEGAL_ARGUMENT_ERROR);
526             }
527 
528             /*
529              * auto-generate a filename:
530              * copy the inFilename,
531              * and if the last basename character matches the input file's type,
532              * then replace it with the output file's type
533              */
534             char suffix[6]="?.dat";
535             char *s;
536 
537             suffix[0]=pkg->getInType();
538             strcpy(outFilenameBuffer, inFilename);
539             s=strchr(outFilenameBuffer, 0);
540             if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) {
541                 *(s-5)=outType;
542             }
543             outFilename=outFilenameBuffer;
544         }
545         if(options[OPT_TOC_PREFIX].doesOccur) {
546             pkg->setPrefix(options[OPT_TOC_PREFIX].value);
547         }
548         result = writePackageDatFile(outFilename, outComment, nullptr, nullptr, pkg, outType);
549     }
550 
551     delete addListPkg;
552     delete pkg;
553     return result;
554 }
555 
556 /*
557  * Hey, Emacs, please set the following:
558  *
559  * Local Variables:
560  * indent-tabs-mode: nil
561  * End:
562  *
563  */
564