1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: icupkg.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005jul29
16 * created by: Markus W. Scherer
17 *
18 * This tool operates on ICU data (.dat package) files.
19 * It takes one as input, or creates an empty one, and can remove, add, and
20 * extract data pieces according to command-line options.
21 * At the same time, it swaps each piece to a consistent set of platform
22 * properties as desired.
23 * Useful as an install-time tool for shipping only one flavor of ICU data
24 * and preparing data files for the target platform.
25 * Also for customizing ICU data (pruning, augmenting, replacing) and for
26 * taking it apart.
27 * Subsumes functionality and implementation code from
28 * gencmn, decmn, and icuswap tools.
29 * Will not work with data DLLs (shared libraries).
30 */
31
32 #include "unicode/utypes.h"
33 #include "unicode/putil.h"
34 #include "cstring.h"
35 #include "toolutil.h"
36 #include "uoptions.h"
37 #include "uparse.h"
38 #include "filestrm.h"
39 #include "package.h"
40 #include "pkg_icu.h"
41
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45
46 U_NAMESPACE_USE
47
48 // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching?
49
50 // general definitions ----------------------------------------------------- ***
51
52 // main() ------------------------------------------------------------------ ***
53
54 static void
printUsage(const char * pname,UBool isHelp)55 printUsage(const char *pname, UBool isHelp) {
56 FILE *where=isHelp ? stdout : stderr;
57
58 fprintf(where,
59 "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n"
60 "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n"
61 "\t[-s path] [-d path] [-w] [-m mode]\n"
62 "\t[--ignore-deps]\n"
63 "\t[--auto_toc_prefix] [--auto_toc_prefix_with_type] [--toc_prefix]\n"
64 "\tinfilename [outfilename]\n",
65 isHelp ? 'U' : 'u', pname);
66 if(isHelp) {
67 fprintf(where,
68 "\n"
69 "Read the input ICU .dat package file, modify it according to the options,\n"
70 "swap it to the desired platform properties (charset & endianness),\n"
71 "and optionally write the resulting ICU .dat package to the output file.\n"
72 "Items are removed, then added, then extracted and listed.\n"
73 "An ICU .dat package is written if items are removed or added,\n"
74 "or if the input and output filenames differ,\n"
75 "or if the --writepkg (-w) option is set.\n");
76 fprintf(where,
77 "\n"
78 "If the input filename is \"new\" then an empty package is created.\n"
79 "If the output filename is missing, then it is automatically generated\n"
80 "from the input filename: If the input filename ends with an l, b, or e\n"
81 "matching its platform properties, then the output filename will\n"
82 "contain the letter from the -t (--type) option.\n");
83 fprintf(where,
84 "\n"
85 "This tool can also be used to just swap a single ICU data file, replacing the\n"
86 "former icuswap tool. For this mode, provide the infilename (and optional\n"
87 "outfilename) for a non-package ICU data file.\n"
88 "Allowed options include -t, -w, -s and -d.\n"
89 "The filenames can be absolute, or relative to the source/dest dir paths.\n"
90 "Other options are not allowed in this mode.\n");
91 fprintf(where,
92 "\n"
93 "Options:\n"
94 "\t(Only the last occurrence of an option is used.)\n"
95 "\n"
96 "\t-h or -? or --help print this message and exit\n");
97 fprintf(where,
98 "\n"
99 "\t-tl or --type l output for little-endian/ASCII charset family\n"
100 "\t-tb or --type b output for big-endian/ASCII charset family\n"
101 "\t-te or --type e output for big-endian/EBCDIC charset family\n"
102 "\t The output type defaults to the input type.\n"
103 "\n"
104 "\t-c or --copyright include the ICU copyright notice\n"
105 "\t-C comment or --comment comment include a comment string\n");
106 fprintf(where,
107 "\n"
108 "\t-a list or --add list add items to the package\n"
109 "\t-r list or --remove list remove items from the package\n"
110 "\t-x list or --extract list extract items from the package\n"
111 "\tThe list can be a single item's filename,\n"
112 "\tor a .txt filename with a list of item filenames,\n"
113 "\tor an ICU .dat package filename.\n");
114 fprintf(where,
115 "\n"
116 "\t-w or --writepkg write the output package even if no items are removed\n"
117 "\t or added (e.g., for only swapping the data)\n");
118 fprintf(where,
119 "\n"
120 "\t-m mode or --matchmode mode set the matching mode for item names with\n"
121 "\t wildcards\n"
122 "\t noslash: the '*' wildcard does not match the '/' tree separator\n");
123 fprintf(where,
124 "\n"
125 "\t--ignore-deps Do not fail if not all resource dependencies are met. Use this\n"
126 "\t option if the missing resources come from another source.");
127 fprintf(where,
128 "\n"
129 "\tIn the .dat package, the Table of Contents (ToC) contains an entry\n"
130 "\tfor each item of the form prefix/tree/itemname .\n"
131 "\tThe prefix normally matches the package basename, and icupkg checks that,\n"
132 "\tbut this is not necessary when ICU need not find and load the package by filename.\n"
133 "\tICU package names end with the platform type letter, and thus differ\n"
134 "\tbetween platform types. This is not required for user data packages.\n");
135 fprintf(where,
136 "\n"
137 "\t--auto_toc_prefix automatic ToC entries prefix\n"
138 "\t Uses the prefix of the first entry of the\n"
139 "\t input package, rather than its basename.\n"
140 "\t Requires a non-empty input package.\n"
141 "\t--auto_toc_prefix_with_type auto_toc_prefix + adjust platform type\n"
142 "\t Same as auto_toc_prefix but also checks that\n"
143 "\t the prefix ends with the input platform\n"
144 "\t type letter, and modifies it to the output\n"
145 "\t platform type letter.\n"
146 "\t At most one of the auto_toc_prefix options\n"
147 "\t can be used at a time.\n"
148 "\t--toc_prefix prefix ToC prefix to be used in the output package\n"
149 "\t Overrides the package basename\n"
150 "\t and --auto_toc_prefix.\n"
151 "\t Cannot be combined with --auto_toc_prefix_with_type.\n");
152 /*
153 * Usage text columns, starting after the initial TAB.
154 * 1 2 3 4 5 6 7 8
155 * 901234567890123456789012345678901234567890123456789012345678901234567890
156 */
157 fprintf(where,
158 "\n"
159 "\tList file syntax: Items are listed on one or more lines and separated\n"
160 "\tby whitespace (space+tab).\n"
161 "\tComments begin with # and are ignored. Empty lines are ignored.\n"
162 "\tLines where the first non-whitespace character is one of %s\n"
163 "\tare also ignored, to reserve for future syntax.\n",
164 U_PKG_RESERVED_CHARS);
165 fprintf(where,
166 "\tItems for removal or extraction may contain a single '*' wildcard\n"
167 "\tcharacter. The '*' matches zero or more characters.\n"
168 "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n"
169 "\tdoes not match '/'.\n");
170 fprintf(where,
171 "\n"
172 "\tItems must be listed relative to the package, and the --sourcedir or\n"
173 "\tthe --destdir path will be prepended.\n"
174 "\tThe paths are only prepended to item filenames while adding or\n"
175 "\textracting items, not to ICU .dat package or list filenames.\n"
176 "\t\n"
177 "\tPaths may contain '/' instead of the platform's\n"
178 "\tfile separator character, and are converted as appropriate.\n");
179 fprintf(where,
180 "\n"
181 "\t-s path or --sourcedir path directory for the --add items\n"
182 "\t-d path or --destdir path directory for the --extract items\n"
183 "\n"
184 "\t-l or --list list the package items\n"
185 "\t (after modifying the package)\n"
186 "\t to stdout or to output list file\n"
187 "\t-o path or --outlist path path/filename for the --list output\n");
188 }
189 }
190
191 static UOption options[]={
192 UOPTION_HELP_H,
193 UOPTION_HELP_QUESTION_MARK,
194 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG),
195
196 UOPTION_COPYRIGHT,
197 UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG),
198
199 UOPTION_SOURCEDIR,
200 UOPTION_DESTDIR,
201
202 UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG),
203
204 UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG),
205
206 UOPTION_DEF("ignore-deps", '\1', UOPT_NO_ARG),
207
208 UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG),
209 UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG),
210 UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG),
211
212 UOPTION_DEF("list", 'l', UOPT_NO_ARG),
213 UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG),
214
215 UOPTION_DEF("auto_toc_prefix", '\1', UOPT_NO_ARG),
216 UOPTION_DEF("auto_toc_prefix_with_type", '\1', UOPT_NO_ARG),
217 UOPTION_DEF("toc_prefix", '\1', UOPT_REQUIRES_ARG)
218 };
219
220 enum {
221 OPT_HELP_H,
222 OPT_HELP_QUESTION_MARK,
223 OPT_OUT_TYPE,
224
225 OPT_COPYRIGHT,
226 OPT_COMMENT,
227
228 OPT_SOURCEDIR,
229 OPT_DESTDIR,
230
231 OPT_WRITEPKG,
232
233 OPT_MATCHMODE,
234
235 OPT_IGNORE_DEPS,
236
237 OPT_ADD_LIST,
238 OPT_REMOVE_LIST,
239 OPT_EXTRACT_LIST,
240
241 OPT_LIST_ITEMS,
242 OPT_LIST_FILE,
243
244 OPT_AUTO_TOC_PREFIX,
245 OPT_AUTO_TOC_PREFIX_WITH_TYPE,
246 OPT_TOC_PREFIX,
247
248 OPT_COUNT
249 };
250
251 static UBool
isPackageName(const char * filename)252 isPackageName(const char *filename) {
253 int32_t len;
254
255 len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */
256 return (UBool)(len>0 && 0==strcmp(filename+len, ".dat"));
257 }
258 /*
259 This line is required by MinGW because it incorrectly globs the arguments.
260 So when \* is used, it turns into a list of files instead of a literal "*"
261 */
262 int _CRT_glob = 0;
263
264 extern int
main(int argc,char * argv[])265 main(int argc, char *argv[]) {
266 const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment;
267 char outType;
268 UBool isHelp, isModified, isPackage;
269 int result = 0;
270
271 Package *pkg, *listPkg, *addListPkg;
272
273 U_MAIN_INIT_ARGS(argc, argv);
274
275 /* get the program basename */
276 pname=findBasename(argv[0]);
277
278 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
279 isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
280 if(isHelp) {
281 printUsage(pname, true);
282 return U_ZERO_ERROR;
283 }
284
285 pkg=new Package;
286 if(pkg==nullptr) {
287 fprintf(stderr, "icupkg: not enough memory\n");
288 return U_MEMORY_ALLOCATION_ERROR;
289 }
290 isModified=false;
291
292 int autoPrefix=0;
293 if(options[OPT_AUTO_TOC_PREFIX].doesOccur) {
294 pkg->setAutoPrefix();
295 ++autoPrefix;
296 }
297 if(options[OPT_AUTO_TOC_PREFIX_WITH_TYPE].doesOccur) {
298 if(options[OPT_TOC_PREFIX].doesOccur) {
299 fprintf(stderr, "icupkg: --auto_toc_prefix_with_type and also --toc_prefix\n");
300 printUsage(pname, false);
301 return U_ILLEGAL_ARGUMENT_ERROR;
302 }
303 pkg->setAutoPrefixWithType();
304 ++autoPrefix;
305 }
306 if(argc<2 || 3<argc || autoPrefix>1) {
307 printUsage(pname, false);
308 return U_ILLEGAL_ARGUMENT_ERROR;
309 }
310
311 if(options[OPT_SOURCEDIR].doesOccur) {
312 sourcePath=options[OPT_SOURCEDIR].value;
313 } else {
314 // work relative to the current working directory
315 sourcePath=nullptr;
316 }
317 if(options[OPT_DESTDIR].doesOccur) {
318 destPath=options[OPT_DESTDIR].value;
319 } else {
320 // work relative to the current working directory
321 destPath=nullptr;
322 }
323
324 if(0==strcmp(argv[1], "new")) {
325 if(autoPrefix) {
326 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but no input package\n");
327 printUsage(pname, false);
328 return U_ILLEGAL_ARGUMENT_ERROR;
329 }
330 inFilename=nullptr;
331 isPackage=true;
332 } else {
333 inFilename=argv[1];
334 if(isPackageName(inFilename)) {
335 pkg->readPackage(inFilename);
336 isPackage=true;
337 } else {
338 /* swap a single file (icuswap replacement) rather than work on a package */
339 pkg->addFile(sourcePath, inFilename);
340 isPackage=false;
341 }
342 }
343
344 if(argc>=3) {
345 outFilename=argv[2];
346 if(0!=strcmp(argv[1], argv[2])) {
347 isModified=true;
348 }
349 } else if(isPackage) {
350 outFilename=nullptr;
351 } else /* !isPackage */ {
352 outFilename=inFilename;
353 isModified=(UBool)(sourcePath!=destPath);
354 }
355
356 /* parse the output type option */
357 if(options[OPT_OUT_TYPE].doesOccur) {
358 const char *type=options[OPT_OUT_TYPE].value;
359 if(type[0]==0 || type[1]!=0) {
360 /* the type must be exactly one letter */
361 printUsage(pname, false);
362 return U_ILLEGAL_ARGUMENT_ERROR;
363 }
364 outType=type[0];
365 switch(outType) {
366 case 'l':
367 case 'b':
368 case 'e':
369 break;
370 default:
371 printUsage(pname, false);
372 return U_ILLEGAL_ARGUMENT_ERROR;
373 }
374
375 /*
376 * Set the isModified flag if the output type differs from the
377 * input package type.
378 * If we swap a single file, just assume that we are modifying it.
379 * The Package class does not give us access to the item and its type.
380 */
381 isModified|=(UBool)(!isPackage || outType!=pkg->getInType());
382 } else if(isPackage) {
383 outType=pkg->getInType(); // default to input type
384 } else /* !isPackage: swap single file */ {
385 outType=0; /* tells extractItem() to not swap */
386 }
387
388 if(options[OPT_WRITEPKG].doesOccur) {
389 isModified=true;
390 }
391
392 if(!isPackage) {
393 /*
394 * icuswap tool replacement: Only swap a single file.
395 * Check that irrelevant options are not set.
396 */
397 if( options[OPT_COMMENT].doesOccur ||
398 options[OPT_COPYRIGHT].doesOccur ||
399 options[OPT_MATCHMODE].doesOccur ||
400 options[OPT_REMOVE_LIST].doesOccur ||
401 options[OPT_ADD_LIST].doesOccur ||
402 options[OPT_EXTRACT_LIST].doesOccur ||
403 options[OPT_LIST_ITEMS].doesOccur
404 ) {
405 printUsage(pname, false);
406 return U_ILLEGAL_ARGUMENT_ERROR;
407 }
408 if(isModified) {
409 pkg->extractItem(destPath, outFilename, 0, outType);
410 }
411
412 delete pkg;
413 return result;
414 }
415
416 /* Work with a package. */
417
418 if(options[OPT_COMMENT].doesOccur) {
419 outComment=options[OPT_COMMENT].value;
420 } else if(options[OPT_COPYRIGHT].doesOccur) {
421 outComment=U_COPYRIGHT_STRING;
422 } else {
423 outComment=nullptr;
424 }
425
426 if(options[OPT_MATCHMODE].doesOccur) {
427 if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) {
428 pkg->setMatchMode(Package::MATCH_NOSLASH);
429 } else {
430 printUsage(pname, false);
431 return U_ILLEGAL_ARGUMENT_ERROR;
432 }
433 }
434
435 /* remove items */
436 if(options[OPT_REMOVE_LIST].doesOccur) {
437 listPkg=new Package();
438 if(listPkg==nullptr) {
439 fprintf(stderr, "icupkg: not enough memory\n");
440 exit(U_MEMORY_ALLOCATION_ERROR);
441 }
442 if(readList(nullptr, options[OPT_REMOVE_LIST].value, false, listPkg)) {
443 pkg->removeItems(*listPkg);
444 delete listPkg;
445 isModified=true;
446 } else {
447 printUsage(pname, false);
448 return U_ILLEGAL_ARGUMENT_ERROR;
449 }
450 }
451
452 /*
453 * add items
454 * use a separate Package so that its memory and items stay around
455 * as long as the main Package
456 */
457 addListPkg=nullptr;
458 if(options[OPT_ADD_LIST].doesOccur) {
459 addListPkg=new Package();
460 if(addListPkg==nullptr) {
461 fprintf(stderr, "icupkg: not enough memory\n");
462 exit(U_MEMORY_ALLOCATION_ERROR);
463 }
464 if(readList(sourcePath, options[OPT_ADD_LIST].value, true, addListPkg)) {
465 pkg->addItems(*addListPkg);
466 // delete addListPkg; deferred until after writePackage()
467 isModified=true;
468 } else {
469 printUsage(pname, false);
470 return U_ILLEGAL_ARGUMENT_ERROR;
471 }
472 }
473
474 /* extract items */
475 if(options[OPT_EXTRACT_LIST].doesOccur) {
476 listPkg=new Package();
477 if(listPkg==nullptr) {
478 fprintf(stderr, "icupkg: not enough memory\n");
479 exit(U_MEMORY_ALLOCATION_ERROR);
480 }
481 if(readList(nullptr, options[OPT_EXTRACT_LIST].value, false, listPkg)) {
482 pkg->extractItems(destPath, *listPkg, outType);
483 delete listPkg;
484 } else {
485 printUsage(pname, false);
486 return U_ILLEGAL_ARGUMENT_ERROR;
487 }
488 }
489
490 /* list items */
491 if(options[OPT_LIST_ITEMS].doesOccur) {
492 int32_t i;
493 if (options[OPT_LIST_FILE].doesOccur) {
494 FileStream *out;
495 out = T_FileStream_open(options[OPT_LIST_FILE].value, "w");
496 if (out != nullptr) {
497 for(i=0; i<pkg->getItemCount(); ++i) {
498 T_FileStream_writeLine(out, pkg->getItem(i)->name);
499 T_FileStream_writeLine(out, "\n");
500 }
501 T_FileStream_close(out);
502 } else {
503 return U_ILLEGAL_ARGUMENT_ERROR;
504 }
505 } else {
506 for(i=0; i<pkg->getItemCount(); ++i) {
507 fprintf(stdout, "%s\n", pkg->getItem(i)->name);
508 }
509 }
510 }
511
512 /* check dependencies between items */
513 if(!options[OPT_IGNORE_DEPS].doesOccur && !pkg->checkDependencies()) {
514 /* some dependencies are not fulfilled */
515 return U_MISSING_RESOURCE_ERROR;
516 }
517
518 /* write the output .dat package if there are any modifications */
519 if(isModified) {
520 char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary
521
522 if(outFilename==nullptr || outFilename[0]==0) {
523 if(inFilename==nullptr || inFilename[0]==0) {
524 fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n");
525 exit(U_ILLEGAL_ARGUMENT_ERROR);
526 }
527
528 /*
529 * auto-generate a filename:
530 * copy the inFilename,
531 * and if the last basename character matches the input file's type,
532 * then replace it with the output file's type
533 */
534 char suffix[6]="?.dat";
535 char *s;
536
537 suffix[0]=pkg->getInType();
538 strcpy(outFilenameBuffer, inFilename);
539 s=strchr(outFilenameBuffer, 0);
540 if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) {
541 *(s-5)=outType;
542 }
543 outFilename=outFilenameBuffer;
544 }
545 if(options[OPT_TOC_PREFIX].doesOccur) {
546 pkg->setPrefix(options[OPT_TOC_PREFIX].value);
547 }
548 result = writePackageDatFile(outFilename, outComment, nullptr, nullptr, pkg, outType);
549 }
550
551 delete addListPkg;
552 delete pkg;
553 return result;
554 }
555
556 /*
557 * Hey, Emacs, please set the following:
558 *
559 * Local Variables:
560 * indent-tabs-mode: nil
561 * End:
562 *
563 */
564