1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: icuswap.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003aug08
16 * created by: Markus W. Scherer
17 *
18 * This tool takes an ICU data file and "swaps" it, that is, changes its
19 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
20 * families.
21 * The modified data file is written to a new file.
22 * Useful as an install-time tool for shipping only one flavor of ICU data
23 * and preparing data files for the target platform.
24 * Will not work with data DLLs (shared libraries).
25 */
26
27 #include "unicode/utypes.h"
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "uinvchar.h"
33 #include "uarrsort.h"
34 #include "ucmndata.h"
35 #include "udataswp.h"
36 #include "swapimpl.h"
37 #include "toolutil.h"
38 #include "uoptions.h"
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 /* definitions */
45
46 #define DEFAULT_PADDING_LENGTH 15
47
48 static UOption options[]={
49 UOPTION_HELP_H,
50 UOPTION_HELP_QUESTION_MARK,
51 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
52 };
53
54 enum {
55 OPT_HELP_H,
56 OPT_HELP_QUESTION_MARK,
57 OPT_OUT_TYPE
58 };
59
60 static int32_t
fileSize(FILE * f)61 fileSize(FILE *f) {
62 int32_t size;
63
64 fseek(f, 0, SEEK_END);
65 size=(int32_t)ftell(f);
66 fseek(f, 0, SEEK_SET);
67 return size;
68 }
69
70 /**
71 * Swap an ICU .dat package, including swapping of enclosed items.
72 */
73 U_CFUNC int32_t U_CALLCONV
74 udata_swapPackage(const char *inFilename, const char *outFilename,
75 const UDataSwapper *ds,
76 const void *inData, int32_t length, void *outData,
77 UErrorCode *pErrorCode);
78
79 U_CDECL_BEGIN
80 static void U_CALLCONV
printError(void * context,const char * fmt,va_list args)81 printError(void *context, const char *fmt, va_list args) {
82 vfprintf((FILE *)context, fmt, args);
83 }
84 U_CDECL_END
85
86 static int
printUsage(const char * pname,UBool ishelp)87 printUsage(const char *pname, UBool ishelp) {
88 fprintf(stderr,
89 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
90 ishelp ? 'U' : 'u', pname);
91 if(ishelp) {
92 fprintf(stderr,
93 "\nOptions: -h, -?, --help print this message and exit\n"
94 " Read the input file, swap its platform properties according\n"
95 " to the -t or --type option, and write the result to the output file.\n"
96 " -tl change to little-endian/ASCII charset family\n"
97 " -tb change to big-endian/ASCII charset family\n"
98 " -te change to big-endian/EBCDIC charset family\n");
99 }
100
101 return !ishelp;
102 }
103
104 extern int
main(int argc,char * argv[])105 main(int argc, char *argv[]) {
106 FILE *in, *out;
107 const char *pname;
108 char *data;
109 int32_t length;
110 UBool ishelp;
111 int rc;
112
113 UDataSwapper *ds;
114 const UDataInfo *pInfo;
115 UErrorCode errorCode;
116 uint8_t outCharset;
117 UBool outIsBigEndian;
118
119 U_MAIN_INIT_ARGS(argc, argv);
120
121 fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
122
123 /* get the program basename */
124 pname=strrchr(argv[0], U_FILE_SEP_CHAR);
125 if(pname==NULL) {
126 pname=strrchr(argv[0], '/');
127 }
128 if(pname!=NULL) {
129 ++pname;
130 } else {
131 pname=argv[0];
132 }
133
134 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
135 ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
136 if(ishelp || argc!=3) {
137 return printUsage(pname, ishelp);
138 }
139
140 /* parse the output type option */
141 data=(char *)options[OPT_OUT_TYPE].value;
142 if(data[0]==0 || data[1]!=0) {
143 /* the type must be exactly one letter */
144 return printUsage(pname, FALSE);
145 }
146 switch(data[0]) {
147 case 'l':
148 outIsBigEndian=FALSE;
149 outCharset=U_ASCII_FAMILY;
150 break;
151 case 'b':
152 outIsBigEndian=TRUE;
153 outCharset=U_ASCII_FAMILY;
154 break;
155 case 'e':
156 outIsBigEndian=TRUE;
157 outCharset=U_EBCDIC_FAMILY;
158 break;
159 default:
160 return printUsage(pname, FALSE);
161 }
162
163 in=out=NULL;
164 data=NULL;
165
166 /* open the input file, get its length, allocate memory for it, read the file */
167 in=fopen(argv[1], "rb");
168 if(in==NULL) {
169 fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
170 rc=2;
171 goto done;
172 }
173
174 length=fileSize(in);
175 if(length<DEFAULT_PADDING_LENGTH) {
176 fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
177 rc=2;
178 goto done;
179 }
180
181 /*
182 * +15: udata_swapPackage() may need to add a few padding bytes to the
183 * last item if charset swapping is done,
184 * because the last item may be resorted into the middle and then needs
185 * additional padding bytes
186 */
187 data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
188 if(data==NULL) {
189 fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
190 rc=2;
191 goto done;
192 }
193
194 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
195 uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);
196
197 if(length!=(int32_t)fread(data, 1, length, in)) {
198 fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
199 rc=3;
200 goto done;
201 }
202
203 fclose(in);
204 in=NULL;
205
206 /* swap the data in-place */
207 errorCode=U_ZERO_ERROR;
208 ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
209 if(U_FAILURE(errorCode)) {
210 fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
211 pname, argv[1], u_errorName(errorCode));
212 rc=4;
213 goto done;
214 }
215
216 ds->printError=printError;
217 ds->printErrorContext=stderr;
218
219 /* speculative cast, protected by the following length check */
220 pInfo=(const UDataInfo *)((const char *)data+4);
221
222 if( length>=20 &&
223 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
224 pInfo->dataFormat[1]==0x6d &&
225 pInfo->dataFormat[2]==0x6e &&
226 pInfo->dataFormat[3]==0x44
227 ) {
228 /*
229 * swap the .dat package
230 * udata_swapPackage() needs to rename ToC name entries from the old package
231 * name to the new one.
232 * We pass it the filenames, and udata_swapPackage() will extract the
233 * package names.
234 */
235 length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
236 udata_closeSwapper(ds);
237 if(U_FAILURE(errorCode)) {
238 fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
239 pname, argv[1], u_errorName(errorCode));
240 rc=4;
241 goto done;
242 }
243 } else {
244 /* swap the data, which is not a .dat package */
245 length=udata_swap(ds, data, length, data, &errorCode);
246 udata_closeSwapper(ds);
247 if(U_FAILURE(errorCode)) {
248 fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
249 pname, argv[1], u_errorName(errorCode));
250 rc=4;
251 goto done;
252 }
253 }
254
255 out=fopen(argv[2], "wb");
256 if(out==NULL) {
257 fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
258 rc=5;
259 goto done;
260 }
261
262 if(length!=(int32_t)fwrite(data, 1, length, out)) {
263 fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
264 rc=6;
265 goto done;
266 }
267
268 fclose(out);
269 out=NULL;
270
271 /* all done */
272 rc=0;
273
274 done:
275 if(in!=NULL) {
276 fclose(in);
277 }
278 if(out!=NULL) {
279 fclose(out);
280 }
281 if(data!=NULL) {
282 free(data);
283 }
284 return rc;
285 }
286
287 /* swap .dat package files -------------------------------------------------- */
288
289 static int32_t
extractPackageName(const UDataSwapper * ds,const char * filename,char pkg[],int32_t capacity,UErrorCode * pErrorCode)290 extractPackageName(const UDataSwapper *ds, const char *filename,
291 char pkg[], int32_t capacity,
292 UErrorCode *pErrorCode) {
293 const char *basename;
294 int32_t len;
295
296 if(U_FAILURE(*pErrorCode)) {
297 return 0;
298 }
299
300 basename=findBasename(filename);
301 len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
302
303 if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
304 udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
305 basename);
306 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
307 return 0;
308 }
309
310 if(len>=capacity) {
311 udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
312 (long)capacity);
313 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
314 return 0;
315 }
316
317 uprv_memcpy(pkg, basename, len);
318 pkg[len]=0;
319 return len;
320 }
321
322 struct ToCEntry {
323 uint32_t nameOffset, inOffset, outOffset, length;
324 };
325
326 U_CDECL_BEGIN
327 static int32_t U_CALLCONV
compareToCEntries(const void * context,const void * left,const void * right)328 compareToCEntries(const void *context, const void *left, const void *right) {
329 const char *chars=(const char *)context;
330 return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
331 chars+((const ToCEntry *)right)->nameOffset);
332 }
333 U_CDECL_END
334
335 U_CFUNC int32_t U_CALLCONV
udata_swapPackage(const char * inFilename,const char * outFilename,const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)336 udata_swapPackage(const char *inFilename, const char *outFilename,
337 const UDataSwapper *ds,
338 const void *inData, int32_t length, void *outData,
339 UErrorCode *pErrorCode) {
340 const UDataInfo *pInfo;
341 int32_t headerSize;
342
343 const uint8_t *inBytes;
344 uint8_t *outBytes;
345
346 uint32_t itemCount, offset, i;
347 int32_t itemLength;
348
349 const UDataOffsetTOCEntry *inEntries;
350 UDataOffsetTOCEntry *outEntries;
351
352 ToCEntry *table;
353
354 char inPkgName[32], outPkgName[32];
355 int32_t inPkgNameLength, outPkgNameLength;
356
357 /* udata_swapDataHeader checks the arguments */
358 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
359 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
360 return 0;
361 }
362
363 /* check data format and format version */
364 pInfo=(const UDataInfo *)((const char *)inData+4);
365 if(!(
366 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
367 pInfo->dataFormat[1]==0x6d &&
368 pInfo->dataFormat[2]==0x6e &&
369 pInfo->dataFormat[3]==0x44 &&
370 pInfo->formatVersion[0]==1
371 )) {
372 udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
373 pInfo->dataFormat[0], pInfo->dataFormat[1],
374 pInfo->dataFormat[2], pInfo->dataFormat[3],
375 pInfo->formatVersion[0]);
376 *pErrorCode=U_UNSUPPORTED_ERROR;
377 return 0;
378 }
379
380 /*
381 * We need to change the ToC name entries so that they have the correct
382 * package name prefix.
383 * Extract the package names from the in/out filenames.
384 */
385 inPkgNameLength=extractPackageName(
386 ds, inFilename,
387 inPkgName, (int32_t)sizeof(inPkgName),
388 pErrorCode);
389 outPkgNameLength=extractPackageName(
390 ds, outFilename,
391 outPkgName, (int32_t)sizeof(outPkgName),
392 pErrorCode);
393 if(U_FAILURE(*pErrorCode)) {
394 return 0;
395 }
396
397 /*
398 * It is possible to work with inPkgNameLength!=outPkgNameLength,
399 * but then the length of the data file would change more significantly,
400 * which we are not currently prepared for.
401 */
402 if(inPkgNameLength!=outPkgNameLength) {
403 udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
404 inPkgName, outPkgName);
405 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
406 return 0;
407 }
408
409 inBytes=(const uint8_t *)inData+headerSize;
410 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
411
412 if(length<0) {
413 /* preflighting */
414 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
415 if(itemCount==0) {
416 /* no items: count only the item count and return */
417 return headerSize+4;
418 }
419
420 /* read the last item's offset and preflight it */
421 offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
422 itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
423
424 if(U_SUCCESS(*pErrorCode)) {
425 return headerSize+offset+(uint32_t)itemLength;
426 } else {
427 return 0;
428 }
429 } else {
430 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
431 length-=headerSize;
432 if(length<4) {
433 /* itemCount does not fit */
434 offset=0xffffffff;
435 itemCount=0; /* make compilers happy */
436 } else {
437 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
438 if(itemCount==0) {
439 offset=4;
440 } else if((uint32_t)length<(4+8*itemCount)) {
441 /* ToC table does not fit */
442 offset=0xffffffff;
443 } else {
444 /* offset of the last item plus at least 20 bytes for its header */
445 offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
446 }
447 }
448 if((uint32_t)length<offset) {
449 udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
450 length);
451 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
452 return 0;
453 }
454
455 outBytes=(uint8_t *)outData+headerSize;
456
457 /* swap the item count */
458 ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
459
460 if(itemCount==0) {
461 /* no items: just return now */
462 return headerSize+4;
463 }
464
465 /* swap the item name strings */
466 offset=4+8*itemCount;
467 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
468 udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
469 if(U_FAILURE(*pErrorCode)) {
470 udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
471 return 0;
472 }
473 /* keep offset and itemLength in case we allocate and copy the strings below */
474
475 /* swap the package names into the output charset */
476 if(ds->outCharset!=U_CHARSET_FAMILY) {
477 UDataSwapper *ds2;
478 ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
479 ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
480 ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
481 udata_closeSwapper(ds2);
482 if(U_FAILURE(*pErrorCode)) {
483 udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
484 }
485 }
486
487 /* change the prefix of each ToC entry name from the old to the new package name */
488 {
489 char *entryName;
490
491 for(i=0; i<itemCount; ++i) {
492 entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
493
494 if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
495 uprv_memcpy(entryName, outPkgName, inPkgNameLength);
496 } else {
497 udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
498 (long)i);
499 *pErrorCode=U_INVALID_FORMAT_ERROR;
500 return 0;
501 }
502 }
503 }
504
505 /*
506 * Allocate the ToC table and, if necessary, a temporary buffer for
507 * pseudo-in-place swapping.
508 *
509 * We cannot swap in-place because:
510 *
511 * 1. If the swapping of an item fails mid-way, then in-place swapping
512 * has destroyed its data.
513 * Out-of-place swapping allows us to then copy its original data.
514 *
515 * 2. If swapping changes the charset family, then we must resort
516 * not only the ToC table but also the data items themselves.
517 * This requires a permutation and is best done with separate in/out
518 * buffers.
519 *
520 * We swapped the strings above to avoid the malloc below if string swapping fails.
521 */
522 if(inData==outData) {
523 /* +15: prepare for extra padding of a newly-last item */
524 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH);
525 if(table!=NULL) {
526 outBytes=(uint8_t *)(table+itemCount);
527
528 /* copy the item count and the swapped strings */
529 uprv_memcpy(outBytes, inBytes, 4);
530 uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
531 }
532 } else {
533 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
534 }
535 if(table==NULL) {
536 udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
537 inData==outData ?
538 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH :
539 itemCount*sizeof(ToCEntry));
540 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
541 return 0;
542 }
543 outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
544
545 /* read the ToC table */
546 for(i=0; i<itemCount; ++i) {
547 table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
548 table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
549 if(i>0) {
550 table[i-1].length=table[i].inOffset-table[i-1].inOffset;
551 }
552 }
553 table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
554
555 if(ds->inCharset==ds->outCharset) {
556 /* no charset swapping, no resorting: keep item offsets the same */
557 for(i=0; i<itemCount; ++i) {
558 table[i].outOffset=table[i].inOffset;
559 }
560 } else {
561 /* charset swapping: resort items by their swapped names */
562
563 /*
564 * Before the actual sorting, we need to make sure that each item
565 * has a length that is a multiple of 16 bytes so that all items
566 * are 16-aligned.
567 * Only the old last item may be missing up to 15 padding bytes.
568 * Add padding bytes for it.
569 * Since the icuswap main() function has already allocated enough
570 * input buffer space and set the last 15 bytes there to 0xaa,
571 * we only need to increase the total data length and the length
572 * of the last item here.
573 */
574 if((length&0xf)!=0) {
575 int32_t delta=16-(length&0xf);
576 length+=delta;
577 table[itemCount-1].length+=(uint32_t)delta;
578 }
579
580 /* Save the offset before we sort the TOC. */
581 offset=table[0].inOffset;
582 /* sort the TOC entries */
583 uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
584 compareToCEntries, outBytes, FALSE, pErrorCode);
585
586 /*
587 * Note: Before sorting, the inOffset values were in order.
588 * Now the outOffset values are in order.
589 */
590
591 /* assign outOffset values */
592 for(i=0; i<itemCount; ++i) {
593 table[i].outOffset=offset;
594 offset+=table[i].length;
595 }
596 }
597
598 /* write the output ToC table */
599 for(i=0; i<itemCount; ++i) {
600 ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
601 ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
602 }
603
604 /* swap each data item */
605 for(i=0; i<itemCount; ++i) {
606 /* first copy the item bytes to make sure that unreachable bytes are copied */
607 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
608
609 /* swap the item */
610 udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
611 outBytes+table[i].outOffset, pErrorCode);
612
613 if(U_FAILURE(*pErrorCode)) {
614 if(ds->outCharset==U_CHARSET_FAMILY) {
615 udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
616 " at inOffset 0x%x length 0x%x - %s\n"
617 " the data item will be copied, not swapped\n\n",
618 (char *)outBytes+table[i].nameOffset,
619 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
620 } else {
621 udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
622 " at inOffset 0x%x length 0x%x - %s\n"
623 " the data item will be copied, not swapped\n\n",
624 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
625 }
626 /* reset the error code, copy the data item, and continue */
627 *pErrorCode=U_ZERO_ERROR;
628 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
629 }
630 }
631
632 if(inData==outData) {
633 /* copy the data from the temporary buffer to the in-place buffer */
634 uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
635 }
636 uprv_free(table);
637
638 return headerSize+length;
639 }
640 }
641
642 /*
643 * Hey, Emacs, please set the following:
644 *
645 * Local Variables:
646 * indent-tabs-mode: nil
647 * End:
648 *
649 */
650