1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: icuswap.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003aug08
14 * created by: Markus W. Scherer
15 *
16 * This tool takes an ICU data file and "swaps" it, that is, changes its
17 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
18 * families.
19 * The modified data file is written to a new file.
20 * Useful as an install-time tool for shipping only one flavor of ICU data
21 * and preparing data files for the target platform.
22 * Will not work with data DLLs (shared libraries).
23 */
24
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/udata.h"
28 #include "cmemory.h"
29 #include "cstring.h"
30 #include "uinvchar.h"
31 #include "uarrsort.h"
32 #include "ucmndata.h"
33 #include "udataswp.h"
34 #include "swapimpl.h"
35 #include "toolutil.h"
36 #include "uoptions.h"
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 /* definitions */
43
44 #define DEFAULT_PADDING_LENGTH 15
45
46 static UOption options[]={
47 UOPTION_HELP_H,
48 UOPTION_HELP_QUESTION_MARK,
49 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
50 };
51
52 enum {
53 OPT_HELP_H,
54 OPT_HELP_QUESTION_MARK,
55 OPT_OUT_TYPE
56 };
57
58 static int32_t
fileSize(FILE * f)59 fileSize(FILE *f) {
60 int32_t size;
61
62 fseek(f, 0, SEEK_END);
63 size=(int32_t)ftell(f);
64 fseek(f, 0, SEEK_SET);
65 return size;
66 }
67
68 /**
69 * Swap an ICU .dat package, including swapping of enclosed items.
70 */
71 U_CFUNC int32_t U_CALLCONV
72 udata_swapPackage(const char *inFilename, const char *outFilename,
73 const UDataSwapper *ds,
74 const void *inData, int32_t length, void *outData,
75 UErrorCode *pErrorCode);
76
77 U_CDECL_BEGIN
78 static void U_CALLCONV
printError(void * context,const char * fmt,va_list args)79 printError(void *context, const char *fmt, va_list args) {
80 vfprintf((FILE *)context, fmt, args);
81 }
82 U_CDECL_END
83
84 static int
printUsage(const char * pname,UBool ishelp)85 printUsage(const char *pname, UBool ishelp) {
86 fprintf(stderr,
87 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
88 ishelp ? 'U' : 'u', pname);
89 if(ishelp) {
90 fprintf(stderr,
91 "\nOptions: -h, -?, --help print this message and exit\n"
92 " Read the input file, swap its platform properties according\n"
93 " to the -t or --type option, and write the result to the output file.\n"
94 " -tl change to little-endian/ASCII charset family\n"
95 " -tb change to big-endian/ASCII charset family\n"
96 " -te change to big-endian/EBCDIC charset family\n");
97 }
98
99 return !ishelp;
100 }
101
102 extern int
main(int argc,char * argv[])103 main(int argc, char *argv[]) {
104 FILE *in, *out;
105 const char *pname;
106 char *data;
107 int32_t length;
108 UBool ishelp;
109 int rc;
110
111 UDataSwapper *ds;
112 const UDataInfo *pInfo;
113 UErrorCode errorCode;
114 uint8_t outCharset;
115 UBool outIsBigEndian;
116
117 U_MAIN_INIT_ARGS(argc, argv);
118
119 fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
120
121 /* get the program basename */
122 pname=strrchr(argv[0], U_FILE_SEP_CHAR);
123 if(pname==NULL) {
124 pname=strrchr(argv[0], '/');
125 }
126 if(pname!=NULL) {
127 ++pname;
128 } else {
129 pname=argv[0];
130 }
131
132 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
133 ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
134 if(ishelp || argc!=3) {
135 return printUsage(pname, ishelp);
136 }
137
138 /* parse the output type option */
139 data=(char *)options[OPT_OUT_TYPE].value;
140 if(data[0]==0 || data[1]!=0) {
141 /* the type must be exactly one letter */
142 return printUsage(pname, FALSE);
143 }
144 switch(data[0]) {
145 case 'l':
146 outIsBigEndian=FALSE;
147 outCharset=U_ASCII_FAMILY;
148 break;
149 case 'b':
150 outIsBigEndian=TRUE;
151 outCharset=U_ASCII_FAMILY;
152 break;
153 case 'e':
154 outIsBigEndian=TRUE;
155 outCharset=U_EBCDIC_FAMILY;
156 break;
157 default:
158 return printUsage(pname, FALSE);
159 }
160
161 in=out=NULL;
162 data=NULL;
163
164 /* open the input file, get its length, allocate memory for it, read the file */
165 in=fopen(argv[1], "rb");
166 if(in==NULL) {
167 fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
168 rc=2;
169 goto done;
170 }
171
172 length=fileSize(in);
173 if(length<DEFAULT_PADDING_LENGTH) {
174 fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
175 rc=2;
176 goto done;
177 }
178
179 /*
180 * +15: udata_swapPackage() may need to add a few padding bytes to the
181 * last item if charset swapping is done,
182 * because the last item may be resorted into the middle and then needs
183 * additional padding bytes
184 */
185 data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
186 if(data==NULL) {
187 fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
188 rc=2;
189 goto done;
190 }
191
192 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
193 uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);
194
195 if(length!=(int32_t)fread(data, 1, length, in)) {
196 fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
197 rc=3;
198 goto done;
199 }
200
201 fclose(in);
202 in=NULL;
203
204 /* swap the data in-place */
205 errorCode=U_ZERO_ERROR;
206 ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
207 if(U_FAILURE(errorCode)) {
208 fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
209 pname, argv[1], u_errorName(errorCode));
210 rc=4;
211 goto done;
212 }
213
214 ds->printError=printError;
215 ds->printErrorContext=stderr;
216
217 /* speculative cast, protected by the following length check */
218 pInfo=(const UDataInfo *)((const char *)data+4);
219
220 if( length>=20 &&
221 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
222 pInfo->dataFormat[1]==0x6d &&
223 pInfo->dataFormat[2]==0x6e &&
224 pInfo->dataFormat[3]==0x44
225 ) {
226 /*
227 * swap the .dat package
228 * udata_swapPackage() needs to rename ToC name entries from the old package
229 * name to the new one.
230 * We pass it the filenames, and udata_swapPackage() will extract the
231 * package names.
232 */
233 length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
234 udata_closeSwapper(ds);
235 if(U_FAILURE(errorCode)) {
236 fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
237 pname, argv[1], u_errorName(errorCode));
238 rc=4;
239 goto done;
240 }
241 } else {
242 /* swap the data, which is not a .dat package */
243 length=udata_swap(ds, data, length, data, &errorCode);
244 udata_closeSwapper(ds);
245 if(U_FAILURE(errorCode)) {
246 fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
247 pname, argv[1], u_errorName(errorCode));
248 rc=4;
249 goto done;
250 }
251 }
252
253 out=fopen(argv[2], "wb");
254 if(out==NULL) {
255 fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
256 rc=5;
257 goto done;
258 }
259
260 if(length!=(int32_t)fwrite(data, 1, length, out)) {
261 fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
262 rc=6;
263 goto done;
264 }
265
266 fclose(out);
267 out=NULL;
268
269 /* all done */
270 rc=0;
271
272 done:
273 if(in!=NULL) {
274 fclose(in);
275 }
276 if(out!=NULL) {
277 fclose(out);
278 }
279 if(data!=NULL) {
280 free(data);
281 }
282 return rc;
283 }
284
285 /* swap .dat package files -------------------------------------------------- */
286
287 static int32_t
extractPackageName(const UDataSwapper * ds,const char * filename,char pkg[],int32_t capacity,UErrorCode * pErrorCode)288 extractPackageName(const UDataSwapper *ds, const char *filename,
289 char pkg[], int32_t capacity,
290 UErrorCode *pErrorCode) {
291 const char *basename;
292 int32_t len;
293
294 if(U_FAILURE(*pErrorCode)) {
295 return 0;
296 }
297
298 basename=findBasename(filename);
299 len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
300
301 if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
302 udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
303 basename);
304 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
305 return 0;
306 }
307
308 if(len>=capacity) {
309 udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
310 (long)capacity);
311 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
312 return 0;
313 }
314
315 uprv_memcpy(pkg, basename, len);
316 pkg[len]=0;
317 return len;
318 }
319
320 struct ToCEntry {
321 uint32_t nameOffset, inOffset, outOffset, length;
322 };
323
324 U_CDECL_BEGIN
325 static int32_t U_CALLCONV
compareToCEntries(const void * context,const void * left,const void * right)326 compareToCEntries(const void *context, const void *left, const void *right) {
327 const char *chars=(const char *)context;
328 return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
329 chars+((const ToCEntry *)right)->nameOffset);
330 }
331 U_CDECL_END
332
333 U_CFUNC int32_t U_CALLCONV
udata_swapPackage(const char * inFilename,const char * outFilename,const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)334 udata_swapPackage(const char *inFilename, const char *outFilename,
335 const UDataSwapper *ds,
336 const void *inData, int32_t length, void *outData,
337 UErrorCode *pErrorCode) {
338 const UDataInfo *pInfo;
339 int32_t headerSize;
340
341 const uint8_t *inBytes;
342 uint8_t *outBytes;
343
344 uint32_t itemCount, offset, i;
345 int32_t itemLength;
346
347 const UDataOffsetTOCEntry *inEntries;
348 UDataOffsetTOCEntry *outEntries;
349
350 ToCEntry *table;
351
352 char inPkgName[32], outPkgName[32];
353 int32_t inPkgNameLength, outPkgNameLength;
354
355 /* udata_swapDataHeader checks the arguments */
356 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
357 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
358 return 0;
359 }
360
361 /* check data format and format version */
362 pInfo=(const UDataInfo *)((const char *)inData+4);
363 if(!(
364 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
365 pInfo->dataFormat[1]==0x6d &&
366 pInfo->dataFormat[2]==0x6e &&
367 pInfo->dataFormat[3]==0x44 &&
368 pInfo->formatVersion[0]==1
369 )) {
370 udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
371 pInfo->dataFormat[0], pInfo->dataFormat[1],
372 pInfo->dataFormat[2], pInfo->dataFormat[3],
373 pInfo->formatVersion[0]);
374 *pErrorCode=U_UNSUPPORTED_ERROR;
375 return 0;
376 }
377
378 /*
379 * We need to change the ToC name entries so that they have the correct
380 * package name prefix.
381 * Extract the package names from the in/out filenames.
382 */
383 inPkgNameLength=extractPackageName(
384 ds, inFilename,
385 inPkgName, (int32_t)sizeof(inPkgName),
386 pErrorCode);
387 outPkgNameLength=extractPackageName(
388 ds, outFilename,
389 outPkgName, (int32_t)sizeof(outPkgName),
390 pErrorCode);
391 if(U_FAILURE(*pErrorCode)) {
392 return 0;
393 }
394
395 /*
396 * It is possible to work with inPkgNameLength!=outPkgNameLength,
397 * but then the length of the data file would change more significantly,
398 * which we are not currently prepared for.
399 */
400 if(inPkgNameLength!=outPkgNameLength) {
401 udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
402 inPkgName, outPkgName);
403 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
404 return 0;
405 }
406
407 inBytes=(const uint8_t *)inData+headerSize;
408 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
409
410 if(length<0) {
411 /* preflighting */
412 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
413 if(itemCount==0) {
414 /* no items: count only the item count and return */
415 return headerSize+4;
416 }
417
418 /* read the last item's offset and preflight it */
419 offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
420 itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
421
422 if(U_SUCCESS(*pErrorCode)) {
423 return headerSize+offset+(uint32_t)itemLength;
424 } else {
425 return 0;
426 }
427 } else {
428 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
429 length-=headerSize;
430 if(length<4) {
431 /* itemCount does not fit */
432 offset=0xffffffff;
433 itemCount=0; /* make compilers happy */
434 } else {
435 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
436 if(itemCount==0) {
437 offset=4;
438 } else if((uint32_t)length<(4+8*itemCount)) {
439 /* ToC table does not fit */
440 offset=0xffffffff;
441 } else {
442 /* offset of the last item plus at least 20 bytes for its header */
443 offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
444 }
445 }
446 if((uint32_t)length<offset) {
447 udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
448 length);
449 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
450 return 0;
451 }
452
453 outBytes=(uint8_t *)outData+headerSize;
454
455 /* swap the item count */
456 ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
457
458 if(itemCount==0) {
459 /* no items: just return now */
460 return headerSize+4;
461 }
462
463 /* swap the item name strings */
464 offset=4+8*itemCount;
465 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
466 udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
467 if(U_FAILURE(*pErrorCode)) {
468 udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
469 return 0;
470 }
471 /* keep offset and itemLength in case we allocate and copy the strings below */
472
473 /* swap the package names into the output charset */
474 if(ds->outCharset!=U_CHARSET_FAMILY) {
475 UDataSwapper *ds2;
476 ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
477 ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
478 ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
479 udata_closeSwapper(ds2);
480 if(U_FAILURE(*pErrorCode)) {
481 udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
482 }
483 }
484
485 /* change the prefix of each ToC entry name from the old to the new package name */
486 {
487 char *entryName;
488
489 for(i=0; i<itemCount; ++i) {
490 entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
491
492 if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
493 uprv_memcpy(entryName, outPkgName, inPkgNameLength);
494 } else {
495 udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
496 (long)i);
497 *pErrorCode=U_INVALID_FORMAT_ERROR;
498 return 0;
499 }
500 }
501 }
502
503 /*
504 * Allocate the ToC table and, if necessary, a temporary buffer for
505 * pseudo-in-place swapping.
506 *
507 * We cannot swap in-place because:
508 *
509 * 1. If the swapping of an item fails mid-way, then in-place swapping
510 * has destroyed its data.
511 * Out-of-place swapping allows us to then copy its original data.
512 *
513 * 2. If swapping changes the charset family, then we must resort
514 * not only the ToC table but also the data items themselves.
515 * This requires a permutation and is best done with separate in/out
516 * buffers.
517 *
518 * We swapped the strings above to avoid the malloc below if string swapping fails.
519 */
520 if(inData==outData) {
521 /* +15: prepare for extra padding of a newly-last item */
522 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH);
523 if(table!=NULL) {
524 outBytes=(uint8_t *)(table+itemCount);
525
526 /* copy the item count and the swapped strings */
527 uprv_memcpy(outBytes, inBytes, 4);
528 uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
529 }
530 } else {
531 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
532 }
533 if(table==NULL) {
534 udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
535 inData==outData ?
536 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH :
537 itemCount*sizeof(ToCEntry));
538 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
539 return 0;
540 }
541 outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
542
543 /* read the ToC table */
544 for(i=0; i<itemCount; ++i) {
545 table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
546 table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
547 if(i>0) {
548 table[i-1].length=table[i].inOffset-table[i-1].inOffset;
549 }
550 }
551 table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
552
553 if(ds->inCharset==ds->outCharset) {
554 /* no charset swapping, no resorting: keep item offsets the same */
555 for(i=0; i<itemCount; ++i) {
556 table[i].outOffset=table[i].inOffset;
557 }
558 } else {
559 /* charset swapping: resort items by their swapped names */
560
561 /*
562 * Before the actual sorting, we need to make sure that each item
563 * has a length that is a multiple of 16 bytes so that all items
564 * are 16-aligned.
565 * Only the old last item may be missing up to 15 padding bytes.
566 * Add padding bytes for it.
567 * Since the icuswap main() function has already allocated enough
568 * input buffer space and set the last 15 bytes there to 0xaa,
569 * we only need to increase the total data length and the length
570 * of the last item here.
571 */
572 if((length&0xf)!=0) {
573 int32_t delta=16-(length&0xf);
574 length+=delta;
575 table[itemCount-1].length+=(uint32_t)delta;
576 }
577
578 /* Save the offset before we sort the TOC. */
579 offset=table[0].inOffset;
580 /* sort the TOC entries */
581 uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
582 compareToCEntries, outBytes, FALSE, pErrorCode);
583
584 /*
585 * Note: Before sorting, the inOffset values were in order.
586 * Now the outOffset values are in order.
587 */
588
589 /* assign outOffset values */
590 for(i=0; i<itemCount; ++i) {
591 table[i].outOffset=offset;
592 offset+=table[i].length;
593 }
594 }
595
596 /* write the output ToC table */
597 for(i=0; i<itemCount; ++i) {
598 ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
599 ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
600 }
601
602 /* swap each data item */
603 for(i=0; i<itemCount; ++i) {
604 /* first copy the item bytes to make sure that unreachable bytes are copied */
605 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
606
607 /* swap the item */
608 udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
609 outBytes+table[i].outOffset, pErrorCode);
610
611 if(U_FAILURE(*pErrorCode)) {
612 if(ds->outCharset==U_CHARSET_FAMILY) {
613 udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
614 " at inOffset 0x%x length 0x%x - %s\n"
615 " the data item will be copied, not swapped\n\n",
616 (char *)outBytes+table[i].nameOffset,
617 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
618 } else {
619 udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
620 " at inOffset 0x%x length 0x%x - %s\n"
621 " the data item will be copied, not swapped\n\n",
622 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
623 }
624 /* reset the error code, copy the data item, and continue */
625 *pErrorCode=U_ZERO_ERROR;
626 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
627 }
628 }
629
630 if(inData==outData) {
631 /* copy the data from the temporary buffer to the in-place buffer */
632 uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
633 }
634 uprv_free(table);
635
636 return headerSize+length;
637 }
638 }
639
640 /*
641 * Hey, Emacs, please set the following:
642 *
643 * Local Variables:
644 * indent-tabs-mode: nil
645 * End:
646 *
647 */
648