1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: icuswap.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003aug08
14 * created by: Markus W. Scherer
15 *
16 * This tool takes an ICU data file and "swaps" it, that is, changes its
17 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
18 * families.
19 * The modified data file is written to a new file.
20 * Useful as an install-time tool for shipping only one flavor of ICU data
21 * and preparing data files for the target platform.
22 * Will not work with data DLLs (shared libraries).
23 */
24
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/udata.h"
28 #include "cmemory.h"
29 #include "cstring.h"
30 #include "uinvchar.h"
31 #include "uarrsort.h"
32 #include "ucmndata.h"
33 #include "udataswp.h"
34 #include "swapimpl.h"
35 #include "toolutil.h"
36 #include "uoptions.h"
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 /* definitions */
43
44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
45 #define DEFAULT_PADDING_LENGTH 15
46
47 static UOption options[]={
48 UOPTION_HELP_H,
49 UOPTION_HELP_QUESTION_MARK,
50 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
51 };
52
53 enum {
54 OPT_HELP_H,
55 OPT_HELP_QUESTION_MARK,
56 OPT_OUT_TYPE
57 };
58
59 static int32_t
fileSize(FILE * f)60 fileSize(FILE *f) {
61 int32_t size;
62
63 fseek(f, 0, SEEK_END);
64 size=(int32_t)ftell(f);
65 fseek(f, 0, SEEK_SET);
66 return size;
67 }
68
69 /**
70 * Swap an ICU .dat package, including swapping of enclosed items.
71 */
72 U_CFUNC int32_t U_CALLCONV
73 udata_swapPackage(const char *inFilename, const char *outFilename,
74 const UDataSwapper *ds,
75 const void *inData, int32_t length, void *outData,
76 UErrorCode *pErrorCode);
77
78 U_CDECL_BEGIN
79 static void U_CALLCONV
printError(void * context,const char * fmt,va_list args)80 printError(void *context, const char *fmt, va_list args) {
81 vfprintf((FILE *)context, fmt, args);
82 }
83 U_CDECL_END
84
85 static int
printUsage(const char * pname,UBool ishelp)86 printUsage(const char *pname, UBool ishelp) {
87 fprintf(stderr,
88 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
89 ishelp ? 'U' : 'u', pname);
90 if(ishelp) {
91 fprintf(stderr,
92 "\nOptions: -h, -?, --help print this message and exit\n"
93 " Read the input file, swap its platform properties according\n"
94 " to the -t or --type option, and write the result to the output file.\n"
95 " -tl change to little-endian/ASCII charset family\n"
96 " -tb change to big-endian/ASCII charset family\n"
97 " -te change to big-endian/EBCDIC charset family\n");
98 }
99
100 return !ishelp;
101 }
102
103 extern int
main(int argc,char * argv[])104 main(int argc, char *argv[]) {
105 FILE *in, *out;
106 const char *pname;
107 char *data;
108 int32_t length;
109 UBool ishelp;
110 int rc;
111
112 UDataSwapper *ds;
113 const UDataInfo *pInfo;
114 UErrorCode errorCode;
115 uint8_t outCharset;
116 UBool outIsBigEndian;
117
118 U_MAIN_INIT_ARGS(argc, argv);
119
120 fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
121
122 /* get the program basename */
123 pname=strrchr(argv[0], U_FILE_SEP_CHAR);
124 if(pname==NULL) {
125 pname=strrchr(argv[0], '/');
126 }
127 if(pname!=NULL) {
128 ++pname;
129 } else {
130 pname=argv[0];
131 }
132
133 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
134 ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
135 if(ishelp || argc!=3) {
136 return printUsage(pname, ishelp);
137 }
138
139 /* parse the output type option */
140 data=(char *)options[OPT_OUT_TYPE].value;
141 if(data[0]==0 || data[1]!=0) {
142 /* the type must be exactly one letter */
143 return printUsage(pname, FALSE);
144 }
145 switch(data[0]) {
146 case 'l':
147 outIsBigEndian=FALSE;
148 outCharset=U_ASCII_FAMILY;
149 break;
150 case 'b':
151 outIsBigEndian=TRUE;
152 outCharset=U_ASCII_FAMILY;
153 break;
154 case 'e':
155 outIsBigEndian=TRUE;
156 outCharset=U_EBCDIC_FAMILY;
157 break;
158 default:
159 return printUsage(pname, FALSE);
160 }
161
162 in=out=NULL;
163 data=NULL;
164
165 /* open the input file, get its length, allocate memory for it, read the file */
166 in=fopen(argv[1], "rb");
167 if(in==NULL) {
168 fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
169 rc=2;
170 goto done;
171 }
172
173 length=fileSize(in);
174 if(length<DEFAULT_PADDING_LENGTH) {
175 fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
176 rc=2;
177 goto done;
178 }
179
180 /*
181 * +15: udata_swapPackage() may need to add a few padding bytes to the
182 * last item if charset swapping is done,
183 * because the last item may be resorted into the middle and then needs
184 * additional padding bytes
185 */
186 data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
187 if(data==NULL) {
188 fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
189 rc=2;
190 goto done;
191 }
192
193 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
194 uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);
195
196 if(length!=(int32_t)fread(data, 1, length, in)) {
197 fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
198 rc=3;
199 goto done;
200 }
201
202 fclose(in);
203 in=NULL;
204
205 /* swap the data in-place */
206 errorCode=U_ZERO_ERROR;
207 ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
208 if(U_FAILURE(errorCode)) {
209 fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
210 pname, argv[1], u_errorName(errorCode));
211 rc=4;
212 goto done;
213 }
214
215 ds->printError=printError;
216 ds->printErrorContext=stderr;
217
218 /* speculative cast, protected by the following length check */
219 pInfo=(const UDataInfo *)((const char *)data+4);
220
221 if( length>=20 &&
222 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
223 pInfo->dataFormat[1]==0x6d &&
224 pInfo->dataFormat[2]==0x6e &&
225 pInfo->dataFormat[3]==0x44
226 ) {
227 /*
228 * swap the .dat package
229 * udata_swapPackage() needs to rename ToC name entries from the old package
230 * name to the new one.
231 * We pass it the filenames, and udata_swapPackage() will extract the
232 * package names.
233 */
234 length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
235 udata_closeSwapper(ds);
236 if(U_FAILURE(errorCode)) {
237 fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
238 pname, argv[1], u_errorName(errorCode));
239 rc=4;
240 goto done;
241 }
242 } else {
243 /* swap the data, which is not a .dat package */
244 length=udata_swap(ds, data, length, data, &errorCode);
245 udata_closeSwapper(ds);
246 if(U_FAILURE(errorCode)) {
247 fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
248 pname, argv[1], u_errorName(errorCode));
249 rc=4;
250 goto done;
251 }
252 }
253
254 out=fopen(argv[2], "wb");
255 if(out==NULL) {
256 fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
257 rc=5;
258 goto done;
259 }
260
261 if(length!=(int32_t)fwrite(data, 1, length, out)) {
262 fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
263 rc=6;
264 goto done;
265 }
266
267 fclose(out);
268 out=NULL;
269
270 /* all done */
271 rc=0;
272
273 done:
274 if(in!=NULL) {
275 fclose(in);
276 }
277 if(out!=NULL) {
278 fclose(out);
279 }
280 if(data!=NULL) {
281 free(data);
282 }
283 return rc;
284 }
285
286 /* swap .dat package files -------------------------------------------------- */
287
288 static int32_t
extractPackageName(const UDataSwapper * ds,const char * filename,char pkg[],int32_t capacity,UErrorCode * pErrorCode)289 extractPackageName(const UDataSwapper *ds, const char *filename,
290 char pkg[], int32_t capacity,
291 UErrorCode *pErrorCode) {
292 const char *basename;
293 int32_t len;
294
295 if(U_FAILURE(*pErrorCode)) {
296 return 0;
297 }
298
299 basename=findBasename(filename);
300 len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
301
302 if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
303 udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
304 basename);
305 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
306 return 0;
307 }
308
309 if(len>=capacity) {
310 udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
311 (long)capacity);
312 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
313 return 0;
314 }
315
316 uprv_memcpy(pkg, basename, len);
317 pkg[len]=0;
318 return len;
319 }
320
321 struct ToCEntry {
322 uint32_t nameOffset, inOffset, outOffset, length;
323 };
324
325 U_CDECL_BEGIN
326 static int32_t U_CALLCONV
compareToCEntries(const void * context,const void * left,const void * right)327 compareToCEntries(const void *context, const void *left, const void *right) {
328 const char *chars=(const char *)context;
329 return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
330 chars+((const ToCEntry *)right)->nameOffset);
331 }
332 U_CDECL_END
333
334 U_CFUNC int32_t U_CALLCONV
udata_swapPackage(const char * inFilename,const char * outFilename,const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)335 udata_swapPackage(const char *inFilename, const char *outFilename,
336 const UDataSwapper *ds,
337 const void *inData, int32_t length, void *outData,
338 UErrorCode *pErrorCode) {
339 const UDataInfo *pInfo;
340 int32_t headerSize;
341
342 const uint8_t *inBytes;
343 uint8_t *outBytes;
344
345 uint32_t itemCount, offset, i;
346 int32_t itemLength;
347
348 const UDataOffsetTOCEntry *inEntries;
349 UDataOffsetTOCEntry *outEntries;
350
351 ToCEntry *table;
352
353 char inPkgName[32], outPkgName[32];
354 int32_t inPkgNameLength, outPkgNameLength;
355
356 /* udata_swapDataHeader checks the arguments */
357 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
358 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
359 return 0;
360 }
361
362 /* check data format and format version */
363 pInfo=(const UDataInfo *)((const char *)inData+4);
364 if(!(
365 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
366 pInfo->dataFormat[1]==0x6d &&
367 pInfo->dataFormat[2]==0x6e &&
368 pInfo->dataFormat[3]==0x44 &&
369 pInfo->formatVersion[0]==1
370 )) {
371 udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
372 pInfo->dataFormat[0], pInfo->dataFormat[1],
373 pInfo->dataFormat[2], pInfo->dataFormat[3],
374 pInfo->formatVersion[0]);
375 *pErrorCode=U_UNSUPPORTED_ERROR;
376 return 0;
377 }
378
379 /*
380 * We need to change the ToC name entries so that they have the correct
381 * package name prefix.
382 * Extract the package names from the in/out filenames.
383 */
384 inPkgNameLength=extractPackageName(
385 ds, inFilename,
386 inPkgName, (int32_t)sizeof(inPkgName),
387 pErrorCode);
388 outPkgNameLength=extractPackageName(
389 ds, outFilename,
390 outPkgName, (int32_t)sizeof(outPkgName),
391 pErrorCode);
392 if(U_FAILURE(*pErrorCode)) {
393 return 0;
394 }
395
396 /*
397 * It is possible to work with inPkgNameLength!=outPkgNameLength,
398 * but then the length of the data file would change more significantly,
399 * which we are not currently prepared for.
400 */
401 if(inPkgNameLength!=outPkgNameLength) {
402 udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
403 inPkgName, outPkgName);
404 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
405 return 0;
406 }
407
408 inBytes=(const uint8_t *)inData+headerSize;
409 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
410
411 if(length<0) {
412 /* preflighting */
413 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
414 if(itemCount==0) {
415 /* no items: count only the item count and return */
416 return headerSize+4;
417 }
418
419 /* read the last item's offset and preflight it */
420 offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
421 itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
422
423 if(U_SUCCESS(*pErrorCode)) {
424 return headerSize+offset+(uint32_t)itemLength;
425 } else {
426 return 0;
427 }
428 } else {
429 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
430 length-=headerSize;
431 if(length<4) {
432 /* itemCount does not fit */
433 offset=0xffffffff;
434 itemCount=0; /* make compilers happy */
435 } else {
436 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
437 if(itemCount==0) {
438 offset=4;
439 } else if((uint32_t)length<(4+8*itemCount)) {
440 /* ToC table does not fit */
441 offset=0xffffffff;
442 } else {
443 /* offset of the last item plus at least 20 bytes for its header */
444 offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
445 }
446 }
447 if((uint32_t)length<offset) {
448 udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
449 length);
450 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
451 return 0;
452 }
453
454 outBytes=(uint8_t *)outData+headerSize;
455
456 /* swap the item count */
457 ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
458
459 if(itemCount==0) {
460 /* no items: just return now */
461 return headerSize+4;
462 }
463
464 /* swap the item name strings */
465 offset=4+8*itemCount;
466 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
467 udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
468 if(U_FAILURE(*pErrorCode)) {
469 udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
470 return 0;
471 }
472 /* keep offset and itemLength in case we allocate and copy the strings below */
473
474 /* swap the package names into the output charset */
475 if(ds->outCharset!=U_CHARSET_FAMILY) {
476 UDataSwapper *ds2;
477 ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
478 ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
479 ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
480 udata_closeSwapper(ds2);
481 if(U_FAILURE(*pErrorCode)) {
482 udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
483 }
484 }
485
486 /* change the prefix of each ToC entry name from the old to the new package name */
487 {
488 char *entryName;
489
490 for(i=0; i<itemCount; ++i) {
491 entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
492
493 if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
494 uprv_memcpy(entryName, outPkgName, inPkgNameLength);
495 } else {
496 udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
497 (long)i);
498 *pErrorCode=U_INVALID_FORMAT_ERROR;
499 return 0;
500 }
501 }
502 }
503
504 /*
505 * Allocate the ToC table and, if necessary, a temporary buffer for
506 * pseudo-in-place swapping.
507 *
508 * We cannot swap in-place because:
509 *
510 * 1. If the swapping of an item fails mid-way, then in-place swapping
511 * has destroyed its data.
512 * Out-of-place swapping allows us to then copy its original data.
513 *
514 * 2. If swapping changes the charset family, then we must resort
515 * not only the ToC table but also the data items themselves.
516 * This requires a permutation and is best done with separate in/out
517 * buffers.
518 *
519 * We swapped the strings above to avoid the malloc below if string swapping fails.
520 */
521 if(inData==outData) {
522 /* +15: prepare for extra padding of a newly-last item */
523 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH);
524 if(table!=NULL) {
525 outBytes=(uint8_t *)(table+itemCount);
526
527 /* copy the item count and the swapped strings */
528 uprv_memcpy(outBytes, inBytes, 4);
529 uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
530 }
531 } else {
532 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
533 }
534 if(table==NULL) {
535 udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
536 inData==outData ?
537 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH :
538 itemCount*sizeof(ToCEntry));
539 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
540 return 0;
541 }
542 outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
543
544 /* read the ToC table */
545 for(i=0; i<itemCount; ++i) {
546 table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
547 table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
548 if(i>0) {
549 table[i-1].length=table[i].inOffset-table[i-1].inOffset;
550 }
551 }
552 table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
553
554 if(ds->inCharset==ds->outCharset) {
555 /* no charset swapping, no resorting: keep item offsets the same */
556 for(i=0; i<itemCount; ++i) {
557 table[i].outOffset=table[i].inOffset;
558 }
559 } else {
560 /* charset swapping: resort items by their swapped names */
561
562 /*
563 * Before the actual sorting, we need to make sure that each item
564 * has a length that is a multiple of 16 bytes so that all items
565 * are 16-aligned.
566 * Only the old last item may be missing up to 15 padding bytes.
567 * Add padding bytes for it.
568 * Since the icuswap main() function has already allocated enough
569 * input buffer space and set the last 15 bytes there to 0xaa,
570 * we only need to increase the total data length and the length
571 * of the last item here.
572 */
573 if((length&0xf)!=0) {
574 int32_t delta=16-(length&0xf);
575 length+=delta;
576 table[itemCount-1].length+=(uint32_t)delta;
577 }
578
579 /* Save the offset before we sort the TOC. */
580 offset=table[0].inOffset;
581 /* sort the TOC entries */
582 uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
583 compareToCEntries, outBytes, FALSE, pErrorCode);
584
585 /*
586 * Note: Before sorting, the inOffset values were in order.
587 * Now the outOffset values are in order.
588 */
589
590 /* assign outOffset values */
591 for(i=0; i<itemCount; ++i) {
592 table[i].outOffset=offset;
593 offset+=table[i].length;
594 }
595 }
596
597 /* write the output ToC table */
598 for(i=0; i<itemCount; ++i) {
599 ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
600 ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
601 }
602
603 /* swap each data item */
604 for(i=0; i<itemCount; ++i) {
605 /* first copy the item bytes to make sure that unreachable bytes are copied */
606 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
607
608 /* swap the item */
609 udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
610 outBytes+table[i].outOffset, pErrorCode);
611
612 if(U_FAILURE(*pErrorCode)) {
613 if(ds->outCharset==U_CHARSET_FAMILY) {
614 udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
615 " at inOffset 0x%x length 0x%x - %s\n"
616 " the data item will be copied, not swapped\n\n",
617 (char *)outBytes+table[i].nameOffset,
618 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
619 } else {
620 udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
621 " at inOffset 0x%x length 0x%x - %s\n"
622 " the data item will be copied, not swapped\n\n",
623 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
624 }
625 /* reset the error code, copy the data item, and continue */
626 *pErrorCode=U_ZERO_ERROR;
627 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
628 }
629 }
630
631 if(inData==outData) {
632 /* copy the data from the temporary buffer to the in-place buffer */
633 uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
634 }
635 uprv_free(table);
636
637 return headerSize+length;
638 }
639 }
640
641 /*
642 * Hey, Emacs, please set the following:
643 *
644 * Local Variables:
645 * indent-tabs-mode: nil
646 * End:
647 *
648 */
649