1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: package.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2005aug25
14 * created by: Markus W. Scherer
15 *
16 * Read, modify, and write ICU .dat data package files.
17 * This is an integral part of the icupkg tool, moved to the toolutil library
18 * because parts of tool implementations tend to be later shared by
19 * other tools.
20 * Subsumes functionality and implementation code from
21 * gencmn, decmn, and icuswap tools.
22 */
23
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
27 #include "cstring.h"
28 #include "uarrsort.h"
29 #include "ucmndata.h"
30 #include "udataswp.h"
31 #include "swapimpl.h"
32 #include "toolutil.h"
33 #include "package.h"
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 // general definitions ----------------------------------------------------- ***
40
41 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
42
43 /* UDataInfo cf. udata.h */
44 static const UDataInfo dataInfo={
45 (uint16_t)sizeof(UDataInfo),
46 0,
47
48 U_IS_BIG_ENDIAN,
49 U_CHARSET_FAMILY,
50 (uint8_t)sizeof(UChar),
51 0,
52
53 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
54 {1, 0, 0, 0}, /* formatVersion */
55 {3, 0, 0, 0} /* dataVersion */
56 };
57
58 U_CDECL_BEGIN
59 static void U_CALLCONV
printPackageError(void * context,const char * fmt,va_list args)60 printPackageError(void *context, const char *fmt, va_list args) {
61 vfprintf((FILE *)context, fmt, args);
62 }
63 U_CDECL_END
64
65 static uint16_t
readSwapUInt16(uint16_t x)66 readSwapUInt16(uint16_t x) {
67 return (uint16_t)((x<<8)|(x>>8));
68 }
69
70 // platform types ---------------------------------------------------------- ***
71
72 static const char *types="lb?e";
73
74 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
75
76 static inline int32_t
makeTypeEnum(uint8_t charset,UBool isBigEndian)77 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
78 return 2*(int32_t)charset+isBigEndian;
79 }
80
81 static inline int32_t
makeTypeEnum(char type)82 makeTypeEnum(char type) {
83 return
84 type == 'l' ? TYPE_L :
85 type == 'b' ? TYPE_B :
86 type == 'e' ? TYPE_E :
87 -1;
88 }
89
90 static inline char
makeTypeLetter(uint8_t charset,UBool isBigEndian)91 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
92 return types[makeTypeEnum(charset, isBigEndian)];
93 }
94
95 static inline char
makeTypeLetter(int32_t typeEnum)96 makeTypeLetter(int32_t typeEnum) {
97 return types[typeEnum];
98 }
99
100 static void
makeTypeProps(char type,uint8_t & charset,UBool & isBigEndian)101 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
102 int32_t typeEnum=makeTypeEnum(type);
103 charset=(uint8_t)(typeEnum>>1);
104 isBigEndian=(UBool)(typeEnum&1);
105 }
106
107 U_CFUNC const UDataInfo *
getDataInfo(const uint8_t * data,int32_t length,int32_t & infoLength,int32_t & headerLength,UErrorCode * pErrorCode)108 getDataInfo(const uint8_t *data, int32_t length,
109 int32_t &infoLength, int32_t &headerLength,
110 UErrorCode *pErrorCode) {
111 const DataHeader *pHeader;
112 const UDataInfo *pInfo;
113
114 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
115 return NULL;
116 }
117 if( data==NULL ||
118 (length>=0 && length<(int32_t)sizeof(DataHeader))
119 ) {
120 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
121 return NULL;
122 }
123
124 pHeader=(const DataHeader *)data;
125 pInfo=&pHeader->info;
126 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
127 pHeader->dataHeader.magic1!=0xda ||
128 pHeader->dataHeader.magic2!=0x27 ||
129 pInfo->sizeofUChar!=2
130 ) {
131 *pErrorCode=U_UNSUPPORTED_ERROR;
132 return NULL;
133 }
134
135 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
136 headerLength=pHeader->dataHeader.headerSize;
137 infoLength=pInfo->size;
138 } else {
139 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
140 infoLength=readSwapUInt16(pInfo->size);
141 }
142
143 if( headerLength<(int32_t)sizeof(DataHeader) ||
144 infoLength<(int32_t)sizeof(UDataInfo) ||
145 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
146 (length>=0 && length<headerLength)
147 ) {
148 *pErrorCode=U_UNSUPPORTED_ERROR;
149 return NULL;
150 }
151
152 return pInfo;
153 }
154
155 static int32_t
getTypeEnumForInputData(const uint8_t * data,int32_t length,UErrorCode * pErrorCode)156 getTypeEnumForInputData(const uint8_t *data, int32_t length,
157 UErrorCode *pErrorCode) {
158 const UDataInfo *pInfo;
159 int32_t infoLength, headerLength;
160
161 /* getDataInfo() checks for illegal arguments */
162 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
163 if(pInfo==NULL) {
164 return -1;
165 }
166
167 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
168 }
169
170 // file handling ----------------------------------------------------------- ***
171
172 static void
extractPackageName(const char * filename,char pkg[],int32_t capacity)173 extractPackageName(const char *filename,
174 char pkg[], int32_t capacity) {
175 const char *basename;
176 int32_t len;
177
178 basename=findBasename(filename);
179 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
180
181 if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
182 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
183 basename);
184 exit(U_ILLEGAL_ARGUMENT_ERROR);
185 }
186
187 if(len>=capacity) {
188 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
189 basename, (long)capacity);
190 exit(U_ILLEGAL_ARGUMENT_ERROR);
191 }
192
193 memcpy(pkg, basename, len);
194 pkg[len]=0;
195 }
196
197 static int32_t
getFileLength(FILE * f)198 getFileLength(FILE *f) {
199 int32_t length;
200
201 fseek(f, 0, SEEK_END);
202 length=(int32_t)ftell(f);
203 fseek(f, 0, SEEK_SET);
204 return length;
205 }
206
207 /*
208 * Turn tree separators and alternate file separators into normal file separators.
209 */
210 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
211 #define treeToPath(s)
212 #else
213 static void
treeToPath(char * s)214 treeToPath(char *s) {
215 char *t;
216
217 for(t=s; *t!=0; ++t) {
218 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
219 *t=U_FILE_SEP_CHAR;
220 }
221 }
222 }
223 #endif
224
225 /*
226 * Turn file separators into tree separators.
227 */
228 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
229 #define pathToTree(s)
230 #else
231 static void
pathToTree(char * s)232 pathToTree(char *s) {
233 char *t;
234
235 for(t=s; *t!=0; ++t) {
236 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
237 *t=U_TREE_ENTRY_SEP_CHAR;
238 }
239 }
240 }
241 #endif
242
243 /*
244 * Prepend the path (if any) to the name and run the name through treeToName().
245 */
246 static void
makeFullFilename(const char * path,const char * name,char * filename,int32_t capacity)247 makeFullFilename(const char *path, const char *name,
248 char *filename, int32_t capacity) {
249 char *s;
250
251 // prepend the path unless NULL or empty
252 if(path!=NULL && path[0]!=0) {
253 if((int32_t)(strlen(path)+1)>=capacity) {
254 fprintf(stderr, "pathname too long: \"%s\"\n", path);
255 exit(U_BUFFER_OVERFLOW_ERROR);
256 }
257 strcpy(filename, path);
258
259 // make sure the path ends with a file separator
260 s=strchr(filename, 0);
261 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
262 *s++=U_FILE_SEP_CHAR;
263 }
264 } else {
265 s=filename;
266 }
267
268 // turn the name into a filename, turn tree separators into file separators
269 if((int32_t)((s-filename)+strlen(name))>=capacity) {
270 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
271 exit(U_BUFFER_OVERFLOW_ERROR);
272 }
273 strcpy(s, name);
274 treeToPath(s);
275 }
276
277 static void
makeFullFilenameAndDirs(const char * path,const char * name,char * filename,int32_t capacity)278 makeFullFilenameAndDirs(const char *path, const char *name,
279 char *filename, int32_t capacity) {
280 char *sep;
281 UErrorCode errorCode;
282
283 makeFullFilename(path, name, filename, capacity);
284
285 // make tree directories
286 errorCode=U_ZERO_ERROR;
287 sep=strchr(filename, 0)-strlen(name);
288 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
289 if(sep!=filename) {
290 *sep=0; // truncate temporarily
291 uprv_mkdir(filename, &errorCode);
292 if(U_FAILURE(errorCode)) {
293 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
294 exit(U_FILE_ACCESS_ERROR);
295 }
296 }
297 *sep++=U_FILE_SEP_CHAR; // restore file separator character
298 }
299 }
300
301 static uint8_t *
readFile(const char * path,const char * name,int32_t & length,char & type)302 readFile(const char *path, const char *name, int32_t &length, char &type) {
303 char filename[1024];
304 FILE *file;
305 uint8_t *data;
306 UErrorCode errorCode;
307 int32_t fileLength, typeEnum;
308
309 makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
310
311 /* open the input file, get its length, allocate memory for it, read the file */
312 file=fopen(filename, "rb");
313 if(file==NULL) {
314 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
315 exit(U_FILE_ACCESS_ERROR);
316 }
317
318 /* get the file length */
319 fileLength=getFileLength(file);
320 if(ferror(file) || fileLength<=0) {
321 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
322 fclose(file);
323 exit(U_FILE_ACCESS_ERROR);
324 }
325
326 /* allocate the buffer, pad to multiple of 16 */
327 length=(fileLength+0xf)&~0xf;
328 data=(uint8_t *)malloc(length);
329 if(data==NULL) {
330 fclose(file);
331 exit(U_MEMORY_ALLOCATION_ERROR);
332 }
333
334 /* read the file */
335 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
336 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
337 fclose(file);
338 free(data);
339 exit(U_FILE_ACCESS_ERROR);
340 }
341
342 /* pad the file to a multiple of 16 using the usual padding byte */
343 if(fileLength<length) {
344 memset(data+fileLength, 0xaa, length-fileLength);
345 }
346
347 fclose(file);
348
349 // minimum check for ICU-format data
350 errorCode=U_ZERO_ERROR;
351 typeEnum=getTypeEnumForInputData(data, length, &errorCode);
352 if(typeEnum<0 || U_FAILURE(errorCode)) {
353 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
354 free(data);
355 exit(U_INVALID_FORMAT_ERROR);
356 }
357 type=makeTypeLetter(typeEnum);
358
359 return data;
360 }
361
362 // .dat package file representation ---------------------------------------- ***
363
364 U_CDECL_BEGIN
365
366 static int32_t U_CALLCONV
compareItems(const void *,const void * left,const void * right)367 compareItems(const void * /*context*/, const void *left, const void *right) {
368 U_NAMESPACE_USE
369
370 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
371 }
372
373 U_CDECL_END
374
375 U_NAMESPACE_BEGIN
376
Package()377 Package::Package() {
378 inPkgName[0]=0;
379 inData=NULL;
380 inLength=0;
381 inCharset=U_CHARSET_FAMILY;
382 inIsBigEndian=U_IS_BIG_ENDIAN;
383
384 itemCount=0;
385 inStringTop=outStringTop=0;
386
387 matchMode=0;
388 findPrefix=findSuffix=NULL;
389 findPrefixLength=findSuffixLength=0;
390 findNextIndex=-1;
391
392 // create a header for an empty package
393 DataHeader *pHeader;
394 pHeader=(DataHeader *)header;
395 pHeader->dataHeader.magic1=0xda;
396 pHeader->dataHeader.magic2=0x27;
397 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
398 headerLength=(int32_t)(4+sizeof(dataInfo));
399 if(headerLength&0xf) {
400 /* NUL-pad the header to a multiple of 16 */
401 int32_t length=(headerLength+0xf)&~0xf;
402 memset(header+headerLength, 0, length-headerLength);
403 headerLength=length;
404 }
405 pHeader->dataHeader.headerSize=(uint16_t)headerLength;
406 }
407
~Package()408 Package::~Package() {
409 int32_t idx;
410
411 free(inData);
412
413 for(idx=0; idx<itemCount; ++idx) {
414 if(items[idx].isDataOwned) {
415 free(items[idx].data);
416 }
417 }
418 }
419
420 void
readPackage(const char * filename)421 Package::readPackage(const char *filename) {
422 UDataSwapper *ds;
423 const UDataInfo *pInfo;
424 UErrorCode errorCode;
425
426 const uint8_t *inBytes;
427
428 int32_t length, offset, i;
429 int32_t itemLength, typeEnum;
430 char type;
431
432 const UDataOffsetTOCEntry *inEntries;
433
434 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
435
436 /* read the file */
437 inData=readFile(NULL, filename, inLength, type);
438 length=inLength;
439
440 /*
441 * swap the header - even if the swapping itself is a no-op
442 * because it tells us the header length
443 */
444 errorCode=U_ZERO_ERROR;
445 makeTypeProps(type, inCharset, inIsBigEndian);
446 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
447 if(U_FAILURE(errorCode)) {
448 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
449 filename, u_errorName(errorCode));
450 exit(errorCode);
451 }
452
453 ds->printError=printPackageError;
454 ds->printErrorContext=stderr;
455
456 headerLength=sizeof(header);
457 if(length<headerLength) {
458 headerLength=length;
459 }
460 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
461 if(U_FAILURE(errorCode)) {
462 exit(errorCode);
463 }
464
465 /* check data format and format version */
466 pInfo=(const UDataInfo *)((const char *)inData+4);
467 if(!(
468 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
469 pInfo->dataFormat[1]==0x6d &&
470 pInfo->dataFormat[2]==0x6e &&
471 pInfo->dataFormat[3]==0x44 &&
472 pInfo->formatVersion[0]==1
473 )) {
474 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
475 pInfo->dataFormat[0], pInfo->dataFormat[1],
476 pInfo->dataFormat[2], pInfo->dataFormat[3],
477 pInfo->formatVersion[0]);
478 exit(U_UNSUPPORTED_ERROR);
479 }
480 inIsBigEndian=(UBool)pInfo->isBigEndian;
481 inCharset=pInfo->charsetFamily;
482
483 inBytes=(const uint8_t *)inData+headerLength;
484 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
485
486 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
487 length-=headerLength;
488 if(length<4) {
489 /* itemCount does not fit */
490 offset=0x7fffffff;
491 } else {
492 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
493 if(itemCount==0) {
494 offset=4;
495 } else if(length<(4+8*itemCount)) {
496 /* ToC table does not fit */
497 offset=0x7fffffff;
498 } else {
499 /* offset of the last item plus at least 20 bytes for its header */
500 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
501 }
502 }
503 if(length<offset) {
504 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
505 (long)length);
506 exit(U_INDEX_OUTOFBOUNDS_ERROR);
507 }
508 /* do not modify the package length variable until the last item's length is set */
509
510 if(itemCount>0) {
511 char prefix[MAX_PKG_NAME_LENGTH+4];
512 char *s, *inItemStrings;
513 int32_t inPkgNameLength, prefixLength, stringsOffset;
514
515 if(itemCount>MAX_FILE_COUNT) {
516 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
517 exit(U_BUFFER_OVERFLOW_ERROR);
518 }
519
520 /* swap the item name strings */
521 stringsOffset=4+8*itemCount;
522 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
523
524 // don't include padding bytes at the end of the item names
525 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
526 --itemLength;
527 }
528
529 if((inStringTop+itemLength)>STRING_STORE_SIZE) {
530 fprintf(stderr, "icupkg: total length of item name strings too long\n");
531 exit(U_BUFFER_OVERFLOW_ERROR);
532 }
533
534 inItemStrings=inStrings+inStringTop;
535 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
536 if(U_FAILURE(errorCode)) {
537 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
538 exit(U_INVALID_FORMAT_ERROR);
539 }
540 inStringTop+=itemLength;
541
542 // reset the Item entries
543 memset(items, 0, itemCount*sizeof(Item));
544
545 inPkgNameLength=strlen(inPkgName);
546 memcpy(prefix, inPkgName, inPkgNameLength);
547 prefixLength=inPkgNameLength;
548
549 /*
550 * Get the common prefix of the items.
551 * New-style ICU .dat packages use tree separators ('/') between package names,
552 * tree names, and item names,
553 * while old-style ICU .dat packages (before multi-tree support)
554 * use an underscore ('_') between package and item names.
555 */
556 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
557 s=inItemStrings+offset;
558 if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
559 0==memcmp(s, inPkgName, inPkgNameLength) &&
560 s[inPkgNameLength]=='_'
561 ) {
562 // old-style .dat package
563 prefix[prefixLength++]='_';
564 } else {
565 // new-style .dat package
566 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
567 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
568 // then the test in the loop below will fail
569 }
570 prefix[prefixLength]=0;
571
572 /* read the ToC table */
573 for(i=0; i<itemCount; ++i) {
574 // skip the package part of the item name, error if it does not match the actual package name
575 // or if nothing follows the package name
576 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
577 s=inItemStrings+offset;
578 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
579 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
580 s, prefix);
581 exit(U_UNSUPPORTED_ERROR);
582 }
583 items[i].name=s+prefixLength;
584
585 // set the item's data
586 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
587 if(i>0) {
588 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
589
590 // set the previous item's platform type
591 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
592 if(typeEnum<0 || U_FAILURE(errorCode)) {
593 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
594 exit(U_INVALID_FORMAT_ERROR);
595 }
596 items[i-1].type=makeTypeLetter(typeEnum);
597 }
598 items[i].isDataOwned=FALSE;
599 }
600 // set the last item's length
601 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
602
603 // set the last item's platform type
604 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
605 if(typeEnum<0 || U_FAILURE(errorCode)) {
606 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
607 exit(U_INVALID_FORMAT_ERROR);
608 }
609 items[itemCount-1].type=makeTypeLetter(typeEnum);
610
611 if(type!=U_ICUDATA_TYPE_LETTER[0]) {
612 // sort the item names for the local charset
613 sortItems();
614 }
615 }
616
617 udata_closeSwapper(ds);
618 }
619
620 char
getInType()621 Package::getInType() {
622 return makeTypeLetter(inCharset, inIsBigEndian);
623 }
624
625 void
writePackage(const char * filename,char outType,const char * comment)626 Package::writePackage(const char *filename, char outType, const char *comment) {
627 char prefix[MAX_PKG_NAME_LENGTH+4];
628 UDataOffsetTOCEntry entry;
629 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
630 FILE *file;
631 Item *pItem;
632 char *name;
633 UErrorCode errorCode;
634 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
635 uint8_t outCharset;
636 UBool outIsBigEndian;
637
638 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
639
640 // if there is an explicit comment, then use it, else use what's in the current header
641 if(comment!=NULL) {
642 /* get the header size minus the current comment */
643 DataHeader *pHeader;
644 int32_t length;
645
646 pHeader=(DataHeader *)header;
647 headerLength=4+pHeader->info.size;
648 length=(int32_t)strlen(comment);
649 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
650 fprintf(stderr, "icupkg: comment too long\n");
651 exit(U_BUFFER_OVERFLOW_ERROR);
652 }
653 memcpy(header+headerLength, comment, length+1);
654 headerLength+=length;
655 if(headerLength&0xf) {
656 /* NUL-pad the header to a multiple of 16 */
657 length=(headerLength+0xf)&~0xf;
658 memset(header+headerLength, 0, length-headerLength);
659 headerLength=length;
660 }
661 pHeader->dataHeader.headerSize=(uint16_t)headerLength;
662 }
663
664 makeTypeProps(outType, outCharset, outIsBigEndian);
665
666 // open (TYPE_COUNT-2) swappers
667 // one is a no-op for local type==outType
668 // one type (TYPE_LE) is bogus
669 errorCode=U_ZERO_ERROR;
670 i=makeTypeEnum(outType);
671 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
672 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
673 ds[TYPE_LE]=NULL;
674 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
675 if(U_FAILURE(errorCode)) {
676 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
677 exit(errorCode);
678 }
679 for(i=0; i<TYPE_COUNT; ++i) {
680 if(ds[i]!=NULL) {
681 ds[i]->printError=printPackageError;
682 ds[i]->printErrorContext=stderr;
683 }
684 }
685
686 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
687
688 // create the file and write its contents
689 file=fopen(filename, "wb");
690 if(file==NULL) {
691 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
692 exit(U_FILE_ACCESS_ERROR);
693 }
694
695 // swap and write the header
696 if(dsLocalToOut!=NULL) {
697 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
698 if(U_FAILURE(errorCode)) {
699 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
700 exit(errorCode);
701 }
702 }
703 length=(int32_t)fwrite(header, 1, headerLength, file);
704 if(length!=headerLength) {
705 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
706 exit(U_FILE_ACCESS_ERROR);
707 }
708
709 // prepare and swap the package name with a tree separator
710 // for prepending to item names
711 strcat(prefix, U_TREE_ENTRY_SEP_STRING);
712 prefixLength=(int32_t)strlen(prefix);
713 if(dsLocalToOut!=NULL) {
714 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
715 if(U_FAILURE(errorCode)) {
716 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
717 exit(errorCode);
718 }
719
720 // swap and sort the item names (sorting needs to be done in the output charset)
721 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
722 if(U_FAILURE(errorCode)) {
723 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
724 exit(errorCode);
725 }
726 sortItems();
727 }
728
729 // create the output item names in sorted order, with the package name prepended to each
730 for(i=0; i<itemCount; ++i) {
731 length=(int32_t)strlen(items[i].name);
732 name=allocString(FALSE, length+prefixLength);
733 memcpy(name, prefix, prefixLength);
734 memcpy(name+prefixLength, items[i].name, length+1);
735 items[i].name=name;
736 }
737
738 // calculate offsets for item names and items, pad to 16-align items
739 // align only the first item; each item's length is a multiple of 16
740 basenameOffset=4+8*itemCount;
741 offset=basenameOffset+outStringTop;
742 if((length=(offset&15))!=0) {
743 length=16-length;
744 memset(allocString(FALSE, length-1), 0xaa, length);
745 offset+=length;
746 }
747
748 // write the table of contents
749 // first the itemCount
750 outInt32=itemCount;
751 if(dsLocalToOut!=NULL) {
752 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
753 if(U_FAILURE(errorCode)) {
754 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
755 exit(errorCode);
756 }
757 }
758 length=(int32_t)fwrite(&outInt32, 1, 4, file);
759 if(length!=4) {
760 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
761 exit(U_FILE_ACCESS_ERROR);
762 }
763
764 // then write the item entries (and collect the maxItemLength)
765 maxItemLength=0;
766 for(i=0; i<itemCount; ++i) {
767 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
768 entry.dataOffset=(uint32_t)offset;
769 if(dsLocalToOut!=NULL) {
770 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
771 if(U_FAILURE(errorCode)) {
772 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
773 exit(errorCode);
774 }
775 }
776 length=(int32_t)fwrite(&entry, 1, 8, file);
777 if(length!=8) {
778 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
779 exit(U_FILE_ACCESS_ERROR);
780 }
781
782 length=items[i].length;
783 if(length>maxItemLength) {
784 maxItemLength=length;
785 }
786 offset+=length;
787 }
788
789 // write the item names
790 length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
791 if(length!=outStringTop) {
792 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
793 exit(U_FILE_ACCESS_ERROR);
794 }
795
796 // write the items
797 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
798 int32_t type=makeTypeEnum(pItem->type);
799 if(ds[type]!=NULL) {
800 // swap each item from its platform properties to the desired ones
801 udata_swap(
802 ds[type],
803 pItem->data, pItem->length, pItem->data,
804 &errorCode);
805 if(U_FAILURE(errorCode)) {
806 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
807 exit(errorCode);
808 }
809 }
810 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
811 if(length!=pItem->length) {
812 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
813 exit(U_FILE_ACCESS_ERROR);
814 }
815 }
816
817 if(ferror(file)) {
818 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
819 exit(U_FILE_ACCESS_ERROR);
820 }
821
822 fclose(file);
823 for(i=0; i<TYPE_COUNT; ++i) {
824 udata_closeSwapper(ds[i]);
825 }
826 }
827
828 int32_t
findItem(const char * name,int32_t length) const829 Package::findItem(const char *name, int32_t length) const {
830 int32_t i, start, limit;
831 int result;
832
833 /* do a binary search for the string */
834 start=0;
835 limit=itemCount;
836 while(start<limit) {
837 i=(start+limit)/2;
838 if(length>=0) {
839 result=strncmp(name, items[i].name, length);
840 } else {
841 result=strcmp(name, items[i].name);
842 }
843
844 if(result==0) {
845 /* found */
846 if(length>=0) {
847 /*
848 * if we compared just prefixes, then we may need to back up
849 * to the first item with this prefix
850 */
851 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
852 --i;
853 }
854 }
855 return i;
856 } else if(result<0) {
857 limit=i;
858 } else /* result>0 */ {
859 start=i+1;
860 }
861 }
862
863 return ~start; /* not found, return binary-not of the insertion point */
864 }
865
866 void
findItems(const char * pattern)867 Package::findItems(const char *pattern) {
868 const char *wild;
869
870 if(pattern==NULL || *pattern==0) {
871 findNextIndex=-1;
872 return;
873 }
874
875 findPrefix=pattern;
876 findSuffix=NULL;
877 findSuffixLength=0;
878
879 wild=strchr(pattern, '*');
880 if(wild==NULL) {
881 // no wildcard
882 findPrefixLength=(int32_t)strlen(pattern);
883 } else {
884 // one wildcard
885 findPrefixLength=(int32_t)(wild-pattern);
886 findSuffix=wild+1;
887 findSuffixLength=(int32_t)strlen(findSuffix);
888 if(NULL!=strchr(findSuffix, '*')) {
889 // two or more wildcards
890 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
891 exit(U_PARSE_ERROR);
892 }
893 }
894
895 if(findPrefixLength==0) {
896 findNextIndex=0;
897 } else {
898 findNextIndex=findItem(findPrefix, findPrefixLength);
899 }
900 }
901
902 int32_t
findNextItem()903 Package::findNextItem() {
904 const char *name, *middle, *treeSep;
905 int32_t idx, nameLength, middleLength;
906
907 if(findNextIndex<0) {
908 return -1;
909 }
910
911 while(findNextIndex<itemCount) {
912 idx=findNextIndex++;
913 name=items[idx].name;
914 nameLength=(int32_t)strlen(name);
915 if(nameLength<(findPrefixLength+findSuffixLength)) {
916 // item name too short for prefix & suffix
917 continue;
918 }
919 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
920 // left the range of names with this prefix
921 break;
922 }
923 middle=name+findPrefixLength;
924 middleLength=nameLength-findPrefixLength-findSuffixLength;
925 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
926 // suffix does not match
927 continue;
928 }
929 // prefix & suffix match
930
931 if(matchMode&MATCH_NOSLASH) {
932 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
933 if(treeSep!=NULL && (treeSep-middle)<middleLength) {
934 // the middle (matching the * wildcard) contains a tree separator /
935 continue;
936 }
937 }
938
939 // found a matching item
940 return idx;
941 }
942
943 // no more items
944 findNextIndex=-1;
945 return -1;
946 }
947
948 void
setMatchMode(uint32_t mode)949 Package::setMatchMode(uint32_t mode) {
950 matchMode=mode;
951 }
952
953 void
addItem(const char * name)954 Package::addItem(const char *name) {
955 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
956 }
957
958 void
addItem(const char * name,uint8_t * data,int32_t length,UBool isDataOwned,char type)959 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
960 int32_t idx;
961
962 idx=findItem(name);
963 if(idx<0) {
964 // new item, make space at the insertion point
965 if(itemCount>=MAX_FILE_COUNT) {
966 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
967 exit(U_BUFFER_OVERFLOW_ERROR);
968 }
969 // move the following items down
970 idx=~idx;
971 if(idx<itemCount) {
972 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
973 }
974 ++itemCount;
975
976 // reset this Item entry
977 memset(items+idx, 0, sizeof(Item));
978
979 // copy the item's name
980 items[idx].name=allocString(TRUE, strlen(name));
981 strcpy(items[idx].name, name);
982 pathToTree(items[idx].name);
983 } else {
984 // same-name item found, replace it
985 if(items[idx].isDataOwned) {
986 free(items[idx].data);
987 }
988
989 // keep the item's name since it is the same
990 }
991
992 // set the item's data
993 items[idx].data=data;
994 items[idx].length=length;
995 items[idx].isDataOwned=isDataOwned;
996 items[idx].type=type;
997 }
998
999 void
addFile(const char * filesPath,const char * name)1000 Package::addFile(const char *filesPath, const char *name) {
1001 uint8_t *data;
1002 int32_t length;
1003 char type;
1004
1005 data=readFile(filesPath, name, length, type);
1006 // readFile() exits the tool if it fails
1007 addItem(name, data, length, TRUE, type);
1008 }
1009
1010 void
addItems(const Package & listPkg)1011 Package::addItems(const Package &listPkg) {
1012 const Item *pItem;
1013 int32_t i;
1014
1015 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1016 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1017 }
1018 }
1019
1020 void
removeItem(int32_t idx)1021 Package::removeItem(int32_t idx) {
1022 if(idx>=0) {
1023 // remove the item
1024 if(items[idx].isDataOwned) {
1025 free(items[idx].data);
1026 }
1027
1028 // move the following items up
1029 if((idx+1)<itemCount) {
1030 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1031 }
1032 --itemCount;
1033
1034 if(idx<=findNextIndex) {
1035 --findNextIndex;
1036 }
1037 }
1038 }
1039
1040 void
removeItems(const char * pattern)1041 Package::removeItems(const char *pattern) {
1042 int32_t idx;
1043
1044 findItems(pattern);
1045 while((idx=findNextItem())>=0) {
1046 removeItem(idx);
1047 }
1048 }
1049
1050 void
removeItems(const Package & listPkg)1051 Package::removeItems(const Package &listPkg) {
1052 const Item *pItem;
1053 int32_t i;
1054
1055 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1056 removeItems(pItem->name);
1057 }
1058 }
1059
1060 void
extractItem(const char * filesPath,const char * outName,int32_t idx,char outType)1061 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1062 char filename[1024];
1063 UDataSwapper *ds;
1064 FILE *file;
1065 Item *pItem;
1066 int32_t fileLength;
1067 uint8_t itemCharset, outCharset;
1068 UBool itemIsBigEndian, outIsBigEndian;
1069
1070 if(idx<0 || itemCount<=idx) {
1071 return;
1072 }
1073 pItem=items+idx;
1074
1075 // swap the data to the outType
1076 // outType==0: don't swap
1077 if(outType!=0 && pItem->type!=outType) {
1078 // open the swapper
1079 UErrorCode errorCode=U_ZERO_ERROR;
1080 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1081 makeTypeProps(outType, outCharset, outIsBigEndian);
1082 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1083 if(U_FAILURE(errorCode)) {
1084 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1085 (long)idx, u_errorName(errorCode));
1086 exit(errorCode);
1087 }
1088
1089 ds->printError=printPackageError;
1090 ds->printErrorContext=stderr;
1091
1092 // swap the item from its platform properties to the desired ones
1093 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1094 if(U_FAILURE(errorCode)) {
1095 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1096 exit(errorCode);
1097 }
1098 udata_closeSwapper(ds);
1099 }
1100
1101 // create the file and write its contents
1102 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1103 file=fopen(filename, "wb");
1104 if(file==NULL) {
1105 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1106 exit(U_FILE_ACCESS_ERROR);
1107 }
1108 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1109
1110 if(ferror(file) || fileLength!=pItem->length) {
1111 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1112 exit(U_FILE_ACCESS_ERROR);
1113 }
1114 fclose(file);
1115 }
1116
1117 void
extractItem(const char * filesPath,int32_t idx,char outType)1118 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1119 extractItem(filesPath, items[idx].name, idx, outType);
1120 }
1121
1122 void
extractItems(const char * filesPath,const char * pattern,char outType)1123 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1124 int32_t idx;
1125
1126 findItems(pattern);
1127 while((idx=findNextItem())>=0) {
1128 extractItem(filesPath, idx, outType);
1129 }
1130 }
1131
1132 void
extractItems(const char * filesPath,const Package & listPkg,char outType)1133 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1134 const Item *pItem;
1135 int32_t i;
1136
1137 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1138 extractItems(filesPath, pItem->name, outType);
1139 }
1140 }
1141
1142 int32_t
getItemCount() const1143 Package::getItemCount() const {
1144 return itemCount;
1145 }
1146
1147 const Item *
getItem(int32_t idx) const1148 Package::getItem(int32_t idx) const {
1149 if (0 <= idx && idx < itemCount) {
1150 return &items[idx];
1151 }
1152 return NULL;
1153 }
1154
1155 void
checkDependency(void * context,const char * itemName,const char * targetName)1156 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1157 // check dependency: make sure the target item is in the package
1158 Package *me=(Package *)context;
1159 if(me->findItem(targetName)<0) {
1160 me->isMissingItems=TRUE;
1161 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1162 }
1163 }
1164
1165 UBool
checkDependencies()1166 Package::checkDependencies() {
1167 isMissingItems=FALSE;
1168 enumDependencies(this, checkDependency);
1169 return (UBool)!isMissingItems;
1170 }
1171
1172 void
enumDependencies(void * context,CheckDependency check)1173 Package::enumDependencies(void *context, CheckDependency check) {
1174 int32_t i;
1175
1176 for(i=0; i<itemCount; ++i) {
1177 enumDependencies(items+i, context, check);
1178 }
1179 }
1180
1181 char *
allocString(UBool in,int32_t length)1182 Package::allocString(UBool in, int32_t length) {
1183 char *p;
1184 int32_t top;
1185
1186 if(in) {
1187 top=inStringTop;
1188 p=inStrings+top;
1189 } else {
1190 top=outStringTop;
1191 p=outStrings+top;
1192 }
1193 top+=length+1;
1194
1195 if(top>STRING_STORE_SIZE) {
1196 fprintf(stderr, "icupkg: string storage overflow\n");
1197 exit(U_BUFFER_OVERFLOW_ERROR);
1198 }
1199 if(in) {
1200 inStringTop=top;
1201 } else {
1202 outStringTop=top;
1203 }
1204 return p;
1205 }
1206
1207 void
sortItems()1208 Package::sortItems() {
1209 UErrorCode errorCode=U_ZERO_ERROR;
1210 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1211 if(U_FAILURE(errorCode)) {
1212 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1213 exit(errorCode);
1214 }
1215 }
1216
1217 U_NAMESPACE_END
1218