• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1999-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  package.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2005aug25
14 *   created by: Markus W. Scherer
15 *
16 *   Read, modify, and write ICU .dat data package files.
17 *   This is an integral part of the icupkg tool, moved to the toolutil library
18 *   because parts of tool implementations tend to be later shared by
19 *   other tools.
20 *   Subsumes functionality and implementation code from
21 *   gencmn, decmn, and icuswap tools.
22 */
23 
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
27 #include "cstring.h"
28 #include "uarrsort.h"
29 #include "ucmndata.h"
30 #include "udataswp.h"
31 #include "swapimpl.h"
32 #include "toolutil.h"
33 #include "package.h"
34 #include "cmemory.h"
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 
41 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
42 
43 // general definitions ----------------------------------------------------- ***
44 
45 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46 
47 /* UDataInfo cf. udata.h */
48 static const UDataInfo dataInfo={
49     (uint16_t)sizeof(UDataInfo),
50     0,
51 
52     U_IS_BIG_ENDIAN,
53     U_CHARSET_FAMILY,
54     (uint8_t)sizeof(UChar),
55     0,
56 
57     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
58     {1, 0, 0, 0},                 /* formatVersion */
59     {3, 0, 0, 0}                  /* dataVersion */
60 };
61 
62 U_CDECL_BEGIN
63 static void U_CALLCONV
printPackageError(void * context,const char * fmt,va_list args)64 printPackageError(void *context, const char *fmt, va_list args) {
65     vfprintf((FILE *)context, fmt, args);
66 }
67 U_CDECL_END
68 
69 static uint16_t
readSwapUInt16(uint16_t x)70 readSwapUInt16(uint16_t x) {
71     return (uint16_t)((x<<8)|(x>>8));
72 }
73 
74 // platform types ---------------------------------------------------------- ***
75 
76 static const char *types="lb?e";
77 
78 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
79 
80 static inline int32_t
makeTypeEnum(uint8_t charset,UBool isBigEndian)81 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
82     return 2*(int32_t)charset+isBigEndian;
83 }
84 
85 static inline int32_t
makeTypeEnum(char type)86 makeTypeEnum(char type) {
87     return
88         type == 'l' ? TYPE_L :
89         type == 'b' ? TYPE_B :
90         type == 'e' ? TYPE_E :
91                -1;
92 }
93 
94 static inline char
makeTypeLetter(uint8_t charset,UBool isBigEndian)95 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
96     return types[makeTypeEnum(charset, isBigEndian)];
97 }
98 
99 static inline char
makeTypeLetter(int32_t typeEnum)100 makeTypeLetter(int32_t typeEnum) {
101     return types[typeEnum];
102 }
103 
104 static void
makeTypeProps(char type,uint8_t & charset,UBool & isBigEndian)105 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
106     int32_t typeEnum=makeTypeEnum(type);
107     charset=(uint8_t)(typeEnum>>1);
108     isBigEndian=(UBool)(typeEnum&1);
109 }
110 
111 U_CFUNC const UDataInfo *
getDataInfo(const uint8_t * data,int32_t length,int32_t & infoLength,int32_t & headerLength,UErrorCode * pErrorCode)112 getDataInfo(const uint8_t *data, int32_t length,
113             int32_t &infoLength, int32_t &headerLength,
114             UErrorCode *pErrorCode) {
115     const DataHeader *pHeader;
116     const UDataInfo *pInfo;
117 
118     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
119         return NULL;
120     }
121     if( data==NULL ||
122         (length>=0 && length<(int32_t)sizeof(DataHeader))
123     ) {
124         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
125         return NULL;
126     }
127 
128     pHeader=(const DataHeader *)data;
129     pInfo=&pHeader->info;
130     if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
131         pHeader->dataHeader.magic1!=0xda ||
132         pHeader->dataHeader.magic2!=0x27 ||
133         pInfo->sizeofUChar!=2
134     ) {
135         *pErrorCode=U_UNSUPPORTED_ERROR;
136         return NULL;
137     }
138 
139     if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
140         headerLength=pHeader->dataHeader.headerSize;
141         infoLength=pInfo->size;
142     } else {
143         headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
144         infoLength=readSwapUInt16(pInfo->size);
145     }
146 
147     if( headerLength<(int32_t)sizeof(DataHeader) ||
148         infoLength<(int32_t)sizeof(UDataInfo) ||
149         headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
150         (length>=0 && length<headerLength)
151     ) {
152         *pErrorCode=U_UNSUPPORTED_ERROR;
153         return NULL;
154     }
155 
156     return pInfo;
157 }
158 
159 static int32_t
getTypeEnumForInputData(const uint8_t * data,int32_t length,UErrorCode * pErrorCode)160 getTypeEnumForInputData(const uint8_t *data, int32_t length,
161                         UErrorCode *pErrorCode) {
162     const UDataInfo *pInfo;
163     int32_t infoLength, headerLength;
164 
165     /* getDataInfo() checks for illegal arguments */
166     pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
167     if(pInfo==NULL) {
168         return -1;
169     }
170 
171     return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
172 }
173 
174 // file handling ----------------------------------------------------------- ***
175 
176 static void
extractPackageName(const char * filename,char pkg[],int32_t capacity)177 extractPackageName(const char *filename,
178                    char pkg[], int32_t capacity) {
179     const char *basename;
180     int32_t len;
181 
182     basename=findBasename(filename);
183     len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
184 
185     if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
186         fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
187                          basename);
188         exit(U_ILLEGAL_ARGUMENT_ERROR);
189     }
190 
191     if(len>=capacity) {
192         fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
193                          basename, (long)capacity);
194         exit(U_ILLEGAL_ARGUMENT_ERROR);
195     }
196 
197     memcpy(pkg, basename, len);
198     pkg[len]=0;
199 }
200 
201 static int32_t
getFileLength(FILE * f)202 getFileLength(FILE *f) {
203     int32_t length;
204 
205     fseek(f, 0, SEEK_END);
206     length=(int32_t)ftell(f);
207     fseek(f, 0, SEEK_SET);
208     return length;
209 }
210 
211 /*
212  * Turn tree separators and alternate file separators into normal file separators.
213  */
214 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
215 #define treeToPath(s)
216 #else
217 static void
treeToPath(char * s)218 treeToPath(char *s) {
219     char *t;
220 
221     for(t=s; *t!=0; ++t) {
222         if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
223             *t=U_FILE_SEP_CHAR;
224         }
225     }
226 }
227 #endif
228 
229 /*
230  * Turn file separators into tree separators.
231  */
232 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
233 #define pathToTree(s)
234 #else
235 static void
pathToTree(char * s)236 pathToTree(char *s) {
237     char *t;
238 
239     for(t=s; *t!=0; ++t) {
240         if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
241             *t=U_TREE_ENTRY_SEP_CHAR;
242         }
243     }
244 }
245 #endif
246 
247 /*
248  * Prepend the path (if any) to the name and run the name through treeToName().
249  */
250 static void
makeFullFilename(const char * path,const char * name,char * filename,int32_t capacity)251 makeFullFilename(const char *path, const char *name,
252                  char *filename, int32_t capacity) {
253     char *s;
254 
255     // prepend the path unless NULL or empty
256     if(path!=NULL && path[0]!=0) {
257         if((int32_t)(strlen(path)+1)>=capacity) {
258             fprintf(stderr, "pathname too long: \"%s\"\n", path);
259             exit(U_BUFFER_OVERFLOW_ERROR);
260         }
261         strcpy(filename, path);
262 
263         // make sure the path ends with a file separator
264         s=strchr(filename, 0);
265         if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
266             *s++=U_FILE_SEP_CHAR;
267         }
268     } else {
269         s=filename;
270     }
271 
272     // turn the name into a filename, turn tree separators into file separators
273     if((int32_t)((s-filename)+strlen(name))>=capacity) {
274         fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
275         exit(U_BUFFER_OVERFLOW_ERROR);
276     }
277     strcpy(s, name);
278     treeToPath(s);
279 }
280 
281 static void
makeFullFilenameAndDirs(const char * path,const char * name,char * filename,int32_t capacity)282 makeFullFilenameAndDirs(const char *path, const char *name,
283                         char *filename, int32_t capacity) {
284     char *sep;
285     UErrorCode errorCode;
286 
287     makeFullFilename(path, name, filename, capacity);
288 
289     // make tree directories
290     errorCode=U_ZERO_ERROR;
291     sep=strchr(filename, 0)-strlen(name);
292     while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
293         if(sep!=filename) {
294             *sep=0;                 // truncate temporarily
295             uprv_mkdir(filename, &errorCode);
296             if(U_FAILURE(errorCode)) {
297                 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
298                 exit(U_FILE_ACCESS_ERROR);
299             }
300         }
301         *sep++=U_FILE_SEP_CHAR; // restore file separator character
302     }
303 }
304 
305 static uint8_t *
readFile(const char * path,const char * name,int32_t & length,char & type)306 readFile(const char *path, const char *name, int32_t &length, char &type) {
307     char filename[1024];
308     FILE *file;
309     uint8_t *data;
310     UErrorCode errorCode;
311     int32_t fileLength, typeEnum;
312 
313     makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
314 
315     /* open the input file, get its length, allocate memory for it, read the file */
316     file=fopen(filename, "rb");
317     if(file==NULL) {
318         fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
319         exit(U_FILE_ACCESS_ERROR);
320     }
321 
322     /* get the file length */
323     fileLength=getFileLength(file);
324     if(ferror(file) || fileLength<=0) {
325         fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
326         fclose(file);
327         exit(U_FILE_ACCESS_ERROR);
328     }
329 
330     /* allocate the buffer, pad to multiple of 16 */
331     length=(fileLength+0xf)&~0xf;
332     data=(uint8_t *)uprv_malloc(length);
333     if(data==NULL) {
334         fclose(file);
335         fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
336         exit(U_MEMORY_ALLOCATION_ERROR);
337     }
338 
339     /* read the file */
340     if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
341         fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
342         fclose(file);
343         free(data);
344         exit(U_FILE_ACCESS_ERROR);
345     }
346 
347     /* pad the file to a multiple of 16 using the usual padding byte */
348     if(fileLength<length) {
349         memset(data+fileLength, 0xaa, length-fileLength);
350     }
351 
352     fclose(file);
353 
354     // minimum check for ICU-format data
355     errorCode=U_ZERO_ERROR;
356     typeEnum=getTypeEnumForInputData(data, length, &errorCode);
357     if(typeEnum<0 || U_FAILURE(errorCode)) {
358         fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
359         free(data);
360 #if !UCONFIG_NO_LEGACY_CONVERSION
361         exit(U_INVALID_FORMAT_ERROR);
362 #else
363         fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
364         exit(0);
365 #endif
366     }
367     type=makeTypeLetter(typeEnum);
368 
369     return data;
370 }
371 
372 // .dat package file representation ---------------------------------------- ***
373 
374 U_CDECL_BEGIN
375 
376 static int32_t U_CALLCONV
compareItems(const void *,const void * left,const void * right)377 compareItems(const void * /*context*/, const void *left, const void *right) {
378     U_NAMESPACE_USE
379 
380     return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
381 }
382 
383 U_CDECL_END
384 
385 U_NAMESPACE_BEGIN
386 
Package()387 Package::Package() {
388     inPkgName[0]=0;
389     inData=NULL;
390     inLength=0;
391     inCharset=U_CHARSET_FAMILY;
392     inIsBigEndian=U_IS_BIG_ENDIAN;
393 
394     itemCount=0;
395     itemMax=0;
396     items=NULL;
397 
398     inStringTop=outStringTop=0;
399 
400     matchMode=0;
401     findPrefix=findSuffix=NULL;
402     findPrefixLength=findSuffixLength=0;
403     findNextIndex=-1;
404 
405     // create a header for an empty package
406     DataHeader *pHeader;
407     pHeader=(DataHeader *)header;
408     pHeader->dataHeader.magic1=0xda;
409     pHeader->dataHeader.magic2=0x27;
410     memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
411     headerLength=(int32_t)(4+sizeof(dataInfo));
412     if(headerLength&0xf) {
413         /* NUL-pad the header to a multiple of 16 */
414         int32_t length=(headerLength+0xf)&~0xf;
415         memset(header+headerLength, 0, length-headerLength);
416         headerLength=length;
417     }
418     pHeader->dataHeader.headerSize=(uint16_t)headerLength;
419 }
420 
~Package()421 Package::~Package() {
422     int32_t idx;
423 
424     free(inData);
425 
426     for(idx=0; idx<itemCount; ++idx) {
427         if(items[idx].isDataOwned) {
428             free(items[idx].data);
429         }
430     }
431 
432     uprv_free((void*)items);
433 }
434 
435 void
readPackage(const char * filename)436 Package::readPackage(const char *filename) {
437     UDataSwapper *ds;
438     const UDataInfo *pInfo;
439     UErrorCode errorCode;
440 
441     const uint8_t *inBytes;
442 
443     int32_t length, offset, i;
444     int32_t itemLength, typeEnum;
445     char type;
446 
447     const UDataOffsetTOCEntry *inEntries;
448 
449     extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
450 
451     /* read the file */
452     inData=readFile(NULL, filename, inLength, type);
453     length=inLength;
454 
455     /*
456      * swap the header - even if the swapping itself is a no-op
457      * because it tells us the header length
458      */
459     errorCode=U_ZERO_ERROR;
460     makeTypeProps(type, inCharset, inIsBigEndian);
461     ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
462     if(U_FAILURE(errorCode)) {
463         fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
464                 filename, u_errorName(errorCode));
465         exit(errorCode);
466     }
467 
468     ds->printError=printPackageError;
469     ds->printErrorContext=stderr;
470 
471     headerLength=sizeof(header);
472     if(length<headerLength) {
473         headerLength=length;
474     }
475     headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
476     if(U_FAILURE(errorCode)) {
477         exit(errorCode);
478     }
479 
480     /* check data format and format version */
481     pInfo=(const UDataInfo *)((const char *)inData+4);
482     if(!(
483         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
484         pInfo->dataFormat[1]==0x6d &&
485         pInfo->dataFormat[2]==0x6e &&
486         pInfo->dataFormat[3]==0x44 &&
487         pInfo->formatVersion[0]==1
488     )) {
489         fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
490                 pInfo->dataFormat[0], pInfo->dataFormat[1],
491                 pInfo->dataFormat[2], pInfo->dataFormat[3],
492                 pInfo->formatVersion[0]);
493         exit(U_UNSUPPORTED_ERROR);
494     }
495     inIsBigEndian=(UBool)pInfo->isBigEndian;
496     inCharset=pInfo->charsetFamily;
497 
498     inBytes=(const uint8_t *)inData+headerLength;
499     inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
500 
501     /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
502     length-=headerLength;
503     if(length<4) {
504         /* itemCount does not fit */
505         offset=0x7fffffff;
506     } else {
507         itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
508         setItemCapacity(itemCount); /* resize so there's space */
509         if(itemCount==0) {
510             offset=4;
511         } else if(length<(4+8*itemCount)) {
512             /* ToC table does not fit */
513             offset=0x7fffffff;
514         } else {
515             /* offset of the last item plus at least 20 bytes for its header */
516             offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
517         }
518     }
519     if(length<offset) {
520         fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
521                         (long)length);
522         exit(U_INDEX_OUTOFBOUNDS_ERROR);
523     }
524     /* do not modify the package length variable until the last item's length is set */
525 
526     if(itemCount>0) {
527         char prefix[MAX_PKG_NAME_LENGTH+4];
528         char *s, *inItemStrings;
529         int32_t inPkgNameLength, prefixLength, stringsOffset;
530 
531         if(itemCount>itemMax) {
532             fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
533             exit(U_BUFFER_OVERFLOW_ERROR);
534         }
535 
536         /* swap the item name strings */
537         stringsOffset=4+8*itemCount;
538         itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
539 
540         // don't include padding bytes at the end of the item names
541         while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
542             --itemLength;
543         }
544 
545         if((inStringTop+itemLength)>STRING_STORE_SIZE) {
546             fprintf(stderr, "icupkg: total length of item name strings too long\n");
547             exit(U_BUFFER_OVERFLOW_ERROR);
548         }
549 
550         inItemStrings=inStrings+inStringTop;
551         ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
552         if(U_FAILURE(errorCode)) {
553             fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
554             exit(U_INVALID_FORMAT_ERROR);
555         }
556         inStringTop+=itemLength;
557 
558         // reset the Item entries
559         memset(items, 0, itemCount*sizeof(Item));
560 
561         inPkgNameLength=strlen(inPkgName);
562         memcpy(prefix, inPkgName, inPkgNameLength);
563         prefixLength=inPkgNameLength;
564 
565         /*
566          * Get the common prefix of the items.
567          * New-style ICU .dat packages use tree separators ('/') between package names,
568          * tree names, and item names,
569          * while old-style ICU .dat packages (before multi-tree support)
570          * use an underscore ('_') between package and item names.
571          */
572         offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
573         s=inItemStrings+offset;
574         if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
575             0==memcmp(s, inPkgName, inPkgNameLength) &&
576             s[inPkgNameLength]=='_'
577         ) {
578             // old-style .dat package
579             prefix[prefixLength++]='_';
580         } else {
581             // new-style .dat package
582             prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
583             // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
584             // then the test in the loop below will fail
585         }
586         prefix[prefixLength]=0;
587 
588         /* read the ToC table */
589         for(i=0; i<itemCount; ++i) {
590             // skip the package part of the item name, error if it does not match the actual package name
591             // or if nothing follows the package name
592             offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
593             s=inItemStrings+offset;
594             if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
595                 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
596                         s, prefix);
597                 exit(U_UNSUPPORTED_ERROR);
598             }
599             items[i].name=s+prefixLength;
600 
601             // set the item's data
602             items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
603             if(i>0) {
604                 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
605 
606                 // set the previous item's platform type
607                 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
608                 if(typeEnum<0 || U_FAILURE(errorCode)) {
609                     fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
610                     exit(U_INVALID_FORMAT_ERROR);
611                 }
612                 items[i-1].type=makeTypeLetter(typeEnum);
613             }
614             items[i].isDataOwned=FALSE;
615         }
616         // set the last item's length
617         items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
618 
619         // set the last item's platform type
620         typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
621         if(typeEnum<0 || U_FAILURE(errorCode)) {
622             fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
623             exit(U_INVALID_FORMAT_ERROR);
624         }
625         items[itemCount-1].type=makeTypeLetter(typeEnum);
626 
627         if(type!=U_ICUDATA_TYPE_LETTER[0]) {
628             // sort the item names for the local charset
629             sortItems();
630         }
631     }
632 
633     udata_closeSwapper(ds);
634 }
635 
636 char
getInType()637 Package::getInType() {
638     return makeTypeLetter(inCharset, inIsBigEndian);
639 }
640 
641 void
writePackage(const char * filename,char outType,const char * comment)642 Package::writePackage(const char *filename, char outType, const char *comment) {
643     char prefix[MAX_PKG_NAME_LENGTH+4];
644     UDataOffsetTOCEntry entry;
645     UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
646     FILE *file;
647     Item *pItem;
648     char *name;
649     UErrorCode errorCode;
650     int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
651     uint8_t outCharset;
652     UBool outIsBigEndian;
653 
654     extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
655 
656     // if there is an explicit comment, then use it, else use what's in the current header
657     if(comment!=NULL) {
658         /* get the header size minus the current comment */
659         DataHeader *pHeader;
660         int32_t length;
661 
662         pHeader=(DataHeader *)header;
663         headerLength=4+pHeader->info.size;
664         length=(int32_t)strlen(comment);
665         if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
666             fprintf(stderr, "icupkg: comment too long\n");
667             exit(U_BUFFER_OVERFLOW_ERROR);
668         }
669         memcpy(header+headerLength, comment, length+1);
670         headerLength+=length;
671         if(headerLength&0xf) {
672             /* NUL-pad the header to a multiple of 16 */
673             length=(headerLength+0xf)&~0xf;
674             memset(header+headerLength, 0, length-headerLength);
675             headerLength=length;
676         }
677         pHeader->dataHeader.headerSize=(uint16_t)headerLength;
678     }
679 
680     makeTypeProps(outType, outCharset, outIsBigEndian);
681 
682     // open (TYPE_COUNT-2) swappers
683     // one is a no-op for local type==outType
684     // one type (TYPE_LE) is bogus
685     errorCode=U_ZERO_ERROR;
686     i=makeTypeEnum(outType);
687     ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
688     ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
689     ds[TYPE_LE]=NULL;
690     ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
691     if(U_FAILURE(errorCode)) {
692         fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
693         exit(errorCode);
694     }
695     for(i=0; i<TYPE_COUNT; ++i) {
696         if(ds[i]!=NULL) {
697             ds[i]->printError=printPackageError;
698             ds[i]->printErrorContext=stderr;
699         }
700     }
701 
702     dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
703 
704     // create the file and write its contents
705     file=fopen(filename, "wb");
706     if(file==NULL) {
707         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
708         exit(U_FILE_ACCESS_ERROR);
709     }
710 
711     // swap and write the header
712     if(dsLocalToOut!=NULL) {
713         udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
714         if(U_FAILURE(errorCode)) {
715             fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
716             exit(errorCode);
717         }
718     }
719     length=(int32_t)fwrite(header, 1, headerLength, file);
720     if(length!=headerLength) {
721         fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
722         exit(U_FILE_ACCESS_ERROR);
723     }
724 
725     // prepare and swap the package name with a tree separator
726     // for prepending to item names
727     strcat(prefix, U_TREE_ENTRY_SEP_STRING);
728     prefixLength=(int32_t)strlen(prefix);
729     if(dsLocalToOut!=NULL) {
730         dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
731         if(U_FAILURE(errorCode)) {
732             fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
733             exit(errorCode);
734         }
735 
736         // swap and sort the item names (sorting needs to be done in the output charset)
737         dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
738         if(U_FAILURE(errorCode)) {
739             fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
740             exit(errorCode);
741         }
742         sortItems();
743     }
744 
745     // create the output item names in sorted order, with the package name prepended to each
746     for(i=0; i<itemCount; ++i) {
747         length=(int32_t)strlen(items[i].name);
748         name=allocString(FALSE, length+prefixLength);
749         memcpy(name, prefix, prefixLength);
750         memcpy(name+prefixLength, items[i].name, length+1);
751         items[i].name=name;
752     }
753 
754     // calculate offsets for item names and items, pad to 16-align items
755     // align only the first item; each item's length is a multiple of 16
756     basenameOffset=4+8*itemCount;
757     offset=basenameOffset+outStringTop;
758     if((length=(offset&15))!=0) {
759         length=16-length;
760         memset(allocString(FALSE, length-1), 0xaa, length);
761         offset+=length;
762     }
763 
764     // write the table of contents
765     // first the itemCount
766     outInt32=itemCount;
767     if(dsLocalToOut!=NULL) {
768         dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
769         if(U_FAILURE(errorCode)) {
770             fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
771             exit(errorCode);
772         }
773     }
774     length=(int32_t)fwrite(&outInt32, 1, 4, file);
775     if(length!=4) {
776         fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
777         exit(U_FILE_ACCESS_ERROR);
778     }
779 
780     // then write the item entries (and collect the maxItemLength)
781     maxItemLength=0;
782     for(i=0; i<itemCount; ++i) {
783         entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
784         entry.dataOffset=(uint32_t)offset;
785         if(dsLocalToOut!=NULL) {
786             dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
787             if(U_FAILURE(errorCode)) {
788                 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
789                 exit(errorCode);
790             }
791         }
792         length=(int32_t)fwrite(&entry, 1, 8, file);
793         if(length!=8) {
794             fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
795             exit(U_FILE_ACCESS_ERROR);
796         }
797 
798         length=items[i].length;
799         if(length>maxItemLength) {
800             maxItemLength=length;
801         }
802         offset+=length;
803     }
804 
805     // write the item names
806     length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
807     if(length!=outStringTop) {
808         fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
809         exit(U_FILE_ACCESS_ERROR);
810     }
811 
812     // write the items
813     for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
814         int32_t type=makeTypeEnum(pItem->type);
815         if(ds[type]!=NULL) {
816             // swap each item from its platform properties to the desired ones
817             udata_swap(
818                 ds[type],
819                 pItem->data, pItem->length, pItem->data,
820                 &errorCode);
821             if(U_FAILURE(errorCode)) {
822                 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
823                 exit(errorCode);
824             }
825         }
826         length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
827         if(length!=pItem->length) {
828             fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
829             exit(U_FILE_ACCESS_ERROR);
830         }
831     }
832 
833     if(ferror(file)) {
834         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
835         exit(U_FILE_ACCESS_ERROR);
836     }
837 
838     fclose(file);
839     for(i=0; i<TYPE_COUNT; ++i) {
840         udata_closeSwapper(ds[i]);
841     }
842 }
843 
844 int32_t
findItem(const char * name,int32_t length) const845 Package::findItem(const char *name, int32_t length) const {
846     int32_t i, start, limit;
847     int result;
848 
849     /* do a binary search for the string */
850     start=0;
851     limit=itemCount;
852     while(start<limit) {
853         i=(start+limit)/2;
854         if(length>=0) {
855             result=strncmp(name, items[i].name, length);
856         } else {
857             result=strcmp(name, items[i].name);
858         }
859 
860         if(result==0) {
861             /* found */
862             if(length>=0) {
863                 /*
864                  * if we compared just prefixes, then we may need to back up
865                  * to the first item with this prefix
866                  */
867                 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
868                     --i;
869                 }
870             }
871             return i;
872         } else if(result<0) {
873             limit=i;
874         } else /* result>0 */ {
875             start=i+1;
876         }
877     }
878 
879     return ~start; /* not found, return binary-not of the insertion point */
880 }
881 
882 void
findItems(const char * pattern)883 Package::findItems(const char *pattern) {
884     const char *wild;
885 
886     if(pattern==NULL || *pattern==0) {
887         findNextIndex=-1;
888         return;
889     }
890 
891     findPrefix=pattern;
892     findSuffix=NULL;
893     findSuffixLength=0;
894 
895     wild=strchr(pattern, '*');
896     if(wild==NULL) {
897         // no wildcard
898         findPrefixLength=(int32_t)strlen(pattern);
899     } else {
900         // one wildcard
901         findPrefixLength=(int32_t)(wild-pattern);
902         findSuffix=wild+1;
903         findSuffixLength=(int32_t)strlen(findSuffix);
904         if(NULL!=strchr(findSuffix, '*')) {
905             // two or more wildcards
906             fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
907             exit(U_PARSE_ERROR);
908         }
909     }
910 
911     if(findPrefixLength==0) {
912         findNextIndex=0;
913     } else {
914         findNextIndex=findItem(findPrefix, findPrefixLength);
915     }
916 }
917 
918 int32_t
findNextItem()919 Package::findNextItem() {
920     const char *name, *middle, *treeSep;
921     int32_t idx, nameLength, middleLength;
922 
923     if(findNextIndex<0) {
924         return -1;
925     }
926 
927     while(findNextIndex<itemCount) {
928         idx=findNextIndex++;
929         name=items[idx].name;
930         nameLength=(int32_t)strlen(name);
931         if(nameLength<(findPrefixLength+findSuffixLength)) {
932             // item name too short for prefix & suffix
933             continue;
934         }
935         if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
936             // left the range of names with this prefix
937             break;
938         }
939         middle=name+findPrefixLength;
940         middleLength=nameLength-findPrefixLength-findSuffixLength;
941         if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
942             // suffix does not match
943             continue;
944         }
945         // prefix & suffix match
946 
947         if(matchMode&MATCH_NOSLASH) {
948             treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
949             if(treeSep!=NULL && (treeSep-middle)<middleLength) {
950                 // the middle (matching the * wildcard) contains a tree separator /
951                 continue;
952             }
953         }
954 
955         // found a matching item
956         return idx;
957     }
958 
959     // no more items
960     findNextIndex=-1;
961     return -1;
962 }
963 
964 void
setMatchMode(uint32_t mode)965 Package::setMatchMode(uint32_t mode) {
966     matchMode=mode;
967 }
968 
969 void
addItem(const char * name)970 Package::addItem(const char *name) {
971     addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
972 }
973 
974 void
addItem(const char * name,uint8_t * data,int32_t length,UBool isDataOwned,char type)975 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
976     int32_t idx;
977 
978     idx=findItem(name);
979     if(idx<0) {
980         // new item, make space at the insertion point
981         ensureItemCapacity();
982         // move the following items down
983         idx=~idx;
984         if(idx<itemCount) {
985             memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
986         }
987         ++itemCount;
988 
989         // reset this Item entry
990         memset(items+idx, 0, sizeof(Item));
991 
992         // copy the item's name
993         items[idx].name=allocString(TRUE, strlen(name));
994         strcpy(items[idx].name, name);
995         pathToTree(items[idx].name);
996     } else {
997         // same-name item found, replace it
998         if(items[idx].isDataOwned) {
999             free(items[idx].data);
1000         }
1001 
1002         // keep the item's name since it is the same
1003     }
1004 
1005     // set the item's data
1006     items[idx].data=data;
1007     items[idx].length=length;
1008     items[idx].isDataOwned=isDataOwned;
1009     items[idx].type=type;
1010 }
1011 
1012 void
addFile(const char * filesPath,const char * name)1013 Package::addFile(const char *filesPath, const char *name) {
1014     uint8_t *data;
1015     int32_t length;
1016     char type;
1017 
1018     data=readFile(filesPath, name, length, type);
1019     // readFile() exits the tool if it fails
1020     addItem(name, data, length, TRUE, type);
1021 }
1022 
1023 void
addItems(const Package & listPkg)1024 Package::addItems(const Package &listPkg) {
1025     const Item *pItem;
1026     int32_t i;
1027 
1028     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1029         addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1030     }
1031 }
1032 
1033 void
removeItem(int32_t idx)1034 Package::removeItem(int32_t idx) {
1035     if(idx>=0) {
1036         // remove the item
1037         if(items[idx].isDataOwned) {
1038             free(items[idx].data);
1039         }
1040 
1041         // move the following items up
1042         if((idx+1)<itemCount) {
1043             memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1044         }
1045         --itemCount;
1046 
1047         if(idx<=findNextIndex) {
1048             --findNextIndex;
1049         }
1050     }
1051 }
1052 
1053 void
removeItems(const char * pattern)1054 Package::removeItems(const char *pattern) {
1055     int32_t idx;
1056 
1057     findItems(pattern);
1058     while((idx=findNextItem())>=0) {
1059         removeItem(idx);
1060     }
1061 }
1062 
1063 void
removeItems(const Package & listPkg)1064 Package::removeItems(const Package &listPkg) {
1065     const Item *pItem;
1066     int32_t i;
1067 
1068     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1069         removeItems(pItem->name);
1070     }
1071 }
1072 
1073 void
extractItem(const char * filesPath,const char * outName,int32_t idx,char outType)1074 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1075     char filename[1024];
1076     UDataSwapper *ds;
1077     FILE *file;
1078     Item *pItem;
1079     int32_t fileLength;
1080     uint8_t itemCharset, outCharset;
1081     UBool itemIsBigEndian, outIsBigEndian;
1082 
1083     if(idx<0 || itemCount<=idx) {
1084         return;
1085     }
1086     pItem=items+idx;
1087 
1088     // swap the data to the outType
1089     // outType==0: don't swap
1090     if(outType!=0 && pItem->type!=outType) {
1091         // open the swapper
1092         UErrorCode errorCode=U_ZERO_ERROR;
1093         makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1094         makeTypeProps(outType, outCharset, outIsBigEndian);
1095         ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1096         if(U_FAILURE(errorCode)) {
1097             fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1098                     (long)idx, u_errorName(errorCode));
1099             exit(errorCode);
1100         }
1101 
1102         ds->printError=printPackageError;
1103         ds->printErrorContext=stderr;
1104 
1105         // swap the item from its platform properties to the desired ones
1106         udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1107         if(U_FAILURE(errorCode)) {
1108             fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1109             exit(errorCode);
1110         }
1111         udata_closeSwapper(ds);
1112         pItem->type=outType;
1113     }
1114 
1115     // create the file and write its contents
1116     makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1117     file=fopen(filename, "wb");
1118     if(file==NULL) {
1119         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1120         exit(U_FILE_ACCESS_ERROR);
1121     }
1122     fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1123 
1124     if(ferror(file) || fileLength!=pItem->length) {
1125         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1126         exit(U_FILE_ACCESS_ERROR);
1127     }
1128     fclose(file);
1129 }
1130 
1131 void
extractItem(const char * filesPath,int32_t idx,char outType)1132 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1133     extractItem(filesPath, items[idx].name, idx, outType);
1134 }
1135 
1136 void
extractItems(const char * filesPath,const char * pattern,char outType)1137 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1138     int32_t idx;
1139 
1140     findItems(pattern);
1141     while((idx=findNextItem())>=0) {
1142         extractItem(filesPath, idx, outType);
1143     }
1144 }
1145 
1146 void
extractItems(const char * filesPath,const Package & listPkg,char outType)1147 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1148     const Item *pItem;
1149     int32_t i;
1150 
1151     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1152         extractItems(filesPath, pItem->name, outType);
1153     }
1154 }
1155 
1156 int32_t
getItemCount() const1157 Package::getItemCount() const {
1158     return itemCount;
1159 }
1160 
1161 const Item *
getItem(int32_t idx) const1162 Package::getItem(int32_t idx) const {
1163     if (0 <= idx && idx < itemCount) {
1164         return &items[idx];
1165     }
1166     return NULL;
1167 }
1168 
1169 void
checkDependency(void * context,const char * itemName,const char * targetName)1170 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1171     // check dependency: make sure the target item is in the package
1172     Package *me=(Package *)context;
1173     if(me->findItem(targetName)<0) {
1174         me->isMissingItems=TRUE;
1175         fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1176     }
1177 }
1178 
1179 UBool
checkDependencies()1180 Package::checkDependencies() {
1181     isMissingItems=FALSE;
1182     enumDependencies(this, checkDependency);
1183     return (UBool)!isMissingItems;
1184 }
1185 
1186 void
enumDependencies(void * context,CheckDependency check)1187 Package::enumDependencies(void *context, CheckDependency check) {
1188     int32_t i;
1189 
1190     for(i=0; i<itemCount; ++i) {
1191         enumDependencies(items+i, context, check);
1192     }
1193 }
1194 
1195 char *
allocString(UBool in,int32_t length)1196 Package::allocString(UBool in, int32_t length) {
1197     char *p;
1198     int32_t top;
1199 
1200     if(in) {
1201         top=inStringTop;
1202         p=inStrings+top;
1203     } else {
1204         top=outStringTop;
1205         p=outStrings+top;
1206     }
1207     top+=length+1;
1208 
1209     if(top>STRING_STORE_SIZE) {
1210         fprintf(stderr, "icupkg: string storage overflow\n");
1211         exit(U_BUFFER_OVERFLOW_ERROR);
1212     }
1213     if(in) {
1214         inStringTop=top;
1215     } else {
1216         outStringTop=top;
1217     }
1218     return p;
1219 }
1220 
1221 void
sortItems()1222 Package::sortItems() {
1223     UErrorCode errorCode=U_ZERO_ERROR;
1224     uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1225     if(U_FAILURE(errorCode)) {
1226         fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1227         exit(errorCode);
1228     }
1229 }
1230 
setItemCapacity(int32_t max)1231 void Package::setItemCapacity(int32_t max)
1232 {
1233   if(max<=itemMax) {
1234     return;
1235   }
1236   Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
1237   Item *oldItems = items;
1238   if(newItems == NULL) {
1239     fprintf(stderr, "icupkg: Out of memory trying to allocate %ld bytes for %d items\n", max*sizeof(items[0]), max);
1240     exit(U_MEMORY_ALLOCATION_ERROR);
1241   }
1242   if(items && itemCount>0) {
1243     uprv_memcpy(newItems, items, itemCount*sizeof(items[0]));
1244   }
1245   itemMax = max;
1246   items = newItems;
1247   uprv_free(oldItems);
1248 }
1249 
ensureItemCapacity()1250 void Package::ensureItemCapacity()
1251 {
1252   if((itemCount+1)>itemMax) {
1253     setItemCapacity(itemCount+kItemsChunk);
1254   }
1255 }
1256 
1257 U_NAMESPACE_END
1258