• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1999-2009, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  package.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2005aug25
14 *   created by: Markus W. Scherer
15 *
16 *   Read, modify, and write ICU .dat data package files.
17 *   This is an integral part of the icupkg tool, moved to the toolutil library
18 *   because parts of tool implementations tend to be later shared by
19 *   other tools.
20 *   Subsumes functionality and implementation code from
21 *   gencmn, decmn, and icuswap tools.
22 */
23 
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
27 #include "cstring.h"
28 #include "uarrsort.h"
29 #include "ucmndata.h"
30 #include "udataswp.h"
31 #include "swapimpl.h"
32 #include "toolutil.h"
33 #include "package.h"
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 // general definitions ----------------------------------------------------- ***
40 
41 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
42 
43 /* UDataInfo cf. udata.h */
44 static const UDataInfo dataInfo={
45     (uint16_t)sizeof(UDataInfo),
46     0,
47 
48     U_IS_BIG_ENDIAN,
49     U_CHARSET_FAMILY,
50     (uint8_t)sizeof(UChar),
51     0,
52 
53     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
54     {1, 0, 0, 0},                 /* formatVersion */
55     {3, 0, 0, 0}                  /* dataVersion */
56 };
57 
58 U_CDECL_BEGIN
59 static void U_CALLCONV
printPackageError(void * context,const char * fmt,va_list args)60 printPackageError(void *context, const char *fmt, va_list args) {
61     vfprintf((FILE *)context, fmt, args);
62 }
63 U_CDECL_END
64 
65 static uint16_t
readSwapUInt16(uint16_t x)66 readSwapUInt16(uint16_t x) {
67     return (uint16_t)((x<<8)|(x>>8));
68 }
69 
70 // platform types ---------------------------------------------------------- ***
71 
72 static const char *types="lb?e";
73 
74 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
75 
76 static inline int32_t
makeTypeEnum(uint8_t charset,UBool isBigEndian)77 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
78     return 2*(int32_t)charset+isBigEndian;
79 }
80 
81 static inline int32_t
makeTypeEnum(char type)82 makeTypeEnum(char type) {
83     return
84         type == 'l' ? TYPE_L :
85         type == 'b' ? TYPE_B :
86         type == 'e' ? TYPE_E :
87                -1;
88 }
89 
90 static inline char
makeTypeLetter(uint8_t charset,UBool isBigEndian)91 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
92     return types[makeTypeEnum(charset, isBigEndian)];
93 }
94 
95 static inline char
makeTypeLetter(int32_t typeEnum)96 makeTypeLetter(int32_t typeEnum) {
97     return types[typeEnum];
98 }
99 
100 static void
makeTypeProps(char type,uint8_t & charset,UBool & isBigEndian)101 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
102     int32_t typeEnum=makeTypeEnum(type);
103     charset=(uint8_t)(typeEnum>>1);
104     isBigEndian=(UBool)(typeEnum&1);
105 }
106 
107 U_CFUNC const UDataInfo *
getDataInfo(const uint8_t * data,int32_t length,int32_t & infoLength,int32_t & headerLength,UErrorCode * pErrorCode)108 getDataInfo(const uint8_t *data, int32_t length,
109             int32_t &infoLength, int32_t &headerLength,
110             UErrorCode *pErrorCode) {
111     const DataHeader *pHeader;
112     const UDataInfo *pInfo;
113 
114     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
115         return NULL;
116     }
117     if( data==NULL ||
118         (length>=0 && length<(int32_t)sizeof(DataHeader))
119     ) {
120         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
121         return NULL;
122     }
123 
124     pHeader=(const DataHeader *)data;
125     pInfo=&pHeader->info;
126     if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
127         pHeader->dataHeader.magic1!=0xda ||
128         pHeader->dataHeader.magic2!=0x27 ||
129         pInfo->sizeofUChar!=2
130     ) {
131         *pErrorCode=U_UNSUPPORTED_ERROR;
132         return NULL;
133     }
134 
135     if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
136         headerLength=pHeader->dataHeader.headerSize;
137         infoLength=pInfo->size;
138     } else {
139         headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
140         infoLength=readSwapUInt16(pInfo->size);
141     }
142 
143     if( headerLength<(int32_t)sizeof(DataHeader) ||
144         infoLength<(int32_t)sizeof(UDataInfo) ||
145         headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
146         (length>=0 && length<headerLength)
147     ) {
148         *pErrorCode=U_UNSUPPORTED_ERROR;
149         return NULL;
150     }
151 
152     return pInfo;
153 }
154 
155 static int32_t
getTypeEnumForInputData(const uint8_t * data,int32_t length,UErrorCode * pErrorCode)156 getTypeEnumForInputData(const uint8_t *data, int32_t length,
157                         UErrorCode *pErrorCode) {
158     const UDataInfo *pInfo;
159     int32_t infoLength, headerLength;
160 
161     /* getDataInfo() checks for illegal arguments */
162     pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
163     if(pInfo==NULL) {
164         return -1;
165     }
166 
167     return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
168 }
169 
170 // file handling ----------------------------------------------------------- ***
171 
172 static void
extractPackageName(const char * filename,char pkg[],int32_t capacity)173 extractPackageName(const char *filename,
174                    char pkg[], int32_t capacity) {
175     const char *basename;
176     int32_t len;
177 
178     basename=findBasename(filename);
179     len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
180 
181     if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
182         fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
183                          basename);
184         exit(U_ILLEGAL_ARGUMENT_ERROR);
185     }
186 
187     if(len>=capacity) {
188         fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
189                          basename, (long)capacity);
190         exit(U_ILLEGAL_ARGUMENT_ERROR);
191     }
192 
193     memcpy(pkg, basename, len);
194     pkg[len]=0;
195 }
196 
197 static int32_t
getFileLength(FILE * f)198 getFileLength(FILE *f) {
199     int32_t length;
200 
201     fseek(f, 0, SEEK_END);
202     length=(int32_t)ftell(f);
203     fseek(f, 0, SEEK_SET);
204     return length;
205 }
206 
207 /*
208  * Turn tree separators and alternate file separators into normal file separators.
209  */
210 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
211 #define treeToPath(s)
212 #else
213 static void
treeToPath(char * s)214 treeToPath(char *s) {
215     char *t;
216 
217     for(t=s; *t!=0; ++t) {
218         if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
219             *t=U_FILE_SEP_CHAR;
220         }
221     }
222 }
223 #endif
224 
225 /*
226  * Turn file separators into tree separators.
227  */
228 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
229 #define pathToTree(s)
230 #else
231 static void
pathToTree(char * s)232 pathToTree(char *s) {
233     char *t;
234 
235     for(t=s; *t!=0; ++t) {
236         if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
237             *t=U_TREE_ENTRY_SEP_CHAR;
238         }
239     }
240 }
241 #endif
242 
243 /*
244  * Prepend the path (if any) to the name and run the name through treeToName().
245  */
246 static void
makeFullFilename(const char * path,const char * name,char * filename,int32_t capacity)247 makeFullFilename(const char *path, const char *name,
248                  char *filename, int32_t capacity) {
249     char *s;
250 
251     // prepend the path unless NULL or empty
252     if(path!=NULL && path[0]!=0) {
253         if((int32_t)(strlen(path)+1)>=capacity) {
254             fprintf(stderr, "pathname too long: \"%s\"\n", path);
255             exit(U_BUFFER_OVERFLOW_ERROR);
256         }
257         strcpy(filename, path);
258 
259         // make sure the path ends with a file separator
260         s=strchr(filename, 0);
261         if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
262             *s++=U_FILE_SEP_CHAR;
263         }
264     } else {
265         s=filename;
266     }
267 
268     // turn the name into a filename, turn tree separators into file separators
269     if((int32_t)((s-filename)+strlen(name))>=capacity) {
270         fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
271         exit(U_BUFFER_OVERFLOW_ERROR);
272     }
273     strcpy(s, name);
274     treeToPath(s);
275 }
276 
277 static void
makeFullFilenameAndDirs(const char * path,const char * name,char * filename,int32_t capacity)278 makeFullFilenameAndDirs(const char *path, const char *name,
279                         char *filename, int32_t capacity) {
280     char *sep;
281     UErrorCode errorCode;
282 
283     makeFullFilename(path, name, filename, capacity);
284 
285     // make tree directories
286     errorCode=U_ZERO_ERROR;
287     sep=strchr(filename, 0)-strlen(name);
288     while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
289         if(sep!=filename) {
290             *sep=0;                 // truncate temporarily
291             uprv_mkdir(filename, &errorCode);
292             if(U_FAILURE(errorCode)) {
293                 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
294                 exit(U_FILE_ACCESS_ERROR);
295             }
296         }
297         *sep++=U_FILE_SEP_CHAR; // restore file separator character
298     }
299 }
300 
301 static uint8_t *
readFile(const char * path,const char * name,int32_t & length,char & type)302 readFile(const char *path, const char *name, int32_t &length, char &type) {
303     char filename[1024];
304     FILE *file;
305     uint8_t *data;
306     UErrorCode errorCode;
307     int32_t fileLength, typeEnum;
308 
309     makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
310 
311     /* open the input file, get its length, allocate memory for it, read the file */
312     file=fopen(filename, "rb");
313     if(file==NULL) {
314         fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
315         exit(U_FILE_ACCESS_ERROR);
316     }
317 
318     /* get the file length */
319     fileLength=getFileLength(file);
320     if(ferror(file) || fileLength<=0) {
321         fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
322         fclose(file);
323         exit(U_FILE_ACCESS_ERROR);
324     }
325 
326     /* allocate the buffer, pad to multiple of 16 */
327     length=(fileLength+0xf)&~0xf;
328     data=(uint8_t *)malloc(length);
329     if(data==NULL) {
330         fclose(file);
331         exit(U_MEMORY_ALLOCATION_ERROR);
332     }
333 
334     /* read the file */
335     if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
336         fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
337         fclose(file);
338         free(data);
339         exit(U_FILE_ACCESS_ERROR);
340     }
341 
342     /* pad the file to a multiple of 16 using the usual padding byte */
343     if(fileLength<length) {
344         memset(data+fileLength, 0xaa, length-fileLength);
345     }
346 
347     fclose(file);
348 
349     // minimum check for ICU-format data
350     errorCode=U_ZERO_ERROR;
351     typeEnum=getTypeEnumForInputData(data, length, &errorCode);
352     if(typeEnum<0 || U_FAILURE(errorCode)) {
353         fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
354         free(data);
355         exit(U_INVALID_FORMAT_ERROR);
356     }
357     type=makeTypeLetter(typeEnum);
358 
359     return data;
360 }
361 
362 // .dat package file representation ---------------------------------------- ***
363 
364 U_CDECL_BEGIN
365 
366 static int32_t U_CALLCONV
compareItems(const void *,const void * left,const void * right)367 compareItems(const void * /*context*/, const void *left, const void *right) {
368     U_NAMESPACE_USE
369 
370     return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
371 }
372 
373 U_CDECL_END
374 
375 U_NAMESPACE_BEGIN
376 
Package()377 Package::Package() {
378     inPkgName[0]=0;
379     inData=NULL;
380     inLength=0;
381     inCharset=U_CHARSET_FAMILY;
382     inIsBigEndian=U_IS_BIG_ENDIAN;
383 
384     itemCount=0;
385     inStringTop=outStringTop=0;
386 
387     matchMode=0;
388     findPrefix=findSuffix=NULL;
389     findPrefixLength=findSuffixLength=0;
390     findNextIndex=-1;
391 
392     // create a header for an empty package
393     DataHeader *pHeader;
394     pHeader=(DataHeader *)header;
395     pHeader->dataHeader.magic1=0xda;
396     pHeader->dataHeader.magic2=0x27;
397     memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
398     headerLength=(int32_t)(4+sizeof(dataInfo));
399     if(headerLength&0xf) {
400         /* NUL-pad the header to a multiple of 16 */
401         int32_t length=(headerLength+0xf)&~0xf;
402         memset(header+headerLength, 0, length-headerLength);
403         headerLength=length;
404     }
405     pHeader->dataHeader.headerSize=(uint16_t)headerLength;
406 }
407 
~Package()408 Package::~Package() {
409     int32_t idx;
410 
411     free(inData);
412 
413     for(idx=0; idx<itemCount; ++idx) {
414         if(items[idx].isDataOwned) {
415             free(items[idx].data);
416         }
417     }
418 }
419 
420 void
readPackage(const char * filename)421 Package::readPackage(const char *filename) {
422     UDataSwapper *ds;
423     const UDataInfo *pInfo;
424     UErrorCode errorCode;
425 
426     const uint8_t *inBytes;
427 
428     int32_t length, offset, i;
429     int32_t itemLength, typeEnum;
430     char type;
431 
432     const UDataOffsetTOCEntry *inEntries;
433 
434     extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
435 
436     /* read the file */
437     inData=readFile(NULL, filename, inLength, type);
438     length=inLength;
439 
440     /*
441      * swap the header - even if the swapping itself is a no-op
442      * because it tells us the header length
443      */
444     errorCode=U_ZERO_ERROR;
445     makeTypeProps(type, inCharset, inIsBigEndian);
446     ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
447     if(U_FAILURE(errorCode)) {
448         fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
449                 filename, u_errorName(errorCode));
450         exit(errorCode);
451     }
452 
453     ds->printError=printPackageError;
454     ds->printErrorContext=stderr;
455 
456     headerLength=sizeof(header);
457     if(length<headerLength) {
458         headerLength=length;
459     }
460     headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
461     if(U_FAILURE(errorCode)) {
462         exit(errorCode);
463     }
464 
465     /* check data format and format version */
466     pInfo=(const UDataInfo *)((const char *)inData+4);
467     if(!(
468         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
469         pInfo->dataFormat[1]==0x6d &&
470         pInfo->dataFormat[2]==0x6e &&
471         pInfo->dataFormat[3]==0x44 &&
472         pInfo->formatVersion[0]==1
473     )) {
474         fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
475                 pInfo->dataFormat[0], pInfo->dataFormat[1],
476                 pInfo->dataFormat[2], pInfo->dataFormat[3],
477                 pInfo->formatVersion[0]);
478         exit(U_UNSUPPORTED_ERROR);
479     }
480     inIsBigEndian=(UBool)pInfo->isBigEndian;
481     inCharset=pInfo->charsetFamily;
482 
483     inBytes=(const uint8_t *)inData+headerLength;
484     inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
485 
486     /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
487     length-=headerLength;
488     if(length<4) {
489         /* itemCount does not fit */
490         offset=0x7fffffff;
491     } else {
492         itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
493         if(itemCount==0) {
494             offset=4;
495         } else if(length<(4+8*itemCount)) {
496             /* ToC table does not fit */
497             offset=0x7fffffff;
498         } else {
499             /* offset of the last item plus at least 20 bytes for its header */
500             offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
501         }
502     }
503     if(length<offset) {
504         fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
505                         (long)length);
506         exit(U_INDEX_OUTOFBOUNDS_ERROR);
507     }
508     /* do not modify the package length variable until the last item's length is set */
509 
510     if(itemCount>0) {
511         char prefix[MAX_PKG_NAME_LENGTH+4];
512         char *s, *inItemStrings;
513         int32_t inPkgNameLength, prefixLength, stringsOffset;
514 
515         if(itemCount>MAX_FILE_COUNT) {
516             fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
517             exit(U_BUFFER_OVERFLOW_ERROR);
518         }
519 
520         /* swap the item name strings */
521         stringsOffset=4+8*itemCount;
522         itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
523 
524         // don't include padding bytes at the end of the item names
525         while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
526             --itemLength;
527         }
528 
529         if((inStringTop+itemLength)>STRING_STORE_SIZE) {
530             fprintf(stderr, "icupkg: total length of item name strings too long\n");
531             exit(U_BUFFER_OVERFLOW_ERROR);
532         }
533 
534         inItemStrings=inStrings+inStringTop;
535         ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
536         if(U_FAILURE(errorCode)) {
537             fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
538             exit(U_INVALID_FORMAT_ERROR);
539         }
540         inStringTop+=itemLength;
541 
542         // reset the Item entries
543         memset(items, 0, itemCount*sizeof(Item));
544 
545         inPkgNameLength=strlen(inPkgName);
546         memcpy(prefix, inPkgName, inPkgNameLength);
547         prefixLength=inPkgNameLength;
548 
549         /*
550          * Get the common prefix of the items.
551          * New-style ICU .dat packages use tree separators ('/') between package names,
552          * tree names, and item names,
553          * while old-style ICU .dat packages (before multi-tree support)
554          * use an underscore ('_') between package and item names.
555          */
556         offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
557         s=inItemStrings+offset;
558         if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
559             0==memcmp(s, inPkgName, inPkgNameLength) &&
560             s[inPkgNameLength]=='_'
561         ) {
562             // old-style .dat package
563             prefix[prefixLength++]='_';
564         } else {
565             // new-style .dat package
566             prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
567             // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
568             // then the test in the loop below will fail
569         }
570         prefix[prefixLength]=0;
571 
572         /* read the ToC table */
573         for(i=0; i<itemCount; ++i) {
574             // skip the package part of the item name, error if it does not match the actual package name
575             // or if nothing follows the package name
576             offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
577             s=inItemStrings+offset;
578             if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
579                 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
580                         s, prefix);
581                 exit(U_UNSUPPORTED_ERROR);
582             }
583             items[i].name=s+prefixLength;
584 
585             // set the item's data
586             items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
587             if(i>0) {
588                 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
589 
590                 // set the previous item's platform type
591                 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
592                 if(typeEnum<0 || U_FAILURE(errorCode)) {
593                     fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
594                     exit(U_INVALID_FORMAT_ERROR);
595                 }
596                 items[i-1].type=makeTypeLetter(typeEnum);
597             }
598             items[i].isDataOwned=FALSE;
599         }
600         // set the last item's length
601         items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
602 
603         // set the last item's platform type
604         typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
605         if(typeEnum<0 || U_FAILURE(errorCode)) {
606             fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
607             exit(U_INVALID_FORMAT_ERROR);
608         }
609         items[itemCount-1].type=makeTypeLetter(typeEnum);
610 
611         if(type!=U_ICUDATA_TYPE_LETTER[0]) {
612             // sort the item names for the local charset
613             sortItems();
614         }
615     }
616 
617     udata_closeSwapper(ds);
618 }
619 
620 char
getInType()621 Package::getInType() {
622     return makeTypeLetter(inCharset, inIsBigEndian);
623 }
624 
625 void
writePackage(const char * filename,char outType,const char * comment)626 Package::writePackage(const char *filename, char outType, const char *comment) {
627     char prefix[MAX_PKG_NAME_LENGTH+4];
628     UDataOffsetTOCEntry entry;
629     UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
630     FILE *file;
631     Item *pItem;
632     char *name;
633     UErrorCode errorCode;
634     int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
635     uint8_t outCharset;
636     UBool outIsBigEndian;
637 
638     extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
639 
640     // if there is an explicit comment, then use it, else use what's in the current header
641     if(comment!=NULL) {
642         /* get the header size minus the current comment */
643         DataHeader *pHeader;
644         int32_t length;
645 
646         pHeader=(DataHeader *)header;
647         headerLength=4+pHeader->info.size;
648         length=(int32_t)strlen(comment);
649         if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
650             fprintf(stderr, "icupkg: comment too long\n");
651             exit(U_BUFFER_OVERFLOW_ERROR);
652         }
653         memcpy(header+headerLength, comment, length+1);
654         headerLength+=length;
655         if(headerLength&0xf) {
656             /* NUL-pad the header to a multiple of 16 */
657             length=(headerLength+0xf)&~0xf;
658             memset(header+headerLength, 0, length-headerLength);
659             headerLength=length;
660         }
661         pHeader->dataHeader.headerSize=(uint16_t)headerLength;
662     }
663 
664     makeTypeProps(outType, outCharset, outIsBigEndian);
665 
666     // open (TYPE_COUNT-2) swappers
667     // one is a no-op for local type==outType
668     // one type (TYPE_LE) is bogus
669     errorCode=U_ZERO_ERROR;
670     i=makeTypeEnum(outType);
671     ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
672     ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
673     ds[TYPE_LE]=NULL;
674     ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
675     if(U_FAILURE(errorCode)) {
676         fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
677         exit(errorCode);
678     }
679     for(i=0; i<TYPE_COUNT; ++i) {
680         if(ds[i]!=NULL) {
681             ds[i]->printError=printPackageError;
682             ds[i]->printErrorContext=stderr;
683         }
684     }
685 
686     dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
687 
688     // create the file and write its contents
689     file=fopen(filename, "wb");
690     if(file==NULL) {
691         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
692         exit(U_FILE_ACCESS_ERROR);
693     }
694 
695     // swap and write the header
696     if(dsLocalToOut!=NULL) {
697         udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
698         if(U_FAILURE(errorCode)) {
699             fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
700             exit(errorCode);
701         }
702     }
703     length=(int32_t)fwrite(header, 1, headerLength, file);
704     if(length!=headerLength) {
705         fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
706         exit(U_FILE_ACCESS_ERROR);
707     }
708 
709     // prepare and swap the package name with a tree separator
710     // for prepending to item names
711     strcat(prefix, U_TREE_ENTRY_SEP_STRING);
712     prefixLength=(int32_t)strlen(prefix);
713     if(dsLocalToOut!=NULL) {
714         dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
715         if(U_FAILURE(errorCode)) {
716             fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
717             exit(errorCode);
718         }
719 
720         // swap and sort the item names (sorting needs to be done in the output charset)
721         dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
722         if(U_FAILURE(errorCode)) {
723             fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
724             exit(errorCode);
725         }
726         sortItems();
727     }
728 
729     // create the output item names in sorted order, with the package name prepended to each
730     for(i=0; i<itemCount; ++i) {
731         length=(int32_t)strlen(items[i].name);
732         name=allocString(FALSE, length+prefixLength);
733         memcpy(name, prefix, prefixLength);
734         memcpy(name+prefixLength, items[i].name, length+1);
735         items[i].name=name;
736     }
737 
738     // calculate offsets for item names and items, pad to 16-align items
739     // align only the first item; each item's length is a multiple of 16
740     basenameOffset=4+8*itemCount;
741     offset=basenameOffset+outStringTop;
742     if((length=(offset&15))!=0) {
743         length=16-length;
744         memset(allocString(FALSE, length-1), 0xaa, length);
745         offset+=length;
746     }
747 
748     // write the table of contents
749     // first the itemCount
750     outInt32=itemCount;
751     if(dsLocalToOut!=NULL) {
752         dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
753         if(U_FAILURE(errorCode)) {
754             fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
755             exit(errorCode);
756         }
757     }
758     length=(int32_t)fwrite(&outInt32, 1, 4, file);
759     if(length!=4) {
760         fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
761         exit(U_FILE_ACCESS_ERROR);
762     }
763 
764     // then write the item entries (and collect the maxItemLength)
765     maxItemLength=0;
766     for(i=0; i<itemCount; ++i) {
767         entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
768         entry.dataOffset=(uint32_t)offset;
769         if(dsLocalToOut!=NULL) {
770             dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
771             if(U_FAILURE(errorCode)) {
772                 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
773                 exit(errorCode);
774             }
775         }
776         length=(int32_t)fwrite(&entry, 1, 8, file);
777         if(length!=8) {
778             fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
779             exit(U_FILE_ACCESS_ERROR);
780         }
781 
782         length=items[i].length;
783         if(length>maxItemLength) {
784             maxItemLength=length;
785         }
786         offset+=length;
787     }
788 
789     // write the item names
790     length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
791     if(length!=outStringTop) {
792         fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
793         exit(U_FILE_ACCESS_ERROR);
794     }
795 
796     // write the items
797     for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
798         int32_t type=makeTypeEnum(pItem->type);
799         if(ds[type]!=NULL) {
800             // swap each item from its platform properties to the desired ones
801             udata_swap(
802                 ds[type],
803                 pItem->data, pItem->length, pItem->data,
804                 &errorCode);
805             if(U_FAILURE(errorCode)) {
806                 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
807                 exit(errorCode);
808             }
809         }
810         length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
811         if(length!=pItem->length) {
812             fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
813             exit(U_FILE_ACCESS_ERROR);
814         }
815     }
816 
817     if(ferror(file)) {
818         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
819         exit(U_FILE_ACCESS_ERROR);
820     }
821 
822     fclose(file);
823     for(i=0; i<TYPE_COUNT; ++i) {
824         udata_closeSwapper(ds[i]);
825     }
826 }
827 
828 int32_t
findItem(const char * name,int32_t length) const829 Package::findItem(const char *name, int32_t length) const {
830     int32_t i, start, limit;
831     int result;
832 
833     /* do a binary search for the string */
834     start=0;
835     limit=itemCount;
836     while(start<limit) {
837         i=(start+limit)/2;
838         if(length>=0) {
839             result=strncmp(name, items[i].name, length);
840         } else {
841             result=strcmp(name, items[i].name);
842         }
843 
844         if(result==0) {
845             /* found */
846             if(length>=0) {
847                 /*
848                  * if we compared just prefixes, then we may need to back up
849                  * to the first item with this prefix
850                  */
851                 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
852                     --i;
853                 }
854             }
855             return i;
856         } else if(result<0) {
857             limit=i;
858         } else /* result>0 */ {
859             start=i+1;
860         }
861     }
862 
863     return ~start; /* not found, return binary-not of the insertion point */
864 }
865 
866 void
findItems(const char * pattern)867 Package::findItems(const char *pattern) {
868     const char *wild;
869 
870     if(pattern==NULL || *pattern==0) {
871         findNextIndex=-1;
872         return;
873     }
874 
875     findPrefix=pattern;
876     findSuffix=NULL;
877     findSuffixLength=0;
878 
879     wild=strchr(pattern, '*');
880     if(wild==NULL) {
881         // no wildcard
882         findPrefixLength=(int32_t)strlen(pattern);
883     } else {
884         // one wildcard
885         findPrefixLength=(int32_t)(wild-pattern);
886         findSuffix=wild+1;
887         findSuffixLength=(int32_t)strlen(findSuffix);
888         if(NULL!=strchr(findSuffix, '*')) {
889             // two or more wildcards
890             fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
891             exit(U_PARSE_ERROR);
892         }
893     }
894 
895     if(findPrefixLength==0) {
896         findNextIndex=0;
897     } else {
898         findNextIndex=findItem(findPrefix, findPrefixLength);
899     }
900 }
901 
902 int32_t
findNextItem()903 Package::findNextItem() {
904     const char *name, *middle, *treeSep;
905     int32_t idx, nameLength, middleLength;
906 
907     if(findNextIndex<0) {
908         return -1;
909     }
910 
911     while(findNextIndex<itemCount) {
912         idx=findNextIndex++;
913         name=items[idx].name;
914         nameLength=(int32_t)strlen(name);
915         if(nameLength<(findPrefixLength+findSuffixLength)) {
916             // item name too short for prefix & suffix
917             continue;
918         }
919         if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
920             // left the range of names with this prefix
921             break;
922         }
923         middle=name+findPrefixLength;
924         middleLength=nameLength-findPrefixLength-findSuffixLength;
925         if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
926             // suffix does not match
927             continue;
928         }
929         // prefix & suffix match
930 
931         if(matchMode&MATCH_NOSLASH) {
932             treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
933             if(treeSep!=NULL && (treeSep-middle)<middleLength) {
934                 // the middle (matching the * wildcard) contains a tree separator /
935                 continue;
936             }
937         }
938 
939         // found a matching item
940         return idx;
941     }
942 
943     // no more items
944     findNextIndex=-1;
945     return -1;
946 }
947 
948 void
setMatchMode(uint32_t mode)949 Package::setMatchMode(uint32_t mode) {
950     matchMode=mode;
951 }
952 
953 void
addItem(const char * name)954 Package::addItem(const char *name) {
955     addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
956 }
957 
958 void
addItem(const char * name,uint8_t * data,int32_t length,UBool isDataOwned,char type)959 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
960     int32_t idx;
961 
962     idx=findItem(name);
963     if(idx<0) {
964         // new item, make space at the insertion point
965         if(itemCount>=MAX_FILE_COUNT) {
966             fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
967             exit(U_BUFFER_OVERFLOW_ERROR);
968         }
969         // move the following items down
970         idx=~idx;
971         if(idx<itemCount) {
972             memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
973         }
974         ++itemCount;
975 
976         // reset this Item entry
977         memset(items+idx, 0, sizeof(Item));
978 
979         // copy the item's name
980         items[idx].name=allocString(TRUE, strlen(name));
981         strcpy(items[idx].name, name);
982         pathToTree(items[idx].name);
983     } else {
984         // same-name item found, replace it
985         if(items[idx].isDataOwned) {
986             free(items[idx].data);
987         }
988 
989         // keep the item's name since it is the same
990     }
991 
992     // set the item's data
993     items[idx].data=data;
994     items[idx].length=length;
995     items[idx].isDataOwned=isDataOwned;
996     items[idx].type=type;
997 }
998 
999 void
addFile(const char * filesPath,const char * name)1000 Package::addFile(const char *filesPath, const char *name) {
1001     uint8_t *data;
1002     int32_t length;
1003     char type;
1004 
1005     data=readFile(filesPath, name, length, type);
1006     // readFile() exits the tool if it fails
1007     addItem(name, data, length, TRUE, type);
1008 }
1009 
1010 void
addItems(const Package & listPkg)1011 Package::addItems(const Package &listPkg) {
1012     const Item *pItem;
1013     int32_t i;
1014 
1015     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1016         addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1017     }
1018 }
1019 
1020 void
removeItem(int32_t idx)1021 Package::removeItem(int32_t idx) {
1022     if(idx>=0) {
1023         // remove the item
1024         if(items[idx].isDataOwned) {
1025             free(items[idx].data);
1026         }
1027 
1028         // move the following items up
1029         if((idx+1)<itemCount) {
1030             memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1031         }
1032         --itemCount;
1033 
1034         if(idx<=findNextIndex) {
1035             --findNextIndex;
1036         }
1037     }
1038 }
1039 
1040 void
removeItems(const char * pattern)1041 Package::removeItems(const char *pattern) {
1042     int32_t idx;
1043 
1044     findItems(pattern);
1045     while((idx=findNextItem())>=0) {
1046         removeItem(idx);
1047     }
1048 }
1049 
1050 void
removeItems(const Package & listPkg)1051 Package::removeItems(const Package &listPkg) {
1052     const Item *pItem;
1053     int32_t i;
1054 
1055     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1056         removeItems(pItem->name);
1057     }
1058 }
1059 
1060 void
extractItem(const char * filesPath,const char * outName,int32_t idx,char outType)1061 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1062     char filename[1024];
1063     UDataSwapper *ds;
1064     FILE *file;
1065     Item *pItem;
1066     int32_t fileLength;
1067     uint8_t itemCharset, outCharset;
1068     UBool itemIsBigEndian, outIsBigEndian;
1069 
1070     if(idx<0 || itemCount<=idx) {
1071         return;
1072     }
1073     pItem=items+idx;
1074 
1075     // swap the data to the outType
1076     // outType==0: don't swap
1077     if(outType!=0 && pItem->type!=outType) {
1078         // open the swapper
1079         UErrorCode errorCode=U_ZERO_ERROR;
1080         makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1081         makeTypeProps(outType, outCharset, outIsBigEndian);
1082         ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1083         if(U_FAILURE(errorCode)) {
1084             fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1085                     (long)idx, u_errorName(errorCode));
1086             exit(errorCode);
1087         }
1088 
1089         ds->printError=printPackageError;
1090         ds->printErrorContext=stderr;
1091 
1092         // swap the item from its platform properties to the desired ones
1093         udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1094         if(U_FAILURE(errorCode)) {
1095             fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1096             exit(errorCode);
1097         }
1098         udata_closeSwapper(ds);
1099     }
1100 
1101     // create the file and write its contents
1102     makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1103     file=fopen(filename, "wb");
1104     if(file==NULL) {
1105         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1106         exit(U_FILE_ACCESS_ERROR);
1107     }
1108     fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1109 
1110     if(ferror(file) || fileLength!=pItem->length) {
1111         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1112         exit(U_FILE_ACCESS_ERROR);
1113     }
1114     fclose(file);
1115 }
1116 
1117 void
extractItem(const char * filesPath,int32_t idx,char outType)1118 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1119     extractItem(filesPath, items[idx].name, idx, outType);
1120 }
1121 
1122 void
extractItems(const char * filesPath,const char * pattern,char outType)1123 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1124     int32_t idx;
1125 
1126     findItems(pattern);
1127     while((idx=findNextItem())>=0) {
1128         extractItem(filesPath, idx, outType);
1129     }
1130 }
1131 
1132 void
extractItems(const char * filesPath,const Package & listPkg,char outType)1133 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1134     const Item *pItem;
1135     int32_t i;
1136 
1137     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1138         extractItems(filesPath, pItem->name, outType);
1139     }
1140 }
1141 
1142 int32_t
getItemCount() const1143 Package::getItemCount() const {
1144     return itemCount;
1145 }
1146 
1147 const Item *
getItem(int32_t idx) const1148 Package::getItem(int32_t idx) const {
1149     if (0 <= idx && idx < itemCount) {
1150         return &items[idx];
1151     }
1152     return NULL;
1153 }
1154 
1155 void
checkDependency(void * context,const char * itemName,const char * targetName)1156 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1157     // check dependency: make sure the target item is in the package
1158     Package *me=(Package *)context;
1159     if(me->findItem(targetName)<0) {
1160         me->isMissingItems=TRUE;
1161         fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1162     }
1163 }
1164 
1165 UBool
checkDependencies()1166 Package::checkDependencies() {
1167     isMissingItems=FALSE;
1168     enumDependencies(this, checkDependency);
1169     return (UBool)!isMissingItems;
1170 }
1171 
1172 void
enumDependencies(void * context,CheckDependency check)1173 Package::enumDependencies(void *context, CheckDependency check) {
1174     int32_t i;
1175 
1176     for(i=0; i<itemCount; ++i) {
1177         enumDependencies(items+i, context, check);
1178     }
1179 }
1180 
1181 char *
allocString(UBool in,int32_t length)1182 Package::allocString(UBool in, int32_t length) {
1183     char *p;
1184     int32_t top;
1185 
1186     if(in) {
1187         top=inStringTop;
1188         p=inStrings+top;
1189     } else {
1190         top=outStringTop;
1191         p=outStrings+top;
1192     }
1193     top+=length+1;
1194 
1195     if(top>STRING_STORE_SIZE) {
1196         fprintf(stderr, "icupkg: string storage overflow\n");
1197         exit(U_BUFFER_OVERFLOW_ERROR);
1198     }
1199     if(in) {
1200         inStringTop=top;
1201     } else {
1202         outStringTop=top;
1203     }
1204     return p;
1205 }
1206 
1207 void
sortItems()1208 Package::sortItems() {
1209     UErrorCode errorCode=U_ZERO_ERROR;
1210     uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1211     if(U_FAILURE(errorCode)) {
1212         fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1213         exit(errorCode);
1214     }
1215 }
1216 
1217 U_NAMESPACE_END
1218