• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *   Copyright (C) 2008, International Business Machines
3  *   Corporation and others.  All Rights Reserved.
4  *******************************************************************************
5  */
6 #include "unicode/utypes.h"
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include "unicode/utypes.h"
11 #include "unicode/putil.h"
12 #include "cmemory.h"
13 #include "cstring.h"
14 #include "filestrm.h"
15 #include "toolutil.h"
16 #include "unicode/uclean.h"
17 #include "unewdata.h"
18 #include "putilimp.h"
19 #include "pkg_gencmn.h"
20 
21 #define STRING_STORE_SIZE 100000
22 #define MAX_FILE_COUNT 2000
23 
24 #define COMMON_DATA_NAME U_ICUDATA_NAME
25 #define DATA_TYPE "dat"
26 
27 /* ICU package data file format (.dat files) ------------------------------- ***
28 
29 Description of the data format after the usual ICU data file header
30 (UDataInfo etc.).
31 
32 Format version 1
33 
34 A .dat package file contains a simple Table of Contents of item names,
35 followed by the items themselves:
36 
37 1. ToC table
38 
39 uint32_t count; - number of items
40 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
41     uint32_t nameOffset; - offset of the item name
42     uint32_t dataOffset; - offset of the item data
43 both are byte offsets from the beginning of the data
44 
45 2. item name strings
46 
47 All item names are stored as char * strings in one block between the ToC table
48 and the data items.
49 
50 3. data items
51 
52 The data items are stored following the item names block.
53 Each data item is 16-aligned.
54 The data items are stored in the sorted order of their names.
55 
56 Therefore, the top of the name strings block is the offset of the first item,
57 the length of the last item is the difference between its offset and
58 the .dat file length, and the length of all previous items is the difference
59 between its offset and the next one.
60 
61 ----------------------------------------------------------------------------- */
62 
63 /* UDataInfo cf. udata.h */
64 static const UDataInfo dataInfo={
65     sizeof(UDataInfo),
66     0,
67 
68     U_IS_BIG_ENDIAN,
69     U_CHARSET_FAMILY,
70     sizeof(UChar),
71     0,
72 
73     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
74     {1, 0, 0, 0},                 /* formatVersion */
75     {3, 0, 0, 0}                  /* dataVersion */
76 };
77 
78 static uint32_t maxSize;
79 
80 static char stringStore[STRING_STORE_SIZE];
81 static uint32_t stringTop=0, basenameTotal=0;
82 
83 typedef struct {
84     char *pathname, *basename;
85     uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
86 } File;
87 
88 static File files[MAX_FILE_COUNT];
89 static uint32_t fileCount=0;
90 
91 static char *symPrefix = NULL;
92 
93 /* prototypes --------------------------------------------------------------- */
94 
95 static void
96 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
97 
98 static char *
99 allocString(uint32_t length);
100 
101 static int
102 compareFiles(const void *file1, const void *file2);
103 
104 static char *
105 pathToFullPath(const char *path, const char *source);
106 
107 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
108 static void
109 fixDirToTreePath(char *s);
110 /* -------------------------------------------------------------------------- */
111 
112 U_CAPI void U_EXPORT2
createCommonDataFile(const char * destDir,const char * name,const char * entrypointName,const char * type,const char * source,const char * copyRight,const char * dataFile,uint32_t max_size,UBool sourceTOC,UBool verbose,char * gencmnFileName)113 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
114                      const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
115     static char buffer[4096];
116     char line[512];
117     char *s;
118     UErrorCode errorCode=U_ZERO_ERROR;
119     uint32_t i, fileOffset, basenameOffset, length, nread;
120     FileStream *in, *file;
121 
122     maxSize = max_size;
123 
124     if (destDir == NULL) {
125         destDir = u_getDataDirectory();
126     }
127     if (name == NULL) {
128         name = COMMON_DATA_NAME;
129     }
130     if (type == NULL) {
131         type = DATA_TYPE;
132     }
133     if (source == NULL) {
134         source = ".";
135     }
136 
137     if (dataFile == NULL) {
138         in = T_FileStream_stdin();
139     } else {
140         in = T_FileStream_open(dataFile, "r");
141         if(in == NULL) {
142             fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
143             exit(U_FILE_ACCESS_ERROR);
144         }
145     }
146 
147     if (verbose) {
148         if(sourceTOC) {
149             printf("generating %s_%s.c (table of contents source file)\n", name, type);
150         } else {
151             printf("generating %s.%s (common data file with table of contents)\n", name, type);
152         }
153     }
154 
155     /* read the list of files and get their lengths */
156     while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
157         /* remove trailing newline characters */
158         s=line;
159         while(*s!=0) {
160             if(*s=='\r' || *s=='\n') {
161                 *s=0;
162                 break;
163             }
164             ++s;
165         }
166 
167         /* check for comment */
168 
169         if (*line == '#') {
170             continue;
171         }
172 
173         /* add the file */
174 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
175         {
176           char *t;
177           while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
178             *t = U_FILE_SEP_CHAR;
179           }
180         }
181 #endif
182         addFile(getLongPathname(line), name, source, sourceTOC, verbose);
183     }
184 
185     if(in!=T_FileStream_stdin()) {
186         T_FileStream_close(in);
187     }
188 
189     if(fileCount==0) {
190         fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
191         return;
192     }
193 
194     /* sort the files by basename */
195     qsort(files, fileCount, sizeof(File), compareFiles);
196 
197     if(!sourceTOC) {
198         UNewDataMemory *out;
199 
200         /* determine the offsets of all basenames and files in this common one */
201         basenameOffset=4+8*fileCount;
202         fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
203         for(i=0; i<fileCount; ++i) {
204             files[i].fileOffset=fileOffset;
205             fileOffset+=(files[i].fileSize+15)&~0xf;
206             files[i].basenameOffset=basenameOffset;
207             basenameOffset+=files[i].basenameLength;
208         }
209 
210         /* create the output file */
211         out=udata_create(destDir, type, name,
212                          &dataInfo,
213                          copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
214                          &errorCode);
215         if(U_FAILURE(errorCode)) {
216             fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
217                 destDir, name, type,
218                 u_errorName(errorCode));
219             exit(errorCode);
220         }
221 
222         /* write the table of contents */
223         udata_write32(out, fileCount);
224         for(i=0; i<fileCount; ++i) {
225             udata_write32(out, files[i].basenameOffset);
226             udata_write32(out, files[i].fileOffset);
227         }
228 
229         /* write the basenames */
230         for(i=0; i<fileCount; ++i) {
231             udata_writeString(out, files[i].basename, files[i].basenameLength);
232         }
233         length=4+8*fileCount+basenameTotal;
234 
235         /* copy the files */
236         for(i=0; i<fileCount; ++i) {
237             /* pad to 16-align the next file */
238             length&=0xf;
239             if(length!=0) {
240                 udata_writePadding(out, 16-length);
241             }
242 
243             if (verbose) {
244                 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
245             }
246 
247             /* copy the next file */
248             file=T_FileStream_open(files[i].pathname, "rb");
249             if(file==NULL) {
250                 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
251                 exit(U_FILE_ACCESS_ERROR);
252             }
253             for(nread = 0;;) {
254                 length=T_FileStream_read(file, buffer, sizeof(buffer));
255                 if(length <= 0) {
256                     break;
257                 }
258                 nread += length;
259                 udata_writeBlock(out, buffer, length);
260             }
261             T_FileStream_close(file);
262             length=files[i].fileSize;
263 
264             if (nread != files[i].fileSize) {
265               fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
266                 exit(U_FILE_ACCESS_ERROR);
267             }
268         }
269 
270         /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
271         length&=0xf;
272         if(length!=0) {
273             udata_writePadding(out, 16-length);
274         }
275 
276         /* finish */
277         udata_finish(out, &errorCode);
278         if(U_FAILURE(errorCode)) {
279             fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
280             exit(errorCode);
281         }
282     } else {
283         /* write a .c source file with the table of contents */
284         char *filename;
285         FileStream *out;
286 
287         /* create the output filename */
288         filename=s=buffer;
289         uprv_strcpy(filename, destDir);
290         s=filename+uprv_strlen(filename);
291         if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
292             *s++=U_FILE_SEP_CHAR;
293         }
294         uprv_strcpy(s, name);
295         if(*(type)!=0) {
296             s+=uprv_strlen(s);
297             *s++='_';
298             uprv_strcpy(s, type);
299         }
300         s+=uprv_strlen(s);
301         uprv_strcpy(s, ".c");
302 
303         /* open the output file */
304         out=T_FileStream_open(filename, "w");
305         if (gencmnFileName != NULL) {
306             uprv_strcpy(gencmnFileName, filename);
307         }
308         if(out==NULL) {
309             fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
310             exit(U_FILE_ACCESS_ERROR);
311         }
312 
313         /* write the source file */
314         sprintf(buffer,
315             "/*\n"
316             " * ICU common data table of contents for %s.%s ,\n"
317             " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
318             " */\n\n"
319             "#include \"unicode/utypes.h\"\n"
320             "#include \"unicode/udata.h\"\n"
321             "\n"
322             "/* external symbol declarations for data */\n",
323             name, type);
324         T_FileStream_writeLine(out, buffer);
325 
326         sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
327         T_FileStream_writeLine(out, buffer);
328         for(i=1; i<fileCount; ++i) {
329             sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
330             T_FileStream_writeLine(out, buffer);
331         }
332         T_FileStream_writeLine(out, ";\n\n");
333 
334         sprintf(
335             buffer,
336             "U_EXPORT struct {\n"
337             "    uint16_t headerSize;\n"
338             "    uint8_t magic1, magic2;\n"
339             "    UDataInfo info;\n"
340             "    char padding[%lu];\n"
341             "    uint32_t count, reserved;\n"
342             "    struct {\n"
343             "        const char *name;\n"
344             "        const void *data;\n"
345             "    } toc[%lu];\n"
346             "} U_EXPORT2 %s_dat = {\n"
347             "    32, 0xda, 0x27, {\n"
348             "        %lu, 0,\n"
349             "        %u, %u, %u, 0,\n"
350             "        {0x54, 0x6f, 0x43, 0x50},\n"
351             "        {1, 0, 0, 0},\n"
352             "        {0, 0, 0, 0}\n"
353             "    },\n"
354             "    \"\", %lu, 0, {\n",
355             (unsigned long)32-4-sizeof(UDataInfo),
356             (unsigned long)fileCount,
357             entrypointName,
358             (unsigned long)sizeof(UDataInfo),
359             U_IS_BIG_ENDIAN,
360             U_CHARSET_FAMILY,
361             U_SIZEOF_UCHAR,
362             (unsigned long)fileCount
363         );
364         T_FileStream_writeLine(out, buffer);
365 
366         sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
367         T_FileStream_writeLine(out, buffer);
368         for(i=1; i<fileCount; ++i) {
369             sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
370             T_FileStream_writeLine(out, buffer);
371         }
372 
373         T_FileStream_writeLine(out, "\n    }\n};\n");
374         T_FileStream_close(out);
375 
376         uprv_free(symPrefix);
377     }
378 }
379 
380 static void
addFile(const char * filename,const char * name,const char * source,UBool sourceTOC,UBool verbose)381 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
382     char *s;
383     uint32_t length;
384     char *fullPath = NULL;
385 
386     if(fileCount==MAX_FILE_COUNT) {
387         fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
388         exit(U_BUFFER_OVERFLOW_ERROR);
389     }
390 
391     if(!sourceTOC) {
392         FileStream *file;
393 
394         if(uprv_pathIsAbsolute(filename)) {
395             fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
396             exit(U_ILLEGAL_ARGUMENT_ERROR);
397         }
398         fullPath = pathToFullPath(filename, source);
399 
400         /* store the pathname */
401         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
402         s=allocString(length);
403         uprv_strcpy(s, name);
404         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
405         uprv_strcat(s, filename);
406 
407         /* get the basename */
408         fixDirToTreePath(s);
409         files[fileCount].basename=s;
410         files[fileCount].basenameLength=length;
411 
412         files[fileCount].pathname=fullPath;
413 
414         basenameTotal+=length;
415 
416         /* try to open the file */
417         file=T_FileStream_open(fullPath, "rb");
418         if(file==NULL) {
419             fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
420             exit(U_FILE_ACCESS_ERROR);
421         }
422 
423         /* get the file length */
424         length=T_FileStream_size(file);
425         if(T_FileStream_error(file) || length<=20) {
426             fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
427             exit(U_FILE_ACCESS_ERROR);
428         }
429 
430         T_FileStream_close(file);
431 
432         /* do not add files that are longer than maxSize */
433         if(maxSize && length>maxSize) {
434             if (verbose) {
435                 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
436             }
437             return;
438         }
439         files[fileCount].fileSize=length;
440     } else {
441         char *t;
442 
443         /* get and store the basename */
444         /* need to include the package name */
445         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
446         s=allocString(length);
447         uprv_strcpy(s, name);
448         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
449         uprv_strcat(s, filename);
450         fixDirToTreePath(s);
451         files[fileCount].basename=s;
452 
453 
454         /* turn the basename into an entry point name and store in the pathname field */
455         t=files[fileCount].pathname=allocString(length);
456         while(--length>0) {
457             if(*s=='.' || *s=='-' || *s=='/') {
458                 *t='_';
459             } else {
460                 *t=*s;
461             }
462             ++s;
463             ++t;
464         }
465         *t=0;
466     }
467     ++fileCount;
468 }
469 
470 static char *
allocString(uint32_t length)471 allocString(uint32_t length) {
472     uint32_t top=stringTop+length;
473     char *p;
474 
475     if(top>STRING_STORE_SIZE) {
476         fprintf(stderr, "gencmn: out of memory\n");
477         exit(U_MEMORY_ALLOCATION_ERROR);
478     }
479     p=stringStore+stringTop;
480     stringTop=top;
481     return p;
482 }
483 
484 static char *
pathToFullPath(const char * path,const char * source)485 pathToFullPath(const char *path, const char *source) {
486     int32_t length;
487     int32_t newLength;
488     char *fullPath;
489     int32_t n;
490 
491     length = (uint32_t)(uprv_strlen(path) + 1);
492     newLength = (length + 1 + (int32_t)uprv_strlen(source));
493     fullPath = uprv_malloc(newLength);
494     if(source != NULL) {
495         uprv_strcpy(fullPath, source);
496         uprv_strcat(fullPath, U_FILE_SEP_STRING);
497     } else {
498         fullPath[0] = 0;
499     }
500     n = (int32_t)uprv_strlen(fullPath);
501     uprv_strcat(fullPath, path);
502 
503 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
504 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
505     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
506     for(;fullPath[n];n++) {
507         if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
508             fullPath[n] = U_FILE_SEP_CHAR;
509         }
510     }
511 #endif
512 #endif
513 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
514     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
515     for(;fullPath[n];n++) {
516         if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
517             fullPath[n] = U_FILE_SEP_CHAR;
518         }
519     }
520 #endif
521     return fullPath;
522 }
523 
524 static int
compareFiles(const void * file1,const void * file2)525 compareFiles(const void *file1, const void *file2) {
526     /* sort by basename */
527     return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
528 }
529 
530 static void
fixDirToTreePath(char * s)531 fixDirToTreePath(char *s)
532 {
533 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
534     char *t;
535 #endif
536 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
537     for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
538         *t = U_TREE_ENTRY_SEP_CHAR;
539     }
540 #endif
541 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
542     for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
543         *t = U_TREE_ENTRY_SEP_CHAR;
544     }
545 #endif
546 }
547