• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4  *   Copyright (C) 2008-2012, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *******************************************************************************
7  */
8 #include "unicode/utypes.h"
9 
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include "unicode/utypes.h"
13 #include "unicode/putil.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "toolutil.h"
18 #include "unicode/uclean.h"
19 #include "unewdata.h"
20 #include "putilimp.h"
21 #include "pkg_gencmn.h"
22 
23 #define STRING_STORE_SIZE 200000
24 
25 #define COMMON_DATA_NAME U_ICUDATA_NAME
26 #define DATA_TYPE "dat"
27 
28 /* ICU package data file format (.dat files) ------------------------------- ***
29 
30 Description of the data format after the usual ICU data file header
31 (UDataInfo etc.).
32 
33 Format version 1
34 
35 A .dat package file contains a simple Table of Contents of item names,
36 followed by the items themselves:
37 
38 1. ToC table
39 
40 uint32_t count; - number of items
41 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
42     uint32_t nameOffset; - offset of the item name
43     uint32_t dataOffset; - offset of the item data
44 both are byte offsets from the beginning of the data
45 
46 2. item name strings
47 
48 All item names are stored as char * strings in one block between the ToC table
49 and the data items.
50 
51 3. data items
52 
53 The data items are stored following the item names block.
54 Each data item is 16-aligned.
55 The data items are stored in the sorted order of their names.
56 
57 Therefore, the top of the name strings block is the offset of the first item,
58 the length of the last item is the difference between its offset and
59 the .dat file length, and the length of all previous items is the difference
60 between its offset and the next one.
61 
62 ----------------------------------------------------------------------------- */
63 
64 /* UDataInfo cf. udata.h */
65 static const UDataInfo dataInfo={
66     sizeof(UDataInfo),
67     0,
68 
69     U_IS_BIG_ENDIAN,
70     U_CHARSET_FAMILY,
71     sizeof(UChar),
72     0,
73 
74     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
75     {1, 0, 0, 0},                 /* formatVersion */
76     {3, 0, 0, 0}                  /* dataVersion */
77 };
78 
79 static uint32_t maxSize;
80 
81 static char stringStore[STRING_STORE_SIZE];
82 static uint32_t stringTop=0, basenameTotal=0;
83 
84 typedef struct {
85     char *pathname, *basename;
86     uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
87 } File;
88 
89 #define CHUNK_FILE_COUNT 256
90 static File *files = NULL;
91 static uint32_t fileCount=0;
92 static uint32_t fileMax = 0;
93 
94 
95 static char *symPrefix = NULL;
96 
97 #define LINE_BUFFER_SIZE 512
98 /* prototypes --------------------------------------------------------------- */
99 
100 static void
101 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
102 
103 static char *
104 allocString(uint32_t length);
105 
106 static int
107 compareFiles(const void *file1, const void *file2);
108 
109 static char *
110 pathToFullPath(const char *path, const char *source);
111 
112 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
113 static void
114 fixDirToTreePath(char *s);
115 /* -------------------------------------------------------------------------- */
116 
117 U_CAPI void U_EXPORT2
createCommonDataFile(const char * destDir,const char * name,const char * entrypointName,const char * type,const char * source,const char * copyRight,const char * dataFile,uint32_t max_size,UBool sourceTOC,UBool verbose,char * gencmnFileName)118 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
119                      const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
120     static char buffer[4096];
121     char *line;
122     char *linePtr;
123     char *s = NULL;
124     UErrorCode errorCode=U_ZERO_ERROR;
125     uint32_t i, fileOffset, basenameOffset, length, nread;
126     FileStream *in, *file;
127 
128     line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
129     if (line == NULL) {
130         fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
131         exit(U_MEMORY_ALLOCATION_ERROR);
132     }
133 
134     linePtr = line;
135 
136     maxSize = max_size;
137 
138     if (destDir == NULL) {
139         destDir = u_getDataDirectory();
140     }
141     if (name == NULL) {
142         name = COMMON_DATA_NAME;
143     }
144     if (type == NULL) {
145         type = DATA_TYPE;
146     }
147     if (source == NULL) {
148         source = ".";
149     }
150 
151     if (dataFile == NULL) {
152         in = T_FileStream_stdin();
153     } else {
154         in = T_FileStream_open(dataFile, "r");
155         if(in == NULL) {
156             fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
157             exit(U_FILE_ACCESS_ERROR);
158         }
159     }
160 
161     if (verbose) {
162         if(sourceTOC) {
163             printf("generating %s_%s.c (table of contents source file)\n", name, type);
164         } else {
165             printf("generating %s.%s (common data file with table of contents)\n", name, type);
166         }
167     }
168 
169     /* read the list of files and get their lengths */
170     while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
171                                                              LINE_BUFFER_SIZE))!=NULL) {
172         /* remove trailing newline characters and parse space separated items */
173         if (s != NULL && *s != 0) {
174             line=s;
175         } else {
176             s=line;
177         }
178         while(*s!=0) {
179             if(*s==' ') {
180                 *s=0;
181                 ++s;
182                 break;
183             } else if(*s=='\r' || *s=='\n') {
184                 *s=0;
185                 break;
186             }
187             ++s;
188         }
189 
190         /* check for comment */
191 
192         if (*line == '#') {
193             continue;
194         }
195 
196         /* add the file */
197 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
198         {
199           char *t;
200           while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
201             *t = U_FILE_SEP_CHAR;
202           }
203         }
204 #endif
205         addFile(getLongPathname(line), name, source, sourceTOC, verbose);
206     }
207 
208     uprv_free(linePtr);
209 
210     if(in!=T_FileStream_stdin()) {
211         T_FileStream_close(in);
212     }
213 
214     if(fileCount==0) {
215         fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
216         return;
217     }
218 
219     /* sort the files by basename */
220     qsort(files, fileCount, sizeof(File), compareFiles);
221 
222     if(!sourceTOC) {
223         UNewDataMemory *out;
224 
225         /* determine the offsets of all basenames and files in this common one */
226         basenameOffset=4+8*fileCount;
227         fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
228         for(i=0; i<fileCount; ++i) {
229             files[i].fileOffset=fileOffset;
230             fileOffset+=(files[i].fileSize+15)&~0xf;
231             files[i].basenameOffset=basenameOffset;
232             basenameOffset+=files[i].basenameLength;
233         }
234 
235         /* create the output file */
236         out=udata_create(destDir, type, name,
237                          &dataInfo,
238                          copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
239                          &errorCode);
240         if(U_FAILURE(errorCode)) {
241             fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
242                 destDir, name, type,
243                 u_errorName(errorCode));
244             exit(errorCode);
245         }
246 
247         /* write the table of contents */
248         udata_write32(out, fileCount);
249         for(i=0; i<fileCount; ++i) {
250             udata_write32(out, files[i].basenameOffset);
251             udata_write32(out, files[i].fileOffset);
252         }
253 
254         /* write the basenames */
255         for(i=0; i<fileCount; ++i) {
256             udata_writeString(out, files[i].basename, files[i].basenameLength);
257         }
258         length=4+8*fileCount+basenameTotal;
259 
260         /* copy the files */
261         for(i=0; i<fileCount; ++i) {
262             /* pad to 16-align the next file */
263             length&=0xf;
264             if(length!=0) {
265                 udata_writePadding(out, 16-length);
266             }
267 
268             if (verbose) {
269                 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
270             }
271 
272             /* copy the next file */
273             file=T_FileStream_open(files[i].pathname, "rb");
274             if(file==NULL) {
275                 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
276                 exit(U_FILE_ACCESS_ERROR);
277             }
278             for(nread = 0;;) {
279                 length=T_FileStream_read(file, buffer, sizeof(buffer));
280                 if(length <= 0) {
281                     break;
282                 }
283                 nread += length;
284                 udata_writeBlock(out, buffer, length);
285             }
286             T_FileStream_close(file);
287             length=files[i].fileSize;
288 
289             if (nread != files[i].fileSize) {
290               fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
291                 exit(U_FILE_ACCESS_ERROR);
292             }
293         }
294 
295         /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
296         length&=0xf;
297         if(length!=0) {
298             udata_writePadding(out, 16-length);
299         }
300 
301         /* finish */
302         udata_finish(out, &errorCode);
303         if(U_FAILURE(errorCode)) {
304             fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
305             exit(errorCode);
306         }
307     } else {
308         /* write a .c source file with the table of contents */
309         char *filename;
310         FileStream *out;
311 
312         /* create the output filename */
313         filename=s=buffer;
314         uprv_strcpy(filename, destDir);
315         s=filename+uprv_strlen(filename);
316         if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
317             *s++=U_FILE_SEP_CHAR;
318         }
319         uprv_strcpy(s, name);
320         if(*(type)!=0) {
321             s+=uprv_strlen(s);
322             *s++='_';
323             uprv_strcpy(s, type);
324         }
325         s+=uprv_strlen(s);
326         uprv_strcpy(s, ".c");
327 
328         /* open the output file */
329         out=T_FileStream_open(filename, "w");
330         if (gencmnFileName != NULL) {
331             uprv_strcpy(gencmnFileName, filename);
332         }
333         if(out==NULL) {
334             fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
335             exit(U_FILE_ACCESS_ERROR);
336         }
337 
338         /* write the source file */
339         sprintf(buffer,
340             "/*\n"
341             " * ICU common data table of contents for %s.%s\n"
342             " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
343             " */\n\n"
344             "#include \"unicode/utypes.h\"\n"
345             "#include \"unicode/udata.h\"\n"
346             "\n"
347             "/* external symbol declarations for data (%d files) */\n",
348                 name, type, fileCount);
349         T_FileStream_writeLine(out, buffer);
350 
351         sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
352         T_FileStream_writeLine(out, buffer);
353         for(i=1; i<fileCount; ++i) {
354             sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
355             T_FileStream_writeLine(out, buffer);
356         }
357         T_FileStream_writeLine(out, ";\n\n");
358 
359         sprintf(
360             buffer,
361             "U_EXPORT struct {\n"
362             "    uint16_t headerSize;\n"
363             "    uint8_t magic1, magic2;\n"
364             "    UDataInfo info;\n"
365             "    char padding[%lu];\n"
366             "    uint32_t count, reserved;\n"
367             "    struct {\n"
368             "        const char *name;\n"
369             "        const void *data;\n"
370             "    } toc[%lu];\n"
371             "} U_EXPORT2 %s_dat = {\n"
372             "    32, 0xda, 0x27, {\n"
373             "        %lu, 0,\n"
374             "        %u, %u, %u, 0,\n"
375             "        {0x54, 0x6f, 0x43, 0x50},\n"
376             "        {1, 0, 0, 0},\n"
377             "        {0, 0, 0, 0}\n"
378             "    },\n"
379             "    \"\", %lu, 0, {\n",
380             (unsigned long)32-4-sizeof(UDataInfo),
381             (unsigned long)fileCount,
382             entrypointName,
383             (unsigned long)sizeof(UDataInfo),
384             U_IS_BIG_ENDIAN,
385             U_CHARSET_FAMILY,
386             U_SIZEOF_UCHAR,
387             (unsigned long)fileCount
388         );
389         T_FileStream_writeLine(out, buffer);
390 
391         sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
392         T_FileStream_writeLine(out, buffer);
393         for(i=1; i<fileCount; ++i) {
394             sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
395             T_FileStream_writeLine(out, buffer);
396         }
397 
398         T_FileStream_writeLine(out, "\n    }\n};\n");
399         T_FileStream_close(out);
400 
401         uprv_free(symPrefix);
402     }
403 }
404 
405 static void
addFile(const char * filename,const char * name,const char * source,UBool sourceTOC,UBool verbose)406 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
407     char *s;
408     uint32_t length;
409     char *fullPath = NULL;
410 
411     if(fileCount==fileMax) {
412       fileMax += CHUNK_FILE_COUNT;
413       files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
414       if(files==NULL) {
415         fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
416         exit(U_MEMORY_ALLOCATION_ERROR);
417       }
418     }
419 
420     if(!sourceTOC) {
421         FileStream *file;
422 
423         if(uprv_pathIsAbsolute(filename)) {
424             fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
425             exit(U_ILLEGAL_ARGUMENT_ERROR);
426         }
427         fullPath = pathToFullPath(filename, source);
428         /* store the pathname */
429         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
430         s=allocString(length);
431         uprv_strcpy(s, name);
432         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
433         uprv_strcat(s, filename);
434 
435         /* get the basename */
436         fixDirToTreePath(s);
437         files[fileCount].basename=s;
438         files[fileCount].basenameLength=length;
439 
440         files[fileCount].pathname=fullPath;
441 
442         basenameTotal+=length;
443 
444         /* try to open the file */
445         file=T_FileStream_open(fullPath, "rb");
446         if(file==NULL) {
447             fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
448             exit(U_FILE_ACCESS_ERROR);
449         }
450 
451         /* get the file length */
452         length=T_FileStream_size(file);
453         if(T_FileStream_error(file) || length<=20) {
454             fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
455             exit(U_FILE_ACCESS_ERROR);
456         }
457 
458         T_FileStream_close(file);
459 
460         /* do not add files that are longer than maxSize */
461         if(maxSize && length>maxSize) {
462             if (verbose) {
463                 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
464             }
465             return;
466         }
467         files[fileCount].fileSize=length;
468     } else {
469         char *t;
470         /* get and store the basename */
471         /* need to include the package name */
472         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
473         s=allocString(length);
474         uprv_strcpy(s, name);
475         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
476         uprv_strcat(s, filename);
477         fixDirToTreePath(s);
478         files[fileCount].basename=s;
479         /* turn the basename into an entry point name and store in the pathname field */
480         t=files[fileCount].pathname=allocString(length);
481         while(--length>0) {
482             if(*s=='.' || *s=='-' || *s=='/') {
483                 *t='_';
484             } else {
485                 *t=*s;
486             }
487             ++s;
488             ++t;
489         }
490         *t=0;
491     }
492     ++fileCount;
493 }
494 
495 static char *
allocString(uint32_t length)496 allocString(uint32_t length) {
497     uint32_t top=stringTop+length;
498     char *p;
499 
500     if(top>STRING_STORE_SIZE) {
501         fprintf(stderr, "gencmn: out of memory\n");
502         exit(U_MEMORY_ALLOCATION_ERROR);
503     }
504     p=stringStore+stringTop;
505     stringTop=top;
506     return p;
507 }
508 
509 static char *
pathToFullPath(const char * path,const char * source)510 pathToFullPath(const char *path, const char *source) {
511     int32_t length;
512     int32_t newLength;
513     char *fullPath;
514     int32_t n;
515 
516     length = (uint32_t)(uprv_strlen(path) + 1);
517     newLength = (length + 1 + (int32_t)uprv_strlen(source));
518     fullPath = uprv_malloc(newLength);
519     if(source != NULL) {
520         uprv_strcpy(fullPath, source);
521         uprv_strcat(fullPath, U_FILE_SEP_STRING);
522     } else {
523         fullPath[0] = 0;
524     }
525     n = (int32_t)uprv_strlen(fullPath);
526     fullPath[n] = 0;       /* Suppress compiler warning for unused variable n    */
527                            /*  when conditional code below is not compiled.      */
528     uprv_strcat(fullPath, path);
529 
530 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
531 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
532     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
533     for(;fullPath[n];n++) {
534         if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
535             fullPath[n] = U_FILE_SEP_CHAR;
536         }
537     }
538 #endif
539 #endif
540 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
541     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
542     for(;fullPath[n];n++) {
543         if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
544             fullPath[n] = U_FILE_SEP_CHAR;
545         }
546     }
547 #endif
548     return fullPath;
549 }
550 
551 static int
compareFiles(const void * file1,const void * file2)552 compareFiles(const void *file1, const void *file2) {
553     /* sort by basename */
554     return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
555 }
556 
557 static void
fixDirToTreePath(char * s)558 fixDirToTreePath(char *s)
559 {
560 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
561     char *t;
562 #endif
563 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
564     for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
565         *t = U_TREE_ENTRY_SEP_CHAR;
566     }
567 #endif
568 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
569     for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
570         *t = U_TREE_ENTRY_SEP_CHAR;
571     }
572 #endif
573 }
574