1 /******************************************************************************
2 * Copyright (C) 2008-2010, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include "unicode/utypes.h"
11 #include "unicode/putil.h"
12 #include "cmemory.h"
13 #include "cstring.h"
14 #include "filestrm.h"
15 #include "toolutil.h"
16 #include "unicode/uclean.h"
17 #include "unewdata.h"
18 #include "putilimp.h"
19 #include "pkg_gencmn.h"
20
21 #define STRING_STORE_SIZE 100000
22
23 #define COMMON_DATA_NAME U_ICUDATA_NAME
24 #define DATA_TYPE "dat"
25
26 /* ICU package data file format (.dat files) ------------------------------- ***
27
28 Description of the data format after the usual ICU data file header
29 (UDataInfo etc.).
30
31 Format version 1
32
33 A .dat package file contains a simple Table of Contents of item names,
34 followed by the items themselves:
35
36 1. ToC table
37
38 uint32_t count; - number of items
39 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40 uint32_t nameOffset; - offset of the item name
41 uint32_t dataOffset; - offset of the item data
42 both are byte offsets from the beginning of the data
43
44 2. item name strings
45
46 All item names are stored as char * strings in one block between the ToC table
47 and the data items.
48
49 3. data items
50
51 The data items are stored following the item names block.
52 Each data item is 16-aligned.
53 The data items are stored in the sorted order of their names.
54
55 Therefore, the top of the name strings block is the offset of the first item,
56 the length of the last item is the difference between its offset and
57 the .dat file length, and the length of all previous items is the difference
58 between its offset and the next one.
59
60 ----------------------------------------------------------------------------- */
61
62 /* UDataInfo cf. udata.h */
63 static const UDataInfo dataInfo={
64 sizeof(UDataInfo),
65 0,
66
67 U_IS_BIG_ENDIAN,
68 U_CHARSET_FAMILY,
69 sizeof(UChar),
70 0,
71
72 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
73 {1, 0, 0, 0}, /* formatVersion */
74 {3, 0, 0, 0} /* dataVersion */
75 };
76
77 static uint32_t maxSize;
78
79 static char stringStore[STRING_STORE_SIZE];
80 static uint32_t stringTop=0, basenameTotal=0;
81
82 typedef struct {
83 char *pathname, *basename;
84 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
85 } File;
86
87 #define CHUNK_FILE_COUNT 256
88 static File *files = NULL;
89 static uint32_t fileCount=0;
90 static uint32_t fileMax = 0;
91
92
93 static char *symPrefix = NULL;
94
95 /* prototypes --------------------------------------------------------------- */
96
97 static void
98 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
99
100 static char *
101 allocString(uint32_t length);
102
103 static int
104 compareFiles(const void *file1, const void *file2);
105
106 static char *
107 pathToFullPath(const char *path, const char *source);
108
109 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
110 static void
111 fixDirToTreePath(char *s);
112 /* -------------------------------------------------------------------------- */
113
114 U_CAPI void U_EXPORT2
createCommonDataFile(const char * destDir,const char * name,const char * entrypointName,const char * type,const char * source,const char * copyRight,const char * dataFile,uint32_t max_size,UBool sourceTOC,UBool verbose,char * gencmnFileName)115 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
116 const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
117 static char buffer[4096];
118 char line[512];
119 char *s;
120 UErrorCode errorCode=U_ZERO_ERROR;
121 uint32_t i, fileOffset, basenameOffset, length, nread;
122 FileStream *in, *file;
123
124 maxSize = max_size;
125
126 if (destDir == NULL) {
127 destDir = u_getDataDirectory();
128 }
129 if (name == NULL) {
130 name = COMMON_DATA_NAME;
131 }
132 if (type == NULL) {
133 type = DATA_TYPE;
134 }
135 if (source == NULL) {
136 source = ".";
137 }
138
139 if (dataFile == NULL) {
140 in = T_FileStream_stdin();
141 } else {
142 in = T_FileStream_open(dataFile, "r");
143 if(in == NULL) {
144 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
145 exit(U_FILE_ACCESS_ERROR);
146 }
147 }
148
149 if (verbose) {
150 if(sourceTOC) {
151 printf("generating %s_%s.c (table of contents source file)\n", name, type);
152 } else {
153 printf("generating %s.%s (common data file with table of contents)\n", name, type);
154 }
155 }
156
157 /* read the list of files and get their lengths */
158 while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
159 /* remove trailing newline characters */
160 s=line;
161 while(*s!=0) {
162 if(*s=='\r' || *s=='\n') {
163 *s=0;
164 break;
165 }
166 ++s;
167 }
168
169 /* check for comment */
170
171 if (*line == '#') {
172 continue;
173 }
174
175 /* add the file */
176 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
177 {
178 char *t;
179 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
180 *t = U_FILE_SEP_CHAR;
181 }
182 }
183 #endif
184 addFile(getLongPathname(line), name, source, sourceTOC, verbose);
185 }
186
187 if(in!=T_FileStream_stdin()) {
188 T_FileStream_close(in);
189 }
190
191 if(fileCount==0) {
192 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
193 return;
194 }
195
196 /* sort the files by basename */
197 qsort(files, fileCount, sizeof(File), compareFiles);
198
199 if(!sourceTOC) {
200 UNewDataMemory *out;
201
202 /* determine the offsets of all basenames and files in this common one */
203 basenameOffset=4+8*fileCount;
204 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
205 for(i=0; i<fileCount; ++i) {
206 files[i].fileOffset=fileOffset;
207 fileOffset+=(files[i].fileSize+15)&~0xf;
208 files[i].basenameOffset=basenameOffset;
209 basenameOffset+=files[i].basenameLength;
210 }
211
212 /* create the output file */
213 out=udata_create(destDir, type, name,
214 &dataInfo,
215 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
216 &errorCode);
217 if(U_FAILURE(errorCode)) {
218 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
219 destDir, name, type,
220 u_errorName(errorCode));
221 exit(errorCode);
222 }
223
224 /* write the table of contents */
225 udata_write32(out, fileCount);
226 for(i=0; i<fileCount; ++i) {
227 udata_write32(out, files[i].basenameOffset);
228 udata_write32(out, files[i].fileOffset);
229 }
230
231 /* write the basenames */
232 for(i=0; i<fileCount; ++i) {
233 udata_writeString(out, files[i].basename, files[i].basenameLength);
234 }
235 length=4+8*fileCount+basenameTotal;
236
237 /* copy the files */
238 for(i=0; i<fileCount; ++i) {
239 /* pad to 16-align the next file */
240 length&=0xf;
241 if(length!=0) {
242 udata_writePadding(out, 16-length);
243 }
244
245 if (verbose) {
246 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
247 }
248
249 /* copy the next file */
250 file=T_FileStream_open(files[i].pathname, "rb");
251 if(file==NULL) {
252 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
253 exit(U_FILE_ACCESS_ERROR);
254 }
255 for(nread = 0;;) {
256 length=T_FileStream_read(file, buffer, sizeof(buffer));
257 if(length <= 0) {
258 break;
259 }
260 nread += length;
261 udata_writeBlock(out, buffer, length);
262 }
263 T_FileStream_close(file);
264 length=files[i].fileSize;
265
266 if (nread != files[i].fileSize) {
267 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
268 exit(U_FILE_ACCESS_ERROR);
269 }
270 }
271
272 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
273 length&=0xf;
274 if(length!=0) {
275 udata_writePadding(out, 16-length);
276 }
277
278 /* finish */
279 udata_finish(out, &errorCode);
280 if(U_FAILURE(errorCode)) {
281 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
282 exit(errorCode);
283 }
284 } else {
285 /* write a .c source file with the table of contents */
286 char *filename;
287 FileStream *out;
288
289 /* create the output filename */
290 filename=s=buffer;
291 uprv_strcpy(filename, destDir);
292 s=filename+uprv_strlen(filename);
293 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
294 *s++=U_FILE_SEP_CHAR;
295 }
296 uprv_strcpy(s, name);
297 if(*(type)!=0) {
298 s+=uprv_strlen(s);
299 *s++='_';
300 uprv_strcpy(s, type);
301 }
302 s+=uprv_strlen(s);
303 uprv_strcpy(s, ".c");
304
305 /* open the output file */
306 out=T_FileStream_open(filename, "w");
307 if (gencmnFileName != NULL) {
308 uprv_strcpy(gencmnFileName, filename);
309 }
310 if(out==NULL) {
311 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
312 exit(U_FILE_ACCESS_ERROR);
313 }
314
315 /* write the source file */
316 sprintf(buffer,
317 "/*\n"
318 " * ICU common data table of contents for %s.%s ,\n"
319 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
320 " */\n\n"
321 "#include \"unicode/utypes.h\"\n"
322 "#include \"unicode/udata.h\"\n"
323 "\n"
324 "/* external symbol declarations for data */\n",
325 name, type);
326 T_FileStream_writeLine(out, buffer);
327
328 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
329 T_FileStream_writeLine(out, buffer);
330 for(i=1; i<fileCount; ++i) {
331 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
332 T_FileStream_writeLine(out, buffer);
333 }
334 T_FileStream_writeLine(out, ";\n\n");
335
336 sprintf(
337 buffer,
338 "U_EXPORT struct {\n"
339 " uint16_t headerSize;\n"
340 " uint8_t magic1, magic2;\n"
341 " UDataInfo info;\n"
342 " char padding[%lu];\n"
343 " uint32_t count, reserved;\n"
344 " struct {\n"
345 " const char *name;\n"
346 " const void *data;\n"
347 " } toc[%lu];\n"
348 "} U_EXPORT2 %s_dat = {\n"
349 " 32, 0xda, 0x27, {\n"
350 " %lu, 0,\n"
351 " %u, %u, %u, 0,\n"
352 " {0x54, 0x6f, 0x43, 0x50},\n"
353 " {1, 0, 0, 0},\n"
354 " {0, 0, 0, 0}\n"
355 " },\n"
356 " \"\", %lu, 0, {\n",
357 (unsigned long)32-4-sizeof(UDataInfo),
358 (unsigned long)fileCount,
359 entrypointName,
360 (unsigned long)sizeof(UDataInfo),
361 U_IS_BIG_ENDIAN,
362 U_CHARSET_FAMILY,
363 U_SIZEOF_UCHAR,
364 (unsigned long)fileCount
365 );
366 T_FileStream_writeLine(out, buffer);
367
368 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
369 T_FileStream_writeLine(out, buffer);
370 for(i=1; i<fileCount; ++i) {
371 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
372 T_FileStream_writeLine(out, buffer);
373 }
374
375 T_FileStream_writeLine(out, "\n }\n};\n");
376 T_FileStream_close(out);
377
378 uprv_free(symPrefix);
379 }
380 }
381
382 static void
addFile(const char * filename,const char * name,const char * source,UBool sourceTOC,UBool verbose)383 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
384 char *s;
385 uint32_t length;
386 char *fullPath = NULL;
387
388 if(fileCount==fileMax) {
389 fileMax += CHUNK_FILE_COUNT;
390 files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
391 if(files==NULL) {
392 fprintf(stderr, "pkgdata/gencmn: Could not allocate %ld bytes for %d files\n", (fileMax*sizeof(files[0])), fileCount);
393 exit(U_MEMORY_ALLOCATION_ERROR);
394 }
395 }
396
397 if(!sourceTOC) {
398 FileStream *file;
399
400 if(uprv_pathIsAbsolute(filename)) {
401 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
402 exit(U_ILLEGAL_ARGUMENT_ERROR);
403 }
404 fullPath = pathToFullPath(filename, source);
405
406 /* store the pathname */
407 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
408 s=allocString(length);
409 uprv_strcpy(s, name);
410 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
411 uprv_strcat(s, filename);
412
413 /* get the basename */
414 fixDirToTreePath(s);
415 files[fileCount].basename=s;
416 files[fileCount].basenameLength=length;
417
418 files[fileCount].pathname=fullPath;
419
420 basenameTotal+=length;
421
422 /* try to open the file */
423 file=T_FileStream_open(fullPath, "rb");
424 if(file==NULL) {
425 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
426 exit(U_FILE_ACCESS_ERROR);
427 }
428
429 /* get the file length */
430 length=T_FileStream_size(file);
431 if(T_FileStream_error(file) || length<=20) {
432 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
433 exit(U_FILE_ACCESS_ERROR);
434 }
435
436 T_FileStream_close(file);
437
438 /* do not add files that are longer than maxSize */
439 if(maxSize && length>maxSize) {
440 if (verbose) {
441 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
442 }
443 return;
444 }
445 files[fileCount].fileSize=length;
446 } else {
447 char *t;
448
449 /* get and store the basename */
450 /* need to include the package name */
451 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
452 s=allocString(length);
453 uprv_strcpy(s, name);
454 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
455 uprv_strcat(s, filename);
456 fixDirToTreePath(s);
457 files[fileCount].basename=s;
458
459
460 /* turn the basename into an entry point name and store in the pathname field */
461 t=files[fileCount].pathname=allocString(length);
462 while(--length>0) {
463 if(*s=='.' || *s=='-' || *s=='/') {
464 *t='_';
465 } else {
466 *t=*s;
467 }
468 ++s;
469 ++t;
470 }
471 *t=0;
472 }
473 ++fileCount;
474 }
475
476 static char *
allocString(uint32_t length)477 allocString(uint32_t length) {
478 uint32_t top=stringTop+length;
479 char *p;
480
481 if(top>STRING_STORE_SIZE) {
482 fprintf(stderr, "gencmn: out of memory\n");
483 exit(U_MEMORY_ALLOCATION_ERROR);
484 }
485 p=stringStore+stringTop;
486 stringTop=top;
487 return p;
488 }
489
490 static char *
pathToFullPath(const char * path,const char * source)491 pathToFullPath(const char *path, const char *source) {
492 int32_t length;
493 int32_t newLength;
494 char *fullPath;
495 int32_t n;
496
497 length = (uint32_t)(uprv_strlen(path) + 1);
498 newLength = (length + 1 + (int32_t)uprv_strlen(source));
499 fullPath = uprv_malloc(newLength);
500 if(source != NULL) {
501 uprv_strcpy(fullPath, source);
502 uprv_strcat(fullPath, U_FILE_SEP_STRING);
503 } else {
504 fullPath[0] = 0;
505 }
506 n = (int32_t)uprv_strlen(fullPath);
507 uprv_strcat(fullPath, path);
508
509 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
510 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
511 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
512 for(;fullPath[n];n++) {
513 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
514 fullPath[n] = U_FILE_SEP_CHAR;
515 }
516 }
517 #endif
518 #endif
519 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
520 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
521 for(;fullPath[n];n++) {
522 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
523 fullPath[n] = U_FILE_SEP_CHAR;
524 }
525 }
526 #endif
527 return fullPath;
528 }
529
530 static int
compareFiles(const void * file1,const void * file2)531 compareFiles(const void *file1, const void *file2) {
532 /* sort by basename */
533 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
534 }
535
536 static void
fixDirToTreePath(char * s)537 fixDirToTreePath(char *s)
538 {
539 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
540 char *t;
541 #endif
542 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
543 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
544 *t = U_TREE_ENTRY_SEP_CHAR;
545 }
546 #endif
547 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
548 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
549 *t = U_TREE_ENTRY_SEP_CHAR;
550 }
551 #endif
552 }
553