1 /******************************************************************************
2 * Copyright (C) 2008-2012, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include "unicode/utypes.h"
11 #include "unicode/putil.h"
12 #include "cmemory.h"
13 #include "cstring.h"
14 #include "filestrm.h"
15 #include "toolutil.h"
16 #include "unicode/uclean.h"
17 #include "unewdata.h"
18 #include "putilimp.h"
19 #include "pkg_gencmn.h"
20
21 #define STRING_STORE_SIZE 200000
22
23 #define COMMON_DATA_NAME U_ICUDATA_NAME
24 #define DATA_TYPE "dat"
25
26 /* ICU package data file format (.dat files) ------------------------------- ***
27
28 Description of the data format after the usual ICU data file header
29 (UDataInfo etc.).
30
31 Format version 1
32
33 A .dat package file contains a simple Table of Contents of item names,
34 followed by the items themselves:
35
36 1. ToC table
37
38 uint32_t count; - number of items
39 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40 uint32_t nameOffset; - offset of the item name
41 uint32_t dataOffset; - offset of the item data
42 both are byte offsets from the beginning of the data
43
44 2. item name strings
45
46 All item names are stored as char * strings in one block between the ToC table
47 and the data items.
48
49 3. data items
50
51 The data items are stored following the item names block.
52 Each data item is 16-aligned.
53 The data items are stored in the sorted order of their names.
54
55 Therefore, the top of the name strings block is the offset of the first item,
56 the length of the last item is the difference between its offset and
57 the .dat file length, and the length of all previous items is the difference
58 between its offset and the next one.
59
60 ----------------------------------------------------------------------------- */
61
62 /* UDataInfo cf. udata.h */
63 static const UDataInfo dataInfo={
64 sizeof(UDataInfo),
65 0,
66
67 U_IS_BIG_ENDIAN,
68 U_CHARSET_FAMILY,
69 sizeof(UChar),
70 0,
71
72 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
73 {1, 0, 0, 0}, /* formatVersion */
74 {3, 0, 0, 0} /* dataVersion */
75 };
76
77 static uint32_t maxSize;
78
79 static char stringStore[STRING_STORE_SIZE];
80 static uint32_t stringTop=0, basenameTotal=0;
81
82 typedef struct {
83 char *pathname, *basename;
84 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
85 } File;
86
87 #define CHUNK_FILE_COUNT 256
88 static File *files = NULL;
89 static uint32_t fileCount=0;
90 static uint32_t fileMax = 0;
91
92
93 static char *symPrefix = NULL;
94
95 #define LINE_BUFFER_SIZE 512
96 /* prototypes --------------------------------------------------------------- */
97
98 static void
99 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
100
101 static char *
102 allocString(uint32_t length);
103
104 static int
105 compareFiles(const void *file1, const void *file2);
106
107 static char *
108 pathToFullPath(const char *path, const char *source);
109
110 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
111 static void
112 fixDirToTreePath(char *s);
113 /* -------------------------------------------------------------------------- */
114
115 U_CAPI void U_EXPORT2
createCommonDataFile(const char * destDir,const char * name,const char * entrypointName,const char * type,const char * source,const char * copyRight,const char * dataFile,uint32_t max_size,UBool sourceTOC,UBool verbose,char * gencmnFileName)116 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
117 const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
118 static char buffer[4096];
119 char *line;
120 char *linePtr;
121 char *s = NULL;
122 UErrorCode errorCode=U_ZERO_ERROR;
123 uint32_t i, fileOffset, basenameOffset, length, nread;
124 FileStream *in, *file;
125
126 line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
127 if (line == NULL) {
128 fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
129 exit(U_MEMORY_ALLOCATION_ERROR);
130 }
131
132 linePtr = line;
133
134 maxSize = max_size;
135
136 if (destDir == NULL) {
137 destDir = u_getDataDirectory();
138 }
139 if (name == NULL) {
140 name = COMMON_DATA_NAME;
141 }
142 if (type == NULL) {
143 type = DATA_TYPE;
144 }
145 if (source == NULL) {
146 source = ".";
147 }
148
149 if (dataFile == NULL) {
150 in = T_FileStream_stdin();
151 } else {
152 in = T_FileStream_open(dataFile, "r");
153 if(in == NULL) {
154 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
155 exit(U_FILE_ACCESS_ERROR);
156 }
157 }
158
159 if (verbose) {
160 if(sourceTOC) {
161 printf("generating %s_%s.c (table of contents source file)\n", name, type);
162 } else {
163 printf("generating %s.%s (common data file with table of contents)\n", name, type);
164 }
165 }
166
167 /* read the list of files and get their lengths */
168 while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
169 LINE_BUFFER_SIZE))!=NULL) {
170 /* remove trailing newline characters and parse space separated items */
171 if (s != NULL && *s != 0) {
172 line=s;
173 } else {
174 s=line;
175 }
176 while(*s!=0) {
177 if(*s==' ') {
178 *s=0;
179 ++s;
180 break;
181 } else if(*s=='\r' || *s=='\n') {
182 *s=0;
183 break;
184 }
185 ++s;
186 }
187
188 /* check for comment */
189
190 if (*line == '#') {
191 continue;
192 }
193
194 /* add the file */
195 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
196 {
197 char *t;
198 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
199 *t = U_FILE_SEP_CHAR;
200 }
201 }
202 #endif
203 addFile(getLongPathname(line), name, source, sourceTOC, verbose);
204 }
205
206 uprv_free(linePtr);
207
208 if(in!=T_FileStream_stdin()) {
209 T_FileStream_close(in);
210 }
211
212 if(fileCount==0) {
213 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
214 return;
215 }
216
217 /* sort the files by basename */
218 qsort(files, fileCount, sizeof(File), compareFiles);
219
220 if(!sourceTOC) {
221 UNewDataMemory *out;
222
223 /* determine the offsets of all basenames and files in this common one */
224 basenameOffset=4+8*fileCount;
225 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
226 for(i=0; i<fileCount; ++i) {
227 files[i].fileOffset=fileOffset;
228 fileOffset+=(files[i].fileSize+15)&~0xf;
229 files[i].basenameOffset=basenameOffset;
230 basenameOffset+=files[i].basenameLength;
231 }
232
233 /* create the output file */
234 out=udata_create(destDir, type, name,
235 &dataInfo,
236 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
237 &errorCode);
238 if(U_FAILURE(errorCode)) {
239 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
240 destDir, name, type,
241 u_errorName(errorCode));
242 exit(errorCode);
243 }
244
245 /* write the table of contents */
246 udata_write32(out, fileCount);
247 for(i=0; i<fileCount; ++i) {
248 udata_write32(out, files[i].basenameOffset);
249 udata_write32(out, files[i].fileOffset);
250 }
251
252 /* write the basenames */
253 for(i=0; i<fileCount; ++i) {
254 udata_writeString(out, files[i].basename, files[i].basenameLength);
255 }
256 length=4+8*fileCount+basenameTotal;
257
258 /* copy the files */
259 for(i=0; i<fileCount; ++i) {
260 /* pad to 16-align the next file */
261 length&=0xf;
262 if(length!=0) {
263 udata_writePadding(out, 16-length);
264 }
265
266 if (verbose) {
267 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
268 }
269
270 /* copy the next file */
271 file=T_FileStream_open(files[i].pathname, "rb");
272 if(file==NULL) {
273 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
274 exit(U_FILE_ACCESS_ERROR);
275 }
276 for(nread = 0;;) {
277 length=T_FileStream_read(file, buffer, sizeof(buffer));
278 if(length <= 0) {
279 break;
280 }
281 nread += length;
282 udata_writeBlock(out, buffer, length);
283 }
284 T_FileStream_close(file);
285 length=files[i].fileSize;
286
287 if (nread != files[i].fileSize) {
288 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
289 exit(U_FILE_ACCESS_ERROR);
290 }
291 }
292
293 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
294 length&=0xf;
295 if(length!=0) {
296 udata_writePadding(out, 16-length);
297 }
298
299 /* finish */
300 udata_finish(out, &errorCode);
301 if(U_FAILURE(errorCode)) {
302 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
303 exit(errorCode);
304 }
305 } else {
306 /* write a .c source file with the table of contents */
307 char *filename;
308 FileStream *out;
309
310 /* create the output filename */
311 filename=s=buffer;
312 uprv_strcpy(filename, destDir);
313 s=filename+uprv_strlen(filename);
314 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
315 *s++=U_FILE_SEP_CHAR;
316 }
317 uprv_strcpy(s, name);
318 if(*(type)!=0) {
319 s+=uprv_strlen(s);
320 *s++='_';
321 uprv_strcpy(s, type);
322 }
323 s+=uprv_strlen(s);
324 uprv_strcpy(s, ".c");
325
326 /* open the output file */
327 out=T_FileStream_open(filename, "w");
328 if (gencmnFileName != NULL) {
329 uprv_strcpy(gencmnFileName, filename);
330 }
331 if(out==NULL) {
332 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
333 exit(U_FILE_ACCESS_ERROR);
334 }
335
336 /* write the source file */
337 sprintf(buffer,
338 "/*\n"
339 " * ICU common data table of contents for %s.%s\n"
340 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
341 " */\n\n"
342 "#include \"unicode/utypes.h\"\n"
343 "#include \"unicode/udata.h\"\n"
344 "\n"
345 "/* external symbol declarations for data (%d files) */\n",
346 name, type, fileCount);
347 T_FileStream_writeLine(out, buffer);
348
349 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
350 T_FileStream_writeLine(out, buffer);
351 for(i=1; i<fileCount; ++i) {
352 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
353 T_FileStream_writeLine(out, buffer);
354 }
355 T_FileStream_writeLine(out, ";\n\n");
356
357 sprintf(
358 buffer,
359 "U_EXPORT struct {\n"
360 " uint16_t headerSize;\n"
361 " uint8_t magic1, magic2;\n"
362 " UDataInfo info;\n"
363 " char padding[%lu];\n"
364 " uint32_t count, reserved;\n"
365 " struct {\n"
366 " const char *name;\n"
367 " const void *data;\n"
368 " } toc[%lu];\n"
369 "} U_EXPORT2 %s_dat = {\n"
370 " 32, 0xda, 0x27, {\n"
371 " %lu, 0,\n"
372 " %u, %u, %u, 0,\n"
373 " {0x54, 0x6f, 0x43, 0x50},\n"
374 " {1, 0, 0, 0},\n"
375 " {0, 0, 0, 0}\n"
376 " },\n"
377 " \"\", %lu, 0, {\n",
378 (unsigned long)32-4-sizeof(UDataInfo),
379 (unsigned long)fileCount,
380 entrypointName,
381 (unsigned long)sizeof(UDataInfo),
382 U_IS_BIG_ENDIAN,
383 U_CHARSET_FAMILY,
384 U_SIZEOF_UCHAR,
385 (unsigned long)fileCount
386 );
387 T_FileStream_writeLine(out, buffer);
388
389 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
390 T_FileStream_writeLine(out, buffer);
391 for(i=1; i<fileCount; ++i) {
392 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
393 T_FileStream_writeLine(out, buffer);
394 }
395
396 T_FileStream_writeLine(out, "\n }\n};\n");
397 T_FileStream_close(out);
398
399 uprv_free(symPrefix);
400 }
401 }
402
403 static void
addFile(const char * filename,const char * name,const char * source,UBool sourceTOC,UBool verbose)404 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
405 char *s;
406 uint32_t length;
407 char *fullPath = NULL;
408
409 if(fileCount==fileMax) {
410 fileMax += CHUNK_FILE_COUNT;
411 files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
412 if(files==NULL) {
413 fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
414 exit(U_MEMORY_ALLOCATION_ERROR);
415 }
416 }
417
418 if(!sourceTOC) {
419 FileStream *file;
420
421 if(uprv_pathIsAbsolute(filename)) {
422 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
423 exit(U_ILLEGAL_ARGUMENT_ERROR);
424 }
425 fullPath = pathToFullPath(filename, source);
426 /* store the pathname */
427 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
428 s=allocString(length);
429 uprv_strcpy(s, name);
430 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
431 uprv_strcat(s, filename);
432
433 /* get the basename */
434 fixDirToTreePath(s);
435 files[fileCount].basename=s;
436 files[fileCount].basenameLength=length;
437
438 files[fileCount].pathname=fullPath;
439
440 basenameTotal+=length;
441
442 /* try to open the file */
443 file=T_FileStream_open(fullPath, "rb");
444 if(file==NULL) {
445 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
446 exit(U_FILE_ACCESS_ERROR);
447 }
448
449 /* get the file length */
450 length=T_FileStream_size(file);
451 if(T_FileStream_error(file) || length<=20) {
452 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
453 exit(U_FILE_ACCESS_ERROR);
454 }
455
456 T_FileStream_close(file);
457
458 /* do not add files that are longer than maxSize */
459 if(maxSize && length>maxSize) {
460 if (verbose) {
461 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
462 }
463 return;
464 }
465 files[fileCount].fileSize=length;
466 } else {
467 char *t;
468 /* get and store the basename */
469 /* need to include the package name */
470 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
471 s=allocString(length);
472 uprv_strcpy(s, name);
473 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
474 uprv_strcat(s, filename);
475 fixDirToTreePath(s);
476 files[fileCount].basename=s;
477 /* turn the basename into an entry point name and store in the pathname field */
478 t=files[fileCount].pathname=allocString(length);
479 while(--length>0) {
480 if(*s=='.' || *s=='-' || *s=='/') {
481 *t='_';
482 } else {
483 *t=*s;
484 }
485 ++s;
486 ++t;
487 }
488 *t=0;
489 }
490 ++fileCount;
491 }
492
493 static char *
allocString(uint32_t length)494 allocString(uint32_t length) {
495 uint32_t top=stringTop+length;
496 char *p;
497
498 if(top>STRING_STORE_SIZE) {
499 fprintf(stderr, "gencmn: out of memory\n");
500 exit(U_MEMORY_ALLOCATION_ERROR);
501 }
502 p=stringStore+stringTop;
503 stringTop=top;
504 return p;
505 }
506
507 static char *
pathToFullPath(const char * path,const char * source)508 pathToFullPath(const char *path, const char *source) {
509 int32_t length;
510 int32_t newLength;
511 char *fullPath;
512 int32_t n;
513
514 length = (uint32_t)(uprv_strlen(path) + 1);
515 newLength = (length + 1 + (int32_t)uprv_strlen(source));
516 fullPath = uprv_malloc(newLength);
517 if(source != NULL) {
518 uprv_strcpy(fullPath, source);
519 uprv_strcat(fullPath, U_FILE_SEP_STRING);
520 } else {
521 fullPath[0] = 0;
522 }
523 n = (int32_t)uprv_strlen(fullPath);
524 fullPath[n] = 0; /* Suppress compiler warning for unused variable n */
525 /* when conditional code below is not compiled. */
526 uprv_strcat(fullPath, path);
527
528 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
529 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
530 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
531 for(;fullPath[n];n++) {
532 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
533 fullPath[n] = U_FILE_SEP_CHAR;
534 }
535 }
536 #endif
537 #endif
538 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
539 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
540 for(;fullPath[n];n++) {
541 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
542 fullPath[n] = U_FILE_SEP_CHAR;
543 }
544 }
545 #endif
546 return fullPath;
547 }
548
549 static int
compareFiles(const void * file1,const void * file2)550 compareFiles(const void *file1, const void *file2) {
551 /* sort by basename */
552 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
553 }
554
555 static void
fixDirToTreePath(char * s)556 fixDirToTreePath(char *s)
557 {
558 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
559 char *t;
560 #endif
561 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
562 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
563 *t = U_TREE_ENTRY_SEP_CHAR;
564 }
565 #endif
566 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
567 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
568 *t = U_TREE_ENTRY_SEP_CHAR;
569 }
570 #endif
571 }
572