1 /******************************************************************************
2 * Copyright (C) 2008, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include "unicode/utypes.h"
11 #include "unicode/putil.h"
12 #include "cmemory.h"
13 #include "cstring.h"
14 #include "filestrm.h"
15 #include "toolutil.h"
16 #include "unicode/uclean.h"
17 #include "unewdata.h"
18 #include "putilimp.h"
19 #include "pkg_gencmn.h"
20
21 #define STRING_STORE_SIZE 100000
22 #define MAX_FILE_COUNT 2000
23
24 #define COMMON_DATA_NAME U_ICUDATA_NAME
25 #define DATA_TYPE "dat"
26
27 /* ICU package data file format (.dat files) ------------------------------- ***
28
29 Description of the data format after the usual ICU data file header
30 (UDataInfo etc.).
31
32 Format version 1
33
34 A .dat package file contains a simple Table of Contents of item names,
35 followed by the items themselves:
36
37 1. ToC table
38
39 uint32_t count; - number of items
40 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
41 uint32_t nameOffset; - offset of the item name
42 uint32_t dataOffset; - offset of the item data
43 both are byte offsets from the beginning of the data
44
45 2. item name strings
46
47 All item names are stored as char * strings in one block between the ToC table
48 and the data items.
49
50 3. data items
51
52 The data items are stored following the item names block.
53 Each data item is 16-aligned.
54 The data items are stored in the sorted order of their names.
55
56 Therefore, the top of the name strings block is the offset of the first item,
57 the length of the last item is the difference between its offset and
58 the .dat file length, and the length of all previous items is the difference
59 between its offset and the next one.
60
61 ----------------------------------------------------------------------------- */
62
63 /* UDataInfo cf. udata.h */
64 static const UDataInfo dataInfo={
65 sizeof(UDataInfo),
66 0,
67
68 U_IS_BIG_ENDIAN,
69 U_CHARSET_FAMILY,
70 sizeof(UChar),
71 0,
72
73 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
74 {1, 0, 0, 0}, /* formatVersion */
75 {3, 0, 0, 0} /* dataVersion */
76 };
77
78 static uint32_t maxSize;
79
80 static char stringStore[STRING_STORE_SIZE];
81 static uint32_t stringTop=0, basenameTotal=0;
82
83 typedef struct {
84 char *pathname, *basename;
85 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
86 } File;
87
88 static File files[MAX_FILE_COUNT];
89 static uint32_t fileCount=0;
90
91 static char *symPrefix = NULL;
92
93 /* prototypes --------------------------------------------------------------- */
94
95 static void
96 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
97
98 static char *
99 allocString(uint32_t length);
100
101 static int
102 compareFiles(const void *file1, const void *file2);
103
104 static char *
105 pathToFullPath(const char *path, const char *source);
106
107 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
108 static void
109 fixDirToTreePath(char *s);
110 /* -------------------------------------------------------------------------- */
111
112 U_CAPI void U_EXPORT2
createCommonDataFile(const char * destDir,const char * name,const char * entrypointName,const char * type,const char * source,const char * copyRight,const char * dataFile,uint32_t max_size,UBool sourceTOC,UBool verbose,char * gencmnFileName)113 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
114 const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
115 static char buffer[4096];
116 char line[512];
117 char *s;
118 UErrorCode errorCode=U_ZERO_ERROR;
119 uint32_t i, fileOffset, basenameOffset, length, nread;
120 FileStream *in, *file;
121
122 maxSize = max_size;
123
124 if (destDir == NULL) {
125 destDir = u_getDataDirectory();
126 }
127 if (name == NULL) {
128 name = COMMON_DATA_NAME;
129 }
130 if (type == NULL) {
131 type = DATA_TYPE;
132 }
133 if (source == NULL) {
134 source = ".";
135 }
136
137 if (dataFile == NULL) {
138 in = T_FileStream_stdin();
139 } else {
140 in = T_FileStream_open(dataFile, "r");
141 if(in == NULL) {
142 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
143 exit(U_FILE_ACCESS_ERROR);
144 }
145 }
146
147 if (verbose) {
148 if(sourceTOC) {
149 printf("generating %s_%s.c (table of contents source file)\n", name, type);
150 } else {
151 printf("generating %s.%s (common data file with table of contents)\n", name, type);
152 }
153 }
154
155 /* read the list of files and get their lengths */
156 while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
157 /* remove trailing newline characters */
158 s=line;
159 while(*s!=0) {
160 if(*s=='\r' || *s=='\n') {
161 *s=0;
162 break;
163 }
164 ++s;
165 }
166
167 /* check for comment */
168
169 if (*line == '#') {
170 continue;
171 }
172
173 /* add the file */
174 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
175 {
176 char *t;
177 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
178 *t = U_FILE_SEP_CHAR;
179 }
180 }
181 #endif
182 addFile(getLongPathname(line), name, source, sourceTOC, verbose);
183 }
184
185 if(in!=T_FileStream_stdin()) {
186 T_FileStream_close(in);
187 }
188
189 if(fileCount==0) {
190 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
191 return;
192 }
193
194 /* sort the files by basename */
195 qsort(files, fileCount, sizeof(File), compareFiles);
196
197 if(!sourceTOC) {
198 UNewDataMemory *out;
199
200 /* determine the offsets of all basenames and files in this common one */
201 basenameOffset=4+8*fileCount;
202 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
203 for(i=0; i<fileCount; ++i) {
204 files[i].fileOffset=fileOffset;
205 fileOffset+=(files[i].fileSize+15)&~0xf;
206 files[i].basenameOffset=basenameOffset;
207 basenameOffset+=files[i].basenameLength;
208 }
209
210 /* create the output file */
211 out=udata_create(destDir, type, name,
212 &dataInfo,
213 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
214 &errorCode);
215 if(U_FAILURE(errorCode)) {
216 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
217 destDir, name, type,
218 u_errorName(errorCode));
219 exit(errorCode);
220 }
221
222 /* write the table of contents */
223 udata_write32(out, fileCount);
224 for(i=0; i<fileCount; ++i) {
225 udata_write32(out, files[i].basenameOffset);
226 udata_write32(out, files[i].fileOffset);
227 }
228
229 /* write the basenames */
230 for(i=0; i<fileCount; ++i) {
231 udata_writeString(out, files[i].basename, files[i].basenameLength);
232 }
233 length=4+8*fileCount+basenameTotal;
234
235 /* copy the files */
236 for(i=0; i<fileCount; ++i) {
237 /* pad to 16-align the next file */
238 length&=0xf;
239 if(length!=0) {
240 udata_writePadding(out, 16-length);
241 }
242
243 if (verbose) {
244 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
245 }
246
247 /* copy the next file */
248 file=T_FileStream_open(files[i].pathname, "rb");
249 if(file==NULL) {
250 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
251 exit(U_FILE_ACCESS_ERROR);
252 }
253 for(nread = 0;;) {
254 length=T_FileStream_read(file, buffer, sizeof(buffer));
255 if(length <= 0) {
256 break;
257 }
258 nread += length;
259 udata_writeBlock(out, buffer, length);
260 }
261 T_FileStream_close(file);
262 length=files[i].fileSize;
263
264 if (nread != files[i].fileSize) {
265 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
266 exit(U_FILE_ACCESS_ERROR);
267 }
268 }
269
270 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
271 length&=0xf;
272 if(length!=0) {
273 udata_writePadding(out, 16-length);
274 }
275
276 /* finish */
277 udata_finish(out, &errorCode);
278 if(U_FAILURE(errorCode)) {
279 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
280 exit(errorCode);
281 }
282 } else {
283 /* write a .c source file with the table of contents */
284 char *filename;
285 FileStream *out;
286
287 /* create the output filename */
288 filename=s=buffer;
289 uprv_strcpy(filename, destDir);
290 s=filename+uprv_strlen(filename);
291 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
292 *s++=U_FILE_SEP_CHAR;
293 }
294 uprv_strcpy(s, name);
295 if(*(type)!=0) {
296 s+=uprv_strlen(s);
297 *s++='_';
298 uprv_strcpy(s, type);
299 }
300 s+=uprv_strlen(s);
301 uprv_strcpy(s, ".c");
302
303 /* open the output file */
304 out=T_FileStream_open(filename, "w");
305 if (gencmnFileName != NULL) {
306 uprv_strcpy(gencmnFileName, filename);
307 }
308 if(out==NULL) {
309 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
310 exit(U_FILE_ACCESS_ERROR);
311 }
312
313 /* write the source file */
314 sprintf(buffer,
315 "/*\n"
316 " * ICU common data table of contents for %s.%s ,\n"
317 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
318 " */\n\n"
319 "#include \"unicode/utypes.h\"\n"
320 "#include \"unicode/udata.h\"\n"
321 "\n"
322 "/* external symbol declarations for data */\n",
323 name, type);
324 T_FileStream_writeLine(out, buffer);
325
326 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
327 T_FileStream_writeLine(out, buffer);
328 for(i=1; i<fileCount; ++i) {
329 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
330 T_FileStream_writeLine(out, buffer);
331 }
332 T_FileStream_writeLine(out, ";\n\n");
333
334 sprintf(
335 buffer,
336 "U_EXPORT struct {\n"
337 " uint16_t headerSize;\n"
338 " uint8_t magic1, magic2;\n"
339 " UDataInfo info;\n"
340 " char padding[%lu];\n"
341 " uint32_t count, reserved;\n"
342 " struct {\n"
343 " const char *name;\n"
344 " const void *data;\n"
345 " } toc[%lu];\n"
346 "} U_EXPORT2 %s_dat = {\n"
347 " 32, 0xda, 0x27, {\n"
348 " %lu, 0,\n"
349 " %u, %u, %u, 0,\n"
350 " {0x54, 0x6f, 0x43, 0x50},\n"
351 " {1, 0, 0, 0},\n"
352 " {0, 0, 0, 0}\n"
353 " },\n"
354 " \"\", %lu, 0, {\n",
355 (unsigned long)32-4-sizeof(UDataInfo),
356 (unsigned long)fileCount,
357 entrypointName,
358 (unsigned long)sizeof(UDataInfo),
359 U_IS_BIG_ENDIAN,
360 U_CHARSET_FAMILY,
361 U_SIZEOF_UCHAR,
362 (unsigned long)fileCount
363 );
364 T_FileStream_writeLine(out, buffer);
365
366 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
367 T_FileStream_writeLine(out, buffer);
368 for(i=1; i<fileCount; ++i) {
369 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
370 T_FileStream_writeLine(out, buffer);
371 }
372
373 T_FileStream_writeLine(out, "\n }\n};\n");
374 T_FileStream_close(out);
375
376 uprv_free(symPrefix);
377 }
378 }
379
380 static void
addFile(const char * filename,const char * name,const char * source,UBool sourceTOC,UBool verbose)381 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
382 char *s;
383 uint32_t length;
384 char *fullPath = NULL;
385
386 if(fileCount==MAX_FILE_COUNT) {
387 fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
388 exit(U_BUFFER_OVERFLOW_ERROR);
389 }
390
391 if(!sourceTOC) {
392 FileStream *file;
393
394 if(uprv_pathIsAbsolute(filename)) {
395 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
396 exit(U_ILLEGAL_ARGUMENT_ERROR);
397 }
398 fullPath = pathToFullPath(filename, source);
399
400 /* store the pathname */
401 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
402 s=allocString(length);
403 uprv_strcpy(s, name);
404 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
405 uprv_strcat(s, filename);
406
407 /* get the basename */
408 fixDirToTreePath(s);
409 files[fileCount].basename=s;
410 files[fileCount].basenameLength=length;
411
412 files[fileCount].pathname=fullPath;
413
414 basenameTotal+=length;
415
416 /* try to open the file */
417 file=T_FileStream_open(fullPath, "rb");
418 if(file==NULL) {
419 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
420 exit(U_FILE_ACCESS_ERROR);
421 }
422
423 /* get the file length */
424 length=T_FileStream_size(file);
425 if(T_FileStream_error(file) || length<=20) {
426 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
427 exit(U_FILE_ACCESS_ERROR);
428 }
429
430 T_FileStream_close(file);
431
432 /* do not add files that are longer than maxSize */
433 if(maxSize && length>maxSize) {
434 if (verbose) {
435 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
436 }
437 return;
438 }
439 files[fileCount].fileSize=length;
440 } else {
441 char *t;
442
443 /* get and store the basename */
444 /* need to include the package name */
445 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
446 s=allocString(length);
447 uprv_strcpy(s, name);
448 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
449 uprv_strcat(s, filename);
450 fixDirToTreePath(s);
451 files[fileCount].basename=s;
452
453
454 /* turn the basename into an entry point name and store in the pathname field */
455 t=files[fileCount].pathname=allocString(length);
456 while(--length>0) {
457 if(*s=='.' || *s=='-' || *s=='/') {
458 *t='_';
459 } else {
460 *t=*s;
461 }
462 ++s;
463 ++t;
464 }
465 *t=0;
466 }
467 ++fileCount;
468 }
469
470 static char *
allocString(uint32_t length)471 allocString(uint32_t length) {
472 uint32_t top=stringTop+length;
473 char *p;
474
475 if(top>STRING_STORE_SIZE) {
476 fprintf(stderr, "gencmn: out of memory\n");
477 exit(U_MEMORY_ALLOCATION_ERROR);
478 }
479 p=stringStore+stringTop;
480 stringTop=top;
481 return p;
482 }
483
484 static char *
pathToFullPath(const char * path,const char * source)485 pathToFullPath(const char *path, const char *source) {
486 int32_t length;
487 int32_t newLength;
488 char *fullPath;
489 int32_t n;
490
491 length = (uint32_t)(uprv_strlen(path) + 1);
492 newLength = (length + 1 + (int32_t)uprv_strlen(source));
493 fullPath = uprv_malloc(newLength);
494 if(source != NULL) {
495 uprv_strcpy(fullPath, source);
496 uprv_strcat(fullPath, U_FILE_SEP_STRING);
497 } else {
498 fullPath[0] = 0;
499 }
500 n = (int32_t)uprv_strlen(fullPath);
501 uprv_strcat(fullPath, path);
502
503 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
504 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
505 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
506 for(;fullPath[n];n++) {
507 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
508 fullPath[n] = U_FILE_SEP_CHAR;
509 }
510 }
511 #endif
512 #endif
513 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
514 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
515 for(;fullPath[n];n++) {
516 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
517 fullPath[n] = U_FILE_SEP_CHAR;
518 }
519 }
520 #endif
521 return fullPath;
522 }
523
524 static int
compareFiles(const void * file1,const void * file2)525 compareFiles(const void *file1, const void *file2) {
526 /* sort by basename */
527 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
528 }
529
530 static void
fixDirToTreePath(char * s)531 fixDirToTreePath(char *s)
532 {
533 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
534 char *t;
535 #endif
536 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
537 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
538 *t = U_TREE_ENTRY_SEP_CHAR;
539 }
540 #endif
541 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
542 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
543 *t = U_TREE_ENTRY_SEP_CHAR;
544 }
545 #endif
546 }
547