1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "uarrsort.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23
24 #include <stdio.h>
25
26 U_NAMESPACE_USE
27
28 // TODO: Clean up and comment this code.
29
30 //----------------------------------------------------------------------
31 // BEGIN DATA
32 //
33 // This is the raw data to be output. We define the data structure,
34 // then include a machine-generated header that contains the actual
35 // data.
36
37 #include "unicode/uchar.h"
38 #include "unicode/uscript.h"
39 #include "unicode/unorm.h"
40
41 class AliasName {
42 public:
43 const char* str;
44 int32_t index;
45
46 AliasName(const char* str, int32_t index);
47
48 int compare(const AliasName& other) const;
49
operator ==(const AliasName & other) const50 UBool operator==(const AliasName& other) const {
51 return compare(other) == 0;
52 }
53
operator !=(const AliasName & other) const54 UBool operator!=(const AliasName& other) const {
55 return compare(other) != 0;
56 }
57 };
58
AliasName(const char * _str,int32_t _index)59 AliasName::AliasName(const char* _str,
60 int32_t _index) :
61 str(_str),
62 index(_index)
63 {
64 }
65
compare(const AliasName & other) const66 int AliasName::compare(const AliasName& other) const {
67 return uprv_comparePropertyNames(str, other.str);
68 }
69
70 class Alias {
71 public:
72 int32_t enumValue;
73 int32_t nameGroupIndex;
74
75 Alias(int32_t enumValue,
76 int32_t nameGroupIndex);
77
78 int32_t getUniqueNames(int32_t* nameGroupIndices) const;
79 };
80
Alias(int32_t anEnumValue,int32_t aNameGroupIndex)81 Alias::Alias(int32_t anEnumValue,
82 int32_t aNameGroupIndex) :
83 enumValue(anEnumValue),
84 nameGroupIndex(aNameGroupIndex)
85 {
86 }
87
88 class Property : public Alias {
89 public:
90 int32_t valueCount;
91 const Alias* valueList;
92
93 Property(int32_t enumValue,
94 int32_t nameGroupIndex,
95 int32_t valueCount,
96 const Alias* valueList);
97 };
98
Property(int32_t _enumValue,int32_t _nameGroupIndex,int32_t _valueCount,const Alias * _valueList)99 Property::Property(int32_t _enumValue,
100 int32_t _nameGroupIndex,
101 int32_t _valueCount,
102 const Alias* _valueList) :
103 Alias(_enumValue, _nameGroupIndex),
104 valueCount(_valueCount),
105 valueList(_valueList)
106 {
107 }
108
109 // *** Include the data header ***
110 #include "data.h"
111
112 /* return a list of unique names, not including "", for this property
113 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
114 * elements, will be filled with indices into STRING_TABLE
115 * @return number of indices, >= 1
116 */
getUniqueNames(int32_t * stringIndices) const117 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
118 int32_t count = 0;
119 int32_t i = nameGroupIndex;
120 UBool done = FALSE;
121 while (!done) {
122 int32_t j = NAME_GROUP[i++];
123 if (j < 0) {
124 done = TRUE;
125 j = -j;
126 }
127 if (j == 0) continue; // omit "" entries
128 UBool dupe = FALSE;
129 for (int32_t k=0; k<count; ++k) {
130 if (stringIndices[k] == j) {
131 dupe = TRUE;
132 break;
133 }
134 // also do a string check for things like "age|Age"
135 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
136 //printf("Found dupe %s|%s\n",
137 // STRING_TABLE[stringIndices[k]].str,
138 // STRING_TABLE[j].str);
139 dupe = TRUE;
140 break;
141 }
142 }
143 if (dupe) continue; // omit duplicates
144 stringIndices[count++] = j;
145 }
146 return count;
147 }
148
149 // END DATA
150 //----------------------------------------------------------------------
151
152 #define MALLOC(type, count) \
153 (type*) uprv_malloc(sizeof(type) * count)
154
die(const char * msg)155 void die(const char* msg) {
156 fprintf(stderr, "Error: %s\n", msg);
157 exit(1);
158 }
159
160 //----------------------------------------------------------------------
161
162 /**
163 * A list of Alias objects.
164 */
165 class AliasList {
166 public:
167 virtual ~AliasList();
168 virtual const Alias& operator[](int32_t i) const = 0;
169 virtual int32_t count() const = 0;
170 };
171
~AliasList()172 AliasList::~AliasList() {}
173
174 /**
175 * A single array.
176 */
177 class AliasArrayList : public AliasList {
178 const Alias* a;
179 int32_t n;
180 public:
AliasArrayList(const Alias * _a,int32_t _n)181 AliasArrayList(const Alias* _a, int32_t _n) {
182 a = _a;
183 n = _n;
184 }
operator [](int32_t i) const185 virtual const Alias& operator[](int32_t i) const {
186 return a[i];
187 }
count() const188 virtual int32_t count() const {
189 return n;
190 }
191 };
192
193 /**
194 * A single array.
195 */
196 class PropertyArrayList : public AliasList {
197 const Property* a;
198 int32_t n;
199 public:
PropertyArrayList(const Property * _a,int32_t _n)200 PropertyArrayList(const Property* _a, int32_t _n) {
201 a = _a;
202 n = _n;
203 }
operator [](int32_t i) const204 virtual const Alias& operator[](int32_t i) const {
205 return a[i];
206 }
count() const207 virtual int32_t count() const {
208 return n;
209 }
210 };
211
212 //----------------------------------------------------------------------
213
214 /**
215 * An element in a name index. It maps a name (given by index) into
216 * an enum value.
217 */
218 class NameToEnumEntry {
219 public:
220 int32_t nameIndex;
221 int32_t enumValue;
NameToEnumEntry(int32_t a,int32_t b)222 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
223 };
224
225 // Sort function for NameToEnumEntry (sort by name)
226 U_CFUNC int32_t
compareNameToEnumEntry(const void *,const void * e1,const void * e2)227 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
228 return
229 STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
230 compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
231 }
232
233 //----------------------------------------------------------------------
234
235 /**
236 * An element in an enum index. It maps an enum into a name group entry
237 * (given by index).
238 */
239 class EnumToNameGroupEntry {
240 public:
241 int32_t enumValue;
242 int32_t nameGroupIndex;
EnumToNameGroupEntry(int32_t a,int32_t b)243 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
244
245 // are enumValues contiguous for count entries starting with this one?
246 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const247 UBool isContiguous(int32_t count) const {
248 const EnumToNameGroupEntry* p = this;
249 for (int32_t i=1; i<count; ++i) {
250 if (p[i].enumValue != (this->enumValue + i)) {
251 return FALSE;
252 }
253 }
254 return TRUE;
255 }
256 };
257
258 // Sort function for EnumToNameGroupEntry (sort by name index)
259 U_CFUNC int32_t
compareEnumToNameGroupEntry(const void *,const void * e1,const void * e2)260 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
261 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
262 }
263
264 //----------------------------------------------------------------------
265
266 /**
267 * An element in the map from enumerated property enums to value maps.
268 */
269 class EnumToValueEntry {
270 public:
271 int32_t enumValue;
272 EnumToNameGroupEntry* enumToName;
273 int32_t enumToName_count;
274 NameToEnumEntry* nameToEnum;
275 int32_t nameToEnum_count;
276
277 // are enumValues contiguous for count entries starting with this one?
278 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const279 UBool isContiguous(int32_t count) const {
280 const EnumToValueEntry* p = this;
281 for (int32_t i=1; i<count; ++i) {
282 if (p[i].enumValue != (this->enumValue + i)) {
283 return FALSE;
284 }
285 }
286 return TRUE;
287 }
288 };
289
290 // Sort function for EnumToValueEntry (sort by enum)
291 U_CFUNC int32_t
compareEnumToValueEntry(const void *,const void * e1,const void * e2)292 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
293 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
294 }
295
296 //----------------------------------------------------------------------
297 // BEGIN Builder
298
299 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
300
301 class Builder {
302 // header:
303 PropertyAliases header;
304
305 // 0:
306 NonContiguousEnumToOffset* enumToName;
307 int32_t enumToName_size;
308 Offset enumToName_offset;
309
310 // 1: (deleted)
311
312 // 2:
313 NameToEnum* nameToEnum;
314 int32_t nameToEnum_size;
315 Offset nameToEnum_offset;
316
317 // 3:
318 NonContiguousEnumToOffset* enumToValue;
319 int32_t enumToValue_size;
320 Offset enumToValue_offset;
321
322 // 4:
323 ValueMap* valueMap;
324 int32_t valueMap_size;
325 int32_t valueMap_count;
326 Offset valueMap_offset;
327
328 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
329 // NULL and one is not. valueEnumToName_size[i] is the size of
330 // the non-NULL one. i=0..valueMapCount-1
331 // 5a:
332 EnumToOffset** valueEnumToName;
333 // 5b:
334 NonContiguousEnumToOffset** valueNCEnumToName;
335 int32_t* valueEnumToName_size;
336 Offset* valueEnumToName_offset;
337 // 6:
338 // arrays of valueMap_count pointers, sizes, & offsets
339 NameToEnum** valueNameToEnum;
340 int32_t* valueNameToEnum_size;
341 Offset* valueNameToEnum_offset;
342
343 // 98:
344 Offset* nameGroupPool;
345 int32_t nameGroupPool_count;
346 int32_t nameGroupPool_size;
347 Offset nameGroupPool_offset;
348
349 // 99:
350 char* stringPool;
351 int32_t stringPool_count;
352 int32_t stringPool_size;
353 Offset stringPool_offset;
354 Offset* stringPool_offsetArray; // relative to stringPool
355
356 int32_t total_size; // size of everything
357
358 int32_t debug;
359
360 public:
361
362 Builder(int32_t debugLevel);
363 ~Builder();
364
365 void buildTopLevelProperties(const NameToEnumEntry* propName,
366 int32_t propNameCount,
367 const EnumToNameGroupEntry* propEnum,
368 int32_t propEnumCount);
369
370 void buildValues(const EnumToValueEntry* e2v,
371 int32_t count);
372
373 void buildStringPool(const AliasName* propertyNames,
374 int32_t propertyNameCount,
375 const int32_t* nameGroupIndices,
376 int32_t nameGroupIndicesCount);
377
378 void fixup();
379
380 int8_t* createData(int32_t& length) const;
381
382 private:
383
384 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
385 int32_t count,
386 int32_t& size);
387 static NonContiguousEnumToOffset*
388 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
389 int32_t count,
390 int32_t& size);
391
392 static NonContiguousEnumToOffset*
393 buildNCEnumToValue(const EnumToValueEntry* e2v,
394 int32_t count,
395 int32_t& size);
396
397 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
398 int32_t count,
399 int32_t& size);
400
401 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
402 void fixupNameToEnum(NameToEnum* n);
403 void fixupEnumToNameGroup(EnumToOffset* e2ng);
404 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
405
406 void computeOffsets();
407 void fixupStringPoolOffsets();
408 void fixupNameGroupPoolOffsets();
409 void fixupMiscellaneousOffsets();
410
411 static int32_t align(int32_t a);
412 static void erase(void* p, int32_t size);
413 };
414
Builder(int32_t debugLevel)415 Builder::Builder(int32_t debugLevel) {
416 debug = debugLevel;
417 enumToName = 0;
418 nameToEnum = 0;
419 enumToValue = 0;
420 valueMap_count = 0;
421 valueMap = 0;
422 valueEnumToName = 0;
423 valueNCEnumToName = 0;
424 valueEnumToName_size = 0;
425 valueEnumToName_offset = 0;
426 valueNameToEnum = 0;
427 valueNameToEnum_size = 0;
428 valueNameToEnum_offset = 0;
429 nameGroupPool = 0;
430 stringPool = 0;
431 stringPool_offsetArray = 0;
432 }
433
~Builder()434 Builder::~Builder() {
435 uprv_free(enumToName);
436 uprv_free(nameToEnum);
437 uprv_free(enumToValue);
438 uprv_free(valueMap);
439 for (int32_t i=0; i<valueMap_count; ++i) {
440 uprv_free(valueEnumToName[i]);
441 uprv_free(valueNCEnumToName[i]);
442 uprv_free(valueNameToEnum[i]);
443 }
444 uprv_free(valueEnumToName);
445 uprv_free(valueNCEnumToName);
446 uprv_free(valueEnumToName_size);
447 uprv_free(valueEnumToName_offset);
448 uprv_free(valueNameToEnum);
449 uprv_free(valueNameToEnum_size);
450 uprv_free(valueNameToEnum_offset);
451 uprv_free(nameGroupPool);
452 uprv_free(stringPool);
453 uprv_free(stringPool_offsetArray);
454 }
455
align(int32_t a)456 int32_t Builder::align(int32_t a) {
457 U_ASSERT(a >= 0);
458 int32_t k = a % sizeof(int32_t);
459 if (k == 0) {
460 return a;
461 }
462 a += sizeof(int32_t) - k;
463 return a;
464 }
465
erase(void * p,int32_t size)466 void Builder::erase(void* p, int32_t size) {
467 U_ASSERT(size >= 0);
468 int8_t* q = (int8_t*) p;
469 while (size--) {
470 *q++ = 0;
471 }
472 }
473
buildEnumToOffset(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)474 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
475 int32_t count,
476 int32_t& size) {
477 U_ASSERT(e2ng->isContiguous(count));
478 size = align(EnumToOffset::getSize(count));
479 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
480 erase(result, size);
481 result->enumStart = e2ng->enumValue;
482 result->enumLimit = e2ng->enumValue + count;
483 Offset* p = result->getOffsetArray();
484 for (int32_t i=0; i<count; ++i) {
485 // set these to NGI index values
486 // fix them up to NGI offset values
487 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
488 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
489 }
490 return result;
491 }
492
493 NonContiguousEnumToOffset*
buildNCEnumToNameGroup(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)494 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
495 int32_t count,
496 int32_t& size) {
497 U_ASSERT(!e2ng->isContiguous(count));
498 size = align(NonContiguousEnumToOffset::getSize(count));
499 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
500 erase(nc, size);
501 nc->count = count;
502 EnumValue* e = nc->getEnumArray();
503 Offset* p = nc->getOffsetArray();
504 for (int32_t i=0; i<count; ++i) {
505 // set these to NGI index values
506 // fix them up to NGI offset values
507 e[i] = e2ng[i].enumValue;
508 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
509 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
510 }
511 return nc;
512 }
513
514 NonContiguousEnumToOffset*
buildNCEnumToValue(const EnumToValueEntry * e2v,int32_t count,int32_t & size)515 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
516 int32_t count,
517 int32_t& size) {
518 U_ASSERT(!e2v->isContiguous(count));
519 size = align(NonContiguousEnumToOffset::getSize(count));
520 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
521 erase(result, size);
522 result->count = count;
523 EnumValue* e = result->getEnumArray();
524 for (int32_t i=0; i<count; ++i) {
525 e[i] = e2v[i].enumValue;
526 // offset must be set later
527 }
528 return result;
529 }
530
531 /**
532 * Given an index into the string pool, return an offset. computeOffsets()
533 * must have been called already. If allowNegative is true, allow negatives
534 * and preserve their sign.
535 */
stringIndexToOffset(int32_t index,UBool allowNegative) const536 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
537 // Index 0 is ""; we turn this into an Offset of zero
538 if (index == 0) return 0;
539 if (index < 0) {
540 if (allowNegative) {
541 return -Builder::stringIndexToOffset(-index);
542 } else {
543 die("Negative string pool index");
544 }
545 } else {
546 if (index >= stringPool_count) {
547 die("String pool index too large");
548 }
549 Offset result = stringPool_offset + stringPool_offsetArray[index];
550 U_ASSERT(result >= 0 && result < total_size);
551 return result;
552 }
553 return 0; // never executed; make compiler happy
554 }
555
buildNameToEnum(const NameToEnumEntry * nameToEnum,int32_t count,int32_t & size)556 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
557 int32_t count,
558 int32_t& size) {
559 size = align(NameToEnum::getSize(count));
560 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
561 erase(n2e, size);
562 n2e->count = count;
563 Offset* p = n2e->getNameArray();
564 EnumValue* e = n2e->getEnumArray();
565 for (int32_t i=0; i<count; ++i) {
566 // set these to SP index values
567 // fix them up to SP offset values
568 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
569 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
570 e[i] = nameToEnum[i].enumValue;
571 }
572 return n2e;
573 }
574
575
buildTopLevelProperties(const NameToEnumEntry * propName,int32_t propNameCount,const EnumToNameGroupEntry * propEnum,int32_t propEnumCount)576 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
577 int32_t propNameCount,
578 const EnumToNameGroupEntry* propEnum,
579 int32_t propEnumCount) {
580 enumToName = buildNCEnumToNameGroup(propEnum,
581 propEnumCount,
582 enumToName_size);
583 nameToEnum = buildNameToEnum(propName,
584 propNameCount,
585 nameToEnum_size);
586 }
587
buildValues(const EnumToValueEntry * e2v,int32_t count)588 void Builder::buildValues(const EnumToValueEntry* e2v,
589 int32_t count) {
590 int32_t i;
591
592 U_ASSERT(!e2v->isContiguous(count));
593
594 valueMap_count = count;
595
596 enumToValue = buildNCEnumToValue(e2v, count,
597 enumToValue_size);
598
599 valueMap_size = align(count * sizeof(ValueMap));
600 valueMap = (ValueMap*) uprv_malloc(valueMap_size);
601 erase(valueMap, valueMap_size);
602
603 valueEnumToName = MALLOC(EnumToOffset*, count);
604 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
605 valueEnumToName_size = MALLOC(int32_t, count);
606 valueEnumToName_offset = MALLOC(Offset, count);
607 valueNameToEnum = MALLOC(NameToEnum*, count);
608 valueNameToEnum_size = MALLOC(int32_t, count);
609 valueNameToEnum_offset = MALLOC(Offset, count);
610
611 for (i=0; i<count; ++i) {
612 UBool isContiguous =
613 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
614 valueEnumToName[i] = 0;
615 valueNCEnumToName[i] = 0;
616 if (isContiguous) {
617 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
618 e2v[i].enumToName_count,
619 valueEnumToName_size[i]);
620 } else {
621 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
622 e2v[i].enumToName_count,
623 valueEnumToName_size[i]);
624 }
625 valueNameToEnum[i] =
626 buildNameToEnum(e2v[i].nameToEnum,
627 e2v[i].nameToEnum_count,
628 valueNameToEnum_size[i]);
629 }
630 }
631
buildStringPool(const AliasName * propertyNames,int32_t propertyNameCount,const int32_t * nameGroupIndices,int32_t nameGroupIndicesCount)632 void Builder::buildStringPool(const AliasName* propertyNames,
633 int32_t propertyNameCount,
634 const int32_t* nameGroupIndices,
635 int32_t nameGroupIndicesCount) {
636 int32_t i;
637
638 nameGroupPool_count = nameGroupIndicesCount;
639 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
640 nameGroupPool = MALLOC(Offset, nameGroupPool_count);
641
642 for (i=0; i<nameGroupPool_count; ++i) {
643 // Some indices are negative.
644 int32_t a = nameGroupIndices[i];
645 if (a < 0) a = -a;
646 U_ASSERT(IS_VALID_OFFSET(a));
647 nameGroupPool[i] = (Offset) nameGroupIndices[i];
648 }
649
650 stringPool_count = propertyNameCount;
651 stringPool_size = 0;
652 // first string must be "" -- we skip it
653 U_ASSERT(*propertyNames[0].str == 0);
654 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
655 stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
656 }
657 stringPool = MALLOC(char, stringPool_size);
658 stringPool_offsetArray = MALLOC(Offset, stringPool_count);
659 Offset soFar = 0;
660 char* p = stringPool;
661 stringPool_offsetArray[0] = -1; // we don't use this entry
662 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
663 const char* str = propertyNames[i].str;
664 int32_t len = (int32_t)uprv_strlen(str);
665 uprv_strcpy(p, str);
666 p += len;
667 *p++ = 0;
668 stringPool_offsetArray[i] = soFar;
669 soFar += (Offset)(len+1);
670 }
671 U_ASSERT(soFar == stringPool_size);
672 U_ASSERT(p == (stringPool + stringPool_size));
673 }
674
675 // Confirm that PropertyAliases is a POD (plain old data; see C++
676 // std). The following union will _fail to compile_ if
677 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
678 // macro to check this, but that's not quite right, so that test is
679 // commented out -- see below.)
680 typedef union {
681 int32_t i;
682 PropertyAliases p;
683 } PropertyAliasesPODTest;
684
computeOffsets()685 void Builder::computeOffsets() {
686 int32_t i;
687 Offset off = sizeof(header);
688
689 if (debug>0) {
690 printf("header \t offset=%4d size=%5d\n", 0, off);
691 }
692
693 // PropertyAliases must have no v-table and must be
694 // padded (if necessary) to the next 32-bit boundary.
695 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
696 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
697
698 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
699
700 #define COMPUTE_OFFSET2(foo,type) \
701 if (debug>0)\
702 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
703 foo##_offset = off;\
704 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
705 U_ASSERT(foo##_offset % sizeof(type) == 0);\
706 off = (Offset) (off + foo##_size);
707
708 COMPUTE_OFFSET(enumToName); // 0:
709 COMPUTE_OFFSET(nameToEnum); // 2:
710 COMPUTE_OFFSET(enumToValue); // 3:
711 COMPUTE_OFFSET(valueMap); // 4:
712
713 for (i=0; i<valueMap_count; ++i) {
714 if (debug>0) {
715 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
716 (int)i, off, (int)valueEnumToName_size[i]);
717 }
718
719 valueEnumToName_offset[i] = off; // 5:
720 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
721 off = (Offset) (off + valueEnumToName_size[i]);
722
723 if (debug>0) {
724 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
725 (int)i, off, (int)valueNameToEnum_size[i]);
726 }
727
728 valueNameToEnum_offset[i] = off; // 6:
729 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
730 off = (Offset) (off + valueNameToEnum_size[i]);
731 }
732
733 // These last two chunks have weaker alignment needs
734 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
735 COMPUTE_OFFSET2(stringPool,char); // 99:
736
737 total_size = off;
738 if (debug>0) printf("total size=%5d\n\n", (int)total_size);
739 U_ASSERT(total_size <= (MAX_OFFSET+1));
740 }
741
fixupNameToEnum(NameToEnum * n)742 void Builder::fixupNameToEnum(NameToEnum* n) {
743 // Fix the string pool offsets in n
744 Offset* p = n->getNameArray();
745 for (int32_t i=0; i<n->count; ++i) {
746 p[i] = stringIndexToOffset(p[i]);
747 }
748 }
749
fixupStringPoolOffsets()750 void Builder::fixupStringPoolOffsets() {
751 int32_t i;
752
753 // 2:
754 fixupNameToEnum(nameToEnum);
755
756 // 6:
757 for (i=0; i<valueMap_count; ++i) {
758 fixupNameToEnum(valueNameToEnum[i]);
759 }
760
761 // 98:
762 for (i=0; i<nameGroupPool_count; ++i) {
763 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
764 }
765 }
766
fixupEnumToNameGroup(EnumToOffset * e2ng)767 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
768 EnumValue i;
769 int32_t j;
770 Offset* p = e2ng->getOffsetArray();
771 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
772 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
773 }
774 }
775
fixupNCEnumToNameGroup(NonContiguousEnumToOffset * e2ng)776 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
777 int32_t i;
778 /*EnumValue* e = e2ng->getEnumArray();*/
779 Offset* p = e2ng->getOffsetArray();
780 for (i=0; i<e2ng->count; ++i) {
781 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
782 }
783 }
784
fixupNameGroupPoolOffsets()785 void Builder::fixupNameGroupPoolOffsets() {
786 int32_t i;
787
788 // 0:
789 fixupNCEnumToNameGroup(enumToName);
790
791 // 1: (deleted)
792
793 // 5:
794 for (i=0; i<valueMap_count; ++i) {
795 // 5a:
796 if (valueEnumToName[i] != 0) {
797 fixupEnumToNameGroup(valueEnumToName[i]);
798 }
799 // 5b:
800 if (valueNCEnumToName[i] != 0) {
801 fixupNCEnumToNameGroup(valueNCEnumToName[i]);
802 }
803 }
804 }
805
fixupMiscellaneousOffsets()806 void Builder::fixupMiscellaneousOffsets() {
807 int32_t i;
808
809 // header:
810 erase(&header, sizeof(header));
811 header.enumToName_offset = enumToName_offset;
812 header.nameToEnum_offset = nameToEnum_offset;
813 header.enumToValue_offset = enumToValue_offset;
814 // header meta-info used by Java:
815 U_ASSERT(total_size > 0 && total_size < 0x7FFF);
816 header.total_size = (int16_t) total_size;
817 header.valueMap_offset = valueMap_offset;
818 header.valueMap_count = (int16_t) valueMap_count;
819 header.nameGroupPool_offset = nameGroupPool_offset;
820 header.nameGroupPool_count = (int16_t) nameGroupPool_count;
821 header.stringPool_offset = stringPool_offset;
822 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
823
824 U_ASSERT(valueMap_count <= 0x7FFF);
825 U_ASSERT(nameGroupPool_count <= 0x7FFF);
826 U_ASSERT(stringPool_count <= 0x7FFF);
827
828 // 3:
829 Offset* p = enumToValue->getOffsetArray();
830 /*EnumValue* e = enumToValue->getEnumArray();*/
831 U_ASSERT(valueMap_count == enumToValue->count);
832 for (i=0; i<valueMap_count; ++i) {
833 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
834 }
835
836 // 4:
837 for (i=0; i<valueMap_count; ++i) {
838 ValueMap& v = valueMap[i];
839 v.enumToName_offset = v.ncEnumToName_offset = 0;
840 if (valueEnumToName[i] != 0) {
841 v.enumToName_offset = valueEnumToName_offset[i];
842 }
843 if (valueNCEnumToName[i] != 0) {
844 v.ncEnumToName_offset = valueEnumToName_offset[i];
845 }
846 v.nameToEnum_offset = valueNameToEnum_offset[i];
847 }
848 }
849
fixup()850 void Builder::fixup() {
851 computeOffsets();
852 fixupStringPoolOffsets();
853 fixupNameGroupPoolOffsets();
854 fixupMiscellaneousOffsets();
855 }
856
createData(int32_t & length) const857 int8_t* Builder::createData(int32_t& length) const {
858 length = total_size;
859 int8_t* result = MALLOC(int8_t, length);
860
861 int8_t* p = result;
862 int8_t* limit = result + length;
863
864 #define APPEND2(x, size) \
865 U_ASSERT((p+size)<=limit); \
866 uprv_memcpy(p, x, size); \
867 p += size
868
869 #define APPEND(x) APPEND2(x, x##_size)
870
871 APPEND2(&header, sizeof(header));
872 APPEND(enumToName);
873 APPEND(nameToEnum);
874 APPEND(enumToValue);
875 APPEND(valueMap);
876
877 for (int32_t i=0; i<valueMap_count; ++i) {
878 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
879 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
880 if (valueEnumToName[i] != 0) {
881 APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
882 }
883 if (valueNCEnumToName[i] != 0) {
884 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
885 }
886 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
887 }
888
889 APPEND(nameGroupPool);
890 APPEND(stringPool);
891
892 if (p != limit) {
893 fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
894 exit(1);
895 }
896 return result;
897 }
898
899 // END Builder
900 //----------------------------------------------------------------------
901
902 /* UDataInfo cf. udata.h */
903 static UDataInfo dataInfo = {
904 sizeof(UDataInfo),
905 0,
906
907 U_IS_BIG_ENDIAN,
908 U_CHARSET_FAMILY,
909 sizeof(UChar),
910 0,
911
912 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
913 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
914 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
915 };
916
917 class genpname {
918
919 // command-line options
920 UBool useCopyright;
921 UBool verbose;
922 int32_t debug;
923
924 public:
925 int MMain(int argc, char *argv[]);
926
927 private:
928 NameToEnumEntry* createNameIndex(const AliasList& list,
929 int32_t& nameIndexCount);
930
931 EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
932
933 int32_t writeDataFile(const char *destdir, const Builder&);
934 };
935
main(int argc,char * argv[])936 int main(int argc, char *argv[]) {
937 UErrorCode status = U_ZERO_ERROR;
938 u_init(&status);
939 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
940 // Note: u_init() will try to open ICU property data.
941 // failures here are expected when building ICU from scratch.
942 // ignore them.
943 fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n",
944 u_errorName(status));
945 exit(1);
946 }
947
948 genpname app;
949 U_MAIN_INIT_ARGS(argc, argv);
950 int retVal = app.MMain(argc, argv);
951 u_cleanup();
952 return retVal;
953 }
954
955 static UOption options[]={
956 UOPTION_HELP_H,
957 UOPTION_HELP_QUESTION_MARK,
958 UOPTION_COPYRIGHT,
959 UOPTION_DESTDIR,
960 UOPTION_VERBOSE,
961 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
962 };
963
createNameIndex(const AliasList & list,int32_t & nameIndexCount)964 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
965 int32_t& nameIndexCount) {
966
967 // Build name => enum map
968
969 // This is an n->1 map. There are typically multiple names
970 // mapping to one enum. The name index is sorted in order of the name,
971 // as defined by the uprv_compareAliasNames() function.
972
973 int32_t i, j;
974 int32_t count = list.count();
975
976 // compute upper limit on number of names in the index
977 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
978 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
979
980 nameIndexCount = 0;
981 int32_t names[MAX_NAMES_PER_GROUP];
982 for (i=0; i<count; ++i) {
983 const Alias& p = list[i];
984 int32_t n = p.getUniqueNames(names);
985 for (j=0; j<n; ++j) {
986 U_ASSERT(nameIndexCount < nameIndexCapacity);
987 nameIndex[nameIndexCount++] =
988 NameToEnumEntry(names[j], p.enumValue);
989 }
990 }
991
992 /*
993 * use a stable sort to ensure consistent results between
994 * genpname.cpp and the propname.cpp swapping code
995 */
996 UErrorCode errorCode = U_ZERO_ERROR;
997 uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
998 compareNameToEnumEntry, NULL, TRUE, &errorCode);
999 if (debug>1) {
1000 printf("Alias names: %d\n", (int)nameIndexCount);
1001 for (i=0; i<nameIndexCount; ++i) {
1002 printf("%s => %d\n",
1003 STRING_TABLE[nameIndex[i].nameIndex].str,
1004 (int)nameIndex[i].enumValue);
1005 }
1006 printf("\n");
1007 }
1008 // make sure there are no duplicates. for a sorted list we need
1009 // only compare adjacent items. Alias.getUniqueNames() has
1010 // already eliminated duplicate names for a single property, which
1011 // does occur, so we're checking for duplicate names between two
1012 // properties, which should never occur.
1013 UBool ok = TRUE;
1014 for (i=1; i<nameIndexCount; ++i) {
1015 if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1016 STRING_TABLE[nameIndex[i].nameIndex]) {
1017 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1018 STRING_TABLE[nameIndex[i-1].nameIndex].str,
1019 STRING_TABLE[nameIndex[i].nameIndex].str);
1020 ok = FALSE;
1021 }
1022 }
1023 if (!ok) {
1024 die("Two or more duplicate names in property list");
1025 }
1026
1027 return nameIndex;
1028 }
1029
createEnumIndex(const AliasList & list)1030 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1031
1032 // Build the enum => name map
1033
1034 // This is a 1->n map. Each enum maps to 1 or more names. To
1035 // accomplish this the index entry points to an element of the
1036 // NAME_GROUP array. This is the short name (which may be empty).
1037 // From there, subsequent elements of NAME_GROUP are alternate
1038 // names for this enum, up to and including the first one that is
1039 // negative (negate for actual index).
1040
1041 int32_t i, j, k;
1042 int32_t count = list.count();
1043
1044 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1045 for (i=0; i<count; ++i) {
1046 const Alias& p = list[i];
1047 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1048 }
1049
1050 UErrorCode errorCode = U_ZERO_ERROR;
1051 uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1052 compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1053 if (debug>1) {
1054 printf("Property enums: %d\n", (int)count);
1055 for (i=0; i<count; ++i) {
1056 printf("%d => %d: ",
1057 (int)enumIndex[i].enumValue,
1058 (int)enumIndex[i].nameGroupIndex);
1059 UBool done = FALSE;
1060 for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1061 k = NAME_GROUP[j];
1062 if (k < 0) {
1063 k = -k;
1064 done = TRUE;
1065 }
1066 printf("\"%s\"", STRING_TABLE[k].str);
1067 if (!done) printf(", ");
1068 }
1069 printf("\n");
1070 }
1071 printf("\n");
1072 }
1073 return enumIndex;
1074 }
1075
MMain(int argc,char * argv[])1076 int genpname::MMain(int argc, char* argv[])
1077 {
1078 int32_t i, j;
1079 UErrorCode status = U_ZERO_ERROR;
1080
1081 u_init(&status);
1082 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1083 fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1084 status = U_ZERO_ERROR;
1085 }
1086
1087
1088 /* preset then read command line options */
1089 options[3].value=u_getDataDirectory();
1090 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1091
1092 /* error handling, printing usage message */
1093 if (argc<0) {
1094 fprintf(stderr,
1095 "error in command line argument \"%s\"\n",
1096 argv[-argc]);
1097 }
1098
1099 debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1100
1101 if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1102 debug < 0 || debug > 9) {
1103 fprintf(stderr,
1104 "usage: %s [-options]\n"
1105 "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1106 "options:\n"
1107 "\t-h or -? or --help this usage text\n"
1108 "\t-v or --verbose turn on verbose output\n"
1109 "\t-c or --copyright include a copyright notice\n"
1110 "\t-d or --destdir destination directory, followed by the path\n"
1111 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1112 argv[0]);
1113 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1114 }
1115
1116 /* get the options values */
1117 useCopyright=options[2].doesOccur;
1118 verbose = options[4].doesOccur;
1119
1120 // ------------------------------------------------------------
1121 // Do not sort the string table, instead keep it in data.h order.
1122 // This simplifies data swapping and testing thereof because the string
1123 // table itself need not be sorted during swapping.
1124 // The NameToEnum sorter sorts each such map's string offsets instead.
1125
1126 if (debug>1) {
1127 printf("String pool: %d\n", (int)STRING_COUNT);
1128 for (i=0; i<STRING_COUNT; ++i) {
1129 if (i != 0) {
1130 printf(", ");
1131 }
1132 printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1133 }
1134 printf("\n\n");
1135 }
1136
1137 // ------------------------------------------------------------
1138 // Create top-level property indices
1139
1140 PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1141 int32_t propNameCount;
1142 NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1143 EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1144
1145 // ------------------------------------------------------------
1146 // Create indices for the value list for each enumerated property
1147
1148 // This will have more entries than we need...
1149 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1150 int32_t enumToValue_count = 0;
1151 for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1152 if (PROPERTY[i].valueCount == 0) continue;
1153 AliasArrayList values(PROPERTY[i].valueList,
1154 PROPERTY[i].valueCount);
1155 enumToValue[j].enumValue = PROPERTY[i].enumValue;
1156 enumToValue[j].enumToName = createEnumIndex(values);
1157 enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1158 enumToValue[j].nameToEnum = createNameIndex(values,
1159 enumToValue[j].nameToEnum_count);
1160 ++j;
1161 }
1162 enumToValue_count = j;
1163
1164 uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1165 compareEnumToValueEntry, NULL, FALSE, &status);
1166
1167 // ------------------------------------------------------------
1168 // Build PropertyAliases layout in memory
1169
1170 Builder builder(debug);
1171
1172 builder.buildTopLevelProperties(propName,
1173 propNameCount,
1174 propEnum,
1175 PROPERTY_COUNT);
1176
1177 builder.buildValues(enumToValue,
1178 enumToValue_count);
1179
1180 builder.buildStringPool(STRING_TABLE,
1181 STRING_COUNT,
1182 NAME_GROUP,
1183 NAME_GROUP_COUNT);
1184
1185 builder.fixup();
1186
1187 ////////////////////////////////////////////////////////////
1188 // Write the output file
1189 ////////////////////////////////////////////////////////////
1190 int32_t wlen = writeDataFile(options[3].value, builder);
1191 if (verbose) {
1192 fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1193 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1194 }
1195
1196 return 0; // success
1197 }
1198
writeDataFile(const char * destdir,const Builder & builder)1199 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1200 int32_t length;
1201 int8_t* data = builder.createData(length);
1202
1203 UNewDataMemory *pdata;
1204 UErrorCode status = U_ZERO_ERROR;
1205
1206 pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1207 useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1208 if (U_FAILURE(status)) {
1209 die("Unable to create data memory");
1210 }
1211
1212 udata_writeBlock(pdata, data, length);
1213
1214 int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1215 if (U_FAILURE(status)) {
1216 die("Error writing output file");
1217 }
1218 if (dataLength != length) {
1219 die("Written file doesn't match expected size");
1220 }
1221
1222 return dataLength;
1223 }
1224
1225 //eof
1226