1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/11/02 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "uarrsort.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23
24 #include <stdio.h>
25
26 U_NAMESPACE_USE
27
28 // TODO: Clean up and comment this code.
29
30 //----------------------------------------------------------------------
31 // BEGIN DATA
32 //
33 // This is the raw data to be output. We define the data structure,
34 // then include a machine-generated header that contains the actual
35 // data.
36
37 #include "unicode/uchar.h"
38 #include "unicode/uscript.h"
39 #include "unicode/unorm.h"
40 #include "unicode/unorm2.h"
41
42 class AliasName {
43 public:
44 const char* str;
45 int32_t index;
46
47 AliasName(const char* str, int32_t index);
48
49 int compare(const AliasName& other) const;
50
operator ==(const AliasName & other) const51 UBool operator==(const AliasName& other) const {
52 return compare(other) == 0;
53 }
54
operator !=(const AliasName & other) const55 UBool operator!=(const AliasName& other) const {
56 return compare(other) != 0;
57 }
58 };
59
AliasName(const char * _str,int32_t _index)60 AliasName::AliasName(const char* _str,
61 int32_t _index) :
62 str(_str),
63 index(_index)
64 {
65 }
66
compare(const AliasName & other) const67 int AliasName::compare(const AliasName& other) const {
68 return uprv_comparePropertyNames(str, other.str);
69 }
70
71 class Alias {
72 public:
73 int32_t enumValue;
74 int32_t nameGroupIndex;
75
76 Alias(int32_t enumValue,
77 int32_t nameGroupIndex);
78
79 int32_t getUniqueNames(int32_t* nameGroupIndices) const;
80 };
81
Alias(int32_t anEnumValue,int32_t aNameGroupIndex)82 Alias::Alias(int32_t anEnumValue,
83 int32_t aNameGroupIndex) :
84 enumValue(anEnumValue),
85 nameGroupIndex(aNameGroupIndex)
86 {
87 }
88
89 class Property : public Alias {
90 public:
91 int32_t valueCount;
92 const Alias* valueList;
93
94 Property(int32_t enumValue,
95 int32_t nameGroupIndex,
96 int32_t valueCount,
97 const Alias* valueList);
98 };
99
Property(int32_t _enumValue,int32_t _nameGroupIndex,int32_t _valueCount,const Alias * _valueList)100 Property::Property(int32_t _enumValue,
101 int32_t _nameGroupIndex,
102 int32_t _valueCount,
103 const Alias* _valueList) :
104 Alias(_enumValue, _nameGroupIndex),
105 valueCount(_valueCount),
106 valueList(_valueList)
107 {
108 }
109
110 // *** Include the data header ***
111 #include "data.h"
112
113 /* return a list of unique names, not including "", for this property
114 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
115 * elements, will be filled with indices into STRING_TABLE
116 * @return number of indices, >= 1
117 */
getUniqueNames(int32_t * stringIndices) const118 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
119 int32_t count = 0;
120 int32_t i = nameGroupIndex;
121 UBool done = FALSE;
122 while (!done) {
123 int32_t j = NAME_GROUP[i++];
124 if (j < 0) {
125 done = TRUE;
126 j = -j;
127 }
128 if (j == 0) continue; // omit "" entries
129 UBool dupe = FALSE;
130 for (int32_t k=0; k<count; ++k) {
131 if (stringIndices[k] == j) {
132 dupe = TRUE;
133 break;
134 }
135 // also do a string check for things like "age|Age"
136 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
137 //printf("Found dupe %s|%s\n",
138 // STRING_TABLE[stringIndices[k]].str,
139 // STRING_TABLE[j].str);
140 dupe = TRUE;
141 break;
142 }
143 }
144 if (dupe) continue; // omit duplicates
145 stringIndices[count++] = j;
146 }
147 return count;
148 }
149
150 // END DATA
151 //----------------------------------------------------------------------
152
153 #define MALLOC(type, count) \
154 (type*) uprv_malloc(sizeof(type) * count)
155
die(const char * msg)156 void die(const char* msg) {
157 fprintf(stderr, "Error: %s\n", msg);
158 exit(1);
159 }
160
161 //----------------------------------------------------------------------
162
163 /**
164 * A list of Alias objects.
165 */
166 class AliasList {
167 public:
168 virtual ~AliasList();
169 virtual const Alias& operator[](int32_t i) const = 0;
170 virtual int32_t count() const = 0;
171 };
172
~AliasList()173 AliasList::~AliasList() {}
174
175 /**
176 * A single array.
177 */
178 class AliasArrayList : public AliasList {
179 const Alias* a;
180 int32_t n;
181 public:
AliasArrayList(const Alias * _a,int32_t _n)182 AliasArrayList(const Alias* _a, int32_t _n) {
183 a = _a;
184 n = _n;
185 }
operator [](int32_t i) const186 virtual const Alias& operator[](int32_t i) const {
187 return a[i];
188 }
count() const189 virtual int32_t count() const {
190 return n;
191 }
192 };
193
194 /**
195 * A single array.
196 */
197 class PropertyArrayList : public AliasList {
198 const Property* a;
199 int32_t n;
200 public:
PropertyArrayList(const Property * _a,int32_t _n)201 PropertyArrayList(const Property* _a, int32_t _n) {
202 a = _a;
203 n = _n;
204 }
operator [](int32_t i) const205 virtual const Alias& operator[](int32_t i) const {
206 return a[i];
207 }
count() const208 virtual int32_t count() const {
209 return n;
210 }
211 };
212
213 //----------------------------------------------------------------------
214
215 /**
216 * An element in a name index. It maps a name (given by index) into
217 * an enum value.
218 */
219 class NameToEnumEntry {
220 public:
221 int32_t nameIndex;
222 int32_t enumValue;
NameToEnumEntry(int32_t a,int32_t b)223 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
224 };
225
226 // Sort function for NameToEnumEntry (sort by name)
227 U_CFUNC int32_t
compareNameToEnumEntry(const void *,const void * e1,const void * e2)228 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
229 return
230 STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
231 compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
232 }
233
234 //----------------------------------------------------------------------
235
236 /**
237 * An element in an enum index. It maps an enum into a name group entry
238 * (given by index).
239 */
240 class EnumToNameGroupEntry {
241 public:
242 int32_t enumValue;
243 int32_t nameGroupIndex;
EnumToNameGroupEntry(int32_t a,int32_t b)244 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
245
246 // are enumValues contiguous for count entries starting with this one?
247 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const248 UBool isContiguous(int32_t count) const {
249 const EnumToNameGroupEntry* p = this;
250 for (int32_t i=1; i<count; ++i) {
251 if (p[i].enumValue != (this->enumValue + i)) {
252 return FALSE;
253 }
254 }
255 return TRUE;
256 }
257 };
258
259 // Sort function for EnumToNameGroupEntry (sort by name index)
260 U_CFUNC int32_t
compareEnumToNameGroupEntry(const void *,const void * e1,const void * e2)261 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
262 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
263 }
264
265 //----------------------------------------------------------------------
266
267 /**
268 * An element in the map from enumerated property enums to value maps.
269 */
270 class EnumToValueEntry {
271 public:
272 int32_t enumValue;
273 EnumToNameGroupEntry* enumToName;
274 int32_t enumToName_count;
275 NameToEnumEntry* nameToEnum;
276 int32_t nameToEnum_count;
277
278 // are enumValues contiguous for count entries starting with this one?
279 // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const280 UBool isContiguous(int32_t count) const {
281 const EnumToValueEntry* p = this;
282 for (int32_t i=1; i<count; ++i) {
283 if (p[i].enumValue != (this->enumValue + i)) {
284 return FALSE;
285 }
286 }
287 return TRUE;
288 }
289 };
290
291 // Sort function for EnumToValueEntry (sort by enum)
292 U_CFUNC int32_t
compareEnumToValueEntry(const void *,const void * e1,const void * e2)293 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
294 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
295 }
296
297 //----------------------------------------------------------------------
298 // BEGIN Builder
299
300 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
301
302 class Builder {
303 // header:
304 PropertyAliases header;
305
306 // 0:
307 NonContiguousEnumToOffset* enumToName;
308 int32_t enumToName_size;
309 Offset enumToName_offset;
310
311 // 1: (deleted)
312
313 // 2:
314 NameToEnum* nameToEnum;
315 int32_t nameToEnum_size;
316 Offset nameToEnum_offset;
317
318 // 3:
319 NonContiguousEnumToOffset* enumToValue;
320 int32_t enumToValue_size;
321 Offset enumToValue_offset;
322
323 // 4:
324 ValueMap* valueMap;
325 int32_t valueMap_size;
326 int32_t valueMap_count;
327 Offset valueMap_offset;
328
329 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
330 // NULL and one is not. valueEnumToName_size[i] is the size of
331 // the non-NULL one. i=0..valueMapCount-1
332 // 5a:
333 EnumToOffset** valueEnumToName;
334 // 5b:
335 NonContiguousEnumToOffset** valueNCEnumToName;
336 int32_t* valueEnumToName_size;
337 Offset* valueEnumToName_offset;
338 // 6:
339 // arrays of valueMap_count pointers, sizes, & offsets
340 NameToEnum** valueNameToEnum;
341 int32_t* valueNameToEnum_size;
342 Offset* valueNameToEnum_offset;
343
344 // 98:
345 Offset* nameGroupPool;
346 int32_t nameGroupPool_count;
347 int32_t nameGroupPool_size;
348 Offset nameGroupPool_offset;
349
350 // 99:
351 char* stringPool;
352 int32_t stringPool_count;
353 int32_t stringPool_size;
354 Offset stringPool_offset;
355 Offset* stringPool_offsetArray; // relative to stringPool
356
357 int32_t total_size; // size of everything
358
359 int32_t debug;
360
361 public:
362
363 Builder(int32_t debugLevel);
364 ~Builder();
365
366 void buildTopLevelProperties(const NameToEnumEntry* propName,
367 int32_t propNameCount,
368 const EnumToNameGroupEntry* propEnum,
369 int32_t propEnumCount);
370
371 void buildValues(const EnumToValueEntry* e2v,
372 int32_t count);
373
374 void buildStringPool(const AliasName* propertyNames,
375 int32_t propertyNameCount,
376 const int32_t* nameGroupIndices,
377 int32_t nameGroupIndicesCount);
378
379 void fixup();
380
381 int8_t* createData(int32_t& length) const;
382
383 private:
384
385 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
386 int32_t count,
387 int32_t& size);
388 static NonContiguousEnumToOffset*
389 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
390 int32_t count,
391 int32_t& size);
392
393 static NonContiguousEnumToOffset*
394 buildNCEnumToValue(const EnumToValueEntry* e2v,
395 int32_t count,
396 int32_t& size);
397
398 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
399 int32_t count,
400 int32_t& size);
401
402 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
403 void fixupNameToEnum(NameToEnum* n);
404 void fixupEnumToNameGroup(EnumToOffset* e2ng);
405 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
406
407 void computeOffsets();
408 void fixupStringPoolOffsets();
409 void fixupNameGroupPoolOffsets();
410 void fixupMiscellaneousOffsets();
411
412 static int32_t align(int32_t a);
413 static void erase(void* p, int32_t size);
414 };
415
Builder(int32_t debugLevel)416 Builder::Builder(int32_t debugLevel) {
417 debug = debugLevel;
418 enumToName = 0;
419 nameToEnum = 0;
420 enumToValue = 0;
421 valueMap_count = 0;
422 valueMap = 0;
423 valueEnumToName = 0;
424 valueNCEnumToName = 0;
425 valueEnumToName_size = 0;
426 valueEnumToName_offset = 0;
427 valueNameToEnum = 0;
428 valueNameToEnum_size = 0;
429 valueNameToEnum_offset = 0;
430 nameGroupPool = 0;
431 stringPool = 0;
432 stringPool_offsetArray = 0;
433 }
434
~Builder()435 Builder::~Builder() {
436 uprv_free(enumToName);
437 uprv_free(nameToEnum);
438 uprv_free(enumToValue);
439 uprv_free(valueMap);
440 for (int32_t i=0; i<valueMap_count; ++i) {
441 uprv_free(valueEnumToName[i]);
442 uprv_free(valueNCEnumToName[i]);
443 uprv_free(valueNameToEnum[i]);
444 }
445 uprv_free(valueEnumToName);
446 uprv_free(valueNCEnumToName);
447 uprv_free(valueEnumToName_size);
448 uprv_free(valueEnumToName_offset);
449 uprv_free(valueNameToEnum);
450 uprv_free(valueNameToEnum_size);
451 uprv_free(valueNameToEnum_offset);
452 uprv_free(nameGroupPool);
453 uprv_free(stringPool);
454 uprv_free(stringPool_offsetArray);
455 }
456
align(int32_t a)457 int32_t Builder::align(int32_t a) {
458 U_ASSERT(a >= 0);
459 int32_t k = a % sizeof(int32_t);
460 if (k == 0) {
461 return a;
462 }
463 a += sizeof(int32_t) - k;
464 return a;
465 }
466
erase(void * p,int32_t size)467 void Builder::erase(void* p, int32_t size) {
468 U_ASSERT(size >= 0);
469 int8_t* q = (int8_t*) p;
470 while (size--) {
471 *q++ = 0;
472 }
473 }
474
buildEnumToOffset(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)475 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
476 int32_t count,
477 int32_t& size) {
478 U_ASSERT(e2ng->isContiguous(count));
479 size = align(EnumToOffset::getSize(count));
480 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
481 erase(result, size);
482 result->enumStart = e2ng->enumValue;
483 result->enumLimit = e2ng->enumValue + count;
484 Offset* p = result->getOffsetArray();
485 for (int32_t i=0; i<count; ++i) {
486 // set these to NGI index values
487 // fix them up to NGI offset values
488 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
489 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
490 }
491 return result;
492 }
493
494 NonContiguousEnumToOffset*
buildNCEnumToNameGroup(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)495 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
496 int32_t count,
497 int32_t& size) {
498 U_ASSERT(!e2ng->isContiguous(count));
499 size = align(NonContiguousEnumToOffset::getSize(count));
500 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
501 erase(nc, size);
502 nc->count = count;
503 EnumValue* e = nc->getEnumArray();
504 Offset* p = nc->getOffsetArray();
505 for (int32_t i=0; i<count; ++i) {
506 // set these to NGI index values
507 // fix them up to NGI offset values
508 e[i] = e2ng[i].enumValue;
509 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
510 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
511 }
512 return nc;
513 }
514
515 NonContiguousEnumToOffset*
buildNCEnumToValue(const EnumToValueEntry * e2v,int32_t count,int32_t & size)516 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
517 int32_t count,
518 int32_t& size) {
519 U_ASSERT(!e2v->isContiguous(count));
520 size = align(NonContiguousEnumToOffset::getSize(count));
521 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
522 erase(result, size);
523 result->count = count;
524 EnumValue* e = result->getEnumArray();
525 for (int32_t i=0; i<count; ++i) {
526 e[i] = e2v[i].enumValue;
527 // offset must be set later
528 }
529 return result;
530 }
531
532 /**
533 * Given an index into the string pool, return an offset. computeOffsets()
534 * must have been called already. If allowNegative is true, allow negatives
535 * and preserve their sign.
536 */
stringIndexToOffset(int32_t index,UBool allowNegative) const537 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
538 // Index 0 is ""; we turn this into an Offset of zero
539 if (index == 0) return 0;
540 if (index < 0) {
541 if (allowNegative) {
542 return -Builder::stringIndexToOffset(-index);
543 } else {
544 die("Negative string pool index");
545 }
546 } else {
547 if (index >= stringPool_count) {
548 die("String pool index too large");
549 }
550 Offset result = stringPool_offset + stringPool_offsetArray[index];
551 U_ASSERT(result >= 0 && result < total_size);
552 return result;
553 }
554 return 0; // never executed; make compiler happy
555 }
556
buildNameToEnum(const NameToEnumEntry * nameToEnum,int32_t count,int32_t & size)557 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
558 int32_t count,
559 int32_t& size) {
560 size = align(NameToEnum::getSize(count));
561 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
562 erase(n2e, size);
563 n2e->count = count;
564 Offset* p = n2e->getNameArray();
565 EnumValue* e = n2e->getEnumArray();
566 for (int32_t i=0; i<count; ++i) {
567 // set these to SP index values
568 // fix them up to SP offset values
569 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
570 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
571 e[i] = nameToEnum[i].enumValue;
572 }
573 return n2e;
574 }
575
576
buildTopLevelProperties(const NameToEnumEntry * propName,int32_t propNameCount,const EnumToNameGroupEntry * propEnum,int32_t propEnumCount)577 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
578 int32_t propNameCount,
579 const EnumToNameGroupEntry* propEnum,
580 int32_t propEnumCount) {
581 enumToName = buildNCEnumToNameGroup(propEnum,
582 propEnumCount,
583 enumToName_size);
584 nameToEnum = buildNameToEnum(propName,
585 propNameCount,
586 nameToEnum_size);
587 }
588
buildValues(const EnumToValueEntry * e2v,int32_t count)589 void Builder::buildValues(const EnumToValueEntry* e2v,
590 int32_t count) {
591 int32_t i;
592
593 U_ASSERT(!e2v->isContiguous(count));
594
595 valueMap_count = count;
596
597 enumToValue = buildNCEnumToValue(e2v, count,
598 enumToValue_size);
599
600 valueMap_size = align(count * sizeof(ValueMap));
601 valueMap = (ValueMap*) uprv_malloc(valueMap_size);
602 erase(valueMap, valueMap_size);
603
604 valueEnumToName = MALLOC(EnumToOffset*, count);
605 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
606 valueEnumToName_size = MALLOC(int32_t, count);
607 valueEnumToName_offset = MALLOC(Offset, count);
608 valueNameToEnum = MALLOC(NameToEnum*, count);
609 valueNameToEnum_size = MALLOC(int32_t, count);
610 valueNameToEnum_offset = MALLOC(Offset, count);
611
612 for (i=0; i<count; ++i) {
613 UBool isContiguous =
614 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
615 valueEnumToName[i] = 0;
616 valueNCEnumToName[i] = 0;
617 if (isContiguous) {
618 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
619 e2v[i].enumToName_count,
620 valueEnumToName_size[i]);
621 } else {
622 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
623 e2v[i].enumToName_count,
624 valueEnumToName_size[i]);
625 }
626 valueNameToEnum[i] =
627 buildNameToEnum(e2v[i].nameToEnum,
628 e2v[i].nameToEnum_count,
629 valueNameToEnum_size[i]);
630 }
631 }
632
buildStringPool(const AliasName * propertyNames,int32_t propertyNameCount,const int32_t * nameGroupIndices,int32_t nameGroupIndicesCount)633 void Builder::buildStringPool(const AliasName* propertyNames,
634 int32_t propertyNameCount,
635 const int32_t* nameGroupIndices,
636 int32_t nameGroupIndicesCount) {
637 int32_t i;
638
639 nameGroupPool_count = nameGroupIndicesCount;
640 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
641 nameGroupPool = MALLOC(Offset, nameGroupPool_count);
642
643 for (i=0; i<nameGroupPool_count; ++i) {
644 // Some indices are negative.
645 int32_t a = nameGroupIndices[i];
646 if (a < 0) a = -a;
647 U_ASSERT(IS_VALID_OFFSET(a));
648 nameGroupPool[i] = (Offset) nameGroupIndices[i];
649 }
650
651 stringPool_count = propertyNameCount;
652 stringPool_size = 0;
653 // first string must be "" -- we skip it
654 U_ASSERT(*propertyNames[0].str == 0);
655 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
656 stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
657 }
658 stringPool = MALLOC(char, stringPool_size);
659 stringPool_offsetArray = MALLOC(Offset, stringPool_count);
660 Offset soFar = 0;
661 char* p = stringPool;
662 stringPool_offsetArray[0] = -1; // we don't use this entry
663 for (i=1 /*sic*/; i<propertyNameCount; ++i) {
664 const char* str = propertyNames[i].str;
665 int32_t len = (int32_t)uprv_strlen(str);
666 uprv_strcpy(p, str);
667 p += len;
668 *p++ = 0;
669 stringPool_offsetArray[i] = soFar;
670 soFar += (Offset)(len+1);
671 }
672 U_ASSERT(soFar == stringPool_size);
673 U_ASSERT(p == (stringPool + stringPool_size));
674 }
675
676 // Confirm that PropertyAliases is a POD (plain old data; see C++
677 // std). The following union will _fail to compile_ if
678 // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof
679 // macro to check this, but that's not quite right, so that test is
680 // commented out -- see below.)
681 typedef union {
682 int32_t i;
683 PropertyAliases p;
684 } PropertyAliasesPODTest;
685
computeOffsets()686 void Builder::computeOffsets() {
687 int32_t i;
688 Offset off = sizeof(header);
689
690 if (debug>0) {
691 printf("header \t offset=%4d size=%5d\n", 0, off);
692 }
693
694 // PropertyAliases must have no v-table and must be
695 // padded (if necessary) to the next 32-bit boundary.
696 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
697 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
698
699 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
700
701 #define COMPUTE_OFFSET2(foo,type) \
702 if (debug>0)\
703 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\
704 foo##_offset = off;\
705 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
706 U_ASSERT(foo##_offset % sizeof(type) == 0);\
707 off = (Offset) (off + foo##_size);
708
709 COMPUTE_OFFSET(enumToName); // 0:
710 COMPUTE_OFFSET(nameToEnum); // 2:
711 COMPUTE_OFFSET(enumToValue); // 3:
712 COMPUTE_OFFSET(valueMap); // 4:
713
714 for (i=0; i<valueMap_count; ++i) {
715 if (debug>0) {
716 printf(" enumToName[%d]\t offset=%4d size=%5d\n",
717 (int)i, off, (int)valueEnumToName_size[i]);
718 }
719
720 valueEnumToName_offset[i] = off; // 5:
721 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
722 off = (Offset) (off + valueEnumToName_size[i]);
723
724 if (debug>0) {
725 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n",
726 (int)i, off, (int)valueNameToEnum_size[i]);
727 }
728
729 valueNameToEnum_offset[i] = off; // 6:
730 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
731 off = (Offset) (off + valueNameToEnum_size[i]);
732 }
733
734 // These last two chunks have weaker alignment needs
735 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
736 COMPUTE_OFFSET2(stringPool,char); // 99:
737
738 total_size = off;
739 if (debug>0) printf("total size=%5d\n\n", (int)total_size);
740 U_ASSERT(total_size <= (MAX_OFFSET+1));
741 }
742
fixupNameToEnum(NameToEnum * n)743 void Builder::fixupNameToEnum(NameToEnum* n) {
744 // Fix the string pool offsets in n
745 Offset* p = n->getNameArray();
746 for (int32_t i=0; i<n->count; ++i) {
747 p[i] = stringIndexToOffset(p[i]);
748 }
749 }
750
fixupStringPoolOffsets()751 void Builder::fixupStringPoolOffsets() {
752 int32_t i;
753
754 // 2:
755 fixupNameToEnum(nameToEnum);
756
757 // 6:
758 for (i=0; i<valueMap_count; ++i) {
759 fixupNameToEnum(valueNameToEnum[i]);
760 }
761
762 // 98:
763 for (i=0; i<nameGroupPool_count; ++i) {
764 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
765 }
766 }
767
fixupEnumToNameGroup(EnumToOffset * e2ng)768 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
769 EnumValue i;
770 int32_t j;
771 Offset* p = e2ng->getOffsetArray();
772 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
773 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
774 }
775 }
776
fixupNCEnumToNameGroup(NonContiguousEnumToOffset * e2ng)777 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
778 int32_t i;
779 /*EnumValue* e = e2ng->getEnumArray();*/
780 Offset* p = e2ng->getOffsetArray();
781 for (i=0; i<e2ng->count; ++i) {
782 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
783 }
784 }
785
fixupNameGroupPoolOffsets()786 void Builder::fixupNameGroupPoolOffsets() {
787 int32_t i;
788
789 // 0:
790 fixupNCEnumToNameGroup(enumToName);
791
792 // 1: (deleted)
793
794 // 5:
795 for (i=0; i<valueMap_count; ++i) {
796 // 5a:
797 if (valueEnumToName[i] != 0) {
798 fixupEnumToNameGroup(valueEnumToName[i]);
799 }
800 // 5b:
801 if (valueNCEnumToName[i] != 0) {
802 fixupNCEnumToNameGroup(valueNCEnumToName[i]);
803 }
804 }
805 }
806
fixupMiscellaneousOffsets()807 void Builder::fixupMiscellaneousOffsets() {
808 int32_t i;
809
810 // header:
811 erase(&header, sizeof(header));
812 header.enumToName_offset = enumToName_offset;
813 header.nameToEnum_offset = nameToEnum_offset;
814 header.enumToValue_offset = enumToValue_offset;
815 // header meta-info used by Java:
816 U_ASSERT(total_size > 0 && total_size < 0x7FFF);
817 header.total_size = (int16_t) total_size;
818 header.valueMap_offset = valueMap_offset;
819 header.valueMap_count = (int16_t) valueMap_count;
820 header.nameGroupPool_offset = nameGroupPool_offset;
821 header.nameGroupPool_count = (int16_t) nameGroupPool_count;
822 header.stringPool_offset = stringPool_offset;
823 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
824
825 U_ASSERT(valueMap_count <= 0x7FFF);
826 U_ASSERT(nameGroupPool_count <= 0x7FFF);
827 U_ASSERT(stringPool_count <= 0x7FFF);
828
829 // 3:
830 Offset* p = enumToValue->getOffsetArray();
831 /*EnumValue* e = enumToValue->getEnumArray();*/
832 U_ASSERT(valueMap_count == enumToValue->count);
833 for (i=0; i<valueMap_count; ++i) {
834 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
835 }
836
837 // 4:
838 for (i=0; i<valueMap_count; ++i) {
839 ValueMap& v = valueMap[i];
840 v.enumToName_offset = v.ncEnumToName_offset = 0;
841 if (valueEnumToName[i] != 0) {
842 v.enumToName_offset = valueEnumToName_offset[i];
843 }
844 if (valueNCEnumToName[i] != 0) {
845 v.ncEnumToName_offset = valueEnumToName_offset[i];
846 }
847 v.nameToEnum_offset = valueNameToEnum_offset[i];
848 }
849 }
850
fixup()851 void Builder::fixup() {
852 computeOffsets();
853 fixupStringPoolOffsets();
854 fixupNameGroupPoolOffsets();
855 fixupMiscellaneousOffsets();
856 }
857
createData(int32_t & length) const858 int8_t* Builder::createData(int32_t& length) const {
859 length = total_size;
860 int8_t* result = MALLOC(int8_t, length);
861
862 int8_t* p = result;
863 int8_t* limit = result + length;
864
865 #define APPEND2(x, size) \
866 U_ASSERT((p+size)<=limit); \
867 uprv_memcpy(p, x, size); \
868 p += size
869
870 #define APPEND(x) APPEND2(x, x##_size)
871
872 APPEND2(&header, sizeof(header));
873 APPEND(enumToName);
874 APPEND(nameToEnum);
875 APPEND(enumToValue);
876 APPEND(valueMap);
877
878 for (int32_t i=0; i<valueMap_count; ++i) {
879 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
880 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
881 if (valueEnumToName[i] != 0) {
882 APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
883 }
884 if (valueNCEnumToName[i] != 0) {
885 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
886 }
887 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
888 }
889
890 APPEND(nameGroupPool);
891 APPEND(stringPool);
892
893 if (p != limit) {
894 fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
895 exit(1);
896 }
897 return result;
898 }
899
900 // END Builder
901 //----------------------------------------------------------------------
902
903 /* UDataInfo cf. udata.h */
904 static UDataInfo dataInfo = {
905 sizeof(UDataInfo),
906 0,
907
908 U_IS_BIG_ENDIAN,
909 U_CHARSET_FAMILY,
910 sizeof(UChar),
911 0,
912
913 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
914 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
915 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
916 };
917
918 class genpname {
919
920 // command-line options
921 UBool useCopyright;
922 UBool verbose;
923 int32_t debug;
924
925 public:
926 int MMain(int argc, char *argv[]);
927
928 private:
929 NameToEnumEntry* createNameIndex(const AliasList& list,
930 int32_t& nameIndexCount);
931
932 EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
933
934 int32_t writeDataFile(const char *destdir, const Builder&);
935 };
936
main(int argc,char * argv[])937 int main(int argc, char *argv[]) {
938 UErrorCode status = U_ZERO_ERROR;
939 u_init(&status);
940 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
941 // Note: u_init() will try to open ICU property data.
942 // failures here are expected when building ICU from scratch.
943 // ignore them.
944 fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n",
945 u_errorName(status));
946 exit(1);
947 }
948
949 genpname app;
950 U_MAIN_INIT_ARGS(argc, argv);
951 int retVal = app.MMain(argc, argv);
952 u_cleanup();
953 return retVal;
954 }
955
956 static UOption options[]={
957 UOPTION_HELP_H,
958 UOPTION_HELP_QUESTION_MARK,
959 UOPTION_COPYRIGHT,
960 UOPTION_DESTDIR,
961 UOPTION_VERBOSE,
962 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
963 };
964
createNameIndex(const AliasList & list,int32_t & nameIndexCount)965 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
966 int32_t& nameIndexCount) {
967
968 // Build name => enum map
969
970 // This is an n->1 map. There are typically multiple names
971 // mapping to one enum. The name index is sorted in order of the name,
972 // as defined by the uprv_compareAliasNames() function.
973
974 int32_t i, j;
975 int32_t count = list.count();
976
977 // compute upper limit on number of names in the index
978 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
979 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
980
981 nameIndexCount = 0;
982 int32_t names[MAX_NAMES_PER_GROUP];
983 for (i=0; i<count; ++i) {
984 const Alias& p = list[i];
985 int32_t n = p.getUniqueNames(names);
986 for (j=0; j<n; ++j) {
987 U_ASSERT(nameIndexCount < nameIndexCapacity);
988 nameIndex[nameIndexCount++] =
989 NameToEnumEntry(names[j], p.enumValue);
990 }
991 }
992
993 /*
994 * use a stable sort to ensure consistent results between
995 * genpname.cpp and the propname.cpp swapping code
996 */
997 UErrorCode errorCode = U_ZERO_ERROR;
998 uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
999 compareNameToEnumEntry, NULL, TRUE, &errorCode);
1000 if (debug>1) {
1001 printf("Alias names: %d\n", (int)nameIndexCount);
1002 for (i=0; i<nameIndexCount; ++i) {
1003 printf("%s => %d\n",
1004 STRING_TABLE[nameIndex[i].nameIndex].str,
1005 (int)nameIndex[i].enumValue);
1006 }
1007 printf("\n");
1008 }
1009 // make sure there are no duplicates. for a sorted list we need
1010 // only compare adjacent items. Alias.getUniqueNames() has
1011 // already eliminated duplicate names for a single property, which
1012 // does occur, so we're checking for duplicate names between two
1013 // properties, which should never occur.
1014 UBool ok = TRUE;
1015 for (i=1; i<nameIndexCount; ++i) {
1016 if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1017 STRING_TABLE[nameIndex[i].nameIndex]) {
1018 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1019 STRING_TABLE[nameIndex[i-1].nameIndex].str,
1020 STRING_TABLE[nameIndex[i].nameIndex].str);
1021 ok = FALSE;
1022 }
1023 }
1024 if (!ok) {
1025 die("Two or more duplicate names in property list");
1026 }
1027
1028 return nameIndex;
1029 }
1030
createEnumIndex(const AliasList & list)1031 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1032
1033 // Build the enum => name map
1034
1035 // This is a 1->n map. Each enum maps to 1 or more names. To
1036 // accomplish this the index entry points to an element of the
1037 // NAME_GROUP array. This is the short name (which may be empty).
1038 // From there, subsequent elements of NAME_GROUP are alternate
1039 // names for this enum, up to and including the first one that is
1040 // negative (negate for actual index).
1041
1042 int32_t i, j, k;
1043 int32_t count = list.count();
1044
1045 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1046 for (i=0; i<count; ++i) {
1047 const Alias& p = list[i];
1048 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1049 }
1050
1051 UErrorCode errorCode = U_ZERO_ERROR;
1052 uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1053 compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1054 if (debug>1) {
1055 printf("Property enums: %d\n", (int)count);
1056 for (i=0; i<count; ++i) {
1057 printf("%d => %d: ",
1058 (int)enumIndex[i].enumValue,
1059 (int)enumIndex[i].nameGroupIndex);
1060 UBool done = FALSE;
1061 for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1062 k = NAME_GROUP[j];
1063 if (k < 0) {
1064 k = -k;
1065 done = TRUE;
1066 }
1067 printf("\"%s\"", STRING_TABLE[k].str);
1068 if (!done) printf(", ");
1069 }
1070 printf("\n");
1071 }
1072 printf("\n");
1073 }
1074 return enumIndex;
1075 }
1076
MMain(int argc,char * argv[])1077 int genpname::MMain(int argc, char* argv[])
1078 {
1079 int32_t i, j;
1080 UErrorCode status = U_ZERO_ERROR;
1081
1082 u_init(&status);
1083 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1084 fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1085 status = U_ZERO_ERROR;
1086 }
1087
1088
1089 /* preset then read command line options */
1090 options[3].value=u_getDataDirectory();
1091 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1092
1093 /* error handling, printing usage message */
1094 if (argc<0) {
1095 fprintf(stderr,
1096 "error in command line argument \"%s\"\n",
1097 argv[-argc]);
1098 }
1099
1100 debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1101
1102 if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1103 debug < 0 || debug > 9) {
1104 fprintf(stderr,
1105 "usage: %s [-options]\n"
1106 "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1107 "options:\n"
1108 "\t-h or -? or --help this usage text\n"
1109 "\t-v or --verbose turn on verbose output\n"
1110 "\t-c or --copyright include a copyright notice\n"
1111 "\t-d or --destdir destination directory, followed by the path\n"
1112 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n",
1113 argv[0]);
1114 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1115 }
1116
1117 /* get the options values */
1118 useCopyright=options[2].doesOccur;
1119 verbose = options[4].doesOccur;
1120
1121 // ------------------------------------------------------------
1122 // Do not sort the string table, instead keep it in data.h order.
1123 // This simplifies data swapping and testing thereof because the string
1124 // table itself need not be sorted during swapping.
1125 // The NameToEnum sorter sorts each such map's string offsets instead.
1126
1127 if (debug>1) {
1128 printf("String pool: %d\n", (int)STRING_COUNT);
1129 for (i=0; i<STRING_COUNT; ++i) {
1130 if (i != 0) {
1131 printf(", ");
1132 }
1133 printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1134 }
1135 printf("\n\n");
1136 }
1137
1138 // ------------------------------------------------------------
1139 // Create top-level property indices
1140
1141 PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1142 int32_t propNameCount;
1143 NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1144 EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1145
1146 // ------------------------------------------------------------
1147 // Create indices for the value list for each enumerated property
1148
1149 // This will have more entries than we need...
1150 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1151 int32_t enumToValue_count = 0;
1152 for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1153 if (PROPERTY[i].valueCount == 0) continue;
1154 AliasArrayList values(PROPERTY[i].valueList,
1155 PROPERTY[i].valueCount);
1156 enumToValue[j].enumValue = PROPERTY[i].enumValue;
1157 enumToValue[j].enumToName = createEnumIndex(values);
1158 enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1159 enumToValue[j].nameToEnum = createNameIndex(values,
1160 enumToValue[j].nameToEnum_count);
1161 ++j;
1162 }
1163 enumToValue_count = j;
1164
1165 uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1166 compareEnumToValueEntry, NULL, FALSE, &status);
1167
1168 // ------------------------------------------------------------
1169 // Build PropertyAliases layout in memory
1170
1171 Builder builder(debug);
1172
1173 builder.buildTopLevelProperties(propName,
1174 propNameCount,
1175 propEnum,
1176 PROPERTY_COUNT);
1177
1178 builder.buildValues(enumToValue,
1179 enumToValue_count);
1180
1181 builder.buildStringPool(STRING_TABLE,
1182 STRING_COUNT,
1183 NAME_GROUP,
1184 NAME_GROUP_COUNT);
1185
1186 builder.fixup();
1187
1188 ////////////////////////////////////////////////////////////
1189 // Write the output file
1190 ////////////////////////////////////////////////////////////
1191 int32_t wlen = writeDataFile(options[3].value, builder);
1192 if (verbose) {
1193 fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1194 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1195 }
1196
1197 return 0; // success
1198 }
1199
writeDataFile(const char * destdir,const Builder & builder)1200 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1201 int32_t length;
1202 int8_t* data = builder.createData(length);
1203
1204 UNewDataMemory *pdata;
1205 UErrorCode status = U_ZERO_ERROR;
1206
1207 pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1208 useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1209 if (U_FAILURE(status)) {
1210 die("Unable to create data memory");
1211 }
1212
1213 udata_writeBlock(pdata, data, length);
1214
1215 int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1216 if (U_FAILURE(status)) {
1217 die("Error writing output file");
1218 }
1219 if (dataLength != length) {
1220 die("Written file doesn't match expected size");
1221 }
1222
1223 return dataLength;
1224 }
1225
1226 //eof
1227