• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2002-2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   10/11/02    aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "uarrsort.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23 
24 #include <stdio.h>
25 
26 U_NAMESPACE_USE
27 
28 // TODO: Clean up and comment this code.
29 
30 //----------------------------------------------------------------------
31 // BEGIN DATA
32 //
33 // This is the raw data to be output.  We define the data structure,
34 // then include a machine-generated header that contains the actual
35 // data.
36 
37 #include "unicode/uchar.h"
38 #include "unicode/uscript.h"
39 #include "unicode/unorm.h"
40 #include "unicode/unorm2.h"
41 
42 class AliasName {
43 public:
44     const char* str;
45     int32_t     index;
46 
47     AliasName(const char* str, int32_t index);
48 
49     int compare(const AliasName& other) const;
50 
operator ==(const AliasName & other) const51     UBool operator==(const AliasName& other) const {
52         return compare(other) == 0;
53     }
54 
operator !=(const AliasName & other) const55     UBool operator!=(const AliasName& other) const {
56         return compare(other) != 0;
57     }
58 };
59 
AliasName(const char * _str,int32_t _index)60 AliasName::AliasName(const char* _str,
61                int32_t _index) :
62     str(_str),
63     index(_index)
64 {
65 }
66 
compare(const AliasName & other) const67 int AliasName::compare(const AliasName& other) const {
68     return uprv_comparePropertyNames(str, other.str);
69 }
70 
71 class Alias {
72 public:
73     int32_t     enumValue;
74     int32_t     nameGroupIndex;
75 
76     Alias(int32_t enumValue,
77              int32_t nameGroupIndex);
78 
79     int32_t getUniqueNames(int32_t* nameGroupIndices) const;
80 };
81 
Alias(int32_t anEnumValue,int32_t aNameGroupIndex)82 Alias::Alias(int32_t anEnumValue,
83                    int32_t aNameGroupIndex) :
84     enumValue(anEnumValue),
85     nameGroupIndex(aNameGroupIndex)
86 {
87 }
88 
89 class Property : public Alias {
90 public:
91     int32_t         valueCount;
92     const Alias* valueList;
93 
94     Property(int32_t enumValue,
95                        int32_t nameGroupIndex,
96                        int32_t valueCount,
97                        const Alias* valueList);
98 };
99 
Property(int32_t _enumValue,int32_t _nameGroupIndex,int32_t _valueCount,const Alias * _valueList)100 Property::Property(int32_t _enumValue,
101                                        int32_t _nameGroupIndex,
102                                        int32_t _valueCount,
103                                        const Alias* _valueList) :
104     Alias(_enumValue, _nameGroupIndex),
105     valueCount(_valueCount),
106     valueList(_valueList)
107 {
108 }
109 
110 // *** Include the data header ***
111 #include "data.h"
112 
113 /* return a list of unique names, not including "", for this property
114  * @param stringIndices array of at least MAX_NAMES_PER_GROUP
115  * elements, will be filled with indices into STRING_TABLE
116  * @return number of indices, >= 1
117  */
getUniqueNames(int32_t * stringIndices) const118 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
119     int32_t count = 0;
120     int32_t i = nameGroupIndex;
121     UBool done = FALSE;
122     while (!done) {
123         int32_t j = NAME_GROUP[i++];
124         if (j < 0) {
125             done = TRUE;
126             j = -j;
127         }
128         if (j == 0) continue; // omit "" entries
129         UBool dupe = FALSE;
130         for (int32_t k=0; k<count; ++k) {
131             if (stringIndices[k] == j) {
132                 dupe = TRUE;
133                 break;
134             }
135             // also do a string check for things like "age|Age"
136             if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
137                 //printf("Found dupe %s|%s\n",
138                 //       STRING_TABLE[stringIndices[k]].str,
139                 //       STRING_TABLE[j].str);
140                 dupe = TRUE;
141                 break;
142             }
143         }
144         if (dupe) continue; // omit duplicates
145         stringIndices[count++] = j;
146     }
147     return count;
148 }
149 
150 // END DATA
151 //----------------------------------------------------------------------
152 
153 #define MALLOC(type, count) \
154   (type*) uprv_malloc(sizeof(type) * count)
155 
die(const char * msg)156 void die(const char* msg) {
157     fprintf(stderr, "Error: %s\n", msg);
158     exit(1);
159 }
160 
161 //----------------------------------------------------------------------
162 
163 /**
164  * A list of Alias objects.
165  */
166 class AliasList {
167 public:
168     virtual ~AliasList();
169     virtual const Alias& operator[](int32_t i) const = 0;
170     virtual int32_t count() const = 0;
171 };
172 
~AliasList()173 AliasList::~AliasList() {}
174 
175 /**
176  * A single array.
177  */
178 class AliasArrayList : public AliasList {
179     const Alias* a;
180     int32_t n;
181 public:
AliasArrayList(const Alias * _a,int32_t _n)182     AliasArrayList(const Alias* _a, int32_t _n) {
183         a = _a;
184         n = _n;
185     }
operator [](int32_t i) const186     virtual const Alias& operator[](int32_t i) const {
187         return a[i];
188     }
count() const189     virtual int32_t count() const {
190         return n;
191     }
192 };
193 
194 /**
195  * A single array.
196  */
197 class PropertyArrayList : public AliasList {
198     const Property* a;
199     int32_t n;
200 public:
PropertyArrayList(const Property * _a,int32_t _n)201     PropertyArrayList(const Property* _a, int32_t _n) {
202         a = _a;
203         n = _n;
204     }
operator [](int32_t i) const205     virtual const Alias& operator[](int32_t i) const {
206         return a[i];
207     }
count() const208     virtual int32_t count() const {
209         return n;
210     }
211 };
212 
213 //----------------------------------------------------------------------
214 
215 /**
216  * An element in a name index.  It maps a name (given by index) into
217  * an enum value.
218  */
219 class NameToEnumEntry {
220 public:
221     int32_t nameIndex;
222     int32_t enumValue;
NameToEnumEntry(int32_t a,int32_t b)223     NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
224 };
225 
226 // Sort function for NameToEnumEntry (sort by name)
227 U_CFUNC int32_t
compareNameToEnumEntry(const void *,const void * e1,const void * e2)228 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
229     return
230         STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
231             compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
232 }
233 
234 //----------------------------------------------------------------------
235 
236 /**
237  * An element in an enum index.  It maps an enum into a name group entry
238  * (given by index).
239  */
240 class EnumToNameGroupEntry {
241 public:
242     int32_t enumValue;
243     int32_t nameGroupIndex;
EnumToNameGroupEntry(int32_t a,int32_t b)244     EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
245 
246     // are enumValues contiguous for count entries starting with this one?
247     // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const248     UBool isContiguous(int32_t count) const {
249         const EnumToNameGroupEntry* p = this;
250         for (int32_t i=1; i<count; ++i) {
251             if (p[i].enumValue != (this->enumValue + i)) {
252                 return FALSE;
253             }
254         }
255         return TRUE;
256     }
257 };
258 
259 // Sort function for EnumToNameGroupEntry (sort by name index)
260 U_CFUNC int32_t
compareEnumToNameGroupEntry(const void *,const void * e1,const void * e2)261 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
262     return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
263 }
264 
265 //----------------------------------------------------------------------
266 
267 /**
268  * An element in the map from enumerated property enums to value maps.
269  */
270 class EnumToValueEntry {
271 public:
272     int32_t enumValue;
273     EnumToNameGroupEntry* enumToName;
274     int32_t enumToName_count;
275     NameToEnumEntry* nameToEnum;
276     int32_t nameToEnum_count;
277 
278     // are enumValues contiguous for count entries starting with this one?
279     // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const280     UBool isContiguous(int32_t count) const {
281         const EnumToValueEntry* p = this;
282         for (int32_t i=1; i<count; ++i) {
283             if (p[i].enumValue != (this->enumValue + i)) {
284                 return FALSE;
285             }
286         }
287         return TRUE;
288     }
289 };
290 
291 // Sort function for EnumToValueEntry (sort by enum)
292 U_CFUNC int32_t
compareEnumToValueEntry(const void *,const void * e1,const void * e2)293 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
294     return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
295 }
296 
297 //----------------------------------------------------------------------
298 // BEGIN Builder
299 
300 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
301 
302 class Builder {
303     // header:
304     PropertyAliases header;
305 
306     // 0:
307     NonContiguousEnumToOffset* enumToName;
308     int32_t enumToName_size;
309     Offset enumToName_offset;
310 
311     // 1: (deleted)
312 
313     // 2:
314     NameToEnum* nameToEnum;
315     int32_t nameToEnum_size;
316     Offset nameToEnum_offset;
317 
318     // 3:
319     NonContiguousEnumToOffset* enumToValue;
320     int32_t enumToValue_size;
321     Offset enumToValue_offset;
322 
323     // 4:
324     ValueMap* valueMap;
325     int32_t valueMap_size;
326     int32_t valueMap_count;
327     Offset valueMap_offset;
328 
329     // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
330     // NULL and one is not.  valueEnumToName_size[i] is the size of
331     // the non-NULL one.  i=0..valueMapCount-1
332     // 5a:
333     EnumToOffset** valueEnumToName;
334     // 5b:
335     NonContiguousEnumToOffset** valueNCEnumToName;
336     int32_t* valueEnumToName_size;
337     Offset* valueEnumToName_offset;
338     // 6:
339     // arrays of valueMap_count pointers, sizes, & offsets
340     NameToEnum** valueNameToEnum;
341     int32_t* valueNameToEnum_size;
342     Offset* valueNameToEnum_offset;
343 
344     // 98:
345     Offset* nameGroupPool;
346     int32_t nameGroupPool_count;
347     int32_t nameGroupPool_size;
348     Offset nameGroupPool_offset;
349 
350     // 99:
351     char* stringPool;
352     int32_t stringPool_count;
353     int32_t stringPool_size;
354     Offset stringPool_offset;
355     Offset* stringPool_offsetArray; // relative to stringPool
356 
357     int32_t total_size; // size of everything
358 
359     int32_t debug;
360 
361 public:
362 
363     Builder(int32_t debugLevel);
364     ~Builder();
365 
366     void buildTopLevelProperties(const NameToEnumEntry* propName,
367                                  int32_t propNameCount,
368                                  const EnumToNameGroupEntry* propEnum,
369                                  int32_t propEnumCount);
370 
371     void buildValues(const EnumToValueEntry* e2v,
372                      int32_t count);
373 
374     void buildStringPool(const AliasName* propertyNames,
375                          int32_t propertyNameCount,
376                          const int32_t* nameGroupIndices,
377                          int32_t nameGroupIndicesCount);
378 
379     void fixup();
380 
381     int8_t* createData(int32_t& length) const;
382 
383 private:
384 
385     static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
386                                            int32_t count,
387                                            int32_t& size);
388     static NonContiguousEnumToOffset*
389         buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
390                                int32_t count,
391                                int32_t& size);
392 
393     static NonContiguousEnumToOffset*
394         buildNCEnumToValue(const EnumToValueEntry* e2v,
395                            int32_t count,
396                            int32_t& size);
397 
398     static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
399                                        int32_t count,
400                                        int32_t& size);
401 
402     Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
403     void fixupNameToEnum(NameToEnum* n);
404     void fixupEnumToNameGroup(EnumToOffset* e2ng);
405     void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
406 
407     void computeOffsets();
408     void fixupStringPoolOffsets();
409     void fixupNameGroupPoolOffsets();
410     void fixupMiscellaneousOffsets();
411 
412     static int32_t align(int32_t a);
413     static void erase(void* p, int32_t size);
414 };
415 
Builder(int32_t debugLevel)416 Builder::Builder(int32_t debugLevel) {
417     debug = debugLevel;
418     enumToName = 0;
419     nameToEnum = 0;
420     enumToValue = 0;
421     valueMap_count = 0;
422     valueMap = 0;
423     valueEnumToName = 0;
424     valueNCEnumToName = 0;
425     valueEnumToName_size = 0;
426     valueEnumToName_offset = 0;
427     valueNameToEnum = 0;
428     valueNameToEnum_size = 0;
429     valueNameToEnum_offset = 0;
430     nameGroupPool = 0;
431     stringPool = 0;
432     stringPool_offsetArray = 0;
433 }
434 
~Builder()435 Builder::~Builder() {
436     uprv_free(enumToName);
437     uprv_free(nameToEnum);
438     uprv_free(enumToValue);
439     uprv_free(valueMap);
440     for (int32_t i=0; i<valueMap_count; ++i) {
441         uprv_free(valueEnumToName[i]);
442         uprv_free(valueNCEnumToName[i]);
443         uprv_free(valueNameToEnum[i]);
444     }
445     uprv_free(valueEnumToName);
446     uprv_free(valueNCEnumToName);
447     uprv_free(valueEnumToName_size);
448     uprv_free(valueEnumToName_offset);
449     uprv_free(valueNameToEnum);
450     uprv_free(valueNameToEnum_size);
451     uprv_free(valueNameToEnum_offset);
452     uprv_free(nameGroupPool);
453     uprv_free(stringPool);
454     uprv_free(stringPool_offsetArray);
455 }
456 
align(int32_t a)457 int32_t Builder::align(int32_t a) {
458     U_ASSERT(a >= 0);
459     int32_t k = a % sizeof(int32_t);
460     if (k == 0) {
461         return a;
462     }
463     a += sizeof(int32_t) - k;
464     return a;
465 }
466 
erase(void * p,int32_t size)467 void Builder::erase(void* p, int32_t size) {
468     U_ASSERT(size >= 0);
469     int8_t* q = (int8_t*) p;
470     while (size--) {
471         *q++ = 0;
472     }
473 }
474 
buildEnumToOffset(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)475 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
476                                          int32_t count,
477                                          int32_t& size) {
478     U_ASSERT(e2ng->isContiguous(count));
479     size = align(EnumToOffset::getSize(count));
480     EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
481     erase(result, size);
482     result->enumStart = e2ng->enumValue;
483     result->enumLimit = e2ng->enumValue + count;
484     Offset* p = result->getOffsetArray();
485     for (int32_t i=0; i<count; ++i) {
486         // set these to NGI index values
487         // fix them up to NGI offset values
488         U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
489         p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
490     }
491     return result;
492 }
493 
494 NonContiguousEnumToOffset*
buildNCEnumToNameGroup(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)495 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
496                                 int32_t count,
497                                 int32_t& size) {
498     U_ASSERT(!e2ng->isContiguous(count));
499     size = align(NonContiguousEnumToOffset::getSize(count));
500     NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
501     erase(nc, size);
502     nc->count = count;
503     EnumValue* e = nc->getEnumArray();
504     Offset* p = nc->getOffsetArray();
505     for (int32_t i=0; i<count; ++i) {
506         // set these to NGI index values
507         // fix them up to NGI offset values
508         e[i] = e2ng[i].enumValue;
509         U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
510         p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
511     }
512     return nc;
513 }
514 
515 NonContiguousEnumToOffset*
buildNCEnumToValue(const EnumToValueEntry * e2v,int32_t count,int32_t & size)516 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
517                             int32_t count,
518                             int32_t& size) {
519     U_ASSERT(!e2v->isContiguous(count));
520     size = align(NonContiguousEnumToOffset::getSize(count));
521     NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
522     erase(result, size);
523     result->count = count;
524     EnumValue* e = result->getEnumArray();
525     for (int32_t i=0; i<count; ++i) {
526         e[i] = e2v[i].enumValue;
527         // offset must be set later
528     }
529     return result;
530 }
531 
532 /**
533  * Given an index into the string pool, return an offset.  computeOffsets()
534  * must have been called already.  If allowNegative is true, allow negatives
535  * and preserve their sign.
536  */
stringIndexToOffset(int32_t index,UBool allowNegative) const537 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
538     // Index 0 is ""; we turn this into an Offset of zero
539     if (index == 0) return 0;
540     if (index < 0) {
541         if (allowNegative) {
542             return -Builder::stringIndexToOffset(-index);
543         } else {
544             die("Negative string pool index");
545         }
546     } else {
547         if (index >= stringPool_count) {
548             die("String pool index too large");
549         }
550         Offset result = stringPool_offset + stringPool_offsetArray[index];
551         U_ASSERT(result >= 0 && result < total_size);
552         return result;
553     }
554     return 0; // never executed; make compiler happy
555 }
556 
buildNameToEnum(const NameToEnumEntry * nameToEnum,int32_t count,int32_t & size)557 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
558                                      int32_t count,
559                                      int32_t& size) {
560     size = align(NameToEnum::getSize(count));
561     NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
562     erase(n2e, size);
563     n2e->count = count;
564     Offset* p = n2e->getNameArray();
565     EnumValue* e = n2e->getEnumArray();
566     for (int32_t i=0; i<count; ++i) {
567         // set these to SP index values
568         // fix them up to SP offset values
569         U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
570         p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
571         e[i] = nameToEnum[i].enumValue;
572     }
573     return n2e;
574 }
575 
576 
buildTopLevelProperties(const NameToEnumEntry * propName,int32_t propNameCount,const EnumToNameGroupEntry * propEnum,int32_t propEnumCount)577 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
578                                       int32_t propNameCount,
579                                       const EnumToNameGroupEntry* propEnum,
580                                       int32_t propEnumCount) {
581     enumToName = buildNCEnumToNameGroup(propEnum,
582                                         propEnumCount,
583                                         enumToName_size);
584     nameToEnum = buildNameToEnum(propName,
585                                  propNameCount,
586                                  nameToEnum_size);
587 }
588 
buildValues(const EnumToValueEntry * e2v,int32_t count)589 void Builder::buildValues(const EnumToValueEntry* e2v,
590                           int32_t count) {
591     int32_t i;
592 
593     U_ASSERT(!e2v->isContiguous(count));
594 
595     valueMap_count = count;
596 
597     enumToValue = buildNCEnumToValue(e2v, count,
598                                      enumToValue_size);
599 
600     valueMap_size = align(count * sizeof(ValueMap));
601     valueMap = (ValueMap*) uprv_malloc(valueMap_size);
602     erase(valueMap, valueMap_size);
603 
604     valueEnumToName = MALLOC(EnumToOffset*, count);
605     valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
606     valueEnumToName_size = MALLOC(int32_t, count);
607     valueEnumToName_offset = MALLOC(Offset, count);
608     valueNameToEnum = MALLOC(NameToEnum*, count);
609     valueNameToEnum_size = MALLOC(int32_t, count);
610     valueNameToEnum_offset = MALLOC(Offset, count);
611 
612     for (i=0; i<count; ++i) {
613         UBool isContiguous =
614             e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
615         valueEnumToName[i] = 0;
616         valueNCEnumToName[i] = 0;
617         if (isContiguous) {
618             valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
619                                                    e2v[i].enumToName_count,
620                                                    valueEnumToName_size[i]);
621         } else {
622             valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
623                                                           e2v[i].enumToName_count,
624                                                           valueEnumToName_size[i]);
625         }
626         valueNameToEnum[i] =
627             buildNameToEnum(e2v[i].nameToEnum,
628                             e2v[i].nameToEnum_count,
629                             valueNameToEnum_size[i]);
630     }
631 }
632 
buildStringPool(const AliasName * propertyNames,int32_t propertyNameCount,const int32_t * nameGroupIndices,int32_t nameGroupIndicesCount)633 void Builder::buildStringPool(const AliasName* propertyNames,
634                               int32_t propertyNameCount,
635                               const int32_t* nameGroupIndices,
636                               int32_t nameGroupIndicesCount) {
637     int32_t i;
638 
639     nameGroupPool_count = nameGroupIndicesCount;
640     nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
641     nameGroupPool = MALLOC(Offset, nameGroupPool_count);
642 
643     for (i=0; i<nameGroupPool_count; ++i) {
644         // Some indices are negative.
645         int32_t a = nameGroupIndices[i];
646         if (a < 0) a = -a;
647         U_ASSERT(IS_VALID_OFFSET(a));
648         nameGroupPool[i] = (Offset) nameGroupIndices[i];
649     }
650 
651     stringPool_count = propertyNameCount;
652     stringPool_size = 0;
653     // first string must be "" -- we skip it
654     U_ASSERT(*propertyNames[0].str == 0);
655     for (i=1 /*sic*/; i<propertyNameCount; ++i) {
656         stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
657     }
658     stringPool = MALLOC(char, stringPool_size);
659     stringPool_offsetArray = MALLOC(Offset, stringPool_count);
660     Offset soFar = 0;
661     char* p = stringPool;
662     stringPool_offsetArray[0] = -1; // we don't use this entry
663     for (i=1 /*sic*/; i<propertyNameCount; ++i) {
664         const char* str = propertyNames[i].str;
665         int32_t len = (int32_t)uprv_strlen(str);
666         uprv_strcpy(p, str);
667         p += len;
668         *p++ = 0;
669         stringPool_offsetArray[i] = soFar;
670         soFar += (Offset)(len+1);
671     }
672     U_ASSERT(soFar == stringPool_size);
673     U_ASSERT(p == (stringPool + stringPool_size));
674 }
675 
676 // Confirm that PropertyAliases is a POD (plain old data; see C++
677 // std).  The following union will _fail to compile_ if
678 // PropertyAliases is _not_ a POD.  (Note: We used to use the offsetof
679 // macro to check this, but that's not quite right, so that test is
680 // commented out -- see below.)
681 typedef union {
682     int32_t i;
683     PropertyAliases p;
684 } PropertyAliasesPODTest;
685 
computeOffsets()686 void Builder::computeOffsets() {
687     int32_t i;
688     Offset off = sizeof(header);
689 
690     if (debug>0) {
691         printf("header   \t offset=%4d  size=%5d\n", 0, off);
692     }
693 
694     // PropertyAliases must have no v-table and must be
695     // padded (if necessary) to the next 32-bit boundary.
696     //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
697     U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
698 
699     #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
700 
701     #define COMPUTE_OFFSET2(foo,type) \
702       if (debug>0)\
703         printf(#foo "\t offset=%4d  size=%5d\n", off, (int)foo##_size);\
704       foo##_offset = off;\
705       U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
706       U_ASSERT(foo##_offset % sizeof(type) == 0);\
707       off = (Offset) (off + foo##_size);
708 
709     COMPUTE_OFFSET(enumToName);     // 0:
710     COMPUTE_OFFSET(nameToEnum);     // 2:
711     COMPUTE_OFFSET(enumToValue);    // 3:
712     COMPUTE_OFFSET(valueMap);       // 4:
713 
714     for (i=0; i<valueMap_count; ++i) {
715         if (debug>0) {
716             printf(" enumToName[%d]\t offset=%4d  size=%5d\n",
717                    (int)i, off, (int)valueEnumToName_size[i]);
718         }
719 
720         valueEnumToName_offset[i] = off;   // 5:
721         U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
722         off = (Offset) (off + valueEnumToName_size[i]);
723 
724         if (debug>0) {
725             printf(" nameToEnum[%d]\t offset=%4d  size=%5d\n",
726                    (int)i, off, (int)valueNameToEnum_size[i]);
727         }
728 
729         valueNameToEnum_offset[i] = off;   // 6:
730         U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
731         off = (Offset) (off + valueNameToEnum_size[i]);
732     }
733 
734     // These last two chunks have weaker alignment needs
735     COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
736     COMPUTE_OFFSET2(stringPool,char);      // 99:
737 
738     total_size = off;
739     if (debug>0) printf("total                         size=%5d\n\n", (int)total_size);
740     U_ASSERT(total_size <= (MAX_OFFSET+1));
741 }
742 
fixupNameToEnum(NameToEnum * n)743 void Builder::fixupNameToEnum(NameToEnum* n) {
744     // Fix the string pool offsets in n
745     Offset* p = n->getNameArray();
746     for (int32_t i=0; i<n->count; ++i) {
747         p[i] = stringIndexToOffset(p[i]);
748     }
749 }
750 
fixupStringPoolOffsets()751 void Builder::fixupStringPoolOffsets() {
752     int32_t i;
753 
754     // 2:
755     fixupNameToEnum(nameToEnum);
756 
757     // 6:
758     for (i=0; i<valueMap_count; ++i) {
759         fixupNameToEnum(valueNameToEnum[i]);
760     }
761 
762     // 98:
763     for (i=0; i<nameGroupPool_count; ++i) {
764         nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
765     }
766 }
767 
fixupEnumToNameGroup(EnumToOffset * e2ng)768 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
769     EnumValue i;
770     int32_t j;
771     Offset* p = e2ng->getOffsetArray();
772     for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
773         p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
774     }
775 }
776 
fixupNCEnumToNameGroup(NonContiguousEnumToOffset * e2ng)777 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
778     int32_t i;
779     /*EnumValue* e = e2ng->getEnumArray();*/
780     Offset* p = e2ng->getOffsetArray();
781     for (i=0; i<e2ng->count; ++i) {
782         p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
783     }
784 }
785 
fixupNameGroupPoolOffsets()786 void Builder::fixupNameGroupPoolOffsets() {
787     int32_t i;
788 
789     // 0:
790     fixupNCEnumToNameGroup(enumToName);
791 
792     // 1: (deleted)
793 
794     // 5:
795     for (i=0; i<valueMap_count; ++i) {
796         // 5a:
797         if (valueEnumToName[i] != 0) {
798             fixupEnumToNameGroup(valueEnumToName[i]);
799         }
800         // 5b:
801         if (valueNCEnumToName[i] != 0) {
802             fixupNCEnumToNameGroup(valueNCEnumToName[i]);
803         }
804     }
805 }
806 
fixupMiscellaneousOffsets()807 void Builder::fixupMiscellaneousOffsets() {
808     int32_t i;
809 
810     // header:
811     erase(&header, sizeof(header));
812     header.enumToName_offset = enumToName_offset;
813     header.nameToEnum_offset = nameToEnum_offset;
814     header.enumToValue_offset = enumToValue_offset;
815     // header meta-info used by Java:
816     U_ASSERT(total_size > 0 && total_size < 0x7FFF);
817     header.total_size = (int16_t) total_size;
818     header.valueMap_offset = valueMap_offset;
819     header.valueMap_count = (int16_t) valueMap_count;
820     header.nameGroupPool_offset = nameGroupPool_offset;
821     header.nameGroupPool_count = (int16_t) nameGroupPool_count;
822     header.stringPool_offset = stringPool_offset;
823     header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
824 
825     U_ASSERT(valueMap_count <= 0x7FFF);
826     U_ASSERT(nameGroupPool_count <= 0x7FFF);
827     U_ASSERT(stringPool_count <= 0x7FFF);
828 
829     // 3:
830     Offset* p = enumToValue->getOffsetArray();
831     /*EnumValue* e = enumToValue->getEnumArray();*/
832     U_ASSERT(valueMap_count == enumToValue->count);
833     for (i=0; i<valueMap_count; ++i) {
834         p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
835     }
836 
837     // 4:
838     for (i=0; i<valueMap_count; ++i) {
839         ValueMap& v = valueMap[i];
840         v.enumToName_offset = v.ncEnumToName_offset = 0;
841         if (valueEnumToName[i] != 0) {
842             v.enumToName_offset = valueEnumToName_offset[i];
843         }
844         if (valueNCEnumToName[i] != 0) {
845             v.ncEnumToName_offset = valueEnumToName_offset[i];
846         }
847         v.nameToEnum_offset = valueNameToEnum_offset[i];
848     }
849 }
850 
fixup()851 void Builder::fixup() {
852     computeOffsets();
853     fixupStringPoolOffsets();
854     fixupNameGroupPoolOffsets();
855     fixupMiscellaneousOffsets();
856 }
857 
createData(int32_t & length) const858 int8_t* Builder::createData(int32_t& length) const {
859     length = total_size;
860     int8_t* result = MALLOC(int8_t, length);
861 
862     int8_t* p = result;
863     int8_t* limit = result + length;
864 
865     #define APPEND2(x, size)   \
866       U_ASSERT((p+size)<=limit); \
867       uprv_memcpy(p, x, size); \
868       p += size
869 
870     #define APPEND(x) APPEND2(x, x##_size)
871 
872     APPEND2(&header, sizeof(header));
873     APPEND(enumToName);
874     APPEND(nameToEnum);
875     APPEND(enumToValue);
876     APPEND(valueMap);
877 
878     for (int32_t i=0; i<valueMap_count; ++i) {
879         U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
880                (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
881         if (valueEnumToName[i] != 0) {
882             APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
883         }
884         if (valueNCEnumToName[i] != 0) {
885             APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
886         }
887         APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
888     }
889 
890     APPEND(nameGroupPool);
891     APPEND(stringPool);
892 
893     if (p != limit) {
894         fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
895         exit(1);
896     }
897     return result;
898 }
899 
900 // END Builder
901 //----------------------------------------------------------------------
902 
903 /* UDataInfo cf. udata.h */
904 static UDataInfo dataInfo = {
905     sizeof(UDataInfo),
906     0,
907 
908     U_IS_BIG_ENDIAN,
909     U_CHARSET_FAMILY,
910     sizeof(UChar),
911     0,
912 
913     {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
914     {PNAME_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
915     {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
916 };
917 
918 class genpname {
919 
920     // command-line options
921     UBool useCopyright;
922     UBool verbose;
923     int32_t debug;
924 
925 public:
926     int      MMain(int argc, char *argv[]);
927 
928 private:
929     NameToEnumEntry* createNameIndex(const AliasList& list,
930                                      int32_t& nameIndexCount);
931 
932     EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
933 
934     int32_t  writeDataFile(const char *destdir, const Builder&);
935 };
936 
main(int argc,char * argv[])937 int main(int argc, char *argv[]) {
938     UErrorCode status = U_ZERO_ERROR;
939     u_init(&status);
940     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
941         // Note: u_init() will try to open ICU property data.
942         //       failures here are expected when building ICU from scratch.
943         //       ignore them.
944         fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
945             u_errorName(status));
946         exit(1);
947     }
948 
949     genpname app;
950     U_MAIN_INIT_ARGS(argc, argv);
951     int retVal = app.MMain(argc, argv);
952     u_cleanup();
953     return retVal;
954 }
955 
956 static UOption options[]={
957     UOPTION_HELP_H,
958     UOPTION_HELP_QUESTION_MARK,
959     UOPTION_COPYRIGHT,
960     UOPTION_DESTDIR,
961     UOPTION_VERBOSE,
962     UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
963 };
964 
createNameIndex(const AliasList & list,int32_t & nameIndexCount)965 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
966                                            int32_t& nameIndexCount) {
967 
968     // Build name => enum map
969 
970     // This is an n->1 map.  There are typically multiple names
971     // mapping to one enum.  The name index is sorted in order of the name,
972     // as defined by the uprv_compareAliasNames() function.
973 
974     int32_t i, j;
975     int32_t count = list.count();
976 
977     // compute upper limit on number of names in the index
978     int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
979     NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
980 
981     nameIndexCount = 0;
982     int32_t names[MAX_NAMES_PER_GROUP];
983     for (i=0; i<count; ++i) {
984         const Alias& p = list[i];
985         int32_t n = p.getUniqueNames(names);
986         for (j=0; j<n; ++j) {
987             U_ASSERT(nameIndexCount < nameIndexCapacity);
988             nameIndex[nameIndexCount++] =
989                 NameToEnumEntry(names[j], p.enumValue);
990         }
991     }
992 
993     /*
994      * use a stable sort to ensure consistent results between
995      * genpname.cpp and the propname.cpp swapping code
996      */
997     UErrorCode errorCode = U_ZERO_ERROR;
998     uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
999                    compareNameToEnumEntry, NULL, TRUE, &errorCode);
1000     if (debug>1) {
1001         printf("Alias names: %d\n", (int)nameIndexCount);
1002         for (i=0; i<nameIndexCount; ++i) {
1003             printf("%s => %d\n",
1004                    STRING_TABLE[nameIndex[i].nameIndex].str,
1005                    (int)nameIndex[i].enumValue);
1006         }
1007         printf("\n");
1008     }
1009     // make sure there are no duplicates.  for a sorted list we need
1010     // only compare adjacent items.  Alias.getUniqueNames() has
1011     // already eliminated duplicate names for a single property, which
1012     // does occur, so we're checking for duplicate names between two
1013     // properties, which should never occur.
1014     UBool ok = TRUE;
1015     for (i=1; i<nameIndexCount; ++i) {
1016         if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1017             STRING_TABLE[nameIndex[i].nameIndex]) {
1018             printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1019                    STRING_TABLE[nameIndex[i-1].nameIndex].str,
1020                    STRING_TABLE[nameIndex[i].nameIndex].str);
1021             ok = FALSE;
1022         }
1023     }
1024     if (!ok) {
1025         die("Two or more duplicate names in property list");
1026     }
1027 
1028     return nameIndex;
1029 }
1030 
createEnumIndex(const AliasList & list)1031 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1032 
1033     // Build the enum => name map
1034 
1035     // This is a 1->n map.  Each enum maps to 1 or more names.  To
1036     // accomplish this the index entry points to an element of the
1037     // NAME_GROUP array.  This is the short name (which may be empty).
1038     // From there, subsequent elements of NAME_GROUP are alternate
1039     // names for this enum, up to and including the first one that is
1040     // negative (negate for actual index).
1041 
1042     int32_t i, j, k;
1043     int32_t count = list.count();
1044 
1045     EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1046     for (i=0; i<count; ++i) {
1047         const Alias& p = list[i];
1048         enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1049     }
1050 
1051     UErrorCode errorCode = U_ZERO_ERROR;
1052     uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1053                    compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1054     if (debug>1) {
1055         printf("Property enums: %d\n", (int)count);
1056         for (i=0; i<count; ++i) {
1057             printf("%d => %d: ",
1058                    (int)enumIndex[i].enumValue,
1059                    (int)enumIndex[i].nameGroupIndex);
1060             UBool done = FALSE;
1061             for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1062                 k = NAME_GROUP[j];
1063                 if (k < 0) {
1064                     k = -k;
1065                     done = TRUE;
1066                 }
1067                 printf("\"%s\"", STRING_TABLE[k].str);
1068                 if (!done) printf(", ");
1069             }
1070             printf("\n");
1071         }
1072         printf("\n");
1073     }
1074     return enumIndex;
1075 }
1076 
MMain(int argc,char * argv[])1077 int genpname::MMain(int argc, char* argv[])
1078 {
1079     int32_t i, j;
1080     UErrorCode status = U_ZERO_ERROR;
1081 
1082     u_init(&status);
1083     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1084         fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1085         status = U_ZERO_ERROR;
1086     }
1087 
1088 
1089     /* preset then read command line options */
1090     options[3].value=u_getDataDirectory();
1091     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1092 
1093     /* error handling, printing usage message */
1094     if (argc<0) {
1095         fprintf(stderr,
1096             "error in command line argument \"%s\"\n",
1097             argv[-argc]);
1098     }
1099 
1100     debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1101 
1102     if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1103        debug < 0 || debug > 9) {
1104         fprintf(stderr,
1105             "usage: %s [-options]\n"
1106             "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1107             "options:\n"
1108             "\t-h or -? or --help  this usage text\n"
1109             "\t-v or --verbose     turn on verbose output\n"
1110             "\t-c or --copyright   include a copyright notice\n"
1111             "\t-d or --destdir     destination directory, followed by the path\n"
1112             "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
1113             argv[0]);
1114         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1115     }
1116 
1117     /* get the options values */
1118     useCopyright=options[2].doesOccur;
1119     verbose = options[4].doesOccur;
1120 
1121     // ------------------------------------------------------------
1122     // Do not sort the string table, instead keep it in data.h order.
1123     // This simplifies data swapping and testing thereof because the string
1124     // table itself need not be sorted during swapping.
1125     // The NameToEnum sorter sorts each such map's string offsets instead.
1126 
1127     if (debug>1) {
1128         printf("String pool: %d\n", (int)STRING_COUNT);
1129         for (i=0; i<STRING_COUNT; ++i) {
1130             if (i != 0) {
1131                 printf(", ");
1132             }
1133             printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1134         }
1135         printf("\n\n");
1136     }
1137 
1138     // ------------------------------------------------------------
1139     // Create top-level property indices
1140 
1141     PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1142     int32_t propNameCount;
1143     NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1144     EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1145 
1146     // ------------------------------------------------------------
1147     // Create indices for the value list for each enumerated property
1148 
1149     // This will have more entries than we need...
1150     EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1151     int32_t enumToValue_count = 0;
1152     for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1153         if (PROPERTY[i].valueCount == 0) continue;
1154         AliasArrayList values(PROPERTY[i].valueList,
1155                               PROPERTY[i].valueCount);
1156         enumToValue[j].enumValue = PROPERTY[i].enumValue;
1157         enumToValue[j].enumToName = createEnumIndex(values);
1158         enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1159         enumToValue[j].nameToEnum = createNameIndex(values,
1160                                                     enumToValue[j].nameToEnum_count);
1161         ++j;
1162     }
1163     enumToValue_count = j;
1164 
1165     uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1166                    compareEnumToValueEntry, NULL, FALSE, &status);
1167 
1168     // ------------------------------------------------------------
1169     // Build PropertyAliases layout in memory
1170 
1171     Builder builder(debug);
1172 
1173     builder.buildTopLevelProperties(propName,
1174                                     propNameCount,
1175                                     propEnum,
1176                                     PROPERTY_COUNT);
1177 
1178     builder.buildValues(enumToValue,
1179                         enumToValue_count);
1180 
1181     builder.buildStringPool(STRING_TABLE,
1182                             STRING_COUNT,
1183                             NAME_GROUP,
1184                             NAME_GROUP_COUNT);
1185 
1186     builder.fixup();
1187 
1188     ////////////////////////////////////////////////////////////
1189     // Write the output file
1190     ////////////////////////////////////////////////////////////
1191     int32_t wlen = writeDataFile(options[3].value, builder);
1192     if (verbose) {
1193         fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1194             U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1195     }
1196 
1197     return 0; // success
1198 }
1199 
writeDataFile(const char * destdir,const Builder & builder)1200 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1201     int32_t length;
1202     int8_t* data = builder.createData(length);
1203 
1204     UNewDataMemory *pdata;
1205     UErrorCode status = U_ZERO_ERROR;
1206 
1207     pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1208                          useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1209     if (U_FAILURE(status)) {
1210         die("Unable to create data memory");
1211     }
1212 
1213     udata_writeBlock(pdata, data, length);
1214 
1215     int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1216     if (U_FAILURE(status)) {
1217         die("Error writing output file");
1218     }
1219     if (dataLength != length) {
1220         die("Written file doesn't match expected size");
1221     }
1222 
1223     return dataLength;
1224 }
1225 
1226 //eof
1227