• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2002-2006, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   10/11/02    aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 #include "unicode/putil.h"
13 #include "unicode/uclean.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "filestrm.h"
17 #include "uarrsort.h"
18 #include "unewdata.h"
19 #include "uoptions.h"
20 #include "uprops.h"
21 #include "propname.h"
22 #include "uassert.h"
23 
24 #include <stdio.h>
25 
26 U_NAMESPACE_USE
27 
28 // TODO: Clean up and comment this code.
29 
30 //----------------------------------------------------------------------
31 // BEGIN DATA
32 //
33 // This is the raw data to be output.  We define the data structure,
34 // then include a machine-generated header that contains the actual
35 // data.
36 
37 #include "unicode/uchar.h"
38 #include "unicode/uscript.h"
39 #include "unicode/unorm.h"
40 
41 class AliasName {
42 public:
43     const char* str;
44     int32_t     index;
45 
46     AliasName(const char* str, int32_t index);
47 
48     int compare(const AliasName& other) const;
49 
operator ==(const AliasName & other) const50     UBool operator==(const AliasName& other) const {
51         return compare(other) == 0;
52     }
53 
operator !=(const AliasName & other) const54     UBool operator!=(const AliasName& other) const {
55         return compare(other) != 0;
56     }
57 };
58 
AliasName(const char * _str,int32_t _index)59 AliasName::AliasName(const char* _str,
60                int32_t _index) :
61     str(_str),
62     index(_index)
63 {
64 }
65 
compare(const AliasName & other) const66 int AliasName::compare(const AliasName& other) const {
67     return uprv_comparePropertyNames(str, other.str);
68 }
69 
70 class Alias {
71 public:
72     int32_t     enumValue;
73     int32_t     nameGroupIndex;
74 
75     Alias(int32_t enumValue,
76              int32_t nameGroupIndex);
77 
78     int32_t getUniqueNames(int32_t* nameGroupIndices) const;
79 };
80 
Alias(int32_t anEnumValue,int32_t aNameGroupIndex)81 Alias::Alias(int32_t anEnumValue,
82                    int32_t aNameGroupIndex) :
83     enumValue(anEnumValue),
84     nameGroupIndex(aNameGroupIndex)
85 {
86 }
87 
88 class Property : public Alias {
89 public:
90     int32_t         valueCount;
91     const Alias* valueList;
92 
93     Property(int32_t enumValue,
94                        int32_t nameGroupIndex,
95                        int32_t valueCount,
96                        const Alias* valueList);
97 };
98 
Property(int32_t _enumValue,int32_t _nameGroupIndex,int32_t _valueCount,const Alias * _valueList)99 Property::Property(int32_t _enumValue,
100                                        int32_t _nameGroupIndex,
101                                        int32_t _valueCount,
102                                        const Alias* _valueList) :
103     Alias(_enumValue, _nameGroupIndex),
104     valueCount(_valueCount),
105     valueList(_valueList)
106 {
107 }
108 
109 // *** Include the data header ***
110 #include "data.h"
111 
112 /* return a list of unique names, not including "", for this property
113  * @param stringIndices array of at least MAX_NAMES_PER_GROUP
114  * elements, will be filled with indices into STRING_TABLE
115  * @return number of indices, >= 1
116  */
getUniqueNames(int32_t * stringIndices) const117 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
118     int32_t count = 0;
119     int32_t i = nameGroupIndex;
120     UBool done = FALSE;
121     while (!done) {
122         int32_t j = NAME_GROUP[i++];
123         if (j < 0) {
124             done = TRUE;
125             j = -j;
126         }
127         if (j == 0) continue; // omit "" entries
128         UBool dupe = FALSE;
129         for (int32_t k=0; k<count; ++k) {
130             if (stringIndices[k] == j) {
131                 dupe = TRUE;
132                 break;
133             }
134             // also do a string check for things like "age|Age"
135             if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
136                 //printf("Found dupe %s|%s\n",
137                 //       STRING_TABLE[stringIndices[k]].str,
138                 //       STRING_TABLE[j].str);
139                 dupe = TRUE;
140                 break;
141             }
142         }
143         if (dupe) continue; // omit duplicates
144         stringIndices[count++] = j;
145     }
146     return count;
147 }
148 
149 // END DATA
150 //----------------------------------------------------------------------
151 
152 #define MALLOC(type, count) \
153   (type*) uprv_malloc(sizeof(type) * count)
154 
die(const char * msg)155 void die(const char* msg) {
156     fprintf(stderr, "Error: %s\n", msg);
157     exit(1);
158 }
159 
160 //----------------------------------------------------------------------
161 
162 /**
163  * A list of Alias objects.
164  */
165 class AliasList {
166 public:
167     virtual ~AliasList();
168     virtual const Alias& operator[](int32_t i) const = 0;
169     virtual int32_t count() const = 0;
170 };
171 
~AliasList()172 AliasList::~AliasList() {}
173 
174 /**
175  * A single array.
176  */
177 class AliasArrayList : public AliasList {
178     const Alias* a;
179     int32_t n;
180 public:
AliasArrayList(const Alias * _a,int32_t _n)181     AliasArrayList(const Alias* _a, int32_t _n) {
182         a = _a;
183         n = _n;
184     }
operator [](int32_t i) const185     virtual const Alias& operator[](int32_t i) const {
186         return a[i];
187     }
count() const188     virtual int32_t count() const {
189         return n;
190     }
191 };
192 
193 /**
194  * A single array.
195  */
196 class PropertyArrayList : public AliasList {
197     const Property* a;
198     int32_t n;
199 public:
PropertyArrayList(const Property * _a,int32_t _n)200     PropertyArrayList(const Property* _a, int32_t _n) {
201         a = _a;
202         n = _n;
203     }
operator [](int32_t i) const204     virtual const Alias& operator[](int32_t i) const {
205         return a[i];
206     }
count() const207     virtual int32_t count() const {
208         return n;
209     }
210 };
211 
212 //----------------------------------------------------------------------
213 
214 /**
215  * An element in a name index.  It maps a name (given by index) into
216  * an enum value.
217  */
218 class NameToEnumEntry {
219 public:
220     int32_t nameIndex;
221     int32_t enumValue;
NameToEnumEntry(int32_t a,int32_t b)222     NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
223 };
224 
225 // Sort function for NameToEnumEntry (sort by name)
226 U_CFUNC int32_t
compareNameToEnumEntry(const void *,const void * e1,const void * e2)227 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
228     return
229         STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
230             compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
231 }
232 
233 //----------------------------------------------------------------------
234 
235 /**
236  * An element in an enum index.  It maps an enum into a name group entry
237  * (given by index).
238  */
239 class EnumToNameGroupEntry {
240 public:
241     int32_t enumValue;
242     int32_t nameGroupIndex;
EnumToNameGroupEntry(int32_t a,int32_t b)243     EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
244 
245     // are enumValues contiguous for count entries starting with this one?
246     // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const247     UBool isContiguous(int32_t count) const {
248         const EnumToNameGroupEntry* p = this;
249         for (int32_t i=1; i<count; ++i) {
250             if (p[i].enumValue != (this->enumValue + i)) {
251                 return FALSE;
252             }
253         }
254         return TRUE;
255     }
256 };
257 
258 // Sort function for EnumToNameGroupEntry (sort by name index)
259 U_CFUNC int32_t
compareEnumToNameGroupEntry(const void *,const void * e1,const void * e2)260 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
261     return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
262 }
263 
264 //----------------------------------------------------------------------
265 
266 /**
267  * An element in the map from enumerated property enums to value maps.
268  */
269 class EnumToValueEntry {
270 public:
271     int32_t enumValue;
272     EnumToNameGroupEntry* enumToName;
273     int32_t enumToName_count;
274     NameToEnumEntry* nameToEnum;
275     int32_t nameToEnum_count;
276 
277     // are enumValues contiguous for count entries starting with this one?
278     // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
isContiguous(int32_t count) const279     UBool isContiguous(int32_t count) const {
280         const EnumToValueEntry* p = this;
281         for (int32_t i=1; i<count; ++i) {
282             if (p[i].enumValue != (this->enumValue + i)) {
283                 return FALSE;
284             }
285         }
286         return TRUE;
287     }
288 };
289 
290 // Sort function for EnumToValueEntry (sort by enum)
291 U_CFUNC int32_t
compareEnumToValueEntry(const void *,const void * e1,const void * e2)292 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
293     return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
294 }
295 
296 //----------------------------------------------------------------------
297 // BEGIN Builder
298 
299 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
300 
301 class Builder {
302     // header:
303     PropertyAliases header;
304 
305     // 0:
306     NonContiguousEnumToOffset* enumToName;
307     int32_t enumToName_size;
308     Offset enumToName_offset;
309 
310     // 1: (deleted)
311 
312     // 2:
313     NameToEnum* nameToEnum;
314     int32_t nameToEnum_size;
315     Offset nameToEnum_offset;
316 
317     // 3:
318     NonContiguousEnumToOffset* enumToValue;
319     int32_t enumToValue_size;
320     Offset enumToValue_offset;
321 
322     // 4:
323     ValueMap* valueMap;
324     int32_t valueMap_size;
325     int32_t valueMap_count;
326     Offset valueMap_offset;
327 
328     // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
329     // NULL and one is not.  valueEnumToName_size[i] is the size of
330     // the non-NULL one.  i=0..valueMapCount-1
331     // 5a:
332     EnumToOffset** valueEnumToName;
333     // 5b:
334     NonContiguousEnumToOffset** valueNCEnumToName;
335     int32_t* valueEnumToName_size;
336     Offset* valueEnumToName_offset;
337     // 6:
338     // arrays of valueMap_count pointers, sizes, & offsets
339     NameToEnum** valueNameToEnum;
340     int32_t* valueNameToEnum_size;
341     Offset* valueNameToEnum_offset;
342 
343     // 98:
344     Offset* nameGroupPool;
345     int32_t nameGroupPool_count;
346     int32_t nameGroupPool_size;
347     Offset nameGroupPool_offset;
348 
349     // 99:
350     char* stringPool;
351     int32_t stringPool_count;
352     int32_t stringPool_size;
353     Offset stringPool_offset;
354     Offset* stringPool_offsetArray; // relative to stringPool
355 
356     int32_t total_size; // size of everything
357 
358     int32_t debug;
359 
360 public:
361 
362     Builder(int32_t debugLevel);
363     ~Builder();
364 
365     void buildTopLevelProperties(const NameToEnumEntry* propName,
366                                  int32_t propNameCount,
367                                  const EnumToNameGroupEntry* propEnum,
368                                  int32_t propEnumCount);
369 
370     void buildValues(const EnumToValueEntry* e2v,
371                      int32_t count);
372 
373     void buildStringPool(const AliasName* propertyNames,
374                          int32_t propertyNameCount,
375                          const int32_t* nameGroupIndices,
376                          int32_t nameGroupIndicesCount);
377 
378     void fixup();
379 
380     int8_t* createData(int32_t& length) const;
381 
382 private:
383 
384     static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
385                                            int32_t count,
386                                            int32_t& size);
387     static NonContiguousEnumToOffset*
388         buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
389                                int32_t count,
390                                int32_t& size);
391 
392     static NonContiguousEnumToOffset*
393         buildNCEnumToValue(const EnumToValueEntry* e2v,
394                            int32_t count,
395                            int32_t& size);
396 
397     static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
398                                        int32_t count,
399                                        int32_t& size);
400 
401     Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
402     void fixupNameToEnum(NameToEnum* n);
403     void fixupEnumToNameGroup(EnumToOffset* e2ng);
404     void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
405 
406     void computeOffsets();
407     void fixupStringPoolOffsets();
408     void fixupNameGroupPoolOffsets();
409     void fixupMiscellaneousOffsets();
410 
411     static int32_t align(int32_t a);
412     static void erase(void* p, int32_t size);
413 };
414 
Builder(int32_t debugLevel)415 Builder::Builder(int32_t debugLevel) {
416     debug = debugLevel;
417     enumToName = 0;
418     nameToEnum = 0;
419     enumToValue = 0;
420     valueMap_count = 0;
421     valueMap = 0;
422     valueEnumToName = 0;
423     valueNCEnumToName = 0;
424     valueEnumToName_size = 0;
425     valueEnumToName_offset = 0;
426     valueNameToEnum = 0;
427     valueNameToEnum_size = 0;
428     valueNameToEnum_offset = 0;
429     nameGroupPool = 0;
430     stringPool = 0;
431     stringPool_offsetArray = 0;
432 }
433 
~Builder()434 Builder::~Builder() {
435     uprv_free(enumToName);
436     uprv_free(nameToEnum);
437     uprv_free(enumToValue);
438     uprv_free(valueMap);
439     for (int32_t i=0; i<valueMap_count; ++i) {
440         uprv_free(valueEnumToName[i]);
441         uprv_free(valueNCEnumToName[i]);
442         uprv_free(valueNameToEnum[i]);
443     }
444     uprv_free(valueEnumToName);
445     uprv_free(valueNCEnumToName);
446     uprv_free(valueEnumToName_size);
447     uprv_free(valueEnumToName_offset);
448     uprv_free(valueNameToEnum);
449     uprv_free(valueNameToEnum_size);
450     uprv_free(valueNameToEnum_offset);
451     uprv_free(nameGroupPool);
452     uprv_free(stringPool);
453     uprv_free(stringPool_offsetArray);
454 }
455 
align(int32_t a)456 int32_t Builder::align(int32_t a) {
457     U_ASSERT(a >= 0);
458     int32_t k = a % sizeof(int32_t);
459     if (k == 0) {
460         return a;
461     }
462     a += sizeof(int32_t) - k;
463     return a;
464 }
465 
erase(void * p,int32_t size)466 void Builder::erase(void* p, int32_t size) {
467     U_ASSERT(size >= 0);
468     int8_t* q = (int8_t*) p;
469     while (size--) {
470         *q++ = 0;
471     }
472 }
473 
buildEnumToOffset(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)474 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
475                                          int32_t count,
476                                          int32_t& size) {
477     U_ASSERT(e2ng->isContiguous(count));
478     size = align(EnumToOffset::getSize(count));
479     EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
480     erase(result, size);
481     result->enumStart = e2ng->enumValue;
482     result->enumLimit = e2ng->enumValue + count;
483     Offset* p = result->getOffsetArray();
484     for (int32_t i=0; i<count; ++i) {
485         // set these to NGI index values
486         // fix them up to NGI offset values
487         U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
488         p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
489     }
490     return result;
491 }
492 
493 NonContiguousEnumToOffset*
buildNCEnumToNameGroup(const EnumToNameGroupEntry * e2ng,int32_t count,int32_t & size)494 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
495                                 int32_t count,
496                                 int32_t& size) {
497     U_ASSERT(!e2ng->isContiguous(count));
498     size = align(NonContiguousEnumToOffset::getSize(count));
499     NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
500     erase(nc, size);
501     nc->count = count;
502     EnumValue* e = nc->getEnumArray();
503     Offset* p = nc->getOffsetArray();
504     for (int32_t i=0; i<count; ++i) {
505         // set these to NGI index values
506         // fix them up to NGI offset values
507         e[i] = e2ng[i].enumValue;
508         U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
509         p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
510     }
511     return nc;
512 }
513 
514 NonContiguousEnumToOffset*
buildNCEnumToValue(const EnumToValueEntry * e2v,int32_t count,int32_t & size)515 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
516                             int32_t count,
517                             int32_t& size) {
518     U_ASSERT(!e2v->isContiguous(count));
519     size = align(NonContiguousEnumToOffset::getSize(count));
520     NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
521     erase(result, size);
522     result->count = count;
523     EnumValue* e = result->getEnumArray();
524     for (int32_t i=0; i<count; ++i) {
525         e[i] = e2v[i].enumValue;
526         // offset must be set later
527     }
528     return result;
529 }
530 
531 /**
532  * Given an index into the string pool, return an offset.  computeOffsets()
533  * must have been called already.  If allowNegative is true, allow negatives
534  * and preserve their sign.
535  */
stringIndexToOffset(int32_t index,UBool allowNegative) const536 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
537     // Index 0 is ""; we turn this into an Offset of zero
538     if (index == 0) return 0;
539     if (index < 0) {
540         if (allowNegative) {
541             return -Builder::stringIndexToOffset(-index);
542         } else {
543             die("Negative string pool index");
544         }
545     } else {
546         if (index >= stringPool_count) {
547             die("String pool index too large");
548         }
549         Offset result = stringPool_offset + stringPool_offsetArray[index];
550         U_ASSERT(result >= 0 && result < total_size);
551         return result;
552     }
553     return 0; // never executed; make compiler happy
554 }
555 
buildNameToEnum(const NameToEnumEntry * nameToEnum,int32_t count,int32_t & size)556 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
557                                      int32_t count,
558                                      int32_t& size) {
559     size = align(NameToEnum::getSize(count));
560     NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
561     erase(n2e, size);
562     n2e->count = count;
563     Offset* p = n2e->getNameArray();
564     EnumValue* e = n2e->getEnumArray();
565     for (int32_t i=0; i<count; ++i) {
566         // set these to SP index values
567         // fix them up to SP offset values
568         U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
569         p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
570         e[i] = nameToEnum[i].enumValue;
571     }
572     return n2e;
573 }
574 
575 
buildTopLevelProperties(const NameToEnumEntry * propName,int32_t propNameCount,const EnumToNameGroupEntry * propEnum,int32_t propEnumCount)576 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
577                                       int32_t propNameCount,
578                                       const EnumToNameGroupEntry* propEnum,
579                                       int32_t propEnumCount) {
580     enumToName = buildNCEnumToNameGroup(propEnum,
581                                         propEnumCount,
582                                         enumToName_size);
583     nameToEnum = buildNameToEnum(propName,
584                                  propNameCount,
585                                  nameToEnum_size);
586 }
587 
buildValues(const EnumToValueEntry * e2v,int32_t count)588 void Builder::buildValues(const EnumToValueEntry* e2v,
589                           int32_t count) {
590     int32_t i;
591 
592     U_ASSERT(!e2v->isContiguous(count));
593 
594     valueMap_count = count;
595 
596     enumToValue = buildNCEnumToValue(e2v, count,
597                                      enumToValue_size);
598 
599     valueMap_size = align(count * sizeof(ValueMap));
600     valueMap = (ValueMap*) uprv_malloc(valueMap_size);
601     erase(valueMap, valueMap_size);
602 
603     valueEnumToName = MALLOC(EnumToOffset*, count);
604     valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
605     valueEnumToName_size = MALLOC(int32_t, count);
606     valueEnumToName_offset = MALLOC(Offset, count);
607     valueNameToEnum = MALLOC(NameToEnum*, count);
608     valueNameToEnum_size = MALLOC(int32_t, count);
609     valueNameToEnum_offset = MALLOC(Offset, count);
610 
611     for (i=0; i<count; ++i) {
612         UBool isContiguous =
613             e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
614         valueEnumToName[i] = 0;
615         valueNCEnumToName[i] = 0;
616         if (isContiguous) {
617             valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
618                                                    e2v[i].enumToName_count,
619                                                    valueEnumToName_size[i]);
620         } else {
621             valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
622                                                           e2v[i].enumToName_count,
623                                                           valueEnumToName_size[i]);
624         }
625         valueNameToEnum[i] =
626             buildNameToEnum(e2v[i].nameToEnum,
627                             e2v[i].nameToEnum_count,
628                             valueNameToEnum_size[i]);
629     }
630 }
631 
buildStringPool(const AliasName * propertyNames,int32_t propertyNameCount,const int32_t * nameGroupIndices,int32_t nameGroupIndicesCount)632 void Builder::buildStringPool(const AliasName* propertyNames,
633                               int32_t propertyNameCount,
634                               const int32_t* nameGroupIndices,
635                               int32_t nameGroupIndicesCount) {
636     int32_t i;
637 
638     nameGroupPool_count = nameGroupIndicesCount;
639     nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
640     nameGroupPool = MALLOC(Offset, nameGroupPool_count);
641 
642     for (i=0; i<nameGroupPool_count; ++i) {
643         // Some indices are negative.
644         int32_t a = nameGroupIndices[i];
645         if (a < 0) a = -a;
646         U_ASSERT(IS_VALID_OFFSET(a));
647         nameGroupPool[i] = (Offset) nameGroupIndices[i];
648     }
649 
650     stringPool_count = propertyNameCount;
651     stringPool_size = 0;
652     // first string must be "" -- we skip it
653     U_ASSERT(*propertyNames[0].str == 0);
654     for (i=1 /*sic*/; i<propertyNameCount; ++i) {
655         stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
656     }
657     stringPool = MALLOC(char, stringPool_size);
658     stringPool_offsetArray = MALLOC(Offset, stringPool_count);
659     Offset soFar = 0;
660     char* p = stringPool;
661     stringPool_offsetArray[0] = -1; // we don't use this entry
662     for (i=1 /*sic*/; i<propertyNameCount; ++i) {
663         const char* str = propertyNames[i].str;
664         int32_t len = (int32_t)uprv_strlen(str);
665         uprv_strcpy(p, str);
666         p += len;
667         *p++ = 0;
668         stringPool_offsetArray[i] = soFar;
669         soFar += (Offset)(len+1);
670     }
671     U_ASSERT(soFar == stringPool_size);
672     U_ASSERT(p == (stringPool + stringPool_size));
673 }
674 
675 // Confirm that PropertyAliases is a POD (plain old data; see C++
676 // std).  The following union will _fail to compile_ if
677 // PropertyAliases is _not_ a POD.  (Note: We used to use the offsetof
678 // macro to check this, but that's not quite right, so that test is
679 // commented out -- see below.)
680 typedef union {
681     int32_t i;
682     PropertyAliases p;
683 } PropertyAliasesPODTest;
684 
computeOffsets()685 void Builder::computeOffsets() {
686     int32_t i;
687     Offset off = sizeof(header);
688 
689     if (debug>0) {
690         printf("header   \t offset=%4d  size=%5d\n", 0, off);
691     }
692 
693     // PropertyAliases must have no v-table and must be
694     // padded (if necessary) to the next 32-bit boundary.
695     //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
696     U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
697 
698     #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
699 
700     #define COMPUTE_OFFSET2(foo,type) \
701       if (debug>0)\
702         printf(#foo "\t offset=%4d  size=%5d\n", off, (int)foo##_size);\
703       foo##_offset = off;\
704       U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
705       U_ASSERT(foo##_offset % sizeof(type) == 0);\
706       off = (Offset) (off + foo##_size);
707 
708     COMPUTE_OFFSET(enumToName);     // 0:
709     COMPUTE_OFFSET(nameToEnum);     // 2:
710     COMPUTE_OFFSET(enumToValue);    // 3:
711     COMPUTE_OFFSET(valueMap);       // 4:
712 
713     for (i=0; i<valueMap_count; ++i) {
714         if (debug>0) {
715             printf(" enumToName[%d]\t offset=%4d  size=%5d\n",
716                    (int)i, off, (int)valueEnumToName_size[i]);
717         }
718 
719         valueEnumToName_offset[i] = off;   // 5:
720         U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
721         off = (Offset) (off + valueEnumToName_size[i]);
722 
723         if (debug>0) {
724             printf(" nameToEnum[%d]\t offset=%4d  size=%5d\n",
725                    (int)i, off, (int)valueNameToEnum_size[i]);
726         }
727 
728         valueNameToEnum_offset[i] = off;   // 6:
729         U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
730         off = (Offset) (off + valueNameToEnum_size[i]);
731     }
732 
733     // These last two chunks have weaker alignment needs
734     COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
735     COMPUTE_OFFSET2(stringPool,char);      // 99:
736 
737     total_size = off;
738     if (debug>0) printf("total                         size=%5d\n\n", (int)total_size);
739     U_ASSERT(total_size <= (MAX_OFFSET+1));
740 }
741 
fixupNameToEnum(NameToEnum * n)742 void Builder::fixupNameToEnum(NameToEnum* n) {
743     // Fix the string pool offsets in n
744     Offset* p = n->getNameArray();
745     for (int32_t i=0; i<n->count; ++i) {
746         p[i] = stringIndexToOffset(p[i]);
747     }
748 }
749 
fixupStringPoolOffsets()750 void Builder::fixupStringPoolOffsets() {
751     int32_t i;
752 
753     // 2:
754     fixupNameToEnum(nameToEnum);
755 
756     // 6:
757     for (i=0; i<valueMap_count; ++i) {
758         fixupNameToEnum(valueNameToEnum[i]);
759     }
760 
761     // 98:
762     for (i=0; i<nameGroupPool_count; ++i) {
763         nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
764     }
765 }
766 
fixupEnumToNameGroup(EnumToOffset * e2ng)767 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
768     EnumValue i;
769     int32_t j;
770     Offset* p = e2ng->getOffsetArray();
771     for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
772         p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
773     }
774 }
775 
fixupNCEnumToNameGroup(NonContiguousEnumToOffset * e2ng)776 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
777     int32_t i;
778     /*EnumValue* e = e2ng->getEnumArray();*/
779     Offset* p = e2ng->getOffsetArray();
780     for (i=0; i<e2ng->count; ++i) {
781         p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
782     }
783 }
784 
fixupNameGroupPoolOffsets()785 void Builder::fixupNameGroupPoolOffsets() {
786     int32_t i;
787 
788     // 0:
789     fixupNCEnumToNameGroup(enumToName);
790 
791     // 1: (deleted)
792 
793     // 5:
794     for (i=0; i<valueMap_count; ++i) {
795         // 5a:
796         if (valueEnumToName[i] != 0) {
797             fixupEnumToNameGroup(valueEnumToName[i]);
798         }
799         // 5b:
800         if (valueNCEnumToName[i] != 0) {
801             fixupNCEnumToNameGroup(valueNCEnumToName[i]);
802         }
803     }
804 }
805 
fixupMiscellaneousOffsets()806 void Builder::fixupMiscellaneousOffsets() {
807     int32_t i;
808 
809     // header:
810     erase(&header, sizeof(header));
811     header.enumToName_offset = enumToName_offset;
812     header.nameToEnum_offset = nameToEnum_offset;
813     header.enumToValue_offset = enumToValue_offset;
814     // header meta-info used by Java:
815     U_ASSERT(total_size > 0 && total_size < 0x7FFF);
816     header.total_size = (int16_t) total_size;
817     header.valueMap_offset = valueMap_offset;
818     header.valueMap_count = (int16_t) valueMap_count;
819     header.nameGroupPool_offset = nameGroupPool_offset;
820     header.nameGroupPool_count = (int16_t) nameGroupPool_count;
821     header.stringPool_offset = stringPool_offset;
822     header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
823 
824     U_ASSERT(valueMap_count <= 0x7FFF);
825     U_ASSERT(nameGroupPool_count <= 0x7FFF);
826     U_ASSERT(stringPool_count <= 0x7FFF);
827 
828     // 3:
829     Offset* p = enumToValue->getOffsetArray();
830     /*EnumValue* e = enumToValue->getEnumArray();*/
831     U_ASSERT(valueMap_count == enumToValue->count);
832     for (i=0; i<valueMap_count; ++i) {
833         p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
834     }
835 
836     // 4:
837     for (i=0; i<valueMap_count; ++i) {
838         ValueMap& v = valueMap[i];
839         v.enumToName_offset = v.ncEnumToName_offset = 0;
840         if (valueEnumToName[i] != 0) {
841             v.enumToName_offset = valueEnumToName_offset[i];
842         }
843         if (valueNCEnumToName[i] != 0) {
844             v.ncEnumToName_offset = valueEnumToName_offset[i];
845         }
846         v.nameToEnum_offset = valueNameToEnum_offset[i];
847     }
848 }
849 
fixup()850 void Builder::fixup() {
851     computeOffsets();
852     fixupStringPoolOffsets();
853     fixupNameGroupPoolOffsets();
854     fixupMiscellaneousOffsets();
855 }
856 
createData(int32_t & length) const857 int8_t* Builder::createData(int32_t& length) const {
858     length = total_size;
859     int8_t* result = MALLOC(int8_t, length);
860 
861     int8_t* p = result;
862     int8_t* limit = result + length;
863 
864     #define APPEND2(x, size)   \
865       U_ASSERT((p+size)<=limit); \
866       uprv_memcpy(p, x, size); \
867       p += size
868 
869     #define APPEND(x) APPEND2(x, x##_size)
870 
871     APPEND2(&header, sizeof(header));
872     APPEND(enumToName);
873     APPEND(nameToEnum);
874     APPEND(enumToValue);
875     APPEND(valueMap);
876 
877     for (int32_t i=0; i<valueMap_count; ++i) {
878         U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
879                (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
880         if (valueEnumToName[i] != 0) {
881             APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
882         }
883         if (valueNCEnumToName[i] != 0) {
884             APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
885         }
886         APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
887     }
888 
889     APPEND(nameGroupPool);
890     APPEND(stringPool);
891 
892     if (p != limit) {
893         fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
894         exit(1);
895     }
896     return result;
897 }
898 
899 // END Builder
900 //----------------------------------------------------------------------
901 
902 /* UDataInfo cf. udata.h */
903 static UDataInfo dataInfo = {
904     sizeof(UDataInfo),
905     0,
906 
907     U_IS_BIG_ENDIAN,
908     U_CHARSET_FAMILY,
909     sizeof(UChar),
910     0,
911 
912     {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
913     {PNAME_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
914     {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
915 };
916 
917 class genpname {
918 
919     // command-line options
920     UBool useCopyright;
921     UBool verbose;
922     int32_t debug;
923 
924 public:
925     int      MMain(int argc, char *argv[]);
926 
927 private:
928     NameToEnumEntry* createNameIndex(const AliasList& list,
929                                      int32_t& nameIndexCount);
930 
931     EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
932 
933     int32_t  writeDataFile(const char *destdir, const Builder&);
934 };
935 
main(int argc,char * argv[])936 int main(int argc, char *argv[]) {
937     UErrorCode status = U_ZERO_ERROR;
938     u_init(&status);
939     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
940         // Note: u_init() will try to open ICU property data.
941         //       failures here are expected when building ICU from scratch.
942         //       ignore them.
943         fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
944             u_errorName(status));
945         exit(1);
946     }
947 
948     genpname app;
949     U_MAIN_INIT_ARGS(argc, argv);
950     int retVal = app.MMain(argc, argv);
951     u_cleanup();
952     return retVal;
953 }
954 
955 static UOption options[]={
956     UOPTION_HELP_H,
957     UOPTION_HELP_QUESTION_MARK,
958     UOPTION_COPYRIGHT,
959     UOPTION_DESTDIR,
960     UOPTION_VERBOSE,
961     UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
962 };
963 
createNameIndex(const AliasList & list,int32_t & nameIndexCount)964 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
965                                            int32_t& nameIndexCount) {
966 
967     // Build name => enum map
968 
969     // This is an n->1 map.  There are typically multiple names
970     // mapping to one enum.  The name index is sorted in order of the name,
971     // as defined by the uprv_compareAliasNames() function.
972 
973     int32_t i, j;
974     int32_t count = list.count();
975 
976     // compute upper limit on number of names in the index
977     int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
978     NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
979 
980     nameIndexCount = 0;
981     int32_t names[MAX_NAMES_PER_GROUP];
982     for (i=0; i<count; ++i) {
983         const Alias& p = list[i];
984         int32_t n = p.getUniqueNames(names);
985         for (j=0; j<n; ++j) {
986             U_ASSERT(nameIndexCount < nameIndexCapacity);
987             nameIndex[nameIndexCount++] =
988                 NameToEnumEntry(names[j], p.enumValue);
989         }
990     }
991 
992     /*
993      * use a stable sort to ensure consistent results between
994      * genpname.cpp and the propname.cpp swapping code
995      */
996     UErrorCode errorCode = U_ZERO_ERROR;
997     uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
998                    compareNameToEnumEntry, NULL, TRUE, &errorCode);
999     if (debug>1) {
1000         printf("Alias names: %d\n", (int)nameIndexCount);
1001         for (i=0; i<nameIndexCount; ++i) {
1002             printf("%s => %d\n",
1003                    STRING_TABLE[nameIndex[i].nameIndex].str,
1004                    (int)nameIndex[i].enumValue);
1005         }
1006         printf("\n");
1007     }
1008     // make sure there are no duplicates.  for a sorted list we need
1009     // only compare adjacent items.  Alias.getUniqueNames() has
1010     // already eliminated duplicate names for a single property, which
1011     // does occur, so we're checking for duplicate names between two
1012     // properties, which should never occur.
1013     UBool ok = TRUE;
1014     for (i=1; i<nameIndexCount; ++i) {
1015         if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1016             STRING_TABLE[nameIndex[i].nameIndex]) {
1017             printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1018                    STRING_TABLE[nameIndex[i-1].nameIndex].str,
1019                    STRING_TABLE[nameIndex[i].nameIndex].str);
1020             ok = FALSE;
1021         }
1022     }
1023     if (!ok) {
1024         die("Two or more duplicate names in property list");
1025     }
1026 
1027     return nameIndex;
1028 }
1029 
createEnumIndex(const AliasList & list)1030 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1031 
1032     // Build the enum => name map
1033 
1034     // This is a 1->n map.  Each enum maps to 1 or more names.  To
1035     // accomplish this the index entry points to an element of the
1036     // NAME_GROUP array.  This is the short name (which may be empty).
1037     // From there, subsequent elements of NAME_GROUP are alternate
1038     // names for this enum, up to and including the first one that is
1039     // negative (negate for actual index).
1040 
1041     int32_t i, j, k;
1042     int32_t count = list.count();
1043 
1044     EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1045     for (i=0; i<count; ++i) {
1046         const Alias& p = list[i];
1047         enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1048     }
1049 
1050     UErrorCode errorCode = U_ZERO_ERROR;
1051     uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1052                    compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1053     if (debug>1) {
1054         printf("Property enums: %d\n", (int)count);
1055         for (i=0; i<count; ++i) {
1056             printf("%d => %d: ",
1057                    (int)enumIndex[i].enumValue,
1058                    (int)enumIndex[i].nameGroupIndex);
1059             UBool done = FALSE;
1060             for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1061                 k = NAME_GROUP[j];
1062                 if (k < 0) {
1063                     k = -k;
1064                     done = TRUE;
1065                 }
1066                 printf("\"%s\"", STRING_TABLE[k].str);
1067                 if (!done) printf(", ");
1068             }
1069             printf("\n");
1070         }
1071         printf("\n");
1072     }
1073     return enumIndex;
1074 }
1075 
MMain(int argc,char * argv[])1076 int genpname::MMain(int argc, char* argv[])
1077 {
1078     int32_t i, j;
1079     UErrorCode status = U_ZERO_ERROR;
1080 
1081     u_init(&status);
1082     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1083         fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1084         status = U_ZERO_ERROR;
1085     }
1086 
1087 
1088     /* preset then read command line options */
1089     options[3].value=u_getDataDirectory();
1090     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1091 
1092     /* error handling, printing usage message */
1093     if (argc<0) {
1094         fprintf(stderr,
1095             "error in command line argument \"%s\"\n",
1096             argv[-argc]);
1097     }
1098 
1099     debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1100 
1101     if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1102        debug < 0 || debug > 9) {
1103         fprintf(stderr,
1104             "usage: %s [-options]\n"
1105             "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1106             "options:\n"
1107             "\t-h or -? or --help  this usage text\n"
1108             "\t-v or --verbose     turn on verbose output\n"
1109             "\t-c or --copyright   include a copyright notice\n"
1110             "\t-d or --destdir     destination directory, followed by the path\n"
1111             "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
1112             argv[0]);
1113         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1114     }
1115 
1116     /* get the options values */
1117     useCopyright=options[2].doesOccur;
1118     verbose = options[4].doesOccur;
1119 
1120     // ------------------------------------------------------------
1121     // Do not sort the string table, instead keep it in data.h order.
1122     // This simplifies data swapping and testing thereof because the string
1123     // table itself need not be sorted during swapping.
1124     // The NameToEnum sorter sorts each such map's string offsets instead.
1125 
1126     if (debug>1) {
1127         printf("String pool: %d\n", (int)STRING_COUNT);
1128         for (i=0; i<STRING_COUNT; ++i) {
1129             if (i != 0) {
1130                 printf(", ");
1131             }
1132             printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1133         }
1134         printf("\n\n");
1135     }
1136 
1137     // ------------------------------------------------------------
1138     // Create top-level property indices
1139 
1140     PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1141     int32_t propNameCount;
1142     NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1143     EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1144 
1145     // ------------------------------------------------------------
1146     // Create indices for the value list for each enumerated property
1147 
1148     // This will have more entries than we need...
1149     EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1150     int32_t enumToValue_count = 0;
1151     for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1152         if (PROPERTY[i].valueCount == 0) continue;
1153         AliasArrayList values(PROPERTY[i].valueList,
1154                               PROPERTY[i].valueCount);
1155         enumToValue[j].enumValue = PROPERTY[i].enumValue;
1156         enumToValue[j].enumToName = createEnumIndex(values);
1157         enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1158         enumToValue[j].nameToEnum = createNameIndex(values,
1159                                                     enumToValue[j].nameToEnum_count);
1160         ++j;
1161     }
1162     enumToValue_count = j;
1163 
1164     uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1165                    compareEnumToValueEntry, NULL, FALSE, &status);
1166 
1167     // ------------------------------------------------------------
1168     // Build PropertyAliases layout in memory
1169 
1170     Builder builder(debug);
1171 
1172     builder.buildTopLevelProperties(propName,
1173                                     propNameCount,
1174                                     propEnum,
1175                                     PROPERTY_COUNT);
1176 
1177     builder.buildValues(enumToValue,
1178                         enumToValue_count);
1179 
1180     builder.buildStringPool(STRING_TABLE,
1181                             STRING_COUNT,
1182                             NAME_GROUP,
1183                             NAME_GROUP_COUNT);
1184 
1185     builder.fixup();
1186 
1187     ////////////////////////////////////////////////////////////
1188     // Write the output file
1189     ////////////////////////////////////////////////////////////
1190     int32_t wlen = writeDataFile(options[3].value, builder);
1191     if (verbose) {
1192         fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1193             U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1194     }
1195 
1196     return 0; // success
1197 }
1198 
writeDataFile(const char * destdir,const Builder & builder)1199 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1200     int32_t length;
1201     int8_t* data = builder.createData(length);
1202 
1203     UNewDataMemory *pdata;
1204     UErrorCode status = U_ZERO_ERROR;
1205 
1206     pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1207                          useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1208     if (U_FAILURE(status)) {
1209         die("Unable to create data memory");
1210     }
1211 
1212     udata_writeBlock(pdata, data, length);
1213 
1214     int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1215     if (U_FAILURE(status)) {
1216         die("Error writing output file");
1217     }
1218     if (dataLength != length) {
1219         die("Written file doesn't match expected size");
1220     }
1221 
1222     return dataLength;
1223 }
1224 
1225 //eof
1226