1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2002-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 10/11/02 aliu Creation.
10 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
11 * 2011dec18 Markus Scherer Moved genpname/genpname.cpp to genprops/pnamesbuilder.cpp.
12 **********************************************************************
13 */
14
15 #include "unicode/utypes.h"
16 #include "unicode/bytestrie.h"
17 #include "unicode/bytestriebuilder.h"
18 #include "unicode/putil.h"
19 #include "unicode/uclean.h"
20 #include "charstr.h"
21 #include "cstring.h"
22 #include "denseranges.h"
23 #include "genprops.h"
24 #include "propname.h"
25 #include "toolutil.h"
26 #include "uhash.h"
27 #include "uinvchar.h"
28 #include "unewdata.h"
29 #include "uvectr32.h"
30 #include "writesrc.h"
31
32 #include <stdio.h>
33
34 // We test for ASCII delimiters and White_Space, and build ASCII string BytesTries.
35 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY
36 # error This builder requires U_CHARSET_FAMILY==U_ASCII_FAMILY.
37 #endif
38
39 U_NAMESPACE_USE
40
41 //----------------------------------------------------------------------
42 // BEGIN DATA
43 //
44 // This is the raw data to be output. We define the data structure,
45 // then include a machine-generated header that contains the actual
46 // data.
47
48 #include "unicode/uchar.h"
49 #include "unicode/unorm2.h"
50 #include "unicode/uscript.h"
51
52 // Dilemma: We want to use MAX_ALIASES to define fields in the Value class.
53 // However, we need to define the class before including the data header
54 // and we can use MAX_ALIASES only after including it.
55 // So we define a second constant and at runtime check that it's >=MAX_ALIASES.
56 static const int32_t VALUE_MAX_ALIASES=4;
57
58 static const int32_t JOINED_ALIASES_CAPACITY=100;
59
60 class Value {
61 public:
Value(int32_t enumValue,const char * joinedAliases)62 Value(int32_t enumValue, const char *joinedAliases)
63 : enumValue(enumValue), joinedAliases(joinedAliases), count(0) {
64 if(uprv_strlen(joinedAliases)>=JOINED_ALIASES_CAPACITY) {
65 fprintf(stderr,
66 "genprops error: pnamesbuilder.cpp Value::Value(%ld, \"%s\"): "
67 "joined aliases too long: "
68 "increase JOINED_ALIASES_CAPACITY, to at least %ld\n",
69 (long)enumValue, joinedAliases, uprv_strlen(joinedAliases)+1);
70 exit(U_BUFFER_OVERFLOW_ERROR);
71 }
72 // Copy the space-separated aliases into NUL-separated ones and count them.
73 // Write a normalized version of each one.
74 const char *j=joinedAliases;
75 char *a=aliasesBuffer;
76 char *n=normalizedBuffer;
77 char c;
78 do {
79 aliases[count]=a;
80 normalized[count++]=n;
81 while((c=*j++)!=' ' && c!=0) {
82 *a++=c;
83 // Ignore delimiters '-' and '_'.
84 if(!(c=='-' || c=='_')) {
85 *n++=uprv_tolower(c);
86 }
87 }
88 *a++=0;
89 *n++=0;
90 } while(c!=0);
91 }
92
93 /**
94 * Writes at most MAX_ALIASES pointers for unique normalized aliases
95 * (no empty strings) to dest and returns how many there are.
96 */
getUniqueNormalizedAliases(const char * dest[]) const97 int32_t getUniqueNormalizedAliases(const char *dest[]) const {
98 int32_t numUnique=0;
99 for(int32_t i=0; i<count; ++i) {
100 const char *s=normalized[i];
101 if(*s!=0) { // Omit empty strings.
102 for(int32_t j=0;; ++j) {
103 if(j==numUnique) {
104 // s is a new unique alias.
105 dest[numUnique++]=s;
106 break;
107 }
108 if(0==uprv_strcmp(s, dest[j])) {
109 // s is equal or equivalent to an earlier alias.
110 break;
111 }
112 }
113 }
114 }
115 return numUnique;
116 }
117
118 int32_t enumValue;
119 const char *joinedAliases;
120 char aliasesBuffer[JOINED_ALIASES_CAPACITY];
121 char normalizedBuffer[JOINED_ALIASES_CAPACITY];
122 const char *aliases[VALUE_MAX_ALIASES];
123 const char *normalized[VALUE_MAX_ALIASES];
124 int32_t count;
125 };
126
127 class Property : public Value {
128 public:
129 // A property with a values array.
Property(int32_t enumValue,const char * joinedAliases,const Value * values,int32_t valueCount)130 Property(int32_t enumValue, const char *joinedAliases,
131 const Value *values, int32_t valueCount)
132 : Value(enumValue, joinedAliases),
133 values(values), valueCount(valueCount) {}
134 // A binary property (enumValue<UCHAR_BINARY_LIMIT), or one without values.
135 Property(int32_t enumValue, const char *joinedAliases);
136
137 const Value *values;
138 int32_t valueCount;
139 };
140
141 // *** Include the data header ***
142 #include "pnames_data.h"
143
Property(int32_t enumValue,const char * joinedAliases)144 Property::Property(int32_t enumValue, const char *joinedAliases)
145 : Value(enumValue, joinedAliases),
146 values(enumValue<UCHAR_BINARY_LIMIT ? VALUES_binprop : NULL),
147 valueCount(enumValue<UCHAR_BINARY_LIMIT ? 2 : 0) {}
148
149 // END DATA
150 //----------------------------------------------------------------------
151
152 class PNamesPropertyNames : public PropertyNames {
153 public:
PNamesPropertyNames()154 PNamesPropertyNames()
155 : valueMaps(NULL), bytesTries(NULL) {}
init(const int32_t * vm,const uint8_t * bt)156 void init(const int32_t *vm, const uint8_t *bt) {
157 valueMaps=vm;
158 bytesTries=bt;
159 }
160 virtual int32_t getPropertyEnum(const char *name) const;
161 virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
162 private:
163 int32_t findProperty(int32_t property) const;
164 UBool containsName(BytesTrie &trie, const char *name) const;
165 int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) const;
166
167 const int32_t *valueMaps;
168 const uint8_t *bytesTries;
169 };
170
171 class PNamesBuilderImpl : public PNamesBuilder {
172 public:
PNamesBuilderImpl(UErrorCode & errorCode)173 PNamesBuilderImpl(UErrorCode &errorCode)
174 : valueMaps(errorCode), btb(errorCode), maxNameLength(0),
175 nameGroupToOffset(NULL) {}
176
~PNamesBuilderImpl()177 ~PNamesBuilderImpl() {
178 uhash_close(nameGroupToOffset);
179 }
180
build(UErrorCode & errorCode)181 virtual void build(UErrorCode &errorCode) {
182 if(U_FAILURE(errorCode)) { return; }
183 if(VALUE_MAX_ALIASES<MAX_ALIASES) {
184 fprintf(stderr,
185 "genprops error: pnamesbuilder.cpp VALUE_MAX_ALIASES=%d<%d=MAX_ALIASES -- "
186 "need to change VALUE_MAX_ALIASES to at least %d\n",
187 (int)VALUE_MAX_ALIASES, (int)MAX_ALIASES, (int)MAX_ALIASES);
188 errorCode=U_INTERNAL_PROGRAM_ERROR;
189 return;
190 }
191 nameGroupToOffset=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
192 // Build main property aliases value map at value map offset 0,
193 // so that we need not store another offset for it.
194 UVector32 propEnums(errorCode);
195 int32_t propIndex;
196 for(propIndex=0; propIndex<LENGTHOF(PROPERTIES); ++propIndex) {
197 propEnums.sortedInsert(PROPERTIES[propIndex].enumValue, errorCode);
198 }
199 int32_t ranges[10][2];
200 int32_t numPropRanges=uprv_makeDenseRanges(propEnums.getBuffer(),
201 LENGTHOF(PROPERTIES), 0x100,
202 ranges, LENGTHOF(ranges));
203 valueMaps.addElement(numPropRanges, errorCode);
204 int32_t i, j;
205 for(i=0; i<numPropRanges; ++i) {
206 valueMaps.addElement(ranges[i][0], errorCode);
207 valueMaps.addElement(ranges[i][1]+1, errorCode);
208 for(j=ranges[i][0]; j<=ranges[i][1]; ++j) {
209 // Reserve two slots per property for the name group offset and the value-map offset.
210 valueMaps.addElement(0, errorCode);
211 valueMaps.addElement(0, errorCode);
212 }
213 }
214
215 // Build the properties trie first, at BytesTrie offset 0,
216 // so that we need not store another offset for it.
217 buildPropertiesBytesTrie(PROPERTIES, LENGTHOF(PROPERTIES), errorCode);
218
219 // Build the name group for the first property, at nameGroups offset 0.
220 // Name groups for *value* aliases must not start at offset 0
221 // because that is a missing-value marker for sparse value ranges.
222 setPropertyInt(PROPERTIES[0].enumValue, 0,
223 writeValueAliases(PROPERTIES[0], errorCode));
224
225 // Build the known-repeated binary properties once.
226 int32_t binPropsValueMapOffset=valueMaps.size();
227 int32_t bytesTrieOffset=buildValuesBytesTrie(VALUES_binprop, 2, errorCode);
228 valueMaps.addElement(bytesTrieOffset, errorCode);
229 buildValueMap(VALUES_binprop, 2, errorCode);
230
231 // Note: It is slightly wasteful to store binary properties like all others.
232 // Since we know that they are in the lowest range of property enum values
233 // and share the same name group and BytesTrie,
234 // we could just store those two indexes once.
235 // (This would save 8 bytes per binary property, or about half a kilobyte.)
236
237 // Build the known-repeated canonical combining class properties once.
238 int32_t cccValueMapOffset=valueMaps.size();
239 bytesTrieOffset=buildValuesBytesTrie(VALUES_ccc, LENGTHOF(VALUES_ccc), errorCode);
240 valueMaps.addElement(bytesTrieOffset, errorCode);
241 buildValueMap(VALUES_ccc, LENGTHOF(VALUES_ccc), errorCode);
242
243 // Build the rest of the data.
244 for(propIndex=0; propIndex<LENGTHOF(PROPERTIES); ++propIndex) {
245 if(propIndex>0) {
246 // writeValueAliases(PROPERTIES[0], ...) already done
247 setPropertyInt(PROPERTIES[propIndex].enumValue, 0,
248 writeValueAliases(PROPERTIES[propIndex], errorCode));
249 }
250 int32_t valueCount=PROPERTIES[propIndex].valueCount;
251 if(valueCount>0) {
252 int32_t valueMapOffset;
253 const Value *values=PROPERTIES[propIndex].values;
254 if(values==VALUES_binprop) {
255 valueMapOffset=binPropsValueMapOffset;
256 } else if(values==VALUES_ccc || values==VALUES_lccc || values==VALUES_tccc) {
257 valueMapOffset=cccValueMapOffset;
258 } else {
259 valueMapOffset=valueMaps.size();
260 bytesTrieOffset=buildValuesBytesTrie(values, valueCount, errorCode);
261 valueMaps.addElement(bytesTrieOffset, errorCode);
262 buildValueMap(values, valueCount, errorCode);
263 }
264 setPropertyInt(PROPERTIES[propIndex].enumValue, 1, valueMapOffset);
265 }
266 }
267
268 // Write the indexes.
269 int32_t offset=(int32_t)sizeof(indexes);
270 indexes[PropNameData::IX_VALUE_MAPS_OFFSET]=offset;
271 offset+=valueMaps.size()*4;
272 indexes[PropNameData::IX_BYTE_TRIES_OFFSET]=offset;
273 offset+=bytesTries.length();
274 indexes[PropNameData::IX_NAME_GROUPS_OFFSET]=offset;
275 offset+=nameGroups.length();
276 for(i=PropNameData::IX_RESERVED3_OFFSET; i<=PropNameData::IX_TOTAL_SIZE; ++i) {
277 indexes[i]=offset;
278 }
279 indexes[PropNameData::IX_MAX_NAME_LENGTH]=maxNameLength;
280 for(i=PropNameData::IX_RESERVED7; i<PropNameData::IX_COUNT; ++i) {
281 indexes[i]=0;
282 }
283
284 if(!beQuiet) {
285 puts("* pnames.icu stats *");
286 printf("length of all value maps: %6ld\n", (long)valueMaps.size());
287 printf("length of all BytesTries: %6ld\n", (long)bytesTries.length());
288 printf("length of all name groups: %6ld\n", (long)nameGroups.length());
289 printf("length of pnames.icu data: %6ld\n", (long)indexes[PropNameData::IX_TOTAL_SIZE]);
290 }
291 }
292
293 virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
294 virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
295
writeValueAliases(const Value & value,UErrorCode & errorCode)296 int32_t writeValueAliases(const Value &value, UErrorCode &errorCode) {
297 int32_t nameOffset=uhash_geti(nameGroupToOffset, (void *)value.joinedAliases);
298 if(nameOffset!=0) {
299 // The same list of aliases has been written already.
300 return nameOffset-1; // Was incremented to reserve 0 for "not found".
301 }
302 // Write this not-yet-seen list of aliases.
303 nameOffset=nameGroups.length();
304 uhash_puti(nameGroupToOffset, (void *)value.joinedAliases,
305 nameOffset+1, &errorCode);
306 // The first byte tells us how many aliases there are.
307 // We use only values 0..0x1f in the first byte because when we write
308 // the name groups as an invariant-character string into a source file,
309 // those values (C0 control codes) are written as numbers rather than as characters.
310 int32_t count=value.count;
311 if(count>=0x20) {
312 fprintf(stderr, "Error: Too many aliases in \"%s\"\n", value.joinedAliases);
313 exit(U_INDEX_OUTOFBOUNDS_ERROR);
314 }
315 nameGroups.append((char)count, errorCode);
316 // There is at least a short name (sometimes empty) and a long name. (count>=2)
317 // Note: Sometimes the short and long names are the same.
318 // In such a case, we could set a flag and omit the duplicate,
319 // but that would save only about 1.35% of total data size (Unicode 6.0/ICU 4.6)
320 // which is not worth the trouble.
321 // Note: In Unicode 6.1, there are more duplicates due to newly added
322 // short names for blocks and other properties.
323 // It might now be worth changing the data structure.
324 for(int32_t i=0; i<count; ++i) {
325 const char *s=value.aliases[i];
326 int32_t sLength=uprv_strlen(s)+1;
327 if(sLength>maxNameLength) {
328 maxNameLength=sLength;
329 }
330 nameGroups.append(s, sLength, errorCode); // including NUL
331 }
332 return nameOffset;
333 }
334
buildValueMap(const Value values[],int32_t length,UErrorCode & errorCode)335 void buildValueMap(const Value values[], int32_t length, UErrorCode &errorCode) {
336 UVector32 sortedValues(errorCode);
337 UVector32 nameOffsets(errorCode); // Parallel to values[].
338 int32_t i;
339 for(i=0; i<length; ++i) {
340 sortedValues.sortedInsert(values[i].enumValue, errorCode);
341 nameOffsets.addElement(writeValueAliases(values[i], errorCode), errorCode);
342 }
343 int32_t ranges[10][2];
344 int32_t numRanges=uprv_makeDenseRanges(sortedValues.getBuffer(), length, 0xe0,
345 ranges, LENGTHOF(ranges));
346 if(numRanges>0) {
347 valueMaps.addElement(numRanges, errorCode);
348 for(i=0; i<numRanges; ++i) {
349 valueMaps.addElement(ranges[i][0], errorCode);
350 valueMaps.addElement(ranges[i][1]+1, errorCode);
351 for(int32_t j=ranges[i][0]; j<=ranges[i][1]; ++j) {
352 // The range might not be completely dense, so j might not have an entry,
353 // in which case we write a nameOffset of 0.
354 // Real nameOffsets for property values are never 0.
355 // (The first name group is for the first property name.)
356 int32_t valueIndex=valuesIndexOf(values, length, j);
357 int32_t nameOffset= valueIndex>=0 ? nameOffsets.elementAti(valueIndex) : 0;
358 valueMaps.addElement(nameOffset, errorCode);
359 }
360 }
361 } else {
362 // No dense ranges.
363 valueMaps.addElement(0x10+length, errorCode);
364 for(i=0; i<length; ++i) {
365 valueMaps.addElement(sortedValues.elementAti(i), errorCode);
366 }
367 for(i=0; i<length; ++i) {
368 valueMaps.addElement(
369 nameOffsets.elementAti(
370 valuesIndexOf(values, length,
371 sortedValues.elementAti(i))), errorCode);
372 }
373 }
374 }
375
valuesIndexOf(const Value values[],int32_t length,int32_t value)376 static int32_t valuesIndexOf(const Value values[], int32_t length, int32_t value) {
377 for(int32_t i=0;; ++i) {
378 if(values[i].enumValue==value) {
379 return i;
380 }
381 }
382 return -1;
383 }
384
setPropertyInt(int32_t prop,int32_t subIndex,int32_t value)385 void setPropertyInt(int32_t prop, int32_t subIndex, int32_t value) {
386 // Assume that prop is in the valueMaps.elementAti(0) ranges.
387 int32_t index=1;
388 for(;;) {
389 int32_t rangeStart=valueMaps.elementAti(index);
390 int32_t rangeLimit=valueMaps.elementAti(index+1);
391 index+=2;
392 if(rangeStart<=prop && prop<rangeLimit) {
393 valueMaps.setElementAt(value, index+2*(prop-rangeStart)+subIndex);
394 break;
395 }
396 index+=2*(rangeLimit-rangeStart);
397 }
398 }
399
addValueToBytesTrie(const Value & value,UErrorCode & errorCode)400 void addValueToBytesTrie(const Value &value, UErrorCode &errorCode) {
401 const char *aliases[MAX_ALIASES];
402 int32_t numAliases=value.getUniqueNormalizedAliases(aliases);
403 for(int32_t i=0; i<numAliases; ++i) {
404 btb.add(aliases[i], value.enumValue, errorCode);
405 }
406 }
407
buildValuesBytesTrie(const Value values[],int32_t length,UErrorCode & errorCode)408 int32_t buildValuesBytesTrie(const Value values[], int32_t length, UErrorCode &errorCode) {
409 btb.clear();
410 for(int32_t i=0; i<length; ++i) {
411 addValueToBytesTrie(values[i], errorCode);
412 }
413 int32_t bytesTrieOffset=bytesTries.length();
414 bytesTries.append(btb.buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode), errorCode);
415 return bytesTrieOffset;
416 }
417
418 // Variant of buildValuesBytesTrie() for Property.
419 // Property is-a Value, and the source code is the same,
420 // but when we iterate through the array we need to increment by the right object size.
buildPropertiesBytesTrie(const Property properties[],int32_t length,UErrorCode & errorCode)421 int32_t buildPropertiesBytesTrie(const Property properties[], int32_t length,
422 UErrorCode &errorCode) {
423 btb.clear();
424 for(int32_t i=0; i<length; ++i) {
425 addValueToBytesTrie(properties[i], errorCode);
426 }
427 int32_t bytesTrieOffset=bytesTries.length();
428 bytesTries.append(btb.buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode), errorCode);
429 return bytesTrieOffset;
430 }
431
getPropertyNames()432 virtual const PropertyNames *getPropertyNames() {
433 pnames.init(valueMaps.getBuffer(),
434 reinterpret_cast<const uint8_t *>(bytesTries.data()));
435 return &pnames;
436 }
437
438 private:
439 int32_t indexes[PropNameData::IX_COUNT];
440 UVector32 valueMaps;
441 BytesTrieBuilder btb;
442 CharString bytesTries;
443 CharString nameGroups;
444 int32_t maxNameLength;
445 PNamesPropertyNames pnames;
446 UHashtable *nameGroupToOffset;
447 };
448
449 /* UDataInfo cf. udata.h */
450 static const UDataInfo dataInfo = {
451 sizeof(UDataInfo),
452 0,
453
454 U_IS_BIG_ENDIAN,
455 U_CHARSET_FAMILY,
456 sizeof(UChar),
457 0,
458
459 { PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3 },
460 { 2, 0, 0, 0 }, /* formatVersion */
461 UNICODE_VERSION
462 };
463
464 void
writeBinaryData(const char * path,UBool withCopyright,UErrorCode & errorCode)465 PNamesBuilderImpl::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
466 if(U_FAILURE(errorCode)) { return; }
467 UNewDataMemory *pdata=udata_create(path, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
468 withCopyright ? U_COPYRIGHT_STRING : 0, &errorCode);
469 if(U_FAILURE(errorCode)) {
470 fprintf(stderr, "genprops: udata_create(%s, pnames.icu) failed - %s\n",
471 path, u_errorName(errorCode));
472 return;
473 }
474
475 udata_writeBlock(pdata, indexes, PropNameData::IX_COUNT*4);
476 udata_writeBlock(pdata, valueMaps.getBuffer(), valueMaps.size()*4);
477 udata_writeBlock(pdata, bytesTries.data(), bytesTries.length());
478 udata_writeBlock(pdata, nameGroups.data(), nameGroups.length());
479
480 int32_t dataLength=(int32_t)udata_finish(pdata, &errorCode);
481 if(dataLength!=indexes[PropNameData::IX_TOTAL_SIZE]) {
482 fprintf(stderr,
483 "udata_finish(pnames.icu) reports %ld bytes written but should be %ld\n",
484 (long)dataLength, (long)indexes[PropNameData::IX_TOTAL_SIZE]);
485 errorCode=U_INTERNAL_PROGRAM_ERROR;
486 }
487 }
488
489 void
writeCSourceFile(const char * path,UErrorCode & errorCode)490 PNamesBuilderImpl::writeCSourceFile(const char *path, UErrorCode &errorCode) {
491 if(U_FAILURE(errorCode)) { return; }
492 FILE *f=usrc_create(path, "propname_data.h", 2016,
493 "icu/tools/unicode/c/genprops/pnamesbuilder.cpp");
494 if(f==NULL) {
495 errorCode=U_FILE_ACCESS_ERROR;
496 return;
497 }
498
499 fputs("#ifdef INCLUDED_FROM_PROPNAME_CPP\n\n"
500 "U_NAMESPACE_BEGIN\n\n", f);
501
502 usrc_writeArray(f, "const int32_t PropNameData::indexes[%ld]={",
503 indexes, 32, PropNameData::IX_COUNT,
504 "};\n\n");
505 usrc_writeArray(f, "const int32_t PropNameData::valueMaps[%ld]={\n",
506 valueMaps.getBuffer(), 32, valueMaps.size(),
507 "\n};\n\n");
508 usrc_writeArray(f, "const uint8_t PropNameData::bytesTries[%ld]={\n",
509 bytesTries.data(), 8, bytesTries.length(),
510 "\n};\n\n");
511 usrc_writeArrayOfMostlyInvChars(
512 f, "const char PropNameData::nameGroups[%ld]={\n",
513 nameGroups.data(), nameGroups.length(),
514 "\n};\n\n");
515
516 fputs("U_NAMESPACE_END\n\n"
517 "#endif // INCLUDED_FROM_PROPNAME_CPP\n", f);
518
519 fclose(f);
520 }
521
522 PNamesBuilder *
createPNamesBuilder(UErrorCode & errorCode)523 createPNamesBuilder(UErrorCode &errorCode) {
524 if(U_FAILURE(errorCode)) { return NULL; }
525 PNamesBuilder *pb=new PNamesBuilderImpl(errorCode);
526 if(pb==NULL) {
527 errorCode=U_MEMORY_ALLOCATION_ERROR;
528 }
529 return pb;
530 }
531
532 // Note: The following is a partial copy of runtime propname.cpp code.
533 // Consider changing that into a semi-public API to avoid duplication.
534
findProperty(int32_t property) const535 int32_t PNamesPropertyNames::findProperty(int32_t property) const {
536 int32_t i=1; // valueMaps index, initially after numRanges
537 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
538 // Read and skip the start and limit of this range.
539 int32_t start=valueMaps[i];
540 int32_t limit=valueMaps[i+1];
541 i+=2;
542 if(property<start) {
543 break;
544 }
545 if(property<limit) {
546 return i+(property-start)*2;
547 }
548 i+=(limit-start)*2; // Skip all entries for this range.
549 }
550 return 0;
551 }
552
containsName(BytesTrie & trie,const char * name) const553 UBool PNamesPropertyNames::containsName(BytesTrie &trie, const char *name) const {
554 if(name==NULL) {
555 return FALSE;
556 }
557 UStringTrieResult result=USTRINGTRIE_NO_VALUE;
558 char c;
559 while((c=*name++)!=0) {
560 c=uprv_invCharToLowercaseAscii(c);
561 // Ignore delimiters '-', '_', and ASCII White_Space.
562 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
563 continue;
564 }
565 if(!USTRINGTRIE_HAS_NEXT(result)) {
566 return FALSE;
567 }
568 result=trie.next((uint8_t)c);
569 }
570 return USTRINGTRIE_HAS_VALUE(result);
571 }
572
getPropertyOrValueEnum(int32_t bytesTrieOffset,const char * alias) const573 int32_t PNamesPropertyNames::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) const {
574 BytesTrie trie(bytesTries+bytesTrieOffset);
575 if(containsName(trie, alias)) {
576 return trie.getValue();
577 } else {
578 return UCHAR_INVALID_CODE;
579 }
580 }
581
582 int32_t
getPropertyEnum(const char * alias) const583 PNamesPropertyNames::getPropertyEnum(const char *alias) const {
584 return getPropertyOrValueEnum(0, alias);
585 }
586
587 int32_t
getPropertyValueEnum(int32_t property,const char * alias) const588 PNamesPropertyNames::getPropertyValueEnum(int32_t property, const char *alias) const {
589 int32_t valueMapIndex=findProperty(property);
590 if(valueMapIndex==0) {
591 return UCHAR_INVALID_CODE; // Not a known property.
592 }
593 valueMapIndex=valueMaps[valueMapIndex+1];
594 if(valueMapIndex==0) {
595 return UCHAR_INVALID_CODE; // The property does not have named values.
596 }
597 // valueMapIndex is the start of the property's valueMap,
598 // where the first word is the BytesTrie offset.
599 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
600 }
601