• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2011, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: October 30 2002
8 * Since: ICU 2.4
9 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
10 **********************************************************************
11 */
12 #include "propname.h"
13 #include "unicode/uchar.h"
14 #include "unicode/udata.h"
15 #include "unicode/uscript.h"
16 #include "umutex.h"
17 #include "cmemory.h"
18 #include "cstring.h"
19 #include "ucln_cmn.h"
20 #include "uarrsort.h"
21 #include "uinvchar.h"
22 
23 #define INCLUDED_FROM_PROPNAME_CPP
24 #include "propname_data.h"
25 
26 U_CDECL_BEGIN
27 
28 /**
29  * Get the next non-ignorable ASCII character from a property name
30  * and lowercases it.
31  * @return ((advance count for the name)<<8)|character
32  */
33 static inline int32_t
getASCIIPropertyNameChar(const char * name)34 getASCIIPropertyNameChar(const char *name) {
35     int32_t i;
36     char c;
37 
38     /* Ignore delimiters '-', '_', and ASCII White_Space */
39     for(i=0;
40         (c=name[i++])==0x2d || c==0x5f ||
41         c==0x20 || (0x09<=c && c<=0x0d);
42     ) {}
43 
44     if(c!=0) {
45         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
46     } else {
47         return i<<8;
48     }
49 }
50 
51 /**
52  * Get the next non-ignorable EBCDIC character from a property name
53  * and lowercases it.
54  * @return ((advance count for the name)<<8)|character
55  */
56 static inline int32_t
getEBCDICPropertyNameChar(const char * name)57 getEBCDICPropertyNameChar(const char *name) {
58     int32_t i;
59     char c;
60 
61     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
62     for(i=0;
63         (c=name[i++])==0x60 || c==0x6d ||
64         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
65     ) {}
66 
67     if(c!=0) {
68         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
69     } else {
70         return i<<8;
71     }
72 }
73 
74 /**
75  * Unicode property names and property value names are compared "loosely".
76  *
77  * UCD.html 4.0.1 says:
78  *   For all property names, property value names, and for property values for
79  *   Enumerated, Binary, or Catalog properties, use the following
80  *   loose matching rule:
81  *
82  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
83  *
84  * This function does just that, for (char *) name strings.
85  * It is almost identical to ucnv_compareNames() but also ignores
86  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
87  *
88  * @internal
89  */
90 
91 U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char * name1,const char * name2)92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
93     int32_t rc, r1, r2;
94 
95     for(;;) {
96         r1=getASCIIPropertyNameChar(name1);
97         r2=getASCIIPropertyNameChar(name2);
98 
99         /* If we reach the ends of both strings then they match */
100         if(((r1|r2)&0xff)==0) {
101             return 0;
102         }
103 
104         /* Compare the lowercased characters */
105         if(r1!=r2) {
106             rc=(r1&0xff)-(r2&0xff);
107             if(rc!=0) {
108                 return rc;
109             }
110         }
111 
112         name1+=r1>>8;
113         name2+=r2>>8;
114     }
115 }
116 
117 U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char * name1,const char * name2)118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
119     int32_t rc, r1, r2;
120 
121     for(;;) {
122         r1=getEBCDICPropertyNameChar(name1);
123         r2=getEBCDICPropertyNameChar(name2);
124 
125         /* If we reach the ends of both strings then they match */
126         if(((r1|r2)&0xff)==0) {
127             return 0;
128         }
129 
130         /* Compare the lowercased characters */
131         if(r1!=r2) {
132             rc=(r1&0xff)-(r2&0xff);
133             if(rc!=0) {
134                 return rc;
135             }
136         }
137 
138         name1+=r1>>8;
139         name2+=r2>>8;
140     }
141 }
142 
143 U_CDECL_END
144 
145 U_NAMESPACE_BEGIN
146 
findProperty(int32_t property)147 int32_t PropNameData::findProperty(int32_t property) {
148     int32_t i=1;  // valueMaps index, initially after numRanges
149     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
150         // Read and skip the start and limit of this range.
151         int32_t start=valueMaps[i];
152         int32_t limit=valueMaps[i+1];
153         i+=2;
154         if(property<start) {
155             break;
156         }
157         if(property<limit) {
158             return i+(property-start)*2;
159         }
160         i+=(limit-start)*2;  // Skip all entries for this range.
161     }
162     return 0;
163 }
164 
findPropertyValueNameGroup(int32_t valueMapIndex,int32_t value)165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
166     if(valueMapIndex==0) {
167         return 0;  // The property does not have named values.
168     }
169     ++valueMapIndex;  // Skip the BytesTrie offset.
170     int32_t numRanges=valueMaps[valueMapIndex++];
171     if(numRanges<0x10) {
172         // Ranges of values.
173         for(; numRanges>0; --numRanges) {
174             // Read and skip the start and limit of this range.
175             int32_t start=valueMaps[valueMapIndex];
176             int32_t limit=valueMaps[valueMapIndex+1];
177             valueMapIndex+=2;
178             if(value<start) {
179                 break;
180             }
181             if(value<limit) {
182                 return valueMaps[valueMapIndex+value-start];
183             }
184             valueMapIndex+=limit-start;  // Skip all entries for this range.
185         }
186     } else {
187         // List of values.
188         int32_t valuesStart=valueMapIndex;
189         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
190         do {
191             int32_t v=valueMaps[valueMapIndex];
192             if(value<v) {
193                 break;
194             }
195             if(value==v) {
196                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
197             }
198         } while(++valueMapIndex<nameGroupOffsetsStart);
199     }
200     return 0;
201 }
202 
getName(const char * nameGroup,int32_t nameIndex)203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
204     int32_t numNames=*nameGroup++;
205     if(nameIndex<0 || numNames<=nameIndex) {
206         return NULL;
207     }
208     // Skip nameIndex names.
209     for(; nameIndex>0; --nameIndex) {
210         nameGroup=uprv_strchr(nameGroup, 0)+1;
211     }
212     if(*nameGroup==0) {
213         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
214     }
215     return nameGroup;
216 }
217 
containsName(BytesTrie & trie,const char * name)218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
219     if(name==NULL) {
220         return FALSE;
221     }
222     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
223     char c;
224     while((c=*name++)!=0) {
225         c=uprv_invCharToLowercaseAscii(c);
226         // Ignore delimiters '-', '_', and ASCII White_Space.
227         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
228             continue;
229         }
230         if(!USTRINGTRIE_HAS_NEXT(result)) {
231             return FALSE;
232         }
233         result=trie.next((uint8_t)c);
234     }
235     return USTRINGTRIE_HAS_VALUE(result);
236 }
237 
getPropertyName(int32_t property,int32_t nameChoice)238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
239     int32_t valueMapIndex=findProperty(property);
240     if(valueMapIndex==0) {
241         return NULL;  // Not a known property.
242     }
243     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
244 }
245 
getPropertyValueName(int32_t property,int32_t value,int32_t nameChoice)246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
247     int32_t valueMapIndex=findProperty(property);
248     if(valueMapIndex==0) {
249         return NULL;  // Not a known property.
250     }
251     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
252     if(nameGroupOffset==0) {
253         return NULL;
254     }
255     return getName(nameGroups+nameGroupOffset, nameChoice);
256 }
257 
getPropertyOrValueEnum(int32_t bytesTrieOffset,const char * alias)258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
259     BytesTrie trie(bytesTries+bytesTrieOffset);
260     if(containsName(trie, alias)) {
261         return trie.getValue();
262     } else {
263         return UCHAR_INVALID_CODE;
264     }
265 }
266 
getPropertyEnum(const char * alias)267 int32_t PropNameData::getPropertyEnum(const char *alias) {
268     return getPropertyOrValueEnum(0, alias);
269 }
270 
getPropertyValueEnum(int32_t property,const char * alias)271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
272     int32_t valueMapIndex=findProperty(property);
273     if(valueMapIndex==0) {
274         return UCHAR_INVALID_CODE;  // Not a known property.
275     }
276     valueMapIndex=valueMaps[valueMapIndex+1];
277     if(valueMapIndex==0) {
278         return UCHAR_INVALID_CODE;  // The property does not have named values.
279     }
280     // valueMapIndex is the start of the property's valueMap,
281     // where the first word is the BytesTrie offset.
282     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
283 }
284 U_NAMESPACE_END
285 
286 //----------------------------------------------------------------------
287 // Public API implementation
288 
289 U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,UPropertyNameChoice nameChoice)290 u_getPropertyName(UProperty property,
291                   UPropertyNameChoice nameChoice) {
292     U_NAMESPACE_USE
293     return PropNameData::getPropertyName(property, nameChoice);
294 }
295 
296 U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char * alias)297 u_getPropertyEnum(const char* alias) {
298     U_NAMESPACE_USE
299     return (UProperty)PropNameData::getPropertyEnum(alias);
300 }
301 
302 U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,int32_t value,UPropertyNameChoice nameChoice)303 u_getPropertyValueName(UProperty property,
304                        int32_t value,
305                        UPropertyNameChoice nameChoice) {
306     U_NAMESPACE_USE
307     return PropNameData::getPropertyValueName(property, value, nameChoice);
308 }
309 
310 U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,const char * alias)311 u_getPropertyValueEnum(UProperty property,
312                        const char* alias) {
313     U_NAMESPACE_USE
314     return PropNameData::getPropertyValueEnum(property, alias);
315 }
316 
317 U_CAPI const char*  U_EXPORT2
uscript_getName(UScriptCode scriptCode)318 uscript_getName(UScriptCode scriptCode){
319     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
320                                   U_LONG_PROPERTY_NAME);
321 }
322 
323 U_CAPI const char*  U_EXPORT2
uscript_getShortName(UScriptCode scriptCode)324 uscript_getShortName(UScriptCode scriptCode){
325     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
326                                   U_SHORT_PROPERTY_NAME);
327 }
328