1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: October 30 2002
8 * Since: ICU 2.4
9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
10 **********************************************************************
11 */
12 #include "propname.h"
13 #include "unicode/uchar.h"
14 #include "unicode/udata.h"
15 #include "unicode/uscript.h"
16 #include "umutex.h"
17 #include "cmemory.h"
18 #include "cstring.h"
19 #include "ucln_cmn.h"
20 #include "uarrsort.h"
21 #include "uinvchar.h"
22
23 #define INCLUDED_FROM_PROPNAME_CPP
24 #include "propname_data.h"
25
26 U_CDECL_BEGIN
27
28 /**
29 * Get the next non-ignorable ASCII character from a property name
30 * and lowercases it.
31 * @return ((advance count for the name)<<8)|character
32 */
33 static inline int32_t
getASCIIPropertyNameChar(const char * name)34 getASCIIPropertyNameChar(const char *name) {
35 int32_t i;
36 char c;
37
38 /* Ignore delimiters '-', '_', and ASCII White_Space */
39 for(i=0;
40 (c=name[i++])==0x2d || c==0x5f ||
41 c==0x20 || (0x09<=c && c<=0x0d);
42 ) {}
43
44 if(c!=0) {
45 return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
46 } else {
47 return i<<8;
48 }
49 }
50
51 /**
52 * Get the next non-ignorable EBCDIC character from a property name
53 * and lowercases it.
54 * @return ((advance count for the name)<<8)|character
55 */
56 static inline int32_t
getEBCDICPropertyNameChar(const char * name)57 getEBCDICPropertyNameChar(const char *name) {
58 int32_t i;
59 char c;
60
61 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
62 for(i=0;
63 (c=name[i++])==0x60 || c==0x6d ||
64 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
65 ) {}
66
67 if(c!=0) {
68 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
69 } else {
70 return i<<8;
71 }
72 }
73
74 /**
75 * Unicode property names and property value names are compared "loosely".
76 *
77 * UCD.html 4.0.1 says:
78 * For all property names, property value names, and for property values for
79 * Enumerated, Binary, or Catalog properties, use the following
80 * loose matching rule:
81 *
82 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
83 *
84 * This function does just that, for (char *) name strings.
85 * It is almost identical to ucnv_compareNames() but also ignores
86 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
87 *
88 * @internal
89 */
90
91 U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char * name1,const char * name2)92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
93 int32_t rc, r1, r2;
94
95 for(;;) {
96 r1=getASCIIPropertyNameChar(name1);
97 r2=getASCIIPropertyNameChar(name2);
98
99 /* If we reach the ends of both strings then they match */
100 if(((r1|r2)&0xff)==0) {
101 return 0;
102 }
103
104 /* Compare the lowercased characters */
105 if(r1!=r2) {
106 rc=(r1&0xff)-(r2&0xff);
107 if(rc!=0) {
108 return rc;
109 }
110 }
111
112 name1+=r1>>8;
113 name2+=r2>>8;
114 }
115 }
116
117 U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char * name1,const char * name2)118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
119 int32_t rc, r1, r2;
120
121 for(;;) {
122 r1=getEBCDICPropertyNameChar(name1);
123 r2=getEBCDICPropertyNameChar(name2);
124
125 /* If we reach the ends of both strings then they match */
126 if(((r1|r2)&0xff)==0) {
127 return 0;
128 }
129
130 /* Compare the lowercased characters */
131 if(r1!=r2) {
132 rc=(r1&0xff)-(r2&0xff);
133 if(rc!=0) {
134 return rc;
135 }
136 }
137
138 name1+=r1>>8;
139 name2+=r2>>8;
140 }
141 }
142
143 U_CDECL_END
144
145 U_NAMESPACE_BEGIN
146
findProperty(int32_t property)147 int32_t PropNameData::findProperty(int32_t property) {
148 int32_t i=1; // valueMaps index, initially after numRanges
149 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
150 // Read and skip the start and limit of this range.
151 int32_t start=valueMaps[i];
152 int32_t limit=valueMaps[i+1];
153 i+=2;
154 if(property<start) {
155 break;
156 }
157 if(property<limit) {
158 return i+(property-start)*2;
159 }
160 i+=(limit-start)*2; // Skip all entries for this range.
161 }
162 return 0;
163 }
164
findPropertyValueNameGroup(int32_t valueMapIndex,int32_t value)165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
166 if(valueMapIndex==0) {
167 return 0; // The property does not have named values.
168 }
169 ++valueMapIndex; // Skip the BytesTrie offset.
170 int32_t numRanges=valueMaps[valueMapIndex++];
171 if(numRanges<0x10) {
172 // Ranges of values.
173 for(; numRanges>0; --numRanges) {
174 // Read and skip the start and limit of this range.
175 int32_t start=valueMaps[valueMapIndex];
176 int32_t limit=valueMaps[valueMapIndex+1];
177 valueMapIndex+=2;
178 if(value<start) {
179 break;
180 }
181 if(value<limit) {
182 return valueMaps[valueMapIndex+value-start];
183 }
184 valueMapIndex+=limit-start; // Skip all entries for this range.
185 }
186 } else {
187 // List of values.
188 int32_t valuesStart=valueMapIndex;
189 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
190 do {
191 int32_t v=valueMaps[valueMapIndex];
192 if(value<v) {
193 break;
194 }
195 if(value==v) {
196 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
197 }
198 } while(++valueMapIndex<nameGroupOffsetsStart);
199 }
200 return 0;
201 }
202
getName(const char * nameGroup,int32_t nameIndex)203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
204 int32_t numNames=*nameGroup++;
205 if(nameIndex<0 || numNames<=nameIndex) {
206 return NULL;
207 }
208 // Skip nameIndex names.
209 for(; nameIndex>0; --nameIndex) {
210 nameGroup=uprv_strchr(nameGroup, 0)+1;
211 }
212 if(*nameGroup==0) {
213 return NULL; // no name (Property[Value]Aliases.txt has "n/a")
214 }
215 return nameGroup;
216 }
217
containsName(BytesTrie & trie,const char * name)218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
219 if(name==NULL) {
220 return FALSE;
221 }
222 UStringTrieResult result=USTRINGTRIE_NO_VALUE;
223 char c;
224 while((c=*name++)!=0) {
225 c=uprv_invCharToLowercaseAscii(c);
226 // Ignore delimiters '-', '_', and ASCII White_Space.
227 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
228 continue;
229 }
230 if(!USTRINGTRIE_HAS_NEXT(result)) {
231 return FALSE;
232 }
233 result=trie.next((uint8_t)c);
234 }
235 return USTRINGTRIE_HAS_VALUE(result);
236 }
237
getPropertyName(int32_t property,int32_t nameChoice)238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
239 int32_t valueMapIndex=findProperty(property);
240 if(valueMapIndex==0) {
241 return NULL; // Not a known property.
242 }
243 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
244 }
245
getPropertyValueName(int32_t property,int32_t value,int32_t nameChoice)246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
247 int32_t valueMapIndex=findProperty(property);
248 if(valueMapIndex==0) {
249 return NULL; // Not a known property.
250 }
251 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
252 if(nameGroupOffset==0) {
253 return NULL;
254 }
255 return getName(nameGroups+nameGroupOffset, nameChoice);
256 }
257
getPropertyOrValueEnum(int32_t bytesTrieOffset,const char * alias)258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
259 BytesTrie trie(bytesTries+bytesTrieOffset);
260 if(containsName(trie, alias)) {
261 return trie.getValue();
262 } else {
263 return UCHAR_INVALID_CODE;
264 }
265 }
266
getPropertyEnum(const char * alias)267 int32_t PropNameData::getPropertyEnum(const char *alias) {
268 return getPropertyOrValueEnum(0, alias);
269 }
270
getPropertyValueEnum(int32_t property,const char * alias)271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
272 int32_t valueMapIndex=findProperty(property);
273 if(valueMapIndex==0) {
274 return UCHAR_INVALID_CODE; // Not a known property.
275 }
276 valueMapIndex=valueMaps[valueMapIndex+1];
277 if(valueMapIndex==0) {
278 return UCHAR_INVALID_CODE; // The property does not have named values.
279 }
280 // valueMapIndex is the start of the property's valueMap,
281 // where the first word is the BytesTrie offset.
282 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
283 }
284 U_NAMESPACE_END
285
286 //----------------------------------------------------------------------
287 // Public API implementation
288
289 U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,UPropertyNameChoice nameChoice)290 u_getPropertyName(UProperty property,
291 UPropertyNameChoice nameChoice) {
292 U_NAMESPACE_USE
293 return PropNameData::getPropertyName(property, nameChoice);
294 }
295
296 U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char * alias)297 u_getPropertyEnum(const char* alias) {
298 U_NAMESPACE_USE
299 return (UProperty)PropNameData::getPropertyEnum(alias);
300 }
301
302 U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,int32_t value,UPropertyNameChoice nameChoice)303 u_getPropertyValueName(UProperty property,
304 int32_t value,
305 UPropertyNameChoice nameChoice) {
306 U_NAMESPACE_USE
307 return PropNameData::getPropertyValueName(property, value, nameChoice);
308 }
309
310 U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,const char * alias)311 u_getPropertyValueEnum(UProperty property,
312 const char* alias) {
313 U_NAMESPACE_USE
314 return PropNameData::getPropertyValueEnum(property, alias);
315 }
316
317 U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode)318 uscript_getName(UScriptCode scriptCode){
319 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
320 U_LONG_PROPERTY_NAME);
321 }
322
323 U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode)324 uscript_getShortName(UScriptCode scriptCode){
325 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
326 U_SHORT_PROPERTY_NAME);
327 }
328