1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2004-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: ucase.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004aug30
16 * created by: Markus W. Scherer
17 *
18 * Low-level Unicode character/string case mapping code.
19 * Much code moved here (and modified) from uchar.c.
20 */
21
22 #include "unicode/utypes.h"
23 #include "unicode/unistr.h"
24 #include "unicode/uset.h"
25 #include "unicode/udata.h" /* UDataInfo */
26 #include "unicode/utf16.h"
27 #include "ucmndata.h" /* DataHeader */
28 #include "udatamem.h"
29 #include "umutex.h"
30 #include "uassert.h"
31 #include "cmemory.h"
32 #include "utrie2.h"
33 #include "ucase.h"
34
35 struct UCaseProps {
36 UDataMemory *mem;
37 const int32_t *indexes;
38 const uint16_t *exceptions;
39 const uint16_t *unfold;
40
41 UTrie2 trie;
42 uint8_t formatVersion[4];
43 };
44
45 /* ucase_props_data.h is machine-generated by gencase --csource */
46 #define INCLUDED_FROM_UCASE_CPP
47 #include "ucase_props_data.h"
48
49 /* set of property starts for UnicodeSet ------------------------------------ */
50
51 static UBool U_CALLCONV
_enumPropertyStartsRange(const void * context,UChar32 start,UChar32,uint32_t)52 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
53 /* add the start code point to the USet */
54 const USetAdder *sa=(const USetAdder *)context;
55 sa->add(sa->set, start);
56 return TRUE;
57 }
58
59 U_CFUNC void U_EXPORT2
ucase_addPropertyStarts(const USetAdder * sa,UErrorCode * pErrorCode)60 ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
61 if(U_FAILURE(*pErrorCode)) {
62 return;
63 }
64
65 /* add the start code point of each same-value range of the trie */
66 utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
67
68 /* add code points with hardcoded properties, plus the ones following them */
69
70 /* (none right now, see comment below) */
71
72 /*
73 * Omit code points with hardcoded specialcasing properties
74 * because we do not build property UnicodeSets for them right now.
75 */
76 }
77
78 /* data access primitives --------------------------------------------------- */
79
80 U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie()81 ucase_getTrie() {
82 return &ucase_props_singleton.trie;
83 }
84
85 #define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
86
87 /* number of bits in an 8-bit integer value */
88 static const uint8_t flagsOffset[256]={
89 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
92 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
93 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
96 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
97 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
100 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
101 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
104 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
105 };
106
107 #define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
108 #define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
109
110 /*
111 * Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
112 *
113 * @param excWord (in) initial exceptions word
114 * @param idx (in) desired slot index
115 * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
116 * moved to the last uint16_t of the value, use +1 for beginning of next slot
117 * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
118 */
119 #define GET_SLOT_VALUE(excWord, idx, pExc16, value) \
120 if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
121 (pExc16)+=SLOT_OFFSET(excWord, idx); \
122 (value)=*pExc16; \
123 } else { \
124 (pExc16)+=2*SLOT_OFFSET(excWord, idx); \
125 (value)=*pExc16++; \
126 (value)=((value)<<16)|*pExc16; \
127 }
128
129 /* simple case mappings ----------------------------------------------------- */
130
131 U_CAPI UChar32 U_EXPORT2
ucase_tolower(UChar32 c)132 ucase_tolower(UChar32 c) {
133 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
134 if(!UCASE_HAS_EXCEPTION(props)) {
135 if(UCASE_IS_UPPER_OR_TITLE(props)) {
136 c+=UCASE_GET_DELTA(props);
137 }
138 } else {
139 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
140 uint16_t excWord=*pe++;
141 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
142 int32_t delta;
143 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
144 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
145 }
146 if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
147 GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
148 }
149 }
150 return c;
151 }
152
153 U_CAPI UChar32 U_EXPORT2
ucase_toupper(UChar32 c)154 ucase_toupper(UChar32 c) {
155 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
156 if(!UCASE_HAS_EXCEPTION(props)) {
157 if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
158 c+=UCASE_GET_DELTA(props);
159 }
160 } else {
161 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
162 uint16_t excWord=*pe++;
163 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
164 int32_t delta;
165 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
166 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
167 }
168 if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
169 GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
170 }
171 }
172 return c;
173 }
174
175 U_CAPI UChar32 U_EXPORT2
ucase_totitle(UChar32 c)176 ucase_totitle(UChar32 c) {
177 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
178 if(!UCASE_HAS_EXCEPTION(props)) {
179 if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
180 c+=UCASE_GET_DELTA(props);
181 }
182 } else {
183 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
184 uint16_t excWord=*pe++;
185 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
186 int32_t delta;
187 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
188 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
189 }
190 int32_t idx;
191 if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
192 idx=UCASE_EXC_TITLE;
193 } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
194 idx=UCASE_EXC_UPPER;
195 } else {
196 return c;
197 }
198 GET_SLOT_VALUE(excWord, idx, pe, c);
199 }
200 return c;
201 }
202
203 static const UChar iDot[2] = { 0x69, 0x307 };
204 static const UChar jDot[2] = { 0x6a, 0x307 };
205 static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
206 static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
207 static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
208 static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
209
210
211 U_CFUNC void U_EXPORT2
ucase_addCaseClosure(UChar32 c,const USetAdder * sa)212 ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
213 uint16_t props;
214
215 /*
216 * Hardcode the case closure of i and its relatives and ignore the
217 * data file data for these characters.
218 * The Turkic dotless i and dotted I with their case mapping conditions
219 * and case folding option make the related characters behave specially.
220 * This code matches their closure behavior to their case folding behavior.
221 */
222
223 switch(c) {
224 case 0x49:
225 /* regular i and I are in one equivalence class */
226 sa->add(sa->set, 0x69);
227 return;
228 case 0x69:
229 sa->add(sa->set, 0x49);
230 return;
231 case 0x130:
232 /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
233 sa->addString(sa->set, iDot, 2);
234 return;
235 case 0x131:
236 /* dotless i is in a class by itself */
237 return;
238 default:
239 /* otherwise use the data file data */
240 break;
241 }
242
243 props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
244 if(!UCASE_HAS_EXCEPTION(props)) {
245 if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
246 /* add the one simple case mapping, no matter what type it is */
247 int32_t delta=UCASE_GET_DELTA(props);
248 if(delta!=0) {
249 sa->add(sa->set, c+delta);
250 }
251 }
252 } else {
253 /*
254 * c has exceptions, so there may be multiple simple and/or
255 * full case mappings. Add them all.
256 */
257 const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
258 const UChar *closure;
259 uint16_t excWord=*pe++;
260 int32_t idx, closureLength, fullLength, length;
261
262 pe0=pe;
263
264 /* add all simple case mappings */
265 for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
266 if(HAS_SLOT(excWord, idx)) {
267 pe=pe0;
268 GET_SLOT_VALUE(excWord, idx, pe, c);
269 sa->add(sa->set, c);
270 }
271 }
272 if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
273 pe=pe0;
274 int32_t delta;
275 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
276 sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
277 }
278
279 /* get the closure string pointer & length */
280 if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
281 pe=pe0;
282 GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
283 closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
284 closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
285 } else {
286 closureLength=0;
287 closure=NULL;
288 }
289
290 /* add the full case folding */
291 if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
292 pe=pe0;
293 GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
294
295 /* start of full case mapping strings */
296 ++pe;
297
298 fullLength&=0xffff; /* bits 16 and higher are reserved */
299
300 /* skip the lowercase result string */
301 pe+=fullLength&UCASE_FULL_LOWER;
302 fullLength>>=4;
303
304 /* add the full case folding string */
305 length=fullLength&0xf;
306 if(length!=0) {
307 sa->addString(sa->set, (const UChar *)pe, length);
308 pe+=length;
309 }
310
311 /* skip the uppercase and titlecase strings */
312 fullLength>>=4;
313 pe+=fullLength&0xf;
314 fullLength>>=4;
315 pe+=fullLength;
316
317 closure=(const UChar *)pe; /* behind full case mappings */
318 }
319
320 /* add each code point in the closure string */
321 for(idx=0; idx<closureLength;) {
322 U16_NEXT_UNSAFE(closure, idx, c);
323 sa->add(sa->set, c);
324 }
325 }
326 }
327
328 /*
329 * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
330 * must be length>0 and max>0 and length<=max
331 */
332 static inline int32_t
strcmpMax(const UChar * s,int32_t length,const UChar * t,int32_t max)333 strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
334 int32_t c1, c2;
335
336 max-=length; /* we require length<=max, so no need to decrement max in the loop */
337 do {
338 c1=*s++;
339 c2=*t++;
340 if(c2==0) {
341 return 1; /* reached the end of t but not of s */
342 }
343 c1-=c2;
344 if(c1!=0) {
345 return c1; /* return difference result */
346 }
347 } while(--length>0);
348 /* ends with length==0 */
349
350 if(max==0 || *t==0) {
351 return 0; /* equal to length of both strings */
352 } else {
353 return -max; /* return lengh difference */
354 }
355 }
356
357 U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UChar * s,int32_t length,const USetAdder * sa)358 ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
359 int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
360
361 if(ucase_props_singleton.unfold==NULL || s==NULL) {
362 return FALSE; /* no reverse case folding data, or no string */
363 }
364 if(length<=1) {
365 /* the string is too short to find any match */
366 /*
367 * more precise would be:
368 * if(!u_strHasMoreChar32Than(s, length, 1))
369 * but this does not make much practical difference because
370 * a single supplementary code point would just not be found
371 */
372 return FALSE;
373 }
374
375 const uint16_t *unfold=ucase_props_singleton.unfold;
376 unfoldRows=unfold[UCASE_UNFOLD_ROWS];
377 unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
378 unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
379 unfold+=unfoldRowWidth;
380
381 if(length>unfoldStringWidth) {
382 /* the string is too long to find any match */
383 return FALSE;
384 }
385
386 /* do a binary search for the string */
387 start=0;
388 limit=unfoldRows;
389 while(start<limit) {
390 i=(start+limit)/2;
391 const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth));
392 result=strcmpMax(s, length, p, unfoldStringWidth);
393
394 if(result==0) {
395 /* found the string: add each code point, and its case closure */
396 UChar32 c;
397
398 for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
399 U16_NEXT_UNSAFE(p, i, c);
400 sa->add(sa->set, c);
401 ucase_addCaseClosure(c, sa);
402 }
403 return TRUE;
404 } else if(result<0) {
405 limit=i;
406 } else /* result>0 */ {
407 start=i+1;
408 }
409 }
410
411 return FALSE; /* string not found */
412 }
413
414 U_NAMESPACE_BEGIN
415
FullCaseFoldingIterator()416 FullCaseFoldingIterator::FullCaseFoldingIterator()
417 : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
418 unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
419 unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
420 unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
421 currentRow(0),
422 rowCpIndex(unfoldStringWidth) {
423 unfold+=unfoldRowWidth;
424 }
425
426 UChar32
next(UnicodeString & full)427 FullCaseFoldingIterator::next(UnicodeString &full) {
428 // Advance past the last-delivered code point.
429 const UChar *p=unfold+(currentRow*unfoldRowWidth);
430 if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
431 ++currentRow;
432 p+=unfoldRowWidth;
433 rowCpIndex=unfoldStringWidth;
434 }
435 if(currentRow>=unfoldRows) { return U_SENTINEL; }
436 // Set "full" to the NUL-terminated string in the first unfold column.
437 int32_t length=unfoldStringWidth;
438 while(length>0 && p[length-1]==0) { --length; }
439 full.setTo(FALSE, p, length);
440 // Return the code point.
441 UChar32 c;
442 U16_NEXT_UNSAFE(p, rowCpIndex, c);
443 return c;
444 }
445
446 namespace LatinCase {
447
448 const int8_t TO_LOWER_NORMAL[LIMIT] = {
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453
454 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
455 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463
464 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
465 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468
469 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
470 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
471 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
472 EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
473
474 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
475 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
476 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
477 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
478 };
479
480 const int8_t TO_LOWER_TR_LT[LIMIT] = {
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485
486 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
487 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495
496 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
497 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500
501 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
502 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
503 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
504 EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
505
506 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
507 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
508 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
509 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
510 };
511
512 const int8_t TO_UPPER_NORMAL[LIMIT] = {
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
521 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
522
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
530 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
531 -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
532
533 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
534 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
535 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
536 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
537
538 -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
539 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
540 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
541 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
542 };
543
544 const int8_t TO_UPPER_TR[LIMIT] = {
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
552 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
553 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
554
555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
562 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
563 -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
564
565 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
566 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
567 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
568 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
569
570 -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
571 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
572 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
573 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
574 };
575
576 } // namespace LatinCase
577
578 U_NAMESPACE_END
579
580 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
581 U_CAPI int32_t U_EXPORT2
ucase_getType(UChar32 c)582 ucase_getType(UChar32 c) {
583 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
584 return UCASE_GET_TYPE(props);
585 }
586
587 /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
588 U_CAPI int32_t U_EXPORT2
ucase_getTypeOrIgnorable(UChar32 c)589 ucase_getTypeOrIgnorable(UChar32 c) {
590 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
591 return UCASE_GET_TYPE_AND_IGNORABLE(props);
592 }
593
594 /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
595 static inline int32_t
getDotType(UChar32 c)596 getDotType(UChar32 c) {
597 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
598 if(!UCASE_HAS_EXCEPTION(props)) {
599 return props&UCASE_DOT_MASK;
600 } else {
601 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
602 return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
603 }
604 }
605
606 U_CAPI UBool U_EXPORT2
ucase_isSoftDotted(UChar32 c)607 ucase_isSoftDotted(UChar32 c) {
608 return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
609 }
610
611 U_CAPI UBool U_EXPORT2
ucase_isCaseSensitive(UChar32 c)612 ucase_isCaseSensitive(UChar32 c) {
613 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
614 if(!UCASE_HAS_EXCEPTION(props)) {
615 return (UBool)((props&UCASE_SENSITIVE)!=0);
616 } else {
617 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
618 return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0);
619 }
620 }
621
622 /* string casing ------------------------------------------------------------ */
623
624 /*
625 * These internal functions form the core of string case mappings.
626 * They map single code points to result code points or strings and take
627 * all necessary conditions (context, locale ID, options) into account.
628 *
629 * They do not iterate over the source or write to the destination
630 * so that the same functions are useful for non-standard string storage,
631 * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
632 * For the same reason, the "surrounding text" context is passed in as a
633 * UCaseContextIterator which does not make any assumptions about
634 * the underlying storage.
635 *
636 * This section contains helper functions that check for conditions
637 * in the input text surrounding the current code point
638 * according to SpecialCasing.txt.
639 *
640 * Each helper function gets the index
641 * - after the current code point if it looks at following text
642 * - before the current code point if it looks at preceding text
643 *
644 * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
645 *
646 * Final_Sigma
647 * C is preceded by a sequence consisting of
648 * a cased letter and a case-ignorable sequence,
649 * and C is not followed by a sequence consisting of
650 * an ignorable sequence and then a cased letter.
651 *
652 * More_Above
653 * C is followed by one or more characters of combining class 230 (ABOVE)
654 * in the combining character sequence.
655 *
656 * After_Soft_Dotted
657 * The last preceding character with combining class of zero before C
658 * was Soft_Dotted,
659 * and there is no intervening combining character class 230 (ABOVE).
660 *
661 * Before_Dot
662 * C is followed by combining dot above (U+0307).
663 * Any sequence of characters with a combining class that is neither 0 nor 230
664 * may intervene between the current character and the combining dot above.
665 *
666 * The erratum from 2002-10-31 adds the condition
667 *
668 * After_I
669 * The last preceding base character was an uppercase I, and there is no
670 * intervening combining character class 230 (ABOVE).
671 *
672 * (See Jitterbug 2344 and the comments on After_I below.)
673 *
674 * Helper definitions in Unicode 3.2 UAX 21:
675 *
676 * D1. A character C is defined to be cased
677 * if it meets any of the following criteria:
678 *
679 * - The general category of C is Titlecase Letter (Lt)
680 * - In [CoreProps], C has one of the properties Uppercase, or Lowercase
681 * - Given D = NFD(C), then it is not the case that:
682 * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
683 * (This third criterium does not add any characters to the list
684 * for Unicode 3.2. Ignored.)
685 *
686 * D2. A character C is defined to be case-ignorable
687 * if it meets either of the following criteria:
688 *
689 * - The general category of C is
690 * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
691 * Letter Modifier (Lm), or Symbol Modifier (Sk)
692 * - C is one of the following characters
693 * U+0027 APOSTROPHE
694 * U+00AD SOFT HYPHEN (SHY)
695 * U+2019 RIGHT SINGLE QUOTATION MARK
696 * (the preferred character for apostrophe)
697 *
698 * D3. A case-ignorable sequence is a sequence of
699 * zero or more case-ignorable characters.
700 */
701
702 #define is_d(c) ((c)=='d' || (c)=='D')
703 #define is_e(c) ((c)=='e' || (c)=='E')
704 #define is_i(c) ((c)=='i' || (c)=='I')
705 #define is_l(c) ((c)=='l' || (c)=='L')
706 #define is_r(c) ((c)=='r' || (c)=='R')
707 #define is_t(c) ((c)=='t' || (c)=='T')
708 #define is_u(c) ((c)=='u' || (c)=='U')
709 #define is_z(c) ((c)=='z' || (c)=='Z')
710
711 /* separator? */
712 #define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
713
714 /**
715 * Requires non-NULL locale ID but otherwise does the equivalent of
716 * checking for language codes as if uloc_getLanguage() were called:
717 * Accepts both 2- and 3-letter codes and accepts case variants.
718 */
719 U_CFUNC int32_t
ucase_getCaseLocale(const char * locale)720 ucase_getCaseLocale(const char *locale) {
721 /*
722 * This function used to use uloc_getLanguage(), but the current code
723 * removes the dependency of this low-level code on uloc implementation code
724 * and is faster because not the whole locale ID has to be
725 * examined and copied/transformed.
726 *
727 * Because this code does not want to depend on uloc, the caller must
728 * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
729 */
730 char c=*locale++;
731 // Fastpath for English "en" which is often used for default (=root locale) case mappings,
732 // and for Chinese "zh": Very common but no special case mapping behavior.
733 // Then check lowercase vs. uppercase to reduce the number of comparisons
734 // for other locales without special behavior.
735 if(c=='e') {
736 /* el or ell? */
737 c=*locale++;
738 if(is_l(c)) {
739 c=*locale++;
740 if(is_l(c)) {
741 c=*locale;
742 }
743 if(is_sep(c)) {
744 return UCASE_LOC_GREEK;
745 }
746 }
747 // en, es, ... -> root
748 } else if(c=='z') {
749 return UCASE_LOC_ROOT;
750 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
751 } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
752 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
753 } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
754 #else
755 # error Unknown charset family!
756 #endif
757 // lowercase c
758 if(c=='t') {
759 /* tr or tur? */
760 c=*locale++;
761 if(is_u(c)) {
762 c=*locale++;
763 }
764 if(is_r(c)) {
765 c=*locale;
766 if(is_sep(c)) {
767 return UCASE_LOC_TURKISH;
768 }
769 }
770 } else if(c=='a') {
771 /* az or aze? */
772 c=*locale++;
773 if(is_z(c)) {
774 c=*locale++;
775 if(is_e(c)) {
776 c=*locale;
777 }
778 if(is_sep(c)) {
779 return UCASE_LOC_TURKISH;
780 }
781 }
782 } else if(c=='l') {
783 /* lt or lit? */
784 c=*locale++;
785 if(is_i(c)) {
786 c=*locale++;
787 }
788 if(is_t(c)) {
789 c=*locale;
790 if(is_sep(c)) {
791 return UCASE_LOC_LITHUANIAN;
792 }
793 }
794 } else if(c=='n') {
795 /* nl or nld? */
796 c=*locale++;
797 if(is_l(c)) {
798 c=*locale++;
799 if(is_d(c)) {
800 c=*locale;
801 }
802 if(is_sep(c)) {
803 return UCASE_LOC_DUTCH;
804 }
805 }
806 }
807 } else {
808 // uppercase c
809 // Same code as for lowercase c but also check for 'E'.
810 if(c=='T') {
811 /* tr or tur? */
812 c=*locale++;
813 if(is_u(c)) {
814 c=*locale++;
815 }
816 if(is_r(c)) {
817 c=*locale;
818 if(is_sep(c)) {
819 return UCASE_LOC_TURKISH;
820 }
821 }
822 } else if(c=='A') {
823 /* az or aze? */
824 c=*locale++;
825 if(is_z(c)) {
826 c=*locale++;
827 if(is_e(c)) {
828 c=*locale;
829 }
830 if(is_sep(c)) {
831 return UCASE_LOC_TURKISH;
832 }
833 }
834 } else if(c=='L') {
835 /* lt or lit? */
836 c=*locale++;
837 if(is_i(c)) {
838 c=*locale++;
839 }
840 if(is_t(c)) {
841 c=*locale;
842 if(is_sep(c)) {
843 return UCASE_LOC_LITHUANIAN;
844 }
845 }
846 } else if(c=='E') {
847 /* el or ell? */
848 c=*locale++;
849 if(is_l(c)) {
850 c=*locale++;
851 if(is_l(c)) {
852 c=*locale;
853 }
854 if(is_sep(c)) {
855 return UCASE_LOC_GREEK;
856 }
857 }
858 } else if(c=='N') {
859 /* nl or nld? */
860 c=*locale++;
861 if(is_l(c)) {
862 c=*locale++;
863 if(is_d(c)) {
864 c=*locale;
865 }
866 if(is_sep(c)) {
867 return UCASE_LOC_DUTCH;
868 }
869 }
870 }
871 }
872 return UCASE_LOC_ROOT;
873 }
874
875 /*
876 * Is followed by
877 * {case-ignorable}* cased
878 * ?
879 * (dir determines looking forward/backward)
880 * If a character is case-ignorable, it is skipped regardless of whether
881 * it is also cased or not.
882 */
883 static UBool
isFollowedByCasedLetter(UCaseContextIterator * iter,void * context,int8_t dir)884 isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
885 UChar32 c;
886
887 if(iter==NULL) {
888 return FALSE;
889 }
890
891 for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
892 int32_t type=ucase_getTypeOrIgnorable(c);
893 if(type&4) {
894 /* case-ignorable, continue with the loop */
895 } else if(type!=UCASE_NONE) {
896 return TRUE; /* followed by cased letter */
897 } else {
898 return FALSE; /* uncased and not case-ignorable */
899 }
900 }
901
902 return FALSE; /* not followed by cased letter */
903 }
904
905 /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
906 static UBool
isPrecededBySoftDotted(UCaseContextIterator * iter,void * context)907 isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
908 UChar32 c;
909 int32_t dotType;
910 int8_t dir;
911
912 if(iter==NULL) {
913 return FALSE;
914 }
915
916 for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
917 dotType=getDotType(c);
918 if(dotType==UCASE_SOFT_DOTTED) {
919 return TRUE; /* preceded by TYPE_i */
920 } else if(dotType!=UCASE_OTHER_ACCENT) {
921 return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
922 }
923 }
924
925 return FALSE; /* not preceded by TYPE_i */
926 }
927
928 /*
929 * See Jitterbug 2344:
930 * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
931 * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
932 * we made those releases compatible with Unicode 3.2 which had not fixed
933 * a related bug in SpecialCasing.txt.
934 *
935 * From the Jitterbug 2344 text:
936 * ... this bug is listed as a Unicode erratum
937 * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
938 * <quote>
939 * There are two errors in SpecialCasing.txt.
940 * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
941 * 2. An incorrect context definition. Correct as follows:
942 * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
943 * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
944 * ---
945 * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
946 * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
947 * where the context After_I is defined as:
948 * The last preceding base character was an uppercase I, and there is no
949 * intervening combining character class 230 (ABOVE).
950 * </quote>
951 *
952 * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
953 *
954 * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
955 * # This matches the behavior of the canonically equivalent I-dot_above
956 *
957 * See also the description in this place in older versions of uchar.c (revision 1.100).
958 *
959 * Markus W. Scherer 2003-feb-15
960 */
961
962 /* Is preceded by base character 'I' with no intervening cc=230 ? */
963 static UBool
isPrecededBy_I(UCaseContextIterator * iter,void * context)964 isPrecededBy_I(UCaseContextIterator *iter, void *context) {
965 UChar32 c;
966 int32_t dotType;
967 int8_t dir;
968
969 if(iter==NULL) {
970 return FALSE;
971 }
972
973 for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
974 if(c==0x49) {
975 return TRUE; /* preceded by I */
976 }
977 dotType=getDotType(c);
978 if(dotType!=UCASE_OTHER_ACCENT) {
979 return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
980 }
981 }
982
983 return FALSE; /* not preceded by I */
984 }
985
986 /* Is followed by one or more cc==230 ? */
987 static UBool
isFollowedByMoreAbove(UCaseContextIterator * iter,void * context)988 isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
989 UChar32 c;
990 int32_t dotType;
991 int8_t dir;
992
993 if(iter==NULL) {
994 return FALSE;
995 }
996
997 for(dir=1; (c=iter(context, dir))>=0; dir=0) {
998 dotType=getDotType(c);
999 if(dotType==UCASE_ABOVE) {
1000 return TRUE; /* at least one cc==230 following */
1001 } else if(dotType!=UCASE_OTHER_ACCENT) {
1002 return FALSE; /* next base character, no more cc==230 following */
1003 }
1004 }
1005
1006 return FALSE; /* no more cc==230 following */
1007 }
1008
1009 /* Is followed by a dot above (without cc==230 in between) ? */
1010 static UBool
isFollowedByDotAbove(UCaseContextIterator * iter,void * context)1011 isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
1012 UChar32 c;
1013 int32_t dotType;
1014 int8_t dir;
1015
1016 if(iter==NULL) {
1017 return FALSE;
1018 }
1019
1020 for(dir=1; (c=iter(context, dir))>=0; dir=0) {
1021 if(c==0x307) {
1022 return TRUE;
1023 }
1024 dotType=getDotType(c);
1025 if(dotType!=UCASE_OTHER_ACCENT) {
1026 return FALSE; /* next base character or cc==230 in between */
1027 }
1028 }
1029
1030 return FALSE; /* no dot above following */
1031 }
1032
1033 U_CAPI int32_t U_EXPORT2
ucase_toFullLower(UChar32 c,UCaseContextIterator * iter,void * context,const UChar ** pString,int32_t loc)1034 ucase_toFullLower(UChar32 c,
1035 UCaseContextIterator *iter, void *context,
1036 const UChar **pString,
1037 int32_t loc) {
1038 // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1039 U_ASSERT(c >= 0);
1040 UChar32 result=c;
1041 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1042 if(!UCASE_HAS_EXCEPTION(props)) {
1043 if(UCASE_IS_UPPER_OR_TITLE(props)) {
1044 result=c+UCASE_GET_DELTA(props);
1045 }
1046 } else {
1047 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
1048 uint16_t excWord=*pe++;
1049 int32_t full;
1050
1051 pe2=pe;
1052
1053 if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
1054 /* use hardcoded conditions and mappings */
1055
1056 /*
1057 * Test for conditional mappings first
1058 * (otherwise the unconditional default mappings are always taken),
1059 * then test for characters that have unconditional mappings in SpecialCasing.txt,
1060 * then get the UnicodeData.txt mappings.
1061 */
1062 if( loc==UCASE_LOC_LITHUANIAN &&
1063 /* base characters, find accents above */
1064 (((c==0x49 || c==0x4a || c==0x12e) &&
1065 isFollowedByMoreAbove(iter, context)) ||
1066 /* precomposed with accent above, no need to find one */
1067 (c==0xcc || c==0xcd || c==0x128))
1068 ) {
1069 /*
1070 # Lithuanian
1071
1072 # Lithuanian retains the dot in a lowercase i when followed by accents.
1073
1074 # Introduce an explicit dot above when lowercasing capital I's and J's
1075 # whenever there are more accents above.
1076 # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
1077
1078 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
1079 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
1080 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
1081 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
1082 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
1083 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
1084 */
1085 switch(c) {
1086 case 0x49: /* LATIN CAPITAL LETTER I */
1087 *pString=iDot;
1088 return 2;
1089 case 0x4a: /* LATIN CAPITAL LETTER J */
1090 *pString=jDot;
1091 return 2;
1092 case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
1093 *pString=iOgonekDot;
1094 return 2;
1095 case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
1096 *pString=iDotGrave;
1097 return 3;
1098 case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
1099 *pString=iDotAcute;
1100 return 3;
1101 case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
1102 *pString=iDotTilde;
1103 return 3;
1104 default:
1105 return 0; /* will not occur */
1106 }
1107 /* # Turkish and Azeri */
1108 } else if(loc==UCASE_LOC_TURKISH && c==0x130) {
1109 /*
1110 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1111 # The following rules handle those cases.
1112
1113 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
1114 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
1115 */
1116 return 0x69;
1117 } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
1118 /*
1119 # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
1120 # This matches the behavior of the canonically equivalent I-dot_above
1121
1122 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
1123 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
1124 */
1125 *pString=nullptr;
1126 return 0; /* remove the dot (continue without output) */
1127 } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
1128 /*
1129 # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
1130
1131 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
1132 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
1133 */
1134 return 0x131;
1135 } else if(c==0x130) {
1136 /*
1137 # Preserve canonical equivalence for I with dot. Turkic is handled below.
1138
1139 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1140 */
1141 *pString=iDot;
1142 return 2;
1143 } else if( c==0x3a3 &&
1144 !isFollowedByCasedLetter(iter, context, 1) &&
1145 isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
1146 ) {
1147 /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
1148 /*
1149 # Special case for final form of sigma
1150
1151 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
1152 */
1153 return 0x3c2; /* greek small final sigma */
1154 } else {
1155 /* no known conditional special case mapping, use a normal mapping */
1156 }
1157 } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1158 GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1159 full&=UCASE_FULL_LOWER;
1160 if(full!=0) {
1161 /* set the output pointer to the lowercase mapping */
1162 *pString=reinterpret_cast<const UChar *>(pe+1);
1163
1164 /* return the string length */
1165 return full;
1166 }
1167 }
1168
1169 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1170 int32_t delta;
1171 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1172 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1173 }
1174 if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1175 GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
1176 }
1177 }
1178
1179 return (result==c) ? ~result : result;
1180 }
1181
1182 /* internal */
1183 static int32_t
toUpperOrTitle(UChar32 c,UCaseContextIterator * iter,void * context,const UChar ** pString,int32_t loc,UBool upperNotTitle)1184 toUpperOrTitle(UChar32 c,
1185 UCaseContextIterator *iter, void *context,
1186 const UChar **pString,
1187 int32_t loc,
1188 UBool upperNotTitle) {
1189 // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1190 U_ASSERT(c >= 0);
1191 UChar32 result=c;
1192 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1193 if(!UCASE_HAS_EXCEPTION(props)) {
1194 if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
1195 result=c+UCASE_GET_DELTA(props);
1196 }
1197 } else {
1198 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
1199 uint16_t excWord=*pe++;
1200 int32_t full, idx;
1201
1202 pe2=pe;
1203
1204 if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
1205 /* use hardcoded conditions and mappings */
1206 if(loc==UCASE_LOC_TURKISH && c==0x69) {
1207 /*
1208 # Turkish and Azeri
1209
1210 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1211 # The following rules handle those cases.
1212
1213 # When uppercasing, i turns into a dotted capital I
1214
1215 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
1216 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
1217 */
1218 return 0x130;
1219 } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
1220 /*
1221 # Lithuanian
1222
1223 # Lithuanian retains the dot in a lowercase i when followed by accents.
1224
1225 # Remove DOT ABOVE after "i" with upper or titlecase
1226
1227 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
1228 */
1229 *pString=nullptr;
1230 return 0; /* remove the dot (continue without output) */
1231 } else {
1232 /* no known conditional special case mapping, use a normal mapping */
1233 }
1234 } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1235 GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1236
1237 /* start of full case mapping strings */
1238 ++pe;
1239
1240 /* skip the lowercase and case-folding result strings */
1241 pe+=full&UCASE_FULL_LOWER;
1242 full>>=4;
1243 pe+=full&0xf;
1244 full>>=4;
1245
1246 if(upperNotTitle) {
1247 full&=0xf;
1248 } else {
1249 /* skip the uppercase result string */
1250 pe+=full&0xf;
1251 full=(full>>4)&0xf;
1252 }
1253
1254 if(full!=0) {
1255 /* set the output pointer to the result string */
1256 *pString=reinterpret_cast<const UChar *>(pe);
1257
1258 /* return the string length */
1259 return full;
1260 }
1261 }
1262
1263 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
1264 int32_t delta;
1265 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1266 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1267 }
1268 if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
1269 idx=UCASE_EXC_TITLE;
1270 } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
1271 /* here, titlecase is same as uppercase */
1272 idx=UCASE_EXC_UPPER;
1273 } else {
1274 return ~c;
1275 }
1276 GET_SLOT_VALUE(excWord, idx, pe2, result);
1277 }
1278
1279 return (result==c) ? ~result : result;
1280 }
1281
1282 U_CAPI int32_t U_EXPORT2
ucase_toFullUpper(UChar32 c,UCaseContextIterator * iter,void * context,const UChar ** pString,int32_t caseLocale)1283 ucase_toFullUpper(UChar32 c,
1284 UCaseContextIterator *iter, void *context,
1285 const UChar **pString,
1286 int32_t caseLocale) {
1287 return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
1288 }
1289
1290 U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(UChar32 c,UCaseContextIterator * iter,void * context,const UChar ** pString,int32_t caseLocale)1291 ucase_toFullTitle(UChar32 c,
1292 UCaseContextIterator *iter, void *context,
1293 const UChar **pString,
1294 int32_t caseLocale) {
1295 return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
1296 }
1297
1298 /* case folding ------------------------------------------------------------- */
1299
1300 /*
1301 * Case folding is similar to lowercasing.
1302 * The result may be a simple mapping, i.e., a single code point, or
1303 * a full mapping, i.e., a string.
1304 * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
1305 * then only the lowercase mapping is stored.
1306 *
1307 * Some special cases are hardcoded because their conditions cannot be
1308 * parsed and processed from CaseFolding.txt.
1309 *
1310 * Unicode 3.2 CaseFolding.txt specifies for its status field:
1311
1312 # C: common case folding, common mappings shared by both simple and full mappings.
1313 # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
1314 # S: simple case folding, mappings to single characters where different from F.
1315 # T: special case for uppercase I and dotted uppercase I
1316 # - For non-Turkic languages, this mapping is normally not used.
1317 # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
1318 #
1319 # Usage:
1320 # A. To do a simple case folding, use the mappings with status C + S.
1321 # B. To do a full case folding, use the mappings with status C + F.
1322 #
1323 # The mappings with status T can be used or omitted depending on the desired case-folding
1324 # behavior. (The default option is to exclude them.)
1325
1326 * Unicode 3.2 has 'T' mappings as follows:
1327
1328 0049; T; 0131; # LATIN CAPITAL LETTER I
1329 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1330
1331 * while the default mappings for these code points are:
1332
1333 0049; C; 0069; # LATIN CAPITAL LETTER I
1334 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1335
1336 * U+0130 has no simple case folding (simple-case-folds to itself).
1337 */
1338
1339 /* return the simple case folding mapping for c */
1340 U_CAPI UChar32 U_EXPORT2
ucase_fold(UChar32 c,uint32_t options)1341 ucase_fold(UChar32 c, uint32_t options) {
1342 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1343 if(!UCASE_HAS_EXCEPTION(props)) {
1344 if(UCASE_IS_UPPER_OR_TITLE(props)) {
1345 c+=UCASE_GET_DELTA(props);
1346 }
1347 } else {
1348 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
1349 uint16_t excWord=*pe++;
1350 int32_t idx;
1351 if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
1352 /* special case folding mappings, hardcoded */
1353 if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
1354 /* default mappings */
1355 if(c==0x49) {
1356 /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1357 return 0x69;
1358 } else if(c==0x130) {
1359 /* no simple case folding for U+0130 */
1360 return c;
1361 }
1362 } else {
1363 /* Turkic mappings */
1364 if(c==0x49) {
1365 /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1366 return 0x131;
1367 } else if(c==0x130) {
1368 /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1369 return 0x69;
1370 }
1371 }
1372 }
1373 if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1374 return c;
1375 }
1376 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1377 int32_t delta;
1378 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
1379 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1380 }
1381 if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
1382 idx=UCASE_EXC_FOLD;
1383 } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1384 idx=UCASE_EXC_LOWER;
1385 } else {
1386 return c;
1387 }
1388 GET_SLOT_VALUE(excWord, idx, pe, c);
1389 }
1390 return c;
1391 }
1392
1393 /*
1394 * Issue for canonical caseless match (UAX #21):
1395 * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
1396 * canonical equivalence, unlike default-option casefolding.
1397 * For example, I-grave and I + grave fold to strings that are not canonically
1398 * equivalent.
1399 * For more details, see the comment in unorm_compare() in unorm.cpp
1400 * and the intermediate prototype changes for Jitterbug 2021.
1401 * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
1402 *
1403 * This did not get fixed because it appears that it is not possible to fix
1404 * it for uppercase and lowercase characters (I-grave vs. i-grave)
1405 * together in a way that they still fold to common result strings.
1406 */
1407
1408 U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(UChar32 c,const UChar ** pString,uint32_t options)1409 ucase_toFullFolding(UChar32 c,
1410 const UChar **pString,
1411 uint32_t options) {
1412 // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1413 U_ASSERT(c >= 0);
1414 UChar32 result=c;
1415 uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1416 if(!UCASE_HAS_EXCEPTION(props)) {
1417 if(UCASE_IS_UPPER_OR_TITLE(props)) {
1418 result=c+UCASE_GET_DELTA(props);
1419 }
1420 } else {
1421 const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
1422 uint16_t excWord=*pe++;
1423 int32_t full, idx;
1424
1425 pe2=pe;
1426
1427 if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
1428 /* use hardcoded conditions and mappings */
1429 if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
1430 /* default mappings */
1431 if(c==0x49) {
1432 /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1433 return 0x69;
1434 } else if(c==0x130) {
1435 /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1436 *pString=iDot;
1437 return 2;
1438 }
1439 } else {
1440 /* Turkic mappings */
1441 if(c==0x49) {
1442 /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1443 return 0x131;
1444 } else if(c==0x130) {
1445 /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1446 return 0x69;
1447 }
1448 }
1449 } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1450 GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1451
1452 /* start of full case mapping strings */
1453 ++pe;
1454
1455 /* skip the lowercase result string */
1456 pe+=full&UCASE_FULL_LOWER;
1457 full=(full>>4)&0xf;
1458
1459 if(full!=0) {
1460 /* set the output pointer to the result string */
1461 *pString=reinterpret_cast<const UChar *>(pe);
1462
1463 /* return the string length */
1464 return full;
1465 }
1466 }
1467
1468 if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1469 return ~c;
1470 }
1471 if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1472 int32_t delta;
1473 GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1474 return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1475 }
1476 if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
1477 idx=UCASE_EXC_FOLD;
1478 } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1479 idx=UCASE_EXC_LOWER;
1480 } else {
1481 return ~c;
1482 }
1483 GET_SLOT_VALUE(excWord, idx, pe2, result);
1484 }
1485
1486 return (result==c) ? ~result : result;
1487 }
1488
1489 /* case mapping properties API ---------------------------------------------- */
1490
1491 /* public API (see uchar.h) */
1492
1493 U_CAPI UBool U_EXPORT2
u_isULowercase(UChar32 c)1494 u_isULowercase(UChar32 c) {
1495 return (UBool)(UCASE_LOWER==ucase_getType(c));
1496 }
1497
1498 U_CAPI UBool U_EXPORT2
u_isUUppercase(UChar32 c)1499 u_isUUppercase(UChar32 c) {
1500 return (UBool)(UCASE_UPPER==ucase_getType(c));
1501 }
1502
1503 /* Transforms the Unicode character to its lower case equivalent.*/
1504 U_CAPI UChar32 U_EXPORT2
u_tolower(UChar32 c)1505 u_tolower(UChar32 c) {
1506 return ucase_tolower(c);
1507 }
1508
1509 /* Transforms the Unicode character to its upper case equivalent.*/
1510 U_CAPI UChar32 U_EXPORT2
u_toupper(UChar32 c)1511 u_toupper(UChar32 c) {
1512 return ucase_toupper(c);
1513 }
1514
1515 /* Transforms the Unicode character to its title case equivalent.*/
1516 U_CAPI UChar32 U_EXPORT2
u_totitle(UChar32 c)1517 u_totitle(UChar32 c) {
1518 return ucase_totitle(c);
1519 }
1520
1521 /* return the simple case folding mapping for c */
1522 U_CAPI UChar32 U_EXPORT2
u_foldCase(UChar32 c,uint32_t options)1523 u_foldCase(UChar32 c, uint32_t options) {
1524 return ucase_fold(c, options);
1525 }
1526
1527 U_CFUNC int32_t U_EXPORT2
ucase_hasBinaryProperty(UChar32 c,UProperty which)1528 ucase_hasBinaryProperty(UChar32 c, UProperty which) {
1529 /* case mapping properties */
1530 const UChar *resultString;
1531 switch(which) {
1532 case UCHAR_LOWERCASE:
1533 return (UBool)(UCASE_LOWER==ucase_getType(c));
1534 case UCHAR_UPPERCASE:
1535 return (UBool)(UCASE_UPPER==ucase_getType(c));
1536 case UCHAR_SOFT_DOTTED:
1537 return ucase_isSoftDotted(c);
1538 case UCHAR_CASE_SENSITIVE:
1539 return ucase_isCaseSensitive(c);
1540 case UCHAR_CASED:
1541 return (UBool)(UCASE_NONE!=ucase_getType(c));
1542 case UCHAR_CASE_IGNORABLE:
1543 return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
1544 /*
1545 * Note: The following Changes_When_Xyz are defined as testing whether
1546 * the NFD form of the input changes when Xyz-case-mapped.
1547 * However, this simpler implementation of these properties,
1548 * ignoring NFD, passes the tests.
1549 * The implementation needs to be changed if the tests start failing.
1550 * When that happens, optimizations should be used to work with the
1551 * per-single-code point ucase_toFullXyz() functions unless
1552 * the NFD form has more than one code point,
1553 * and the property starts set needs to be the union of the
1554 * start sets for normalization and case mappings.
1555 */
1556 case UCHAR_CHANGES_WHEN_LOWERCASED:
1557 return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1558 case UCHAR_CHANGES_WHEN_UPPERCASED:
1559 return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1560 case UCHAR_CHANGES_WHEN_TITLECASED:
1561 return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1562 /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
1563 case UCHAR_CHANGES_WHEN_CASEMAPPED:
1564 return (UBool)(
1565 ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
1566 ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
1567 ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1568 default:
1569 return FALSE;
1570 }
1571 }
1572