• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  *
7  *   Copyright (C) 2004-2015, International Business Machines
8  *   Corporation and others.  All Rights Reserved.
9  *
10  *******************************************************************************
11  *   file name:  UCaseProps.java
12  *   encoding:   US-ASCII
13  *   tab size:   8 (not used)
14  *   indentation:4
15  *
16  *   created on: 2005jan29
17  *   created by: Markus W. Scherer
18  *
19  *   Low-level Unicode character/string case mapping code.
20  *   Java port of ucase.h/.c.
21  */
22 
23 package ohos.global.icu.impl;
24 
25 import java.io.IOException;
26 import java.nio.ByteBuffer;
27 import java.util.Iterator;
28 import java.util.Locale;
29 
30 import ohos.global.icu.lang.UCharacter;
31 import ohos.global.icu.lang.UProperty;
32 import ohos.global.icu.text.UTF16;
33 import ohos.global.icu.text.UnicodeSet;
34 import ohos.global.icu.util.ICUUncheckedIOException;
35 import ohos.global.icu.util.ULocale;
36 
37 /**
38  * @hide exposed on OHOS
39  */
40 public final class UCaseProps {
41 
42     // constructors etc. --------------------------------------------------- ***
43 
44     // port of ucase_openProps()
UCaseProps()45     private UCaseProps() throws IOException {
46         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
47         readData(bytes);
48     }
49 
readData(ByteBuffer bytes)50     private final void readData(ByteBuffer bytes) throws IOException {
51         // read the header
52         ICUBinary.readHeader(bytes, FMT, new IsAcceptable());
53 
54         // read indexes[]
55         int count=bytes.getInt();
56         if(count<IX_TOP) {
57             throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
58         }
59         indexes=new int[count];
60 
61         indexes[0]=count;
62         for(int i=1; i<count; ++i) {
63             indexes[i]=bytes.getInt();
64         }
65 
66         // read the trie
67         trie=Trie2_16.createFromSerialized(bytes);
68         int expectedTrieLength=indexes[IX_TRIE_SIZE];
69         int trieLength=trie.getSerializedLength();
70         if(trieLength>expectedTrieLength) {
71             throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
72         }
73         // skip padding after trie bytes
74         ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength);
75 
76         // read exceptions[]
77         count=indexes[IX_EXC_LENGTH];
78         if(count>0) {
79             exceptions=ICUBinary.getString(bytes, count, 0);
80         }
81 
82         // read unfold[]
83         count=indexes[IX_UNFOLD_LENGTH];
84         if(count>0) {
85             unfold=ICUBinary.getChars(bytes, count, 0);
86         }
87     }
88 
89     // implement ICUBinary.Authenticate
90     private final static class IsAcceptable implements ICUBinary.Authenticate {
91         @Override
isDataVersionAcceptable(byte version[])92         public boolean isDataVersionAcceptable(byte version[]) {
93             return version[0]==4;
94         }
95     }
96 
97     // set of property starts for UnicodeSet ------------------------------- ***
98 
addPropertyStarts(UnicodeSet set)99     public final void addPropertyStarts(UnicodeSet set) {
100         /* add the start code point of each same-value range of the trie */
101         Iterator<Trie2.Range> trieIterator=trie.iterator();
102         Trie2.Range range;
103         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
104             set.add(range.startCodePoint);
105         }
106 
107         /* add code points with hardcoded properties, plus the ones following them */
108 
109         /* (none right now, see comment below) */
110 
111         /*
112          * Omit code points with hardcoded specialcasing properties
113          * because we do not build property UnicodeSets for them right now.
114          */
115     }
116 
117     // data access primitives ---------------------------------------------- ***
getExceptionsOffset(int props)118     private static final int getExceptionsOffset(int props) {
119         return props>>EXC_SHIFT;
120     }
121 
propsHasException(int props)122     static final boolean propsHasException(int props) {
123         return (props&EXCEPTION)!=0;
124     }
125 
126     /* number of bits in an 8-bit integer value */
127     private static final byte flagsOffset[/*256*/]={
128         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
129         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
130         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
131         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
132         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
133         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
134         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
135         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
136         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
137         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
138         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
139         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
140         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
141         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
142         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
143         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
144     };
145 
hasSlot(int flags, int index)146     private static final boolean hasSlot(int flags, int index) {
147         return (flags&(1<<index))!=0;
148     }
slotOffset(int flags, int index)149     private static final byte slotOffset(int flags, int index) {
150         return flagsOffset[flags&((1<<index)-1)];
151     }
152 
153     /*
154      * Get the value of an optional-value slot where hasSlot(excWord, index).
155      *
156      * @param excWord (in) initial exceptions word
157      * @param index (in) desired slot index
158      * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
159      * @return bits 31..0: slot value
160      *             63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
161      */
getSlotValueAndOffset(int excWord, int index, int excOffset)162     private final long getSlotValueAndOffset(int excWord, int index, int excOffset) {
163         long value;
164         if((excWord&EXC_DOUBLE_SLOTS)==0) {
165             excOffset+=slotOffset(excWord, index);
166             value=exceptions.charAt(excOffset);
167         } else {
168             excOffset+=2*slotOffset(excWord, index);
169             value=exceptions.charAt(excOffset++);
170             value=(value<<16)|exceptions.charAt(excOffset);
171         }
172         return value |((long)excOffset<<32);
173     }
174 
175     /* same as getSlotValueAndOffset() but does not return the slot offset */
getSlotValue(int excWord, int index, int excOffset)176     private final int getSlotValue(int excWord, int index, int excOffset) {
177         int value;
178         if((excWord&EXC_DOUBLE_SLOTS)==0) {
179             excOffset+=slotOffset(excWord, index);
180             value=exceptions.charAt(excOffset);
181         } else {
182             excOffset+=2*slotOffset(excWord, index);
183             value=exceptions.charAt(excOffset++);
184             value=(value<<16)|exceptions.charAt(excOffset);
185         }
186         return value;
187     }
188 
189     // simple case mappings ------------------------------------------------ ***
190 
tolower(int c)191     public final int tolower(int c) {
192         int props=trie.get(c);
193         if(!propsHasException(props)) {
194             if(isUpperOrTitleFromProps(props)) {
195                 c+=getDelta(props);
196             }
197         } else {
198             int excOffset=getExceptionsOffset(props);
199             int excWord=exceptions.charAt(excOffset++);
200             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
201                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
202                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
203             }
204             if(hasSlot(excWord, EXC_LOWER)) {
205                 c=getSlotValue(excWord, EXC_LOWER, excOffset);
206             }
207         }
208         return c;
209     }
210 
toupper(int c)211     public final int toupper(int c) {
212         int props=trie.get(c);
213         if(!propsHasException(props)) {
214             if(getTypeFromProps(props)==LOWER) {
215                 c+=getDelta(props);
216             }
217         } else {
218             int excOffset=getExceptionsOffset(props);
219             int excWord=exceptions.charAt(excOffset++);
220             if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) {
221                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
222                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
223             }
224             if(hasSlot(excWord, EXC_UPPER)) {
225                 c=getSlotValue(excWord, EXC_UPPER, excOffset);
226             }
227         }
228         return c;
229     }
230 
totitle(int c)231     public final int totitle(int c) {
232         int props=trie.get(c);
233         if(!propsHasException(props)) {
234             if(getTypeFromProps(props)==LOWER) {
235                 c+=getDelta(props);
236             }
237         } else {
238             int excOffset=getExceptionsOffset(props);
239             int excWord=exceptions.charAt(excOffset++);
240             if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) {
241                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
242                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
243             }
244             int index;
245             if(hasSlot(excWord, EXC_TITLE)) {
246                 index=EXC_TITLE;
247             } else if(hasSlot(excWord, EXC_UPPER)) {
248                 index=EXC_UPPER;
249             } else {
250                 return c;
251             }
252             c=getSlotValue(excWord, index, excOffset);
253         }
254         return c;
255     }
256 
257     /**
258      * Adds all simple case mappings and the full case folding for c to sa,
259      * and also adds special case closure mappings.
260      * c itself is not added.
261      * For example, the mappings
262      * - for s include long s
263      * - for sharp s include ss
264      * - for k include the Kelvin sign
265      */
addCaseClosure(int c, UnicodeSet set)266     public final void addCaseClosure(int c, UnicodeSet set) {
267         /*
268          * Hardcode the case closure of i and its relatives and ignore the
269          * data file data for these characters.
270          * The Turkic dotless i and dotted I with their case mapping conditions
271          * and case folding option make the related characters behave specially.
272          * This code matches their closure behavior to their case folding behavior.
273          */
274 
275         switch(c) {
276         case 0x49:
277             /* regular i and I are in one equivalence class */
278             set.add(0x69);
279             return;
280         case 0x69:
281             set.add(0x49);
282             return;
283         case 0x130:
284             /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
285             set.add(iDot);
286             return;
287         case 0x131:
288             /* dotless i is in a class by itself */
289             return;
290         default:
291             /* otherwise use the data file data */
292             break;
293         }
294 
295         int props=trie.get(c);
296         if(!propsHasException(props)) {
297             if(getTypeFromProps(props)!=NONE) {
298                 /* add the one simple case mapping, no matter what type it is */
299                 int delta=getDelta(props);
300                 if(delta!=0) {
301                     set.add(c+delta);
302                 }
303             }
304         } else {
305             /*
306              * c has exceptions, so there may be multiple simple and/or
307              * full case mappings. Add them all.
308              */
309             int excOffset0, excOffset=getExceptionsOffset(props);
310             int closureOffset;
311             int excWord=exceptions.charAt(excOffset++);
312             int index, closureLength, fullLength, length;
313 
314             excOffset0=excOffset;
315 
316             /* add all simple case mappings */
317             for(index=EXC_LOWER; index<=EXC_TITLE; ++index) {
318                 if(hasSlot(excWord, index)) {
319                     excOffset=excOffset0;
320                     c=getSlotValue(excWord, index, excOffset);
321                     set.add(c);
322                 }
323             }
324             if(hasSlot(excWord, EXC_DELTA)) {
325                 excOffset=excOffset0;
326                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
327                 set.add((excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
328             }
329 
330             /* get the closure string pointer & length */
331             if(hasSlot(excWord, EXC_CLOSURE)) {
332                 excOffset=excOffset0;
333                 long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
334                 closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */
335                 closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */
336             } else {
337                 closureLength=0;
338                 closureOffset=0;
339             }
340 
341             /* add the full case folding */
342             if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
343                 excOffset=excOffset0;
344                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
345                 fullLength=(int)value;
346 
347                 /* start of full case mapping strings */
348                 excOffset=(int)(value>>32)+1;
349 
350                 fullLength&=0xffff; /* bits 16 and higher are reserved */
351 
352                 /* skip the lowercase result string */
353                 excOffset+=fullLength&FULL_LOWER;
354                 fullLength>>=4;
355 
356                 /* add the full case folding string */
357                 length=fullLength&0xf;
358                 if(length!=0) {
359                     set.add(exceptions.substring(excOffset, excOffset+length));
360                     excOffset+=length;
361                 }
362 
363                 /* skip the uppercase and titlecase strings */
364                 fullLength>>=4;
365                 excOffset+=fullLength&0xf;
366                 fullLength>>=4;
367                 excOffset+=fullLength;
368 
369                 closureOffset=excOffset; /* behind full case mappings */
370             }
371 
372             /* add each code point in the closure string */
373             int limit=closureOffset+closureLength;
374             for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
375                 c=exceptions.codePointAt(index);
376                 set.add(c);
377             }
378         }
379     }
380 
381     /*
382      * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated
383      * must be s.length()>0 and max>0 and s.length()<=max
384      */
strcmpMax(String s, int unfoldOffset, int max)385     private final int strcmpMax(String s, int unfoldOffset, int max) {
386         int i1, length, c1, c2;
387 
388         length=s.length();
389         max-=length; /* we require length<=max, so no need to decrement max in the loop */
390         i1=0;
391         do {
392             c1=s.charAt(i1++);
393             c2=unfold[unfoldOffset++];
394             if(c2==0) {
395                 return 1; /* reached the end of t but not of s */
396             }
397             c1-=c2;
398             if(c1!=0) {
399                 return c1; /* return difference result */
400             }
401         } while(--length>0);
402         /* ends with length==0 */
403 
404         if(max==0 || unfold[unfoldOffset]==0) {
405             return 0; /* equal to length of both strings */
406         } else {
407             return -max; /* return lengh difference */
408         }
409     }
410 
411     /**
412      * Maps the string to single code points and adds the associated case closure
413      * mappings.
414      * The string is mapped to code points if it is their full case folding string.
415      * In other words, this performs a reverse full case folding and then
416      * adds the case closure items of the resulting code points.
417      * If the string is found and its closure applied, then
418      * the string itself is added as well as part of its code points' closure.
419      *
420      * @return true if the string was found
421      */
addStringCaseClosure(String s, UnicodeSet set)422     public final boolean addStringCaseClosure(String s, UnicodeSet set) {
423         int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;
424 
425         if(unfold==null || s==null) {
426             return false; /* no reverse case folding data, or no string */
427         }
428         length=s.length();
429         if(length<=1) {
430             /* the string is too short to find any match */
431             /*
432              * more precise would be:
433              * if(!u_strHasMoreChar32Than(s, length, 1))
434              * but this does not make much practical difference because
435              * a single supplementary code point would just not be found
436              */
437             return false;
438         }
439 
440         unfoldRows=unfold[UNFOLD_ROWS];
441         unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
442         unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
443         //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
444 
445         if(length>unfoldStringWidth) {
446             /* the string is too long to find any match */
447             return false;
448         }
449 
450         /* do a binary search for the string */
451         start=0;
452         limit=unfoldRows;
453         while(start<limit) {
454             i=(start+limit)/2;
455             unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
456             result=strcmpMax(s, unfoldOffset, unfoldStringWidth);
457 
458             if(result==0) {
459                 /* found the string: add each code point, and its case closure */
460                 int c;
461 
462                 for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
463                     c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
464                     set.add(c);
465                     addCaseClosure(c, set);
466                 }
467                 return true;
468             } else if(result<0) {
469                 limit=i;
470             } else /* result>0 */ {
471                 start=i+1;
472             }
473         }
474 
475         return false; /* string not found */
476     }
477 
478     /** @return NONE, LOWER, UPPER, TITLE */
getType(int c)479     public final int getType(int c) {
480         return getTypeFromProps(trie.get(c));
481     }
482 
483     /** @return like getType() but also sets IGNORABLE if c is case-ignorable */
getTypeOrIgnorable(int c)484     public final int getTypeOrIgnorable(int c) {
485         return getTypeAndIgnorableFromProps(trie.get(c));
486     }
487 
488     /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */
getDotType(int c)489     public final int getDotType(int c) {
490         int props=trie.get(c);
491         if(!propsHasException(props)) {
492             return props&DOT_MASK;
493         } else {
494             return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
495         }
496     }
497 
isSoftDotted(int c)498     public final boolean isSoftDotted(int c) {
499         return getDotType(c)==SOFT_DOTTED;
500     }
501 
isCaseSensitive(int c)502     public final boolean isCaseSensitive(int c) {
503         int props=trie.get(c);
504         if(!propsHasException(props)) {
505             return (props&SENSITIVE)!=0;
506         } else {
507             return (exceptions.charAt(getExceptionsOffset(props))&EXC_SENSITIVE)!=0;
508         }
509     }
510 
511     // string casing ------------------------------------------------------- ***
512 
513     /*
514      * These internal functions form the core of string case mappings.
515      * They map single code points to result code points or strings and take
516      * all necessary conditions (context, locale ID, options) into account.
517      *
518      * They do not iterate over the source or write to the destination
519      * so that the same functions are useful for non-standard string storage,
520      * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
521      * For the same reason, the "surrounding text" context is passed in as a
522      * ContextIterator which does not make any assumptions about
523      * the underlying storage.
524      *
525      * This section contains helper functions that check for conditions
526      * in the input text surrounding the current code point
527      * according to SpecialCasing.txt.
528      *
529      * Each helper function gets the index
530      * - after the current code point if it looks at following text
531      * - before the current code point if it looks at preceding text
532      *
533      * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
534      *
535      * Final_Sigma
536      *   C is preceded by a sequence consisting of
537      *     a cased letter and a case-ignorable sequence,
538      *   and C is not followed by a sequence consisting of
539      *     an ignorable sequence and then a cased letter.
540      *
541      * More_Above
542      *   C is followed by one or more characters of combining class 230 (ABOVE)
543      *   in the combining character sequence.
544      *
545      * After_Soft_Dotted
546      *   The last preceding character with combining class of zero before C
547      *   was Soft_Dotted,
548      *   and there is no intervening combining character class 230 (ABOVE).
549      *
550      * Before_Dot
551      *   C is followed by combining dot above (U+0307).
552      *   Any sequence of characters with a combining class that is neither 0 nor 230
553      *   may intervene between the current character and the combining dot above.
554      *
555      * The erratum from 2002-10-31 adds the condition
556      *
557      * After_I
558      *   The last preceding base character was an uppercase I, and there is no
559      *   intervening combining character class 230 (ABOVE).
560      *
561      *   (See Jitterbug 2344 and the comments on After_I below.)
562      *
563      * Helper definitions in Unicode 3.2 UAX 21:
564      *
565      * D1. A character C is defined to be cased
566      *     if it meets any of the following criteria:
567      *
568      *   - The general category of C is Titlecase Letter (Lt)
569      *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
570      *   - Given D = NFD(C), then it is not the case that:
571      *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
572      *     (This third criterium does not add any characters to the list
573      *      for Unicode 3.2. Ignored.)
574      *
575      * D2. A character C is defined to be case-ignorable
576      *     if it meets either of the following criteria:
577      *
578      *   - The general category of C is
579      *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
580      *     Letter Modifier (Lm), or Symbol Modifier (Sk)
581      *   - C is one of the following characters
582      *     U+0027 APOSTROPHE
583      *     U+00AD SOFT HYPHEN (SHY)
584      *     U+2019 RIGHT SINGLE QUOTATION MARK
585      *            (the preferred character for apostrophe)
586      *
587      * D3. A case-ignorable sequence is a sequence of
588      *     zero or more case-ignorable characters.
589      */
590 
591     /**
592      * Iterator for string case mappings, which need to look at the
593      * context (surrounding text) of a given character for conditional mappings.
594      *
595      * The iterator only needs to go backward or forward away from the
596      * character in question. It does not use any indexes on this interface.
597      * It does not support random access or an arbitrary change of
598      * iteration direction.
599      *
600      * The code point being case-mapped itself is never returned by
601      * this iterator.
602      * @hide exposed on OHOS
603      */
604     public interface ContextIterator {
605         /**
606          * Reset the iterator for forward or backward iteration.
607          * @param dir >0: Begin iterating forward from the first code point
608          * after the one that is being case-mapped.
609          *            <0: Begin iterating backward from the first code point
610          * before the one that is being case-mapped.
611          */
reset(int dir)612         public void reset(int dir);
613         /**
614          * Iterate and return the next code point, moving in the direction
615          * determined by the reset() call.
616          * @return Next code point, or <0 when the iteration is done.
617          */
next()618         public int next();
619     }
620 
621     /**
622      * Fast case mapping data for ASCII/Latin.
623      * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
624      * Deltas must not cross the ASCII boundary, or else they cannot be easily used
625      * in simple UTF-8 code.
626      */
627     static final class LatinCase {
628         /** Case mapping/folding data for code points up to U+017F. */
629         static final char LIMIT = 0x180;
630         /** U+017F case-folds and uppercases crossing the ASCII boundary. */
631         static final char LONG_S = 0x17f;
632         /** Exception: Complex mapping, or too-large delta. */
633         static final byte EXC = -0x80;
634 
635         /** Deltas for lowercasing for most locales, and default case folding. */
636         static final byte[] TO_LOWER_NORMAL = {
637             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641 
642             0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
643             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
644             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 
647             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651 
652             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
653             32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
654             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
655             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
656 
657             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
658             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
659             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
660             EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
661 
662             0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
663             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
664             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
665             1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
666         };
667 
668         /** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
669         static final byte[] TO_LOWER_TR_LT = {
670             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
671             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
672             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
673             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674 
675             0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
676             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
678             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
679 
680             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
681             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
682             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
683             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
684 
685             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
686             32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
687             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
688             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
689 
690             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
691             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
692             1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
693             EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
694 
695             0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
696             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
697             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
698             1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
699         };
700 
701         /** Deltas for uppercasing for most locales. */
702         static final byte[] TO_UPPER_NORMAL = {
703             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
704             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
705             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
706             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
707 
708             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
709             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
710             0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
711             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
712 
713             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
714             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
715             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
716             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
717 
718             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
719             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
720             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
721             -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
722 
723             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
724             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
725             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
726             0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
727 
728             -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
729             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
730             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
731             0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
732         };
733 
734         /** Deltas for uppercasing for tr/az. */
735         static final byte[] TO_UPPER_TR = {
736             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
737             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
738             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
739             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
740 
741             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
742             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743             0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
744             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
745 
746             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
747             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
748             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
749             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
750 
751             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
752             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
753             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
754             -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
755 
756             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
757             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
758             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
759             0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
760 
761             -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
762             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
763             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
764             0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
765         };
766     }
767 
768     /**
769      * For string case mappings, a single character (a code point) is mapped
770      * either to itself (in which case in-place mapping functions do nothing),
771      * or to another single code point, or to a string.
772      * Aside from the string contents, these are indicated with a single int
773      * value as follows:
774      *
775      * Mapping to self: Negative values (~self instead of -self to support U+0000)
776      *
777      * Mapping to another code point: Positive values >MAX_STRING_LENGTH
778      *
779      * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is
780      * returned. Note that the string result may indeed have zero length.
781      */
782     public static final int MAX_STRING_LENGTH=0x1f;
783 
784     //ivate static final int LOC_UNKNOWN=0;
785     public static final int LOC_ROOT=1;
786     static final int LOC_TURKISH=2;
787     static final int LOC_LITHUANIAN=3;
788     static final int LOC_GREEK=4;
789     public static final int LOC_DUTCH=5;
790 
getCaseLocale(Locale locale)791     public static final int getCaseLocale(Locale locale) {
792         return getCaseLocale(locale.getLanguage());
793     }
getCaseLocale(ULocale locale)794     public static final int getCaseLocale(ULocale locale) {
795         return getCaseLocale(locale.getLanguage());
796     }
797     /** Accepts both 2- and 3-letter language subtags. */
getCaseLocale(String language)798     private static final int getCaseLocale(String language) {
799         // Check the subtag length to reduce the number of comparisons
800         // for locales without special behavior.
801         // Fastpath for English "en" which is often used for default (=root locale) case mappings,
802         // and for Chinese "zh": Very common but no special case mapping behavior.
803         if(language.length()==2) {
804             if(language.equals("en") || language.charAt(0)>'t') {
805                 return LOC_ROOT;
806             } else if(language.equals("tr") || language.equals("az")) {
807                 return LOC_TURKISH;
808             } else if(language.equals("el")) {
809                 return LOC_GREEK;
810             } else if(language.equals("lt")) {
811                 return LOC_LITHUANIAN;
812             } else if(language.equals("nl")) {
813                 return LOC_DUTCH;
814             }
815         } else if(language.length()==3) {
816             if(language.equals("tur") || language.equals("aze")) {
817                 return LOC_TURKISH;
818             } else if(language.equals("ell")) {
819                 return LOC_GREEK;
820             } else if(language.equals("lit")) {
821                 return LOC_LITHUANIAN;
822             } else if(language.equals("nld")) {
823                 return LOC_DUTCH;
824             }
825         }
826         return LOC_ROOT;
827     }
828 
829     /* Is followed by {case-ignorable}* cased  ? (dir determines looking forward/backward) */
isFollowedByCasedLetter(ContextIterator iter, int dir)830     private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) {
831         int c;
832 
833         if(iter==null) {
834             return false;
835         }
836 
837         for(iter.reset(dir); (c=iter.next())>=0;) {
838             int type=getTypeOrIgnorable(c);
839             if((type&4)!=0) {
840                 /* case-ignorable, continue with the loop */
841             } else if(type!=NONE) {
842                 return true; /* followed by cased letter */
843             } else {
844                 return false; /* uncased and not case-ignorable */
845             }
846         }
847 
848         return false; /* not followed by cased letter */
849     }
850 
851     /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
isPrecededBySoftDotted(ContextIterator iter)852     private final boolean isPrecededBySoftDotted(ContextIterator iter) {
853         int c;
854         int dotType;
855 
856         if(iter==null) {
857             return false;
858         }
859 
860         for(iter.reset(-1); (c=iter.next())>=0;) {
861             dotType=getDotType(c);
862             if(dotType==SOFT_DOTTED) {
863                 return true; /* preceded by TYPE_i */
864             } else if(dotType!=OTHER_ACCENT) {
865                 return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
866             }
867         }
868 
869         return false; /* not preceded by TYPE_i */
870     }
871 
872     /*
873      * See Jitterbug 2344:
874      * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
875      * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
876      * we made those releases compatible with Unicode 3.2 which had not fixed
877      * a related bug in SpecialCasing.txt.
878      *
879      * From the Jitterbug 2344 text:
880      * ... this bug is listed as a Unicode erratum
881      * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
882      * <quote>
883      * There are two errors in SpecialCasing.txt.
884      * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
885      * 2. An incorrect context definition. Correct as follows:
886      * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
887      * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
888      * ---
889      * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
890      * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
891      * where the context After_I is defined as:
892      * The last preceding base character was an uppercase I, and there is no
893      * intervening combining character class 230 (ABOVE).
894      * </quote>
895      *
896      * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
897      *
898      * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
899      * # This matches the behavior of the canonically equivalent I-dot_above
900      *
901      * See also the description in this place in older versions of uchar.c (revision 1.100).
902      *
903      * Markus W. Scherer 2003-feb-15
904      */
905 
906     /* Is preceded by base character 'I' with no intervening cc=230 ? */
isPrecededBy_I(ContextIterator iter)907     private final boolean isPrecededBy_I(ContextIterator iter) {
908         int c;
909         int dotType;
910 
911         if(iter==null) {
912             return false;
913         }
914 
915         for(iter.reset(-1); (c=iter.next())>=0;) {
916             if(c==0x49) {
917                 return true; /* preceded by I */
918             }
919             dotType=getDotType(c);
920             if(dotType!=OTHER_ACCENT) {
921                 return false; /* preceded by different base character (not I), or intervening cc==230 */
922             }
923         }
924 
925         return false; /* not preceded by I */
926     }
927 
928     /* Is followed by one or more cc==230 ? */
isFollowedByMoreAbove(ContextIterator iter)929     private final boolean isFollowedByMoreAbove(ContextIterator iter) {
930         int c;
931         int dotType;
932 
933         if(iter==null) {
934             return false;
935         }
936 
937         for(iter.reset(1); (c=iter.next())>=0;) {
938             dotType=getDotType(c);
939             if(dotType==ABOVE) {
940                 return true; /* at least one cc==230 following */
941             } else if(dotType!=OTHER_ACCENT) {
942                 return false; /* next base character, no more cc==230 following */
943             }
944         }
945 
946         return false; /* no more cc==230 following */
947     }
948 
949     /* Is followed by a dot above (without cc==230 in between) ? */
isFollowedByDotAbove(ContextIterator iter)950     private final boolean isFollowedByDotAbove(ContextIterator iter) {
951         int c;
952         int dotType;
953 
954         if(iter==null) {
955             return false;
956         }
957 
958         for(iter.reset(1); (c=iter.next())>=0; ) {
959             if(c==0x307) {
960                 return true;
961             }
962             dotType=getDotType(c);
963             if(dotType!=OTHER_ACCENT) {
964                 return false; /* next base character or cc==230 in between */
965             }
966         }
967 
968         return false; /* no dot above following */
969     }
970 
971     private static final String
972         iDot=       "i\u0307",
973         jDot=       "j\u0307",
974         iOgonekDot= "\u012f\u0307",
975         iDotGrave=  "i\u0307\u0300",
976         iDotAcute=  "i\u0307\u0301",
977         iDotTilde=  "i\u0307\u0303";
978 
979     /**
980      * Get the full lowercase mapping for c.
981      *
982      * @param c Character to be mapped.
983      * @param iter Character iterator, used for context-sensitive mappings.
984      *             See ContextIterator for details.
985      *             If iter==null then a context-independent result is returned.
986      * @param out If the mapping result is a string, then it is appended to out.
987      * @param caseLocale Case locale value from ucase_getCaseLocale().
988      * @return Output code point or string length, see MAX_STRING_LENGTH.
989      *
990      * @see ContextIterator
991      * @see #MAX_STRING_LENGTH
992      * @hide draft / provisional / internal are hidden on OHOS
993      */
toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale)994     public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
995         int result, props;
996 
997         result=c;
998         props=trie.get(c);
999         if(!propsHasException(props)) {
1000             if(isUpperOrTitleFromProps(props)) {
1001                 result=c+getDelta(props);
1002             }
1003         } else {
1004             int excOffset=getExceptionsOffset(props), excOffset2;
1005             int excWord=exceptions.charAt(excOffset++);
1006             int full;
1007 
1008             excOffset2=excOffset;
1009 
1010             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
1011                 /* use hardcoded conditions and mappings */
1012                 /*
1013                  * Test for conditional mappings first
1014                  *   (otherwise the unconditional default mappings are always taken),
1015                  * then test for characters that have unconditional mappings in SpecialCasing.txt,
1016                  * then get the UnicodeData.txt mappings.
1017                  */
1018                 if( caseLocale==LOC_LITHUANIAN &&
1019                         /* base characters, find accents above */
1020                         (((c==0x49 || c==0x4a || c==0x12e) &&
1021                             isFollowedByMoreAbove(iter)) ||
1022                         /* precomposed with accent above, no need to find one */
1023                         (c==0xcc || c==0xcd || c==0x128))
1024                 ) {
1025                     /*
1026                         # Lithuanian
1027 
1028                         # Lithuanian retains the dot in a lowercase i when followed by accents.
1029 
1030                         # Introduce an explicit dot above when lowercasing capital I's and J's
1031                         # whenever there are more accents above.
1032                         # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
1033 
1034                         0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
1035                         004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
1036                         012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
1037                         00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
1038                         00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
1039                         0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
1040                      */
1041                     try {
1042                         switch(c) {
1043                         case 0x49:  /* LATIN CAPITAL LETTER I */
1044                             out.append(iDot);
1045                             return 2;
1046                         case 0x4a:  /* LATIN CAPITAL LETTER J */
1047                             out.append(jDot);
1048                             return 2;
1049                         case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
1050                             out.append(iOgonekDot);
1051                             return 2;
1052                         case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
1053                             out.append(iDotGrave);
1054                             return 3;
1055                         case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
1056                             out.append(iDotAcute);
1057                             return 3;
1058                         case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
1059                             out.append(iDotTilde);
1060                             return 3;
1061                         default:
1062                             return 0; /* will not occur */
1063                         }
1064                     } catch (IOException e) {
1065                         throw new ICUUncheckedIOException(e);
1066                     }
1067                 /* # Turkish and Azeri */
1068                 } else if(caseLocale==LOC_TURKISH && c==0x130) {
1069                     /*
1070                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1071                         # The following rules handle those cases.
1072 
1073                         0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
1074                         0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
1075                      */
1076                     return 0x69;
1077                 } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
1078                     /*
1079                         # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
1080                         # This matches the behavior of the canonically equivalent I-dot_above
1081 
1082                         0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
1083                         0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
1084                      */
1085                     return 0; /* remove the dot (continue without output) */
1086                 } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
1087                     /*
1088                         # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
1089 
1090                         0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
1091                         0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
1092                      */
1093                     return 0x131;
1094                 } else if(c==0x130) {
1095                     /*
1096                         # Preserve canonical equivalence for I with dot. Turkic is handled below.
1097 
1098                         0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1099                      */
1100                     try {
1101                         out.append(iDot);
1102                         return 2;
1103                     } catch (IOException e) {
1104                         throw new ICUUncheckedIOException(e);
1105                     }
1106                 } else if(  c==0x3a3 &&
1107                             !isFollowedByCasedLetter(iter, 1) &&
1108                             isFollowedByCasedLetter(iter, -1) /* -1=preceded */
1109                 ) {
1110                     /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
1111                     /*
1112                         # Special case for final form of sigma
1113 
1114                         03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
1115                      */
1116                     return 0x3c2; /* greek small final sigma */
1117                 } else {
1118                     /* no known conditional special case mapping, use a normal mapping */
1119                 }
1120             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1121                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1122                 full=(int)value&FULL_LOWER;
1123                 if(full!=0) {
1124                     /* start of full case mapping strings */
1125                     excOffset=(int)(value>>32)+1;
1126 
1127                     try {
1128                         // append the lowercase mapping
1129                         out.append(exceptions, excOffset, excOffset+full);
1130 
1131                         /* return the string length */
1132                         return full;
1133                     } catch (IOException e) {
1134                         throw new ICUUncheckedIOException(e);
1135                     }
1136                 }
1137             }
1138 
1139             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
1140                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2);
1141                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1142             }
1143             if(hasSlot(excWord, EXC_LOWER)) {
1144                 result=getSlotValue(excWord, EXC_LOWER, excOffset2);
1145             }
1146         }
1147 
1148         return (result==c) ? ~result : result;
1149     }
1150 
1151     /* internal */
toUpperOrTitle(int c, ContextIterator iter, Appendable out, int loc, boolean upperNotTitle)1152     private final int toUpperOrTitle(int c, ContextIterator iter,
1153                                      Appendable out,
1154                                      int loc,
1155                                      boolean upperNotTitle) {
1156         int result;
1157         int props;
1158 
1159         result=c;
1160         props=trie.get(c);
1161         if(!propsHasException(props)) {
1162             if(getTypeFromProps(props)==LOWER) {
1163                 result=c+getDelta(props);
1164             }
1165         } else {
1166             int excOffset=getExceptionsOffset(props), excOffset2;
1167             int excWord=exceptions.charAt(excOffset++);
1168             int full, index;
1169 
1170             excOffset2=excOffset;
1171 
1172             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
1173                 /* use hardcoded conditions and mappings */
1174                 if(loc==LOC_TURKISH && c==0x69) {
1175                     /*
1176                         # Turkish and Azeri
1177 
1178                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1179                         # The following rules handle those cases.
1180 
1181                         # When uppercasing, i turns into a dotted capital I
1182 
1183                         0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
1184                         0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
1185                     */
1186                     return 0x130;
1187                 } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) {
1188                     /*
1189                         # Lithuanian
1190 
1191                         # Lithuanian retains the dot in a lowercase i when followed by accents.
1192 
1193                         # Remove DOT ABOVE after "i" with upper or titlecase
1194 
1195                         0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
1196                      */
1197                     return 0; /* remove the dot (continue without output) */
1198                 } else {
1199                     /* no known conditional special case mapping, use a normal mapping */
1200                 }
1201             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1202                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1203                 full=(int)value&0xffff;
1204 
1205                 /* start of full case mapping strings */
1206                 excOffset=(int)(value>>32)+1;
1207 
1208                 /* skip the lowercase and case-folding result strings */
1209                 excOffset+=full&FULL_LOWER;
1210                 full>>=4;
1211                 excOffset+=full&0xf;
1212                 full>>=4;
1213 
1214                 if(upperNotTitle) {
1215                     full&=0xf;
1216                 } else {
1217                     /* skip the uppercase result string */
1218                     excOffset+=full&0xf;
1219                     full=(full>>4)&0xf;
1220                 }
1221 
1222                 if(full!=0) {
1223                     try {
1224                         // append the result string
1225                         out.append(exceptions, excOffset, excOffset+full);
1226 
1227                         /* return the string length */
1228                         return full;
1229                     } catch (IOException e) {
1230                         throw new ICUUncheckedIOException(e);
1231                     }
1232                 }
1233             }
1234 
1235             if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) {
1236                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2);
1237                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1238             }
1239             if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) {
1240                 index=EXC_TITLE;
1241             } else if(hasSlot(excWord, EXC_UPPER)) {
1242                 /* here, titlecase is same as uppercase */
1243                 index=EXC_UPPER;
1244             } else {
1245                 return ~c;
1246             }
1247             result=getSlotValue(excWord, index, excOffset2);
1248         }
1249 
1250         return (result==c) ? ~result : result;
1251     }
1252 
toFullUpper(int c, ContextIterator iter, Appendable out, int caseLocale)1253     public final int toFullUpper(int c, ContextIterator iter,
1254                                  Appendable out,
1255                                  int caseLocale) {
1256         return toUpperOrTitle(c, iter, out, caseLocale, true);
1257     }
1258 
toFullTitle(int c, ContextIterator iter, Appendable out, int caseLocale)1259     public final int toFullTitle(int c, ContextIterator iter,
1260                                  Appendable out,
1261                                  int caseLocale) {
1262         return toUpperOrTitle(c, iter, out, caseLocale, false);
1263     }
1264 
1265     /* case folding ------------------------------------------------------------- */
1266 
1267     /*
1268      * Case folding is similar to lowercasing.
1269      * The result may be a simple mapping, i.e., a single code point, or
1270      * a full mapping, i.e., a string.
1271      * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
1272      * then only the lowercase mapping is stored.
1273      *
1274      * Some special cases are hardcoded because their conditions cannot be
1275      * parsed and processed from CaseFolding.txt.
1276      *
1277      * Unicode 3.2 CaseFolding.txt specifies for its status field:
1278 
1279     # C: common case folding, common mappings shared by both simple and full mappings.
1280     # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
1281     # S: simple case folding, mappings to single characters where different from F.
1282     # T: special case for uppercase I and dotted uppercase I
1283     #    - For non-Turkic languages, this mapping is normally not used.
1284     #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
1285     #
1286     # Usage:
1287     #  A. To do a simple case folding, use the mappings with status C + S.
1288     #  B. To do a full case folding, use the mappings with status C + F.
1289     #
1290     #    The mappings with status T can be used or omitted depending on the desired case-folding
1291     #    behavior. (The default option is to exclude them.)
1292 
1293      * Unicode 3.2 has 'T' mappings as follows:
1294 
1295     0049; T; 0131; # LATIN CAPITAL LETTER I
1296     0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1297 
1298      * while the default mappings for these code points are:
1299 
1300     0049; C; 0069; # LATIN CAPITAL LETTER I
1301     0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1302 
1303      * U+0130 has no simple case folding (simple-case-folds to itself).
1304      */
1305 
1306     /**
1307      * Bit mask for getting just the options from a string compare options word
1308      * that are relevant for case folding (of a single string or code point).
1309      *
1310      * Currently only bit 0 for FOLD_CASE_EXCLUDE_SPECIAL_I.
1311      * It is conceivable that at some point we might use one more bit for using uppercase sharp s.
1312      * It is conceivable that at some point we might want the option to use only simple case foldings
1313      * when operating on strings.
1314      *
1315      * @hide draft / provisional / internal are hidden on OHOS
1316      */
1317     static final int FOLD_CASE_OPTIONS_MASK = 7;
1318 
1319     /* return the simple case folding mapping for c */
fold(int c, int options)1320     public final int fold(int c, int options) {
1321         int props=trie.get(c);
1322         if(!propsHasException(props)) {
1323             if(isUpperOrTitleFromProps(props)) {
1324                 c+=getDelta(props);
1325             }
1326         } else {
1327             int excOffset=getExceptionsOffset(props);
1328             int excWord=exceptions.charAt(excOffset++);
1329             int index;
1330             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
1331                 /* special case folding mappings, hardcoded */
1332                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
1333                     /* default mappings */
1334                     if(c==0x49) {
1335                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1336                         return 0x69;
1337                     } else if(c==0x130) {
1338                         /* no simple case folding for U+0130 */
1339                         return c;
1340                     }
1341                 } else {
1342                     /* Turkic mappings */
1343                     if(c==0x49) {
1344                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1345                         return 0x131;
1346                     } else if(c==0x130) {
1347                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1348                         return 0x69;
1349                     }
1350                 }
1351             }
1352             if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1353                 return c;
1354             }
1355             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
1356                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
1357                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1358             }
1359             if(hasSlot(excWord, EXC_FOLD)) {
1360                 index=EXC_FOLD;
1361             } else if(hasSlot(excWord, EXC_LOWER)) {
1362                 index=EXC_LOWER;
1363             } else {
1364                 return c;
1365             }
1366             c=getSlotValue(excWord, index, excOffset);
1367         }
1368         return c;
1369     }
1370 
1371     /*
1372      * Issue for canonical caseless match (UAX #21):
1373      * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
1374      * canonical equivalence, unlike default-option casefolding.
1375      * For example, I-grave and I + grave fold to strings that are not canonically
1376      * equivalent.
1377      * For more details, see the comment in unorm_compare() in unorm.cpp
1378      * and the intermediate prototype changes for Jitterbug 2021.
1379      * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
1380      *
1381      * This did not get fixed because it appears that it is not possible to fix
1382      * it for uppercase and lowercase characters (I-grave vs. i-grave)
1383      * together in a way that they still fold to common result strings.
1384      */
1385 
toFullFolding(int c, Appendable out, int options)1386     public final int toFullFolding(int c, Appendable out, int options) {
1387         int result;
1388         int props;
1389 
1390         result=c;
1391         props=trie.get(c);
1392         if(!propsHasException(props)) {
1393             if(isUpperOrTitleFromProps(props)) {
1394                 result=c+getDelta(props);
1395             }
1396         } else {
1397             int excOffset=getExceptionsOffset(props), excOffset2;
1398             int excWord=exceptions.charAt(excOffset++);
1399             int full, index;
1400 
1401             excOffset2=excOffset;
1402 
1403             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
1404                 /* use hardcoded conditions and mappings */
1405                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
1406                     /* default mappings */
1407                     if(c==0x49) {
1408                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1409                         return 0x69;
1410                     } else if(c==0x130) {
1411                         /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1412                         try {
1413                             out.append(iDot);
1414                             return 2;
1415                         } catch (IOException e) {
1416                             throw new ICUUncheckedIOException(e);
1417                         }
1418                     }
1419                 } else {
1420                     /* Turkic mappings */
1421                     if(c==0x49) {
1422                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1423                         return 0x131;
1424                     } else if(c==0x130) {
1425                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1426                         return 0x69;
1427                     }
1428                 }
1429             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1430                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1431                 full=(int)value&0xffff;
1432 
1433                 /* start of full case mapping strings */
1434                 excOffset=(int)(value>>32)+1;
1435 
1436                 /* skip the lowercase result string */
1437                 excOffset+=full&FULL_LOWER;
1438                 full=(full>>4)&0xf;
1439 
1440                 if(full!=0) {
1441                     try {
1442                         // append the result string
1443                         out.append(exceptions, excOffset, excOffset+full);
1444 
1445                         /* return the string length */
1446                         return full;
1447                     } catch (IOException e) {
1448                         throw new ICUUncheckedIOException(e);
1449                     }
1450                 }
1451             }
1452 
1453             if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1454                 return ~c;
1455             }
1456             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
1457                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2);
1458                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1459             }
1460             if(hasSlot(excWord, EXC_FOLD)) {
1461                 index=EXC_FOLD;
1462             } else if(hasSlot(excWord, EXC_LOWER)) {
1463                 index=EXC_LOWER;
1464             } else {
1465                 return ~c;
1466             }
1467             result=getSlotValue(excWord, index, excOffset2);
1468         }
1469 
1470         return (result==c) ? ~result : result;
1471     }
1472 
1473     /* case mapping properties API ---------------------------------------------- */
1474 
1475     /*
1476      * We need a StringBuilder for multi-code point output from the
1477      * full case mapping functions. However, we do not actually use that output,
1478      * we just check whether the input character was mapped to anything else.
1479      * We use a shared StringBuilder to avoid allocating a new one in each call.
1480      * We remove its contents each time so that it does not grow large over time.
1481      *
1482      * @internal
1483      */
1484     public static final StringBuilder dummyStringBuilder = new StringBuilder();
1485 
hasBinaryProperty(int c, int which)1486     public final boolean hasBinaryProperty(int c, int which) {
1487         switch(which) {
1488         case UProperty.LOWERCASE:
1489             return LOWER==getType(c);
1490         case UProperty.UPPERCASE:
1491             return UPPER==getType(c);
1492         case UProperty.SOFT_DOTTED:
1493             return isSoftDotted(c);
1494         case UProperty.CASE_SENSITIVE:
1495             return isCaseSensitive(c);
1496         case UProperty.CASED:
1497             return NONE!=getType(c);
1498         case UProperty.CASE_IGNORABLE:
1499             return (getTypeOrIgnorable(c)>>2)!=0;
1500         /*
1501          * Note: The following Changes_When_Xyz are defined as testing whether
1502          * the NFD form of the input changes when Xyz-case-mapped.
1503          * However, this simpler implementation of these properties,
1504          * ignoring NFD, passes the tests.
1505          * The implementation needs to be changed if the tests start failing.
1506          * When that happens, optimizations should be used to work with the
1507          * per-single-code point ucase_toFullXyz() functions unless
1508          * the NFD form has more than one code point,
1509          * and the property starts set needs to be the union of the
1510          * start sets for normalization and case mappings.
1511          */
1512         case UProperty.CHANGES_WHEN_LOWERCASED:
1513             dummyStringBuilder.setLength(0);
1514             return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1515         case UProperty.CHANGES_WHEN_UPPERCASED:
1516             dummyStringBuilder.setLength(0);
1517             return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1518         case UProperty.CHANGES_WHEN_TITLECASED:
1519             dummyStringBuilder.setLength(0);
1520             return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1521         /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
1522         case UProperty.CHANGES_WHEN_CASEMAPPED:
1523             dummyStringBuilder.setLength(0);
1524             return
1525                 toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
1526                 toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
1527                 toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1528         default:
1529             return false;
1530         }
1531     }
1532 
1533     // data members -------------------------------------------------------- ***
1534     private int indexes[];
1535     private String exceptions;
1536     private char unfold[];
1537 
1538     private Trie2_16 trie;
1539 
1540     // data format constants ----------------------------------------------- ***
1541     private static final String DATA_NAME="ucase";
1542     private static final String DATA_TYPE="icu";
1543     private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE;
1544 
1545     /* format "cAsE" */
1546     private static final int FMT=0x63415345;
1547 
1548     /* indexes into indexes[] */
1549     //private static final int IX_INDEX_TOP=0;
1550     //private static final int IX_LENGTH=1;
1551     private static final int IX_TRIE_SIZE=2;
1552     private static final int IX_EXC_LENGTH=3;
1553     private static final int IX_UNFOLD_LENGTH=4;
1554 
1555     //private static final int IX_MAX_FULL_LENGTH=15;
1556     private static final int IX_TOP=16;
1557 
1558     // definitions for 16-bit case properties word ------------------------- ***
1559 
getTrie()1560     static Trie2_16 getTrie() {
1561         return INSTANCE.trie;
1562     }
1563 
1564     /* 2-bit constants for types of cased characters */
1565     public static final int TYPE_MASK=3;
1566     public static final int NONE=0;
1567     public static final int LOWER=1;
1568     public static final int UPPER=2;
1569     public static final int TITLE=3;
1570 
1571     /** @return NONE, LOWER, UPPER, TITLE */
getTypeFromProps(int props)1572     static final int getTypeFromProps(int props) {
1573         return props&TYPE_MASK;
1574     }
1575 
1576     /** @return like getTypeFromProps() but also sets IGNORABLE if props indicate case-ignorable */
getTypeAndIgnorableFromProps(int props)1577     private static final int getTypeAndIgnorableFromProps(int props) {
1578         return props&7;
1579     }
1580 
isUpperOrTitleFromProps(int props)1581     static final boolean isUpperOrTitleFromProps(int props) {
1582         return (props & 2) != 0;
1583     }
1584 
1585     static final int IGNORABLE=4;
1586     private static final int EXCEPTION=     8;
1587     private static final int SENSITIVE=     0x10;
1588 
1589     private static final int DOT_MASK=      0x60;
1590     //private static final int NO_DOT=        0;      /* normal characters with cc=0 */
1591     private static final int SOFT_DOTTED=   0x20;   /* soft-dotted characters with cc=0 */
1592     private static final int ABOVE=         0x40;   /* "above" accents with cc=230 */
1593     private static final int OTHER_ACCENT=  0x60;   /* other accent character (0<cc!=230) */
1594 
1595     /* no exception: bits 15..7 are a 9-bit signed case mapping delta */
1596     private static final int DELTA_SHIFT=   7;
1597     //private static final int DELTA_MASK=    0xff80;
1598     //private static final int MAX_DELTA=     0xff;
1599     //private static final int MIN_DELTA=     (-MAX_DELTA-1);
1600 
getDelta(int props)1601     static final int getDelta(int props) {
1602         return (short)props>>DELTA_SHIFT;
1603     }
1604 
1605     /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
1606     private static final int EXC_SHIFT=     4;
1607     //private static final int EXC_MASK=      0xfff0;
1608     //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1);
1609 
1610     /* definitions for 16-bit main exceptions word ------------------------------ */
1611 
1612     /* first 8 bits indicate values in optional slots */
1613     private static final int EXC_LOWER=0;
1614     private static final int EXC_FOLD=1;
1615     private static final int EXC_UPPER=2;
1616     private static final int EXC_TITLE=3;
1617     private static final int EXC_DELTA=4;
1618     //private static final int EXC_5=5;           /* reserved */
1619     private static final int EXC_CLOSURE=6;
1620     private static final int EXC_FULL_MAPPINGS=7;
1621     //private static final int EXC_ALL_SLOTS=8;   /* one past the last slot */
1622 
1623     /* each slot is 2 uint16_t instead of 1 */
1624     private static final int EXC_DOUBLE_SLOTS=          0x100;
1625 
1626     private static final int EXC_NO_SIMPLE_CASE_FOLDING=0x200;
1627     private static final int EXC_DELTA_IS_NEGATIVE=0x400;
1628     private static final int EXC_SENSITIVE=0x800;
1629 
1630     /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */
1631     private static final int EXC_DOT_SHIFT=7;
1632 
1633     /* normally stored in the main word, but pushed out for larger exception indexes */
1634     //private static final int EXC_DOT_MASK=              0x3000;
1635     //private static final int EXC_NO_DOT=                0;
1636     //private static final int EXC_SOFT_DOTTED=           0x1000;
1637     //private static final int EXC_ABOVE=                 0x2000; /* "above" accents with cc=230 */
1638     //private static final int EXC_OTHER_ACCENT=          0x3000; /* other character (0<cc!=230) */
1639 
1640     /* complex/conditional mappings */
1641     private static final int EXC_CONDITIONAL_SPECIAL=   0x4000;
1642     private static final int EXC_CONDITIONAL_FOLD=      0x8000;
1643 
1644     /* definitions for lengths word for full case mappings */
1645     private static final int FULL_LOWER=    0xf;
1646     //private static final int FULL_FOLDING=  0xf0;
1647     //private static final int FULL_UPPER=    0xf00;
1648     //private static final int FULL_TITLE=    0xf000;
1649 
1650     /* maximum lengths */
1651     //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf;
1652     private static final int CLOSURE_MAX_LENGTH=0xf;
1653 
1654     /* constants for reverse case folding ("unfold") data */
1655     private static final int UNFOLD_ROWS=0;
1656     private static final int UNFOLD_ROW_WIDTH=1;
1657     private static final int UNFOLD_STRING_WIDTH=2;
1658 
1659     /*
1660      * public singleton instance
1661      */
1662     public static final UCaseProps INSTANCE;
1663 
1664     // This static initializer block must be placed after
1665     // other static member initialization
1666     static {
1667         try {
1668             INSTANCE = new UCaseProps();
1669         } catch (IOException e) {
1670             throw new ICUUncheckedIOException(e);
1671         }
1672     }
1673 }
1674