• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 /*
5  *******************************************************************************
6  *
7  *   Copyright (C) 2004-2015, International Business Machines
8  *   Corporation and others.  All Rights Reserved.
9  *
10  *******************************************************************************
11  *   file name:  UCaseProps.java
12  *   encoding:   US-ASCII
13  *   tab size:   8 (not used)
14  *   indentation:4
15  *
16  *   created on: 2005jan29
17  *   created by: Markus W. Scherer
18  *
19  *   Low-level Unicode character/string case mapping code.
20  *   Java port of ucase.h/.c.
21  */
22 
23 package android.icu.impl;
24 
25 import java.io.IOException;
26 import java.nio.ByteBuffer;
27 import java.util.Iterator;
28 import java.util.Locale;
29 
30 import android.icu.lang.UCharacter;
31 import android.icu.lang.UProperty;
32 import android.icu.text.UTF16;
33 import android.icu.text.UnicodeSet;
34 import android.icu.util.ICUUncheckedIOException;
35 import android.icu.util.ULocale;
36 
37 /**
38  * @hide Only a subset of ICU is exposed in Android
39  */
40 public final class UCaseProps {
41 
42     // constructors etc. --------------------------------------------------- ***
43 
44     // port of ucase_openProps()
UCaseProps()45     private UCaseProps() throws IOException {
46         ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
47         readData(bytes);
48     }
49 
readData(ByteBuffer bytes)50     private final void readData(ByteBuffer bytes) throws IOException {
51         // read the header
52         ICUBinary.readHeader(bytes, FMT, new IsAcceptable());
53 
54         // read indexes[]
55         int count=bytes.getInt();
56         if(count<IX_TOP) {
57             throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
58         }
59         indexes=new int[count];
60 
61         indexes[0]=count;
62         for(int i=1; i<count; ++i) {
63             indexes[i]=bytes.getInt();
64         }
65 
66         // read the trie
67         trie=Trie2_16.createFromSerialized(bytes);
68         int expectedTrieLength=indexes[IX_TRIE_SIZE];
69         int trieLength=trie.getSerializedLength();
70         if(trieLength>expectedTrieLength) {
71             throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
72         }
73         // skip padding after trie bytes
74         ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength);
75 
76         // read exceptions[]
77         count=indexes[IX_EXC_LENGTH];
78         if(count>0) {
79             exceptions=ICUBinary.getString(bytes, count, 0);
80         }
81 
82         // read unfold[]
83         count=indexes[IX_UNFOLD_LENGTH];
84         if(count>0) {
85             unfold=ICUBinary.getChars(bytes, count, 0);
86         }
87     }
88 
89     // implement ICUBinary.Authenticate
90     private final static class IsAcceptable implements ICUBinary.Authenticate {
91         @Override
isDataVersionAcceptable(byte version[])92         public boolean isDataVersionAcceptable(byte version[]) {
93             return version[0]==4;
94         }
95     }
96 
97     // set of property starts for UnicodeSet ------------------------------- ***
98 
addPropertyStarts(UnicodeSet set)99     public final void addPropertyStarts(UnicodeSet set) {
100         /* add the start code point of each same-value range of the trie */
101         Iterator<Trie2.Range> trieIterator=trie.iterator();
102         Trie2.Range range;
103         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
104             set.add(range.startCodePoint);
105         }
106 
107         /* add code points with hardcoded properties, plus the ones following them */
108 
109         /* (none right now, see comment below) */
110 
111         /*
112          * Omit code points with hardcoded specialcasing properties
113          * because we do not build property UnicodeSets for them right now.
114          */
115     }
116 
117     // data access primitives ---------------------------------------------- ***
getExceptionsOffset(int props)118     private static final int getExceptionsOffset(int props) {
119         return props>>EXC_SHIFT;
120     }
121 
propsHasException(int props)122     static final boolean propsHasException(int props) {
123         return (props&EXCEPTION)!=0;
124     }
125 
126     /* number of bits in an 8-bit integer value */
127     private static final byte flagsOffset[/*256*/]={
128         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
129         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
130         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
131         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
132         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
133         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
134         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
135         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
136         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
137         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
138         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
139         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
140         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
141         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
142         3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
143         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
144     };
145 
hasSlot(int flags, int index)146     private static final boolean hasSlot(int flags, int index) {
147         return (flags&(1<<index))!=0;
148     }
slotOffset(int flags, int index)149     private static final byte slotOffset(int flags, int index) {
150         return flagsOffset[flags&((1<<index)-1)];
151     }
152 
153     /*
154      * Get the value of an optional-value slot where hasSlot(excWord, index).
155      *
156      * @param excWord (in) initial exceptions word
157      * @param index (in) desired slot index
158      * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
159      * @return bits 31..0: slot value
160      *             63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
161      */
getSlotValueAndOffset(int excWord, int index, int excOffset)162     private final long getSlotValueAndOffset(int excWord, int index, int excOffset) {
163         long value;
164         if((excWord&EXC_DOUBLE_SLOTS)==0) {
165             excOffset+=slotOffset(excWord, index);
166             value=exceptions.charAt(excOffset);
167         } else {
168             excOffset+=2*slotOffset(excWord, index);
169             value=exceptions.charAt(excOffset++);
170             value=(value<<16)|exceptions.charAt(excOffset);
171         }
172         return value |((long)excOffset<<32);
173     }
174 
175     /* same as getSlotValueAndOffset() but does not return the slot offset */
getSlotValue(int excWord, int index, int excOffset)176     private final int getSlotValue(int excWord, int index, int excOffset) {
177         int value;
178         if((excWord&EXC_DOUBLE_SLOTS)==0) {
179             excOffset+=slotOffset(excWord, index);
180             value=exceptions.charAt(excOffset);
181         } else {
182             excOffset+=2*slotOffset(excWord, index);
183             value=exceptions.charAt(excOffset++);
184             value=(value<<16)|exceptions.charAt(excOffset);
185         }
186         return value;
187     }
188 
189     // simple case mappings ------------------------------------------------ ***
190 
tolower(int c)191     public final int tolower(int c) {
192         int props=trie.get(c);
193         if(!propsHasException(props)) {
194             if(isUpperOrTitleFromProps(props)) {
195                 c+=getDelta(props);
196             }
197         } else {
198             int excOffset=getExceptionsOffset(props);
199             int excWord=exceptions.charAt(excOffset++);
200             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
201                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
202                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
203             }
204             if(hasSlot(excWord, EXC_LOWER)) {
205                 c=getSlotValue(excWord, EXC_LOWER, excOffset);
206             }
207         }
208         return c;
209     }
210 
toupper(int c)211     public final int toupper(int c) {
212         int props=trie.get(c);
213         if(!propsHasException(props)) {
214             if(getTypeFromProps(props)==LOWER) {
215                 c+=getDelta(props);
216             }
217         } else {
218             int excOffset=getExceptionsOffset(props);
219             int excWord=exceptions.charAt(excOffset++);
220             if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) {
221                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
222                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
223             }
224             if(hasSlot(excWord, EXC_UPPER)) {
225                 c=getSlotValue(excWord, EXC_UPPER, excOffset);
226             }
227         }
228         return c;
229     }
230 
totitle(int c)231     public final int totitle(int c) {
232         int props=trie.get(c);
233         if(!propsHasException(props)) {
234             if(getTypeFromProps(props)==LOWER) {
235                 c+=getDelta(props);
236             }
237         } else {
238             int excOffset=getExceptionsOffset(props);
239             int excWord=exceptions.charAt(excOffset++);
240             if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) {
241                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
242                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
243             }
244             int index;
245             if(hasSlot(excWord, EXC_TITLE)) {
246                 index=EXC_TITLE;
247             } else if(hasSlot(excWord, EXC_UPPER)) {
248                 index=EXC_UPPER;
249             } else {
250                 return c;
251             }
252             c=getSlotValue(excWord, index, excOffset);
253         }
254         return c;
255     }
256 
257     /**
258      * Adds all simple case mappings and the full case folding for c to sa,
259      * and also adds special case closure mappings.
260      * c itself is not added.
261      * For example, the mappings
262      * - for s include long s
263      * - for sharp s include ss
264      * - for k include the Kelvin sign
265      */
addCaseClosure(int c, UnicodeSet set)266     public final void addCaseClosure(int c, UnicodeSet set) {
267         /*
268          * Hardcode the case closure of i and its relatives and ignore the
269          * data file data for these characters.
270          * The Turkic dotless i and dotted I with their case mapping conditions
271          * and case folding option make the related characters behave specially.
272          * This code matches their closure behavior to their case folding behavior.
273          */
274 
275         switch(c) {
276         case 0x49:
277             /* regular i and I are in one equivalence class */
278             set.add(0x69);
279             return;
280         case 0x69:
281             set.add(0x49);
282             return;
283         case 0x130:
284             /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
285             set.add(iDot);
286             return;
287         case 0x131:
288             /* dotless i is in a class by itself */
289             return;
290         default:
291             /* otherwise use the data file data */
292             break;
293         }
294 
295         int props=trie.get(c);
296         if(!propsHasException(props)) {
297             if(getTypeFromProps(props)!=NONE) {
298                 /* add the one simple case mapping, no matter what type it is */
299                 int delta=getDelta(props);
300                 if(delta!=0) {
301                     set.add(c+delta);
302                 }
303             }
304         } else {
305             /*
306              * c has exceptions, so there may be multiple simple and/or
307              * full case mappings. Add them all.
308              */
309             int excOffset0, excOffset=getExceptionsOffset(props);
310             int closureOffset;
311             int excWord=exceptions.charAt(excOffset++);
312             int index, closureLength, fullLength, length;
313 
314             excOffset0=excOffset;
315 
316             /* add all simple case mappings */
317             for(index=EXC_LOWER; index<=EXC_TITLE; ++index) {
318                 if(hasSlot(excWord, index)) {
319                     excOffset=excOffset0;
320                     c=getSlotValue(excWord, index, excOffset);
321                     set.add(c);
322                 }
323             }
324             if(hasSlot(excWord, EXC_DELTA)) {
325                 excOffset=excOffset0;
326                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
327                 set.add((excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
328             }
329 
330             /* get the closure string pointer & length */
331             if(hasSlot(excWord, EXC_CLOSURE)) {
332                 excOffset=excOffset0;
333                 long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset);
334                 closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */
335                 closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */
336             } else {
337                 closureLength=0;
338                 closureOffset=0;
339             }
340 
341             /* add the full case folding */
342             if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
343                 excOffset=excOffset0;
344                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
345                 fullLength=(int)value;
346 
347                 /* start of full case mapping strings */
348                 excOffset=(int)(value>>32)+1;
349 
350                 fullLength&=0xffff; /* bits 16 and higher are reserved */
351 
352                 /* skip the lowercase result string */
353                 excOffset+=fullLength&FULL_LOWER;
354                 fullLength>>=4;
355 
356                 /* add the full case folding string */
357                 length=fullLength&0xf;
358                 if(length!=0) {
359                     set.add(exceptions.substring(excOffset, excOffset+length));
360                     excOffset+=length;
361                 }
362 
363                 /* skip the uppercase and titlecase strings */
364                 fullLength>>=4;
365                 excOffset+=fullLength&0xf;
366                 fullLength>>=4;
367                 excOffset+=fullLength;
368 
369                 closureOffset=excOffset; /* behind full case mappings */
370             }
371 
372             /* add each code point in the closure string */
373             int limit=closureOffset+closureLength;
374             for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
375                 c=exceptions.codePointAt(index);
376                 set.add(c);
377             }
378         }
379     }
380 
381     /*
382      * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated
383      * must be s.length()>0 and max>0 and s.length()<=max
384      */
strcmpMax(String s, int unfoldOffset, int max)385     private final int strcmpMax(String s, int unfoldOffset, int max) {
386         int i1, length, c1, c2;
387 
388         length=s.length();
389         max-=length; /* we require length<=max, so no need to decrement max in the loop */
390         i1=0;
391         do {
392             c1=s.charAt(i1++);
393             c2=unfold[unfoldOffset++];
394             if(c2==0) {
395                 return 1; /* reached the end of t but not of s */
396             }
397             c1-=c2;
398             if(c1!=0) {
399                 return c1; /* return difference result */
400             }
401         } while(--length>0);
402         /* ends with length==0 */
403 
404         if(max==0 || unfold[unfoldOffset]==0) {
405             return 0; /* equal to length of both strings */
406         } else {
407             return -max; /* return lengh difference */
408         }
409     }
410 
411     /**
412      * Maps the string to single code points and adds the associated case closure
413      * mappings.
414      * The string is mapped to code points if it is their full case folding string.
415      * In other words, this performs a reverse full case folding and then
416      * adds the case closure items of the resulting code points.
417      * If the string is found and its closure applied, then
418      * the string itself is added as well as part of its code points' closure.
419      *
420      * @return true if the string was found
421      */
addStringCaseClosure(String s, UnicodeSet set)422     public final boolean addStringCaseClosure(String s, UnicodeSet set) {
423         int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;
424 
425         if(unfold==null || s==null) {
426             return false; /* no reverse case folding data, or no string */
427         }
428         length=s.length();
429         if(length<=1) {
430             /* the string is too short to find any match */
431             /*
432              * more precise would be:
433              * if(!u_strHasMoreChar32Than(s, length, 1))
434              * but this does not make much practical difference because
435              * a single supplementary code point would just not be found
436              */
437             return false;
438         }
439 
440         unfoldRows=unfold[UNFOLD_ROWS];
441         unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
442         unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
443         //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
444 
445         if(length>unfoldStringWidth) {
446             /* the string is too long to find any match */
447             return false;
448         }
449 
450         /* do a binary search for the string */
451         start=0;
452         limit=unfoldRows;
453         while(start<limit) {
454             i=(start+limit)/2;
455             unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
456             result=strcmpMax(s, unfoldOffset, unfoldStringWidth);
457 
458             if(result==0) {
459                 /* found the string: add each code point, and its case closure */
460                 int c;
461 
462                 for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
463                     c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
464                     set.add(c);
465                     addCaseClosure(c, set);
466                 }
467                 return true;
468             } else if(result<0) {
469                 limit=i;
470             } else /* result>0 */ {
471                 start=i+1;
472             }
473         }
474 
475         return false; /* string not found */
476     }
477 
478     /** @return NONE, LOWER, UPPER, TITLE */
getType(int c)479     public final int getType(int c) {
480         return getTypeFromProps(trie.get(c));
481     }
482 
483     /** @return like getType() but also sets IGNORABLE if c is case-ignorable */
getTypeOrIgnorable(int c)484     public final int getTypeOrIgnorable(int c) {
485         return getTypeAndIgnorableFromProps(trie.get(c));
486     }
487 
488     /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */
getDotType(int c)489     public final int getDotType(int c) {
490         int props=trie.get(c);
491         if(!propsHasException(props)) {
492             return props&DOT_MASK;
493         } else {
494             return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
495         }
496     }
497 
isSoftDotted(int c)498     public final boolean isSoftDotted(int c) {
499         return getDotType(c)==SOFT_DOTTED;
500     }
501 
isCaseSensitive(int c)502     public final boolean isCaseSensitive(int c) {
503         int props=trie.get(c);
504         if(!propsHasException(props)) {
505             return (props&SENSITIVE)!=0;
506         } else {
507             return (exceptions.charAt(getExceptionsOffset(props))&EXC_SENSITIVE)!=0;
508         }
509     }
510 
511     // string casing ------------------------------------------------------- ***
512 
513     /*
514      * These internal functions form the core of string case mappings.
515      * They map single code points to result code points or strings and take
516      * all necessary conditions (context, locale ID, options) into account.
517      *
518      * They do not iterate over the source or write to the destination
519      * so that the same functions are useful for non-standard string storage,
520      * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
521      * For the same reason, the "surrounding text" context is passed in as a
522      * ContextIterator which does not make any assumptions about
523      * the underlying storage.
524      *
525      * This section contains helper functions that check for conditions
526      * in the input text surrounding the current code point
527      * according to SpecialCasing.txt.
528      *
529      * Each helper function gets the index
530      * - after the current code point if it looks at following text
531      * - before the current code point if it looks at preceding text
532      *
533      * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
534      *
535      * Final_Sigma
536      *   C is preceded by a sequence consisting of
537      *     a cased letter and a case-ignorable sequence,
538      *   and C is not followed by a sequence consisting of
539      *     an ignorable sequence and then a cased letter.
540      *
541      * More_Above
542      *   C is followed by one or more characters of combining class 230 (ABOVE)
543      *   in the combining character sequence.
544      *
545      * After_Soft_Dotted
546      *   The last preceding character with combining class of zero before C
547      *   was Soft_Dotted,
548      *   and there is no intervening combining character class 230 (ABOVE).
549      *
550      * Before_Dot
551      *   C is followed by combining dot above (U+0307).
552      *   Any sequence of characters with a combining class that is neither 0 nor 230
553      *   may intervene between the current character and the combining dot above.
554      *
555      * The erratum from 2002-10-31 adds the condition
556      *
557      * After_I
558      *   The last preceding base character was an uppercase I, and there is no
559      *   intervening combining character class 230 (ABOVE).
560      *
561      *   (See Jitterbug 2344 and the comments on After_I below.)
562      *
563      * Helper definitions in Unicode 3.2 UAX 21:
564      *
565      * D1. A character C is defined to be cased
566      *     if it meets any of the following criteria:
567      *
568      *   - The general category of C is Titlecase Letter (Lt)
569      *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
570      *   - Given D = NFD(C), then it is not the case that:
571      *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
572      *     (This third criterium does not add any characters to the list
573      *      for Unicode 3.2. Ignored.)
574      *
575      * D2. A character C is defined to be case-ignorable
576      *     if it meets either of the following criteria:
577      *
578      *   - The general category of C is
579      *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
580      *     Letter Modifier (Lm), or Symbol Modifier (Sk)
581      *   - C is one of the following characters
582      *     U+0027 APOSTROPHE
583      *     U+00AD SOFT HYPHEN (SHY)
584      *     U+2019 RIGHT SINGLE QUOTATION MARK
585      *            (the preferred character for apostrophe)
586      *
587      * D3. A case-ignorable sequence is a sequence of
588      *     zero or more case-ignorable characters.
589      */
590 
591     /**
592      * Iterator for string case mappings, which need to look at the
593      * context (surrounding text) of a given character for conditional mappings.
594      *
595      * The iterator only needs to go backward or forward away from the
596      * character in question. It does not use any indexes on this interface.
597      * It does not support random access or an arbitrary change of
598      * iteration direction.
599      *
600      * The code point being case-mapped itself is never returned by
601      * this iterator.
602      * @hide Only a subset of ICU is exposed in Android
603      */
604     public interface ContextIterator {
605         /**
606          * Reset the iterator for forward or backward iteration.
607          * @param dir >0: Begin iterating forward from the first code point
608          * after the one that is being case-mapped.
609          *            <0: Begin iterating backward from the first code point
610          * before the one that is being case-mapped.
611          */
reset(int dir)612         public void reset(int dir);
613         /**
614          * Iterate and return the next code point, moving in the direction
615          * determined by the reset() call.
616          * @return Next code point, or <0 when the iteration is done.
617          */
next()618         public int next();
619     }
620 
621     /**
622      * Fast case mapping data for ASCII/Latin.
623      * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
624      * Deltas must not cross the ASCII boundary, or else they cannot be easily used
625      * in simple UTF-8 code.
626      */
627     static final class LatinCase {
628         /** Case mapping/folding data for code points up to U+017F. */
629         static final char LIMIT = 0x180;
630         /** U+017F case-folds and uppercases crossing the ASCII boundary. */
631         static final char LONG_S = 0x17f;
632         /** Exception: Complex mapping, or too-large delta. */
633         static final byte EXC = -0x80;
634 
635         /** Deltas for lowercasing for most locales, and default case folding. */
636         static final byte[] TO_LOWER_NORMAL = {
637             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641 
642             0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
643             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
644             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 
647             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651 
652             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
653             32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
654             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
655             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
656 
657             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
658             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
659             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
660             EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
661 
662             0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
663             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
664             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
665             1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
666         };
667 
668         /** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
669         static final byte[] TO_LOWER_TR_LT = {
670             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
671             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
672             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
673             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674 
675             0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
676             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
678             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
679 
680             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
681             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
682             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
683             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
684 
685             32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
686             32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
687             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
688             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
689 
690             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
691             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
692             1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
693             EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
694 
695             0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
696             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
697             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
698             1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
699         };
700 
701         /** Deltas for uppercasing for most locales. */
702         static final byte[] TO_UPPER_NORMAL = {
703             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
704             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
705             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
706             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
707 
708             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
709             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
710             0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
711             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
712 
713             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
714             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
715             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
716             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
717 
718             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
719             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
720             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
721             -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
722 
723             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
724             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
725             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
726             0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
727 
728             -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
729             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
730             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
731             0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
732         };
733 
734         /** Deltas for uppercasing for tr/az. */
735         static final byte[] TO_UPPER_TR = {
736             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
737             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
738             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
739             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
740 
741             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
742             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743             0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
744             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
745 
746             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
747             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
748             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
749             0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
750 
751             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
752             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
753             -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
754             -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
755 
756             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
757             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
758             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
759             0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
760 
761             -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
762             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
763             0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
764             0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
765         };
766     }
767 
768     /**
769      * For string case mappings, a single character (a code point) is mapped
770      * either to itself (in which case in-place mapping functions do nothing),
771      * or to another single code point, or to a string.
772      * Aside from the string contents, these are indicated with a single int
773      * value as follows:
774      *
775      * Mapping to self: Negative values (~self instead of -self to support U+0000)
776      *
777      * Mapping to another code point: Positive values >MAX_STRING_LENGTH
778      *
779      * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is
780      * returned. Note that the string result may indeed have zero length.
781      */
782     public static final int MAX_STRING_LENGTH=0x1f;
783 
784     //ivate static final int LOC_UNKNOWN=0;
785     public static final int LOC_ROOT=1;
786     static final int LOC_TURKISH=2;
787     static final int LOC_LITHUANIAN=3;
788     static final int LOC_GREEK=4;
789     public static final int LOC_DUTCH=5;
790     static final int LOC_ARMENIAN=6;
791 
getCaseLocale(Locale locale)792     public static final int getCaseLocale(Locale locale) {
793         return getCaseLocale(locale.getLanguage());
794     }
getCaseLocale(ULocale locale)795     public static final int getCaseLocale(ULocale locale) {
796         return getCaseLocale(locale.getLanguage());
797     }
798     /** Accepts both 2- and 3-letter language subtags. */
getCaseLocale(String language)799     private static final int getCaseLocale(String language) {
800         // Check the subtag length to reduce the number of comparisons
801         // for locales without special behavior.
802         // Fastpath for English "en" which is often used for default (=root locale) case mappings,
803         // and for Chinese "zh": Very common but no special case mapping behavior.
804         if(language.length()==2) {
805             if(language.equals("en") || language.charAt(0)>'t') {
806                 return LOC_ROOT;
807             } else if(language.equals("tr") || language.equals("az")) {
808                 return LOC_TURKISH;
809             } else if(language.equals("el")) {
810                 return LOC_GREEK;
811             } else if(language.equals("lt")) {
812                 return LOC_LITHUANIAN;
813             } else if(language.equals("nl")) {
814                 return LOC_DUTCH;
815             } else if(language.equals("hy")) {
816                 return LOC_ARMENIAN;
817             }
818         } else if(language.length()==3) {
819             if(language.equals("tur") || language.equals("aze")) {
820                 return LOC_TURKISH;
821             } else if(language.equals("ell")) {
822                 return LOC_GREEK;
823             } else if(language.equals("lit")) {
824                 return LOC_LITHUANIAN;
825             } else if(language.equals("nld")) {
826                 return LOC_DUTCH;
827             } else if(language.equals("hye")) {  // *not* hyw
828                 return LOC_ARMENIAN;
829             }
830         }
831         return LOC_ROOT;
832     }
833 
834     /* Is followed by {case-ignorable}* cased  ? (dir determines looking forward/backward) */
isFollowedByCasedLetter(ContextIterator iter, int dir)835     private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) {
836         int c;
837 
838         if(iter==null) {
839             return false;
840         }
841 
842         for(iter.reset(dir); (c=iter.next())>=0;) {
843             int type=getTypeOrIgnorable(c);
844             if((type&4)!=0) {
845                 /* case-ignorable, continue with the loop */
846             } else if(type!=NONE) {
847                 return true; /* followed by cased letter */
848             } else {
849                 return false; /* uncased and not case-ignorable */
850             }
851         }
852 
853         return false; /* not followed by cased letter */
854     }
855 
856     /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
isPrecededBySoftDotted(ContextIterator iter)857     private final boolean isPrecededBySoftDotted(ContextIterator iter) {
858         int c;
859         int dotType;
860 
861         if(iter==null) {
862             return false;
863         }
864 
865         for(iter.reset(-1); (c=iter.next())>=0;) {
866             dotType=getDotType(c);
867             if(dotType==SOFT_DOTTED) {
868                 return true; /* preceded by TYPE_i */
869             } else if(dotType!=OTHER_ACCENT) {
870                 return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
871             }
872         }
873 
874         return false; /* not preceded by TYPE_i */
875     }
876 
877     /*
878      * See Jitterbug 2344:
879      * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
880      * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
881      * we made those releases compatible with Unicode 3.2 which had not fixed
882      * a related bug in SpecialCasing.txt.
883      *
884      * From the Jitterbug 2344 text:
885      * ... this bug is listed as a Unicode erratum
886      * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
887      * <quote>
888      * There are two errors in SpecialCasing.txt.
889      * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
890      * 2. An incorrect context definition. Correct as follows:
891      * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
892      * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
893      * ---
894      * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
895      * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
896      * where the context After_I is defined as:
897      * The last preceding base character was an uppercase I, and there is no
898      * intervening combining character class 230 (ABOVE).
899      * </quote>
900      *
901      * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
902      *
903      * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
904      * # This matches the behavior of the canonically equivalent I-dot_above
905      *
906      * See also the description in this place in older versions of uchar.c (revision 1.100).
907      *
908      * Markus W. Scherer 2003-feb-15
909      */
910 
911     /* Is preceded by base character 'I' with no intervening cc=230 ? */
isPrecededBy_I(ContextIterator iter)912     private final boolean isPrecededBy_I(ContextIterator iter) {
913         int c;
914         int dotType;
915 
916         if(iter==null) {
917             return false;
918         }
919 
920         for(iter.reset(-1); (c=iter.next())>=0;) {
921             if(c==0x49) {
922                 return true; /* preceded by I */
923             }
924             dotType=getDotType(c);
925             if(dotType!=OTHER_ACCENT) {
926                 return false; /* preceded by different base character (not I), or intervening cc==230 */
927             }
928         }
929 
930         return false; /* not preceded by I */
931     }
932 
933     /* Is followed by one or more cc==230 ? */
isFollowedByMoreAbove(ContextIterator iter)934     private final boolean isFollowedByMoreAbove(ContextIterator iter) {
935         int c;
936         int dotType;
937 
938         if(iter==null) {
939             return false;
940         }
941 
942         for(iter.reset(1); (c=iter.next())>=0;) {
943             dotType=getDotType(c);
944             if(dotType==ABOVE) {
945                 return true; /* at least one cc==230 following */
946             } else if(dotType!=OTHER_ACCENT) {
947                 return false; /* next base character, no more cc==230 following */
948             }
949         }
950 
951         return false; /* no more cc==230 following */
952     }
953 
954     /* Is followed by a dot above (without cc==230 in between) ? */
isFollowedByDotAbove(ContextIterator iter)955     private final boolean isFollowedByDotAbove(ContextIterator iter) {
956         int c;
957         int dotType;
958 
959         if(iter==null) {
960             return false;
961         }
962 
963         for(iter.reset(1); (c=iter.next())>=0; ) {
964             if(c==0x307) {
965                 return true;
966             }
967             dotType=getDotType(c);
968             if(dotType!=OTHER_ACCENT) {
969                 return false; /* next base character or cc==230 in between */
970             }
971         }
972 
973         return false; /* no dot above following */
974     }
975 
976     private static final String
977         iDot=       "i\u0307",
978         jDot=       "j\u0307",
979         iOgonekDot= "\u012f\u0307",
980         iDotGrave=  "i\u0307\u0300",
981         iDotAcute=  "i\u0307\u0301",
982         iDotTilde=  "i\u0307\u0303";
983 
984     /**
985      * Get the full lowercase mapping for c.
986      *
987      * @param c Character to be mapped.
988      * @param iter Character iterator, used for context-sensitive mappings.
989      *             See ContextIterator for details.
990      *             If iter==null then a context-independent result is returned.
991      * @param out If the mapping result is a string, then it is appended to out.
992      * @param caseLocale Case locale value from ucase_getCaseLocale().
993      * @return Output code point or string length, see MAX_STRING_LENGTH.
994      *
995      * @see ContextIterator
996      * @see #MAX_STRING_LENGTH
997      * @hide draft / provisional / internal are hidden on Android
998      */
toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale)999     public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
1000         int result, props;
1001 
1002         result=c;
1003         props=trie.get(c);
1004         if(!propsHasException(props)) {
1005             if(isUpperOrTitleFromProps(props)) {
1006                 result=c+getDelta(props);
1007             }
1008         } else {
1009             int excOffset=getExceptionsOffset(props), excOffset2;
1010             int excWord=exceptions.charAt(excOffset++);
1011             int full;
1012 
1013             excOffset2=excOffset;
1014 
1015             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
1016                 /* use hardcoded conditions and mappings */
1017                 /*
1018                  * Test for conditional mappings first
1019                  *   (otherwise the unconditional default mappings are always taken),
1020                  * then test for characters that have unconditional mappings in SpecialCasing.txt,
1021                  * then get the UnicodeData.txt mappings.
1022                  */
1023                 if( caseLocale==LOC_LITHUANIAN &&
1024                         /* base characters, find accents above */
1025                         (((c==0x49 || c==0x4a || c==0x12e) &&
1026                             isFollowedByMoreAbove(iter)) ||
1027                         /* precomposed with accent above, no need to find one */
1028                         (c==0xcc || c==0xcd || c==0x128))
1029                 ) {
1030                     /*
1031                         # Lithuanian
1032 
1033                         # Lithuanian retains the dot in a lowercase i when followed by accents.
1034 
1035                         # Introduce an explicit dot above when lowercasing capital I's and J's
1036                         # whenever there are more accents above.
1037                         # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
1038 
1039                         0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
1040                         004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
1041                         012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
1042                         00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
1043                         00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
1044                         0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
1045                      */
1046                     try {
1047                         switch(c) {
1048                         case 0x49:  /* LATIN CAPITAL LETTER I */
1049                             out.append(iDot);
1050                             return 2;
1051                         case 0x4a:  /* LATIN CAPITAL LETTER J */
1052                             out.append(jDot);
1053                             return 2;
1054                         case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
1055                             out.append(iOgonekDot);
1056                             return 2;
1057                         case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
1058                             out.append(iDotGrave);
1059                             return 3;
1060                         case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
1061                             out.append(iDotAcute);
1062                             return 3;
1063                         case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
1064                             out.append(iDotTilde);
1065                             return 3;
1066                         default:
1067                             return 0; /* will not occur */
1068                         }
1069                     } catch (IOException e) {
1070                         throw new ICUUncheckedIOException(e);
1071                     }
1072                 /* # Turkish and Azeri */
1073                 } else if(caseLocale==LOC_TURKISH && c==0x130) {
1074                     /*
1075                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1076                         # The following rules handle those cases.
1077 
1078                         0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
1079                         0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
1080                      */
1081                     return 0x69;
1082                 } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
1083                     /*
1084                         # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
1085                         # This matches the behavior of the canonically equivalent I-dot_above
1086 
1087                         0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
1088                         0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
1089                      */
1090                     return 0; /* remove the dot (continue without output) */
1091                 } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
1092                     /*
1093                         # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
1094 
1095                         0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
1096                         0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
1097                      */
1098                     return 0x131;
1099                 } else if(c==0x130) {
1100                     /*
1101                         # Preserve canonical equivalence for I with dot. Turkic is handled below.
1102 
1103                         0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1104                      */
1105                     try {
1106                         out.append(iDot);
1107                         return 2;
1108                     } catch (IOException e) {
1109                         throw new ICUUncheckedIOException(e);
1110                     }
1111                 } else if(  c==0x3a3 &&
1112                             !isFollowedByCasedLetter(iter, 1) &&
1113                             isFollowedByCasedLetter(iter, -1) /* -1=preceded */
1114                 ) {
1115                     /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
1116                     /*
1117                         # Special case for final form of sigma
1118 
1119                         03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
1120                      */
1121                     return 0x3c2; /* greek small final sigma */
1122                 } else {
1123                     /* no known conditional special case mapping, use a normal mapping */
1124                 }
1125             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1126                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1127                 full=(int)value&FULL_LOWER;
1128                 if(full!=0) {
1129                     /* start of full case mapping strings */
1130                     excOffset=(int)(value>>32)+1;
1131 
1132                     try {
1133                         // append the lowercase mapping
1134                         out.append(exceptions, excOffset, excOffset+full);
1135 
1136                         /* return the string length */
1137                         return full;
1138                     } catch (IOException e) {
1139                         throw new ICUUncheckedIOException(e);
1140                     }
1141                 }
1142             }
1143 
1144             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
1145                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2);
1146                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1147             }
1148             if(hasSlot(excWord, EXC_LOWER)) {
1149                 result=getSlotValue(excWord, EXC_LOWER, excOffset2);
1150             }
1151         }
1152 
1153         return (result==c) ? ~result : result;
1154     }
1155 
1156     /* internal */
toUpperOrTitle(int c, ContextIterator iter, Appendable out, int loc, boolean upperNotTitle)1157     private final int toUpperOrTitle(int c, ContextIterator iter,
1158                                      Appendable out,
1159                                      int loc,
1160                                      boolean upperNotTitle) {
1161         int result;
1162         int props;
1163 
1164         result=c;
1165         props=trie.get(c);
1166         if(!propsHasException(props)) {
1167             if(getTypeFromProps(props)==LOWER) {
1168                 result=c+getDelta(props);
1169             }
1170         } else {
1171             int excOffset=getExceptionsOffset(props), excOffset2;
1172             int excWord=exceptions.charAt(excOffset++);
1173             int full, index;
1174 
1175             excOffset2=excOffset;
1176 
1177             if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
1178                 /* use hardcoded conditions and mappings */
1179                 if(loc==LOC_TURKISH && c==0x69) {
1180                     /*
1181                         # Turkish and Azeri
1182 
1183                         # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1184                         # The following rules handle those cases.
1185 
1186                         # When uppercasing, i turns into a dotted capital I
1187 
1188                         0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
1189                         0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
1190                     */
1191                     return 0x130;
1192                 } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) {
1193                     /*
1194                         # Lithuanian
1195 
1196                         # Lithuanian retains the dot in a lowercase i when followed by accents.
1197 
1198                         # Remove DOT ABOVE after "i" with upper or titlecase
1199 
1200                         0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
1201                      */
1202                     return 0; /* remove the dot (continue without output) */
1203                 } else if(c==0x0587) {
1204                     // See ICU-13416:
1205                     // և ligature ech-yiwn
1206                     // uppercases to ԵՒ=ech+yiwn by default and in Western Armenian,
1207                     // but to ԵՎ=ech+vew in Eastern Armenian.
1208                     try {
1209                         if(loc==LOC_ARMENIAN) {
1210                             out.append(upperNotTitle ? "ԵՎ" : "Եվ");
1211                         } else {
1212                             out.append(upperNotTitle ? "ԵՒ" : "Եւ");
1213                         }
1214                         return 2;
1215                     } catch (IOException e) {
1216                         throw new ICUUncheckedIOException(e);
1217                     }
1218                 } else {
1219                     /* no known conditional special case mapping, use a normal mapping */
1220                 }
1221             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1222                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1223                 full=(int)value&0xffff;
1224 
1225                 /* start of full case mapping strings */
1226                 excOffset=(int)(value>>32)+1;
1227 
1228                 /* skip the lowercase and case-folding result strings */
1229                 excOffset+=full&FULL_LOWER;
1230                 full>>=4;
1231                 excOffset+=full&0xf;
1232                 full>>=4;
1233 
1234                 if(upperNotTitle) {
1235                     full&=0xf;
1236                 } else {
1237                     /* skip the uppercase result string */
1238                     excOffset+=full&0xf;
1239                     full=(full>>4)&0xf;
1240                 }
1241 
1242                 if(full!=0) {
1243                     try {
1244                         // append the result string
1245                         out.append(exceptions, excOffset, excOffset+full);
1246 
1247                         /* return the string length */
1248                         return full;
1249                     } catch (IOException e) {
1250                         throw new ICUUncheckedIOException(e);
1251                     }
1252                 }
1253             }
1254 
1255             if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) {
1256                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2);
1257                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1258             }
1259             if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) {
1260                 index=EXC_TITLE;
1261             } else if(hasSlot(excWord, EXC_UPPER)) {
1262                 /* here, titlecase is same as uppercase */
1263                 index=EXC_UPPER;
1264             } else {
1265                 return ~c;
1266             }
1267             result=getSlotValue(excWord, index, excOffset2);
1268         }
1269 
1270         return (result==c) ? ~result : result;
1271     }
1272 
toFullUpper(int c, ContextIterator iter, Appendable out, int caseLocale)1273     public final int toFullUpper(int c, ContextIterator iter,
1274                                  Appendable out,
1275                                  int caseLocale) {
1276         return toUpperOrTitle(c, iter, out, caseLocale, true);
1277     }
1278 
toFullTitle(int c, ContextIterator iter, Appendable out, int caseLocale)1279     public final int toFullTitle(int c, ContextIterator iter,
1280                                  Appendable out,
1281                                  int caseLocale) {
1282         return toUpperOrTitle(c, iter, out, caseLocale, false);
1283     }
1284 
1285     /* case folding ------------------------------------------------------------- */
1286 
1287     /*
1288      * Case folding is similar to lowercasing.
1289      * The result may be a simple mapping, i.e., a single code point, or
1290      * a full mapping, i.e., a string.
1291      * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
1292      * then only the lowercase mapping is stored.
1293      *
1294      * Some special cases are hardcoded because their conditions cannot be
1295      * parsed and processed from CaseFolding.txt.
1296      *
1297      * Unicode 3.2 CaseFolding.txt specifies for its status field:
1298 
1299     # C: common case folding, common mappings shared by both simple and full mappings.
1300     # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
1301     # S: simple case folding, mappings to single characters where different from F.
1302     # T: special case for uppercase I and dotted uppercase I
1303     #    - For non-Turkic languages, this mapping is normally not used.
1304     #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
1305     #
1306     # Usage:
1307     #  A. To do a simple case folding, use the mappings with status C + S.
1308     #  B. To do a full case folding, use the mappings with status C + F.
1309     #
1310     #    The mappings with status T can be used or omitted depending on the desired case-folding
1311     #    behavior. (The default option is to exclude them.)
1312 
1313      * Unicode 3.2 has 'T' mappings as follows:
1314 
1315     0049; T; 0131; # LATIN CAPITAL LETTER I
1316     0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1317 
1318      * while the default mappings for these code points are:
1319 
1320     0049; C; 0069; # LATIN CAPITAL LETTER I
1321     0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1322 
1323      * U+0130 has no simple case folding (simple-case-folds to itself).
1324      */
1325 
1326     /**
1327      * Bit mask for getting just the options from a string compare options word
1328      * that are relevant for case folding (of a single string or code point).
1329      *
1330      * Currently only bit 0 for FOLD_CASE_EXCLUDE_SPECIAL_I.
1331      * It is conceivable that at some point we might use one more bit for using uppercase sharp s.
1332      * It is conceivable that at some point we might want the option to use only simple case foldings
1333      * when operating on strings.
1334      *
1335      * @hide draft / provisional / internal are hidden on Android
1336      */
1337     static final int FOLD_CASE_OPTIONS_MASK = 7;
1338 
1339     /* return the simple case folding mapping for c */
fold(int c, int options)1340     public final int fold(int c, int options) {
1341         int props=trie.get(c);
1342         if(!propsHasException(props)) {
1343             if(isUpperOrTitleFromProps(props)) {
1344                 c+=getDelta(props);
1345             }
1346         } else {
1347             int excOffset=getExceptionsOffset(props);
1348             int excWord=exceptions.charAt(excOffset++);
1349             int index;
1350             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
1351                 /* special case folding mappings, hardcoded */
1352                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
1353                     /* default mappings */
1354                     if(c==0x49) {
1355                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1356                         return 0x69;
1357                     } else if(c==0x130) {
1358                         /* no simple case folding for U+0130 */
1359                         return c;
1360                     }
1361                 } else {
1362                     /* Turkic mappings */
1363                     if(c==0x49) {
1364                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1365                         return 0x131;
1366                     } else if(c==0x130) {
1367                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1368                         return 0x69;
1369                     }
1370                 }
1371             }
1372             if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1373                 return c;
1374             }
1375             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
1376                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset);
1377                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1378             }
1379             if(hasSlot(excWord, EXC_FOLD)) {
1380                 index=EXC_FOLD;
1381             } else if(hasSlot(excWord, EXC_LOWER)) {
1382                 index=EXC_LOWER;
1383             } else {
1384                 return c;
1385             }
1386             c=getSlotValue(excWord, index, excOffset);
1387         }
1388         return c;
1389     }
1390 
1391     /*
1392      * Issue for canonical caseless match (UAX #21):
1393      * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
1394      * canonical equivalence, unlike default-option casefolding.
1395      * For example, I-grave and I + grave fold to strings that are not canonically
1396      * equivalent.
1397      * For more details, see the comment in unorm_compare() in unorm.cpp
1398      * and the intermediate prototype changes for Jitterbug 2021.
1399      * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
1400      *
1401      * This did not get fixed because it appears that it is not possible to fix
1402      * it for uppercase and lowercase characters (I-grave vs. i-grave)
1403      * together in a way that they still fold to common result strings.
1404      */
1405 
toFullFolding(int c, Appendable out, int options)1406     public final int toFullFolding(int c, Appendable out, int options) {
1407         int result;
1408         int props;
1409 
1410         result=c;
1411         props=trie.get(c);
1412         if(!propsHasException(props)) {
1413             if(isUpperOrTitleFromProps(props)) {
1414                 result=c+getDelta(props);
1415             }
1416         } else {
1417             int excOffset=getExceptionsOffset(props), excOffset2;
1418             int excWord=exceptions.charAt(excOffset++);
1419             int full, index;
1420 
1421             excOffset2=excOffset;
1422 
1423             if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
1424                 /* use hardcoded conditions and mappings */
1425                 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) {
1426                     /* default mappings */
1427                     if(c==0x49) {
1428                         /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1429                         return 0x69;
1430                     } else if(c==0x130) {
1431                         /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1432                         try {
1433                             out.append(iDot);
1434                             return 2;
1435                         } catch (IOException e) {
1436                             throw new ICUUncheckedIOException(e);
1437                         }
1438                     }
1439                 } else {
1440                     /* Turkic mappings */
1441                     if(c==0x49) {
1442                         /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1443                         return 0x131;
1444                     } else if(c==0x130) {
1445                         /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1446                         return 0x69;
1447                     }
1448                 }
1449             } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) {
1450                 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset);
1451                 full=(int)value&0xffff;
1452 
1453                 /* start of full case mapping strings */
1454                 excOffset=(int)(value>>32)+1;
1455 
1456                 /* skip the lowercase result string */
1457                 excOffset+=full&FULL_LOWER;
1458                 full=(full>>4)&0xf;
1459 
1460                 if(full!=0) {
1461                     try {
1462                         // append the result string
1463                         out.append(exceptions, excOffset, excOffset+full);
1464 
1465                         /* return the string length */
1466                         return full;
1467                     } catch (IOException e) {
1468                         throw new ICUUncheckedIOException(e);
1469                     }
1470                 }
1471             }
1472 
1473             if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1474                 return ~c;
1475             }
1476             if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) {
1477                 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2);
1478                 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1479             }
1480             if(hasSlot(excWord, EXC_FOLD)) {
1481                 index=EXC_FOLD;
1482             } else if(hasSlot(excWord, EXC_LOWER)) {
1483                 index=EXC_LOWER;
1484             } else {
1485                 return ~c;
1486             }
1487             result=getSlotValue(excWord, index, excOffset2);
1488         }
1489 
1490         return (result==c) ? ~result : result;
1491     }
1492 
1493     /* case mapping properties API ---------------------------------------------- */
1494 
1495     /*
1496      * We need a StringBuilder for multi-code point output from the
1497      * full case mapping functions. However, we do not actually use that output,
1498      * we just check whether the input character was mapped to anything else.
1499      * We use a shared StringBuilder to avoid allocating a new one in each call.
1500      * We remove its contents each time so that it does not grow large over time.
1501      *
1502      * @internal
1503      */
1504     public static final StringBuilder dummyStringBuilder = new StringBuilder();
1505 
hasBinaryProperty(int c, int which)1506     public final boolean hasBinaryProperty(int c, int which) {
1507         switch(which) {
1508         case UProperty.LOWERCASE:
1509             return LOWER==getType(c);
1510         case UProperty.UPPERCASE:
1511             return UPPER==getType(c);
1512         case UProperty.SOFT_DOTTED:
1513             return isSoftDotted(c);
1514         case UProperty.CASE_SENSITIVE:
1515             return isCaseSensitive(c);
1516         case UProperty.CASED:
1517             return NONE!=getType(c);
1518         case UProperty.CASE_IGNORABLE:
1519             return (getTypeOrIgnorable(c)>>2)!=0;
1520         /*
1521          * Note: The following Changes_When_Xyz are defined as testing whether
1522          * the NFD form of the input changes when Xyz-case-mapped.
1523          * However, this simpler implementation of these properties,
1524          * ignoring NFD, passes the tests.
1525          * The implementation needs to be changed if the tests start failing.
1526          * When that happens, optimizations should be used to work with the
1527          * per-single-code point ucase_toFullXyz() functions unless
1528          * the NFD form has more than one code point,
1529          * and the property starts set needs to be the union of the
1530          * start sets for normalization and case mappings.
1531          */
1532         case UProperty.CHANGES_WHEN_LOWERCASED:
1533             dummyStringBuilder.setLength(0);
1534             return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1535         case UProperty.CHANGES_WHEN_UPPERCASED:
1536             dummyStringBuilder.setLength(0);
1537             return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1538         case UProperty.CHANGES_WHEN_TITLECASED:
1539             dummyStringBuilder.setLength(0);
1540             return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1541         /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
1542         case UProperty.CHANGES_WHEN_CASEMAPPED:
1543             dummyStringBuilder.setLength(0);
1544             return
1545                 toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
1546                 toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
1547                 toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
1548         default:
1549             return false;
1550         }
1551     }
1552 
1553     // data members -------------------------------------------------------- ***
1554     private int indexes[];
1555     private String exceptions;
1556     private char unfold[];
1557 
1558     private Trie2_16 trie;
1559 
1560     // data format constants ----------------------------------------------- ***
1561     private static final String DATA_NAME="ucase";
1562     private static final String DATA_TYPE="icu";
1563     private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE;
1564 
1565     /* format "cAsE" */
1566     private static final int FMT=0x63415345;
1567 
1568     /* indexes into indexes[] */
1569     //private static final int IX_INDEX_TOP=0;
1570     //private static final int IX_LENGTH=1;
1571     private static final int IX_TRIE_SIZE=2;
1572     private static final int IX_EXC_LENGTH=3;
1573     private static final int IX_UNFOLD_LENGTH=4;
1574 
1575     //private static final int IX_MAX_FULL_LENGTH=15;
1576     private static final int IX_TOP=16;
1577 
1578     // definitions for 16-bit case properties word ------------------------- ***
1579 
getTrie()1580     static Trie2_16 getTrie() {
1581         return INSTANCE.trie;
1582     }
1583 
1584     /* 2-bit constants for types of cased characters */
1585     public static final int TYPE_MASK=3;
1586     public static final int NONE=0;
1587     public static final int LOWER=1;
1588     public static final int UPPER=2;
1589     public static final int TITLE=3;
1590 
1591     /** @return NONE, LOWER, UPPER, TITLE */
getTypeFromProps(int props)1592     static final int getTypeFromProps(int props) {
1593         return props&TYPE_MASK;
1594     }
1595 
1596     /** @return like getTypeFromProps() but also sets IGNORABLE if props indicate case-ignorable */
getTypeAndIgnorableFromProps(int props)1597     private static final int getTypeAndIgnorableFromProps(int props) {
1598         return props&7;
1599     }
1600 
isUpperOrTitleFromProps(int props)1601     static final boolean isUpperOrTitleFromProps(int props) {
1602         return (props & 2) != 0;
1603     }
1604 
1605     static final int IGNORABLE=4;
1606     private static final int EXCEPTION=     8;
1607     private static final int SENSITIVE=     0x10;
1608 
1609     private static final int DOT_MASK=      0x60;
1610     //private static final int NO_DOT=        0;      /* normal characters with cc=0 */
1611     private static final int SOFT_DOTTED=   0x20;   /* soft-dotted characters with cc=0 */
1612     private static final int ABOVE=         0x40;   /* "above" accents with cc=230 */
1613     private static final int OTHER_ACCENT=  0x60;   /* other accent character (0<cc!=230) */
1614 
1615     /* no exception: bits 15..7 are a 9-bit signed case mapping delta */
1616     private static final int DELTA_SHIFT=   7;
1617     //private static final int DELTA_MASK=    0xff80;
1618     //private static final int MAX_DELTA=     0xff;
1619     //private static final int MIN_DELTA=     (-MAX_DELTA-1);
1620 
getDelta(int props)1621     static final int getDelta(int props) {
1622         return (short)props>>DELTA_SHIFT;
1623     }
1624 
1625     /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
1626     private static final int EXC_SHIFT=     4;
1627     //private static final int EXC_MASK=      0xfff0;
1628     //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1);
1629 
1630     /* definitions for 16-bit main exceptions word ------------------------------ */
1631 
1632     /* first 8 bits indicate values in optional slots */
1633     private static final int EXC_LOWER=0;
1634     private static final int EXC_FOLD=1;
1635     private static final int EXC_UPPER=2;
1636     private static final int EXC_TITLE=3;
1637     private static final int EXC_DELTA=4;
1638     //private static final int EXC_5=5;           /* reserved */
1639     private static final int EXC_CLOSURE=6;
1640     private static final int EXC_FULL_MAPPINGS=7;
1641     //private static final int EXC_ALL_SLOTS=8;   /* one past the last slot */
1642 
1643     /* each slot is 2 uint16_t instead of 1 */
1644     private static final int EXC_DOUBLE_SLOTS=          0x100;
1645 
1646     private static final int EXC_NO_SIMPLE_CASE_FOLDING=0x200;
1647     private static final int EXC_DELTA_IS_NEGATIVE=0x400;
1648     private static final int EXC_SENSITIVE=0x800;
1649 
1650     /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */
1651     private static final int EXC_DOT_SHIFT=7;
1652 
1653     /* normally stored in the main word, but pushed out for larger exception indexes */
1654     //private static final int EXC_DOT_MASK=              0x3000;
1655     //private static final int EXC_NO_DOT=                0;
1656     //private static final int EXC_SOFT_DOTTED=           0x1000;
1657     //private static final int EXC_ABOVE=                 0x2000; /* "above" accents with cc=230 */
1658     //private static final int EXC_OTHER_ACCENT=          0x3000; /* other character (0<cc!=230) */
1659 
1660     /* complex/conditional mappings */
1661     private static final int EXC_CONDITIONAL_SPECIAL=   0x4000;
1662     private static final int EXC_CONDITIONAL_FOLD=      0x8000;
1663 
1664     /* definitions for lengths word for full case mappings */
1665     private static final int FULL_LOWER=    0xf;
1666     //private static final int FULL_FOLDING=  0xf0;
1667     //private static final int FULL_UPPER=    0xf00;
1668     //private static final int FULL_TITLE=    0xf000;
1669 
1670     /* maximum lengths */
1671     //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf;
1672     private static final int CLOSURE_MAX_LENGTH=0xf;
1673 
1674     /* constants for reverse case folding ("unfold") data */
1675     private static final int UNFOLD_ROWS=0;
1676     private static final int UNFOLD_ROW_WIDTH=1;
1677     private static final int UNFOLD_STRING_WIDTH=2;
1678 
1679     /*
1680      * public singleton instance
1681      */
1682     public static final UCaseProps INSTANCE;
1683 
1684     // This static initializer block must be placed after
1685     // other static member initialization
1686     static {
1687         try {
1688             INSTANCE = new UCaseProps();
1689         } catch (IOException e) {
1690             throw new ICUUncheckedIOException(e);
1691         }
1692     }
1693 }
1694