• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 package ohos.global.icu.impl;
5 
6 import java.io.IOException;
7 import java.text.CharacterIterator;
8 import java.util.Locale;
9 
10 import ohos.global.icu.lang.UCharacter;
11 import ohos.global.icu.lang.UCharacterCategory;
12 import ohos.global.icu.text.BreakIterator;
13 import ohos.global.icu.text.Edits;
14 import ohos.global.icu.util.ICUUncheckedIOException;
15 import ohos.global.icu.util.ULocale;
16 
17 /**
18  * @hide exposed on OHOS
19  */
20 public final class CaseMapImpl {
21     /**
22      * Implementation of UCaseProps.ContextIterator, iterates over a String.
23      * See ustrcase.c/utf16_caseContextIterator().
24      * @hide exposed on OHOS
25      */
26     public static final class StringContextIterator implements UCaseProps.ContextIterator {
27         /**
28          * Constructor.
29          * @param src String to iterate over.
30          */
StringContextIterator(CharSequence src)31         public StringContextIterator(CharSequence src) {
32             this.s=src;
33             limit=src.length();
34             cpStart=cpLimit=index=0;
35             dir=0;
36         }
37 
38         /**
39          * Constructor.
40          * @param src String to iterate over.
41          * @param cpStart Start index of the current code point.
42          * @param cpLimit Limit index of the current code point.
43          */
StringContextIterator(CharSequence src, int cpStart, int cpLimit)44         public StringContextIterator(CharSequence src, int cpStart, int cpLimit) {
45             s = src;
46             index = 0;
47             limit = src.length();
48             this.cpStart = cpStart;
49             this.cpLimit = cpLimit;
50             dir = 0;
51         }
52 
53         /**
54          * Set the iteration limit for nextCaseMapCP() to an index within the string.
55          * If the limit parameter is negative or past the string, then the
56          * string length is restored as the iteration limit.
57          *
58          * <p>This limit does not affect the next() function which always
59          * iterates to the very end of the string.
60          *
61          * @param lim The iteration limit.
62          */
setLimit(int lim)63         public void setLimit(int lim) {
64             if(0<=lim && lim<=s.length()) {
65                 limit=lim;
66             } else {
67                 limit=s.length();
68             }
69         }
70 
71         /**
72          * Move to the iteration limit without fetching code points up to there.
73          */
moveToLimit()74         public void moveToLimit() {
75             cpStart=cpLimit=limit;
76         }
77 
78         /**
79          * Iterate forward through the string to fetch the next code point
80          * to be case-mapped, and set the context indexes for it.
81          *
82          * <p>When the iteration limit is reached (and -1 is returned),
83          * getCPStart() will be at the iteration limit.
84          *
85          * <p>Iteration with next() does not affect the position for nextCaseMapCP().
86          *
87          * @return The next code point to be case-mapped, or <0 when the iteration is done.
88          */
nextCaseMapCP()89         public int nextCaseMapCP() {
90             cpStart=cpLimit;
91             if(cpLimit<limit) {
92                 int c=Character.codePointAt(s, cpLimit);
93                 cpLimit+=Character.charCount(c);
94                 return c;
95             } else {
96                 return -1;
97             }
98         }
99 
setCPStartAndLimit(int s, int l)100         public void setCPStartAndLimit(int s, int l) {
101             cpStart = s;
102             cpLimit = l;
103             dir = 0;
104         }
105         /**
106          * Returns the start of the code point that was last returned
107          * by nextCaseMapCP().
108          */
getCPStart()109         public int getCPStart() {
110             return cpStart;
111         }
112 
113         /**
114          * Returns the limit of the code point that was last returned
115          * by nextCaseMapCP().
116          */
getCPLimit()117         public int getCPLimit() {
118             return cpLimit;
119         }
120 
getCPLength()121         public int getCPLength() {
122             return cpLimit-cpStart;
123         }
124 
125         // implement UCaseProps.ContextIterator
126         // The following code is not used anywhere in this private class
127         @Override
reset(int direction)128         public void reset(int direction) {
129             if(direction>0) {
130                 /* reset for forward iteration */
131                 dir=1;
132                 index=cpLimit;
133             } else if(direction<0) {
134                 /* reset for backward iteration */
135                 dir=-1;
136                 index=cpStart;
137             } else {
138                 // not a valid direction
139                 dir=0;
140                 index=0;
141             }
142         }
143 
144         @Override
next()145         public int next() {
146             int c;
147 
148             if(dir>0 && index<s.length()) {
149                 c=Character.codePointAt(s, index);
150                 index+=Character.charCount(c);
151                 return c;
152             } else if(dir<0 && index>0) {
153                 c=Character.codePointBefore(s, index);
154                 index-=Character.charCount(c);
155                 return c;
156             }
157             return -1;
158         }
159 
160         // variables
161         protected CharSequence s;
162         protected int index, limit, cpStart, cpLimit;
163         protected int dir; // 0=initial state  >0=forward  <0=backward
164     }
165 
166     public static final int TITLECASE_WHOLE_STRING = 0x20;
167     public static final int TITLECASE_SENTENCES = 0x40;
168 
169     /**
170      * Bit mask for the titlecasing iterator options bit field.
171      * Currently only 3 out of 8 values are used:
172      * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES.
173      * See stringoptions.h.
174      * @hide draft / provisional / internal are hidden on OHOS
175      */
176     private static final int TITLECASE_ITERATOR_MASK = 0xe0;
177 
178     public static final int TITLECASE_ADJUST_TO_CASED = 0x400;
179 
180     /**
181      * Bit mask for the titlecasing index adjustment options bit set.
182      * Currently two bits are defined:
183      * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED.
184      * See stringoptions.h.
185      * @hide draft / provisional / internal are hidden on OHOS
186      */
187     private static final int TITLECASE_ADJUSTMENT_MASK = 0x600;
188 
addTitleAdjustmentOption(int options, int newOption)189     public static int addTitleAdjustmentOption(int options, int newOption) {
190         int adjOptions = options & TITLECASE_ADJUSTMENT_MASK;
191         if (adjOptions !=0 && adjOptions != newOption) {
192             throw new IllegalArgumentException("multiple titlecasing index adjustment options");
193         }
194         return options | newOption;
195     }
196 
197     private static final int LNS =
198             (1 << UCharacterCategory.UPPERCASE_LETTER) |
199             (1 << UCharacterCategory.LOWERCASE_LETTER) |
200             (1 << UCharacterCategory.TITLECASE_LETTER) |
201             // Not MODIFIER_LETTER: We count only cased modifier letters.
202             (1 << UCharacterCategory.OTHER_LETTER) |
203 
204             (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) |
205             (1 << UCharacterCategory.LETTER_NUMBER) |
206             (1 << UCharacterCategory.OTHER_NUMBER) |
207 
208             (1 << UCharacterCategory.MATH_SYMBOL) |
209             (1 << UCharacterCategory.CURRENCY_SYMBOL) |
210             (1 << UCharacterCategory.MODIFIER_SYMBOL) |
211             (1 << UCharacterCategory.OTHER_SYMBOL) |
212 
213             (1 << UCharacterCategory.PRIVATE_USE);
214 
isLNS(int c)215     private static boolean isLNS(int c) {
216         // Letter, number, symbol,
217         // or a private use code point because those are typically used as letters or numbers.
218         // Consider modifier letters only if they are cased.
219         int gc = UCharacterProperty.INSTANCE.getType(c);
220         return ((1 << gc) & LNS) != 0 ||
221                 (gc == UCharacterCategory.MODIFIER_LETTER &&
222                     UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE);
223     }
224 
addTitleIteratorOption(int options, int newOption)225     public static int addTitleIteratorOption(int options, int newOption) {
226         int iterOptions = options & TITLECASE_ITERATOR_MASK;
227         if (iterOptions !=0 && iterOptions != newOption) {
228             throw new IllegalArgumentException("multiple titlecasing iterator options");
229         }
230         return options | newOption;
231     }
232 
getTitleBreakIterator( Locale locale, int options, BreakIterator iter)233     public static BreakIterator getTitleBreakIterator(
234             Locale locale, int options, BreakIterator iter) {
235         options &= TITLECASE_ITERATOR_MASK;
236         if (options != 0 && iter != null) {
237             throw new IllegalArgumentException(
238                     "titlecasing iterator option together with an explicit iterator");
239         }
240         if (iter == null) {
241             switch (options) {
242             case 0:
243                 iter = BreakIterator.getWordInstance(locale);
244                 break;
245             case TITLECASE_WHOLE_STRING:
246                 iter = new WholeStringBreakIterator();
247                 break;
248             case TITLECASE_SENTENCES:
249                 iter = BreakIterator.getSentenceInstance(locale);
250                 break;
251             default:
252                 throw new IllegalArgumentException("unknown titlecasing iterator option");
253             }
254         }
255         return iter;
256     }
257 
getTitleBreakIterator( ULocale locale, int options, BreakIterator iter)258     public static BreakIterator getTitleBreakIterator(
259             ULocale locale, int options, BreakIterator iter) {
260         options &= TITLECASE_ITERATOR_MASK;
261         if (options != 0 && iter != null) {
262             throw new IllegalArgumentException(
263                     "titlecasing iterator option together with an explicit iterator");
264         }
265         if (iter == null) {
266             switch (options) {
267             case 0:
268                 iter = BreakIterator.getWordInstance(locale);
269                 break;
270             case TITLECASE_WHOLE_STRING:
271                 iter = new WholeStringBreakIterator();
272                 break;
273             case TITLECASE_SENTENCES:
274                 iter = BreakIterator.getSentenceInstance(locale);
275                 break;
276             default:
277                 throw new IllegalArgumentException("unknown titlecasing iterator option");
278             }
279         }
280         return iter;
281     }
282 
283     /**
284      * Omit unchanged text when case-mapping with Edits.
285      */
286     public static final int OMIT_UNCHANGED_TEXT = 0x4000;
287 
288     private static final class WholeStringBreakIterator extends BreakIterator {
289         private int length;
290 
notImplemented()291         private static void notImplemented() {
292             throw new UnsupportedOperationException("should not occur");
293         }
294 
295         @Override
first()296         public int first() {
297             return 0;
298         }
299 
300         @Override
last()301         public int last() {
302             notImplemented();
303             return 0;
304         }
305 
306         @Override
next(int n)307         public int next(int n) {
308             notImplemented();
309             return 0;
310         }
311 
312         @Override
next()313         public int next() {
314             return length;
315         }
316 
317         @Override
previous()318         public int previous() {
319             notImplemented();
320             return 0;
321         }
322 
323         @Override
following(int offset)324         public int following(int offset) {
325             notImplemented();
326             return 0;
327         }
328 
329         @Override
current()330         public int current() {
331             notImplemented();
332             return 0;
333         }
334 
335         @Override
getText()336         public CharacterIterator getText() {
337             notImplemented();
338             return null;
339         }
340 
341         @Override
setText(CharacterIterator newText)342         public void setText(CharacterIterator newText) {
343             length = newText.getEndIndex();
344         }
345 
346         @Override
setText(CharSequence newText)347         public void setText(CharSequence newText) {
348             length = newText.length();
349         }
350 
351         @Override
setText(String newText)352         public void setText(String newText) {
353             length = newText.length();
354         }
355     }
356 
appendCodePoint(Appendable a, int c)357     private static int appendCodePoint(Appendable a, int c) throws IOException {
358         if (c <= Character.MAX_VALUE) {
359             a.append((char)c);
360             return 1;
361         } else {
362             a.append((char)(0xd7c0 + (c >> 10)));
363             a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
364             return 2;
365         }
366     }
367 
368     /**
369      * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
370      * @throws IOException
371      */
appendResult(int result, Appendable dest, int cpLength, int options, Edits edits)372     private static void appendResult(int result, Appendable dest,
373             int cpLength, int options, Edits edits) throws IOException {
374         // Decode the result.
375         if (result < 0) {
376             // (not) original code point
377             if (edits != null) {
378                 edits.addUnchanged(cpLength);
379             }
380             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
381                 return;
382             }
383             appendCodePoint(dest, ~result);
384         } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
385             // The mapping has already been appended to result.
386             if (edits != null) {
387                 edits.addReplace(cpLength, result);
388             }
389         } else {
390             // Append the single-code point mapping.
391             int length = appendCodePoint(dest, result);
392             if (edits != null) {
393                 edits.addReplace(cpLength, length);
394             }
395         }
396     }
397 
appendUnchanged(CharSequence src, int start, int length, Appendable dest, int options, Edits edits)398     private static final void appendUnchanged(CharSequence src, int start, int length,
399             Appendable dest, int options, Edits edits) throws IOException {
400         if (length > 0) {
401             if (edits != null) {
402                 edits.addUnchanged(length);
403             }
404             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
405                 return;
406             }
407             dest.append(src, start, start + length);
408         }
409     }
410 
applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits)411     private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) {
412         if (!edits.hasChanges()) {
413             return src.toString();
414         }
415         StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta());
416         for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
417             if (ei.hasChange()) {
418                 int i = ei.replacementIndex();
419                 result.append(replacementChars, i, i + ei.newLength());
420             } else {
421                 int i = ei.sourceIndex();
422                 result.append(src, i, i + ei.oldLength());
423             }
424         }
425         return result.toString();
426     }
427 
428     private static final Trie2_16 CASE_TRIE = UCaseProps.getTrie();
429 
430     /**
431      * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
432      * caseLocale < 0: Case-folds [srcStart..srcLimit[.
433      */
internalToLower(int caseLocale, int options, CharSequence src, int srcStart, int srcLimit, StringContextIterator iter, Appendable dest, Edits edits)434     private static void internalToLower(int caseLocale, int options,
435             CharSequence src, int srcStart, int srcLimit, StringContextIterator iter,
436             Appendable dest, Edits edits) throws IOException {
437         byte[] latinToLower;
438         if (caseLocale == UCaseProps.LOC_ROOT ||
439                 (caseLocale >= 0 ?
440                     !(caseLocale == UCaseProps.LOC_TURKISH || caseLocale == UCaseProps.LOC_LITHUANIAN) :
441                     (options & UCaseProps.FOLD_CASE_OPTIONS_MASK) == UCharacter.FOLD_CASE_DEFAULT)) {
442             latinToLower = UCaseProps.LatinCase.TO_LOWER_NORMAL;
443         } else {
444             latinToLower = UCaseProps.LatinCase.TO_LOWER_TR_LT;
445         }
446         int prev = srcStart;
447         int srcIndex = srcStart;
448         outerLoop:
449         for (;;) {
450             // fast path for simple cases
451             char lead;
452             for (;;) {
453                 if (srcIndex >= srcLimit) {
454                     break outerLoop;
455                 }
456                 lead = src.charAt(srcIndex);
457                 int delta;
458                 if (lead < UCaseProps.LatinCase.LONG_S) {
459                     byte d = latinToLower[lead];
460                     if (d == UCaseProps.LatinCase.EXC) { break; }
461                     ++srcIndex;
462                     if (d == 0) { continue; }
463                     delta = d;
464                 } else if (lead >= 0xd800) {
465                     break;  // surrogate or higher
466                 } else {
467                     int props = CASE_TRIE.getFromU16SingleLead(lead);
468                     if (UCaseProps.propsHasException(props)) { break; }
469                     ++srcIndex;
470                     if (!UCaseProps.isUpperOrTitleFromProps(props) ||
471                             (delta = UCaseProps.getDelta(props)) == 0) {
472                         continue;
473                     }
474                 }
475                 lead += delta;
476                 appendUnchanged(src, prev, srcIndex - 1 - prev, dest, options, edits);
477                 dest.append(lead);
478                 if (edits != null) {
479                     edits.addReplace(1, 1);
480                 }
481                 prev = srcIndex;
482             }
483             // slow path
484             int cpStart = srcIndex++;
485             char trail;
486             int c;
487             if (Character.isHighSurrogate(lead) && srcIndex < srcLimit &&
488                     Character.isLowSurrogate(trail = src.charAt(srcIndex))) {
489                 c = Character.toCodePoint(lead, trail);
490                 ++srcIndex;
491             } else {
492                 c = lead;
493             }
494             // We need to append unchanged text before calling the UCaseProps.toFullXyz() methods
495             // because they will sometimes append their mapping to dest,
496             // and that must be after copying the previous text.
497             appendUnchanged(src, prev, cpStart - prev, dest, options, edits);
498             prev = cpStart;
499             if (caseLocale >= 0) {
500                 if (iter == null) {
501                     iter = new StringContextIterator(src, cpStart, srcIndex);
502                 } else {
503                     iter.setCPStartAndLimit(cpStart, srcIndex);
504                 }
505                 c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
506             } else {
507                 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
508             }
509             if (c >= 0) {
510                 appendResult(c, dest, srcIndex - cpStart, options, edits);
511                 prev = srcIndex;
512             }
513         }
514         appendUnchanged(src, prev, srcIndex - prev, dest, options, edits);
515     }
516 
internalToUpper(int caseLocale, int options, CharSequence src, Appendable dest, Edits edits)517     private static void internalToUpper(int caseLocale, int options,
518             CharSequence src, Appendable dest, Edits edits) throws IOException {
519         StringContextIterator iter = null;
520         byte[] latinToUpper;
521         if (caseLocale == UCaseProps.LOC_TURKISH) {
522             latinToUpper = UCaseProps.LatinCase.TO_UPPER_TR;
523         } else {
524             latinToUpper = UCaseProps.LatinCase.TO_UPPER_NORMAL;
525         }
526         int prev = 0;
527         int srcIndex = 0;
528         int srcLength = src.length();
529         outerLoop:
530         for (;;) {
531             // fast path for simple cases
532             char lead;
533             for (;;) {
534                 if (srcIndex >= srcLength) {
535                     break outerLoop;
536                 }
537                 lead = src.charAt(srcIndex);
538                 int delta;
539                 if (lead < UCaseProps.LatinCase.LONG_S) {
540                     byte d = latinToUpper[lead];
541                     if (d == UCaseProps.LatinCase.EXC) { break; }
542                     ++srcIndex;
543                     if (d == 0) { continue; }
544                     delta = d;
545                 } else if (lead >= 0xd800) {
546                     break;  // surrogate or higher
547                 } else {
548                     int props = CASE_TRIE.getFromU16SingleLead(lead);
549                     if (UCaseProps.propsHasException(props)) { break; }
550                     ++srcIndex;
551                     if (UCaseProps.getTypeFromProps(props) != UCaseProps.LOWER ||
552                             (delta = UCaseProps.getDelta(props)) == 0) {
553                         continue;
554                     }
555                 }
556                 lead += delta;
557                 appendUnchanged(src, prev, srcIndex - 1 - prev, dest, options, edits);
558                 dest.append(lead);
559                 if (edits != null) {
560                     edits.addReplace(1, 1);
561                 }
562                 prev = srcIndex;
563             }
564             // slow path
565             int cpStart = srcIndex++;
566             char trail;
567             int c;
568             if (Character.isHighSurrogate(lead) && srcIndex < srcLength &&
569                     Character.isLowSurrogate(trail = src.charAt(srcIndex))) {
570                 c = Character.toCodePoint(lead, trail);
571                 ++srcIndex;
572             } else {
573                 c = lead;
574             }
575             if (iter == null) {
576                 iter = new StringContextIterator(src, cpStart, srcIndex);
577             } else {
578                 iter.setCPStartAndLimit(cpStart, srcIndex);
579             }
580             // We need to append unchanged text before calling UCaseProps.toFullUpper()
581             // because it will sometimes append its mapping to dest,
582             // and that must be after copying the previous text.
583             appendUnchanged(src, prev, cpStart - prev, dest, options, edits);
584             prev = cpStart;
585             c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
586             if (c >= 0) {
587                 appendResult(c, dest, srcIndex - cpStart, options, edits);
588                 prev = srcIndex;
589             }
590         }
591         appendUnchanged(src, prev, srcIndex - prev, dest, options, edits);
592     }
593 
toLower(int caseLocale, int options, CharSequence src)594     public static String toLower(int caseLocale, int options, CharSequence src) {
595         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
596             if (src.length() == 0) {
597                 return src.toString();
598             }
599             // Collect and apply only changes.
600             // Good if no or few changes. Bad (slow) if many changes.
601             Edits edits = new Edits();
602             StringBuilder replacementChars = toLower(
603                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
604             return applyEdits(src, replacementChars, edits);
605         } else {
606             return toLower(caseLocale, options, src,
607                     new StringBuilder(src.length()), null).toString();
608         }
609     }
610 
toLower(int caseLocale, int options, CharSequence src, A dest, Edits edits)611     public static <A extends Appendable> A toLower(int caseLocale, int options,
612             CharSequence src, A dest, Edits edits) {
613         try {
614             if (edits != null) {
615                 edits.reset();
616             }
617             internalToLower(caseLocale, options, src, 0, src.length(), null, dest, edits);
618             return dest;
619         } catch (IOException e) {
620             throw new ICUUncheckedIOException(e);
621         }
622     }
623 
toUpper(int caseLocale, int options, CharSequence src)624     public static String toUpper(int caseLocale, int options, CharSequence src) {
625         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
626             if (src.length() == 0) {
627                 return src.toString();
628             }
629             // Collect and apply only changes.
630             // Good if no or few changes. Bad (slow) if many changes.
631             Edits edits = new Edits();
632             StringBuilder replacementChars = toUpper(
633                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
634             return applyEdits(src, replacementChars, edits);
635         } else {
636             return toUpper(caseLocale, options, src,
637                     new StringBuilder(src.length()), null).toString();
638         }
639     }
640 
toUpper(int caseLocale, int options, CharSequence src, A dest, Edits edits)641     public static <A extends Appendable> A toUpper(int caseLocale, int options,
642             CharSequence src, A dest, Edits edits) {
643         try {
644             if (edits != null) {
645                 edits.reset();
646             }
647             if (caseLocale == UCaseProps.LOC_GREEK) {
648                 return GreekUpper.toUpper(options, src, dest, edits);
649             }
650             internalToUpper(caseLocale, options, src, dest, edits);
651             return dest;
652         } catch (IOException e) {
653             throw new ICUUncheckedIOException(e);
654         }
655     }
656 
toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src)657     public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) {
658         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
659             if (src.length() == 0) {
660                 return src.toString();
661             }
662             // Collect and apply only changes.
663             // Good if no or few changes. Bad (slow) if many changes.
664             Edits edits = new Edits();
665             StringBuilder replacementChars = toTitle(
666                     caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src,
667                     new StringBuilder(), edits);
668             return applyEdits(src, replacementChars, edits);
669         } else {
670             return toTitle(caseLocale, options, iter, src,
671                     new StringBuilder(src.length()), null).toString();
672         }
673     }
674 
toTitle( int caseLocale, int options, BreakIterator titleIter, CharSequence src, A dest, Edits edits)675     public static <A extends Appendable> A toTitle(
676             int caseLocale, int options, BreakIterator titleIter,
677             CharSequence src, A dest, Edits edits) {
678         try {
679             if (edits != null) {
680                 edits.reset();
681             }
682 
683             /* set up local variables */
684             StringContextIterator iter = new StringContextIterator(src);
685             int srcLength = src.length();
686             int prev=0;
687             boolean isFirstIndex=true;
688 
689             /* titlecasing loop */
690             while(prev<srcLength) {
691                 /* find next index where to titlecase */
692                 int index;
693                 if(isFirstIndex) {
694                     isFirstIndex=false;
695                     index=titleIter.first();
696                 } else {
697                     index=titleIter.next();
698                 }
699                 if(index==BreakIterator.DONE || index>srcLength) {
700                     index=srcLength;
701                 }
702 
703                 /*
704                  * Segment [prev..index[ into 3 parts:
705                  * a) skipped characters (copy as-is) [prev..titleStart[
706                  * b) first letter (titlecase)              [titleStart..titleLimit[
707                  * c) subsequent characters (lowercase)                 [titleLimit..index[
708                  */
709                 if(prev<index) {
710                     // Find and copy skipped characters [prev..titleStart[
711                     int titleStart=prev;
712                     iter.setLimit(index);
713                     int c=iter.nextCaseMapCP();
714                     if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
715                         // Adjust the titlecasing index to the next cased character,
716                         // or to the next letter/number/symbol/private use.
717                         // Stop with titleStart<titleLimit<=index
718                         // if there is a character to be titlecased,
719                         // or else stop with titleStart==titleLimit==index.
720                         boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0;
721                         while ((toCased ?
722                                     UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) :
723                                         !CaseMapImpl.isLNS(c)) &&
724                                 (c=iter.nextCaseMapCP())>=0) {}
725                         // If c<0 then we have only uncased characters in [prev..index[
726                         // and stopped with titleStart==titleLimit==index.
727                         titleStart=iter.getCPStart();
728                         if (prev < titleStart) {
729                             appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
730                         }
731                     }
732 
733                     if(titleStart<index) {
734                         int titleLimit=iter.getCPLimit();
735                         // titlecase c which is from [titleStart..titleLimit[
736                         c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
737                         appendResult(c, dest, iter.getCPLength(), options, edits);
738 
739                         // Special case Dutch IJ titlecasing
740                         if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
741                             char c1 = src.charAt(titleStart);
742                             if ((c1 == 'i' || c1 == 'I')) {
743                                 char c2 = src.charAt(titleStart+1);
744                                 if (c2 == 'j') {
745                                     dest.append('J');
746                                     if (edits != null) {
747                                         edits.addReplace(1, 1);
748                                     }
749                                     c = iter.nextCaseMapCP();
750                                     titleLimit++;
751                                     assert c == c2;
752                                     assert titleLimit == iter.getCPLimit();
753                                 } else if (c2 == 'J') {
754                                     // Keep the capital J from getting lowercased.
755                                     appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
756                                     c = iter.nextCaseMapCP();
757                                     titleLimit++;
758                                     assert c == c2;
759                                     assert titleLimit == iter.getCPLimit();
760                                 }
761                             }
762                         }
763 
764                         // lowercase [titleLimit..index[
765                         if(titleLimit<index) {
766                             if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
767                                 // Normal operation: Lowercase the rest of the word.
768                                 internalToLower(caseLocale, options,
769                                         src, titleLimit, index, iter, dest, edits);
770                             } else {
771                                 // Optionally just copy the rest of the word unchanged.
772                                 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
773                             }
774                             iter.moveToLimit();
775                         }
776                     }
777                 }
778 
779                 prev=index;
780             }
781             return dest;
782         } catch (IOException e) {
783             throw new ICUUncheckedIOException(e);
784         }
785     }
786 
fold(int options, CharSequence src)787     public static String fold(int options, CharSequence src) {
788         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
789             if (src.length() == 0) {
790                 return src.toString();
791             }
792             // Collect and apply only changes.
793             // Good if no or few changes. Bad (slow) if many changes.
794             Edits edits = new Edits();
795             StringBuilder replacementChars = fold(
796                     options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
797             return applyEdits(src, replacementChars, edits);
798         } else {
799             return fold(options, src, new StringBuilder(src.length()), null).toString();
800         }
801     }
802 
fold(int options, CharSequence src, A dest, Edits edits)803     public static <A extends Appendable> A fold(int options,
804             CharSequence src, A dest, Edits edits) {
805         try {
806             if (edits != null) {
807                 edits.reset();
808             }
809             internalToLower(-1, options, src, 0, src.length(), null, dest, edits);
810             return dest;
811         } catch (IOException e) {
812             throw new ICUUncheckedIOException(e);
813         }
814     }
815 
816     private static final class GreekUpper {
817         // Data bits.
818         private static final int UPPER_MASK = 0x3ff;
819         private static final int HAS_VOWEL = 0x1000;
820         private static final int HAS_YPOGEGRAMMENI = 0x2000;
821         private static final int HAS_ACCENT = 0x4000;
822         private static final int HAS_DIALYTIKA = 0x8000;
823         // Further bits during data building and processing, not stored in the data map.
824         private static final int HAS_COMBINING_DIALYTIKA = 0x10000;
825         private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000;
826 
827         private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
828         private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
829                 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
830         private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
831 
832         // State bits.
833         private static final int AFTER_CASED = 1;
834         private static final int AFTER_VOWEL_WITH_ACCENT = 2;
835 
836         // Data generated by prototype code, see
837         // http://site.icu-project.org/design/case/greek-upper
838         // TODO: Move this data into ucase.icu.
839         private static final char[] data0370 = {
840             // U+0370..03FF
841             0x0370,  // Ͱ
842             0x0370,  // ͱ
843             0x0372,  // Ͳ
844             0x0372,  // ͳ
845             0,
846             0,
847             0x0376,  // Ͷ
848             0x0376,  // ͷ
849             0,
850             0,
851             0x037A,  // ͺ
852             0x03FD,  // ͻ
853             0x03FE,  // ͼ
854             0x03FF,  // ͽ
855             0,
856             0x037F,  // Ϳ
857             0,
858             0,
859             0,
860             0,
861             0,
862             0,
863             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ά
864             0,
865             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Έ
866             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ή
867             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ί
868             0,
869             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ό
870             0,
871             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ύ
872             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ώ
873             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΐ
874             0x0391 | HAS_VOWEL,  // Α
875             0x0392,  // Β
876             0x0393,  // Γ
877             0x0394,  // Δ
878             0x0395 | HAS_VOWEL,  // Ε
879             0x0396,  // Ζ
880             0x0397 | HAS_VOWEL,  // Η
881             0x0398,  // Θ
882             0x0399 | HAS_VOWEL,  // Ι
883             0x039A,  // Κ
884             0x039B,  // Λ
885             0x039C,  // Μ
886             0x039D,  // Ν
887             0x039E,  // Ξ
888             0x039F | HAS_VOWEL,  // Ο
889             0x03A0,  // Π
890             0x03A1,  // Ρ
891             0,
892             0x03A3,  // Σ
893             0x03A4,  // Τ
894             0x03A5 | HAS_VOWEL,  // Υ
895             0x03A6,  // Φ
896             0x03A7,  // Χ
897             0x03A8,  // Ψ
898             0x03A9 | HAS_VOWEL,  // Ω
899             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // Ϊ
900             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // Ϋ
901             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ά
902             0x0395 | HAS_VOWEL | HAS_ACCENT,  // έ
903             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ή
904             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ί
905             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΰ
906             0x0391 | HAS_VOWEL,  // α
907             0x0392,  // β
908             0x0393,  // γ
909             0x0394,  // δ
910             0x0395 | HAS_VOWEL,  // ε
911             0x0396,  // ζ
912             0x0397 | HAS_VOWEL,  // η
913             0x0398,  // θ
914             0x0399 | HAS_VOWEL,  // ι
915             0x039A,  // κ
916             0x039B,  // λ
917             0x039C,  // μ
918             0x039D,  // ν
919             0x039E,  // ξ
920             0x039F | HAS_VOWEL,  // ο
921             0x03A0,  // π
922             0x03A1,  // ρ
923             0x03A3,  // ς
924             0x03A3,  // σ
925             0x03A4,  // τ
926             0x03A5 | HAS_VOWEL,  // υ
927             0x03A6,  // φ
928             0x03A7,  // χ
929             0x03A8,  // ψ
930             0x03A9 | HAS_VOWEL,  // ω
931             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // ϊ
932             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // ϋ
933             0x039F | HAS_VOWEL | HAS_ACCENT,  // ό
934             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ύ
935             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ώ
936             0x03CF,  // Ϗ
937             0x0392,  // ϐ
938             0x0398,  // ϑ
939             0x03D2,  // ϒ
940             0x03D2 | HAS_ACCENT,  // ϓ
941             0x03D2 | HAS_DIALYTIKA,  // ϔ
942             0x03A6,  // ϕ
943             0x03A0,  // ϖ
944             0x03CF,  // ϗ
945             0x03D8,  // Ϙ
946             0x03D8,  // ϙ
947             0x03DA,  // Ϛ
948             0x03DA,  // ϛ
949             0x03DC,  // Ϝ
950             0x03DC,  // ϝ
951             0x03DE,  // Ϟ
952             0x03DE,  // ϟ
953             0x03E0,  // Ϡ
954             0x03E0,  // ϡ
955             0,
956             0,
957             0,
958             0,
959             0,
960             0,
961             0,
962             0,
963             0,
964             0,
965             0,
966             0,
967             0,
968             0,
969             0x039A,  // ϰ
970             0x03A1,  // ϱ
971             0x03F9,  // ϲ
972             0x037F,  // ϳ
973             0x03F4,  // ϴ
974             0x0395 | HAS_VOWEL,  // ϵ
975             0,
976             0x03F7,  // Ϸ
977             0x03F7,  // ϸ
978             0x03F9,  // Ϲ
979             0x03FA,  // Ϻ
980             0x03FA,  // ϻ
981             0x03FC,  // ϼ
982             0x03FD,  // Ͻ
983             0x03FE,  // Ͼ
984             0x03FF,  // Ͽ
985         };
986 
987         private static final char[] data1F00 = {
988             // U+1F00..1FFF
989             0x0391 | HAS_VOWEL,  // ἀ
990             0x0391 | HAS_VOWEL,  // ἁ
991             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἂ
992             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἃ
993             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἄ
994             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἅ
995             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἆ
996             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἇ
997             0x0391 | HAS_VOWEL,  // Ἀ
998             0x0391 | HAS_VOWEL,  // Ἁ
999             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἂ
1000             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἃ
1001             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἄ
1002             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἅ
1003             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἆ
1004             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἇ
1005             0x0395 | HAS_VOWEL,  // ἐ
1006             0x0395 | HAS_VOWEL,  // ἑ
1007             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἒ
1008             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἓ
1009             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἔ
1010             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἕ
1011             0,
1012             0,
1013             0x0395 | HAS_VOWEL,  // Ἐ
1014             0x0395 | HAS_VOWEL,  // Ἑ
1015             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἒ
1016             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἓ
1017             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἔ
1018             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἕ
1019             0,
1020             0,
1021             0x0397 | HAS_VOWEL,  // ἠ
1022             0x0397 | HAS_VOWEL,  // ἡ
1023             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἢ
1024             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἣ
1025             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἤ
1026             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἥ
1027             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἦ
1028             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἧ
1029             0x0397 | HAS_VOWEL,  // Ἠ
1030             0x0397 | HAS_VOWEL,  // Ἡ
1031             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἢ
1032             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἣ
1033             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἤ
1034             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἥ
1035             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἦ
1036             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἧ
1037             0x0399 | HAS_VOWEL,  // ἰ
1038             0x0399 | HAS_VOWEL,  // ἱ
1039             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἲ
1040             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἳ
1041             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἴ
1042             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἵ
1043             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἶ
1044             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἷ
1045             0x0399 | HAS_VOWEL,  // Ἰ
1046             0x0399 | HAS_VOWEL,  // Ἱ
1047             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἲ
1048             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἳ
1049             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἴ
1050             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἵ
1051             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἶ
1052             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἷ
1053             0x039F | HAS_VOWEL,  // ὀ
1054             0x039F | HAS_VOWEL,  // ὁ
1055             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὂ
1056             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὃ
1057             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὄ
1058             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὅ
1059             0,
1060             0,
1061             0x039F | HAS_VOWEL,  // Ὀ
1062             0x039F | HAS_VOWEL,  // Ὁ
1063             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὂ
1064             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὃ
1065             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὄ
1066             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὅ
1067             0,
1068             0,
1069             0x03A5 | HAS_VOWEL,  // ὐ
1070             0x03A5 | HAS_VOWEL,  // ὑ
1071             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὒ
1072             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὓ
1073             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὔ
1074             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὕ
1075             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὖ
1076             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὗ
1077             0,
1078             0x03A5 | HAS_VOWEL,  // Ὑ
1079             0,
1080             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὓ
1081             0,
1082             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὕ
1083             0,
1084             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὗ
1085             0x03A9 | HAS_VOWEL,  // ὠ
1086             0x03A9 | HAS_VOWEL,  // ὡ
1087             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὢ
1088             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὣ
1089             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὤ
1090             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὥ
1091             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὦ
1092             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὧ
1093             0x03A9 | HAS_VOWEL,  // Ὠ
1094             0x03A9 | HAS_VOWEL,  // Ὡ
1095             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὢ
1096             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὣ
1097             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὤ
1098             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὥ
1099             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὦ
1100             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὧ
1101             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ὰ
1102             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ά
1103             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ὲ
1104             0x0395 | HAS_VOWEL | HAS_ACCENT,  // έ
1105             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ὴ
1106             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ή
1107             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ὶ
1108             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ί
1109             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὸ
1110             0x039F | HAS_VOWEL | HAS_ACCENT,  // ό
1111             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὺ
1112             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ύ
1113             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὼ
1114             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ώ
1115             0,
1116             0,
1117             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾀ
1118             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾁ
1119             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾂ
1120             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾃ
1121             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾄ
1122             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾅ
1123             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾆ
1124             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾇ
1125             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾈ
1126             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾉ
1127             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾊ
1128             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾋ
1129             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾌ
1130             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾍ
1131             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾎ
1132             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾏ
1133             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾐ
1134             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾑ
1135             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾒ
1136             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾓ
1137             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾔ
1138             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾕ
1139             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾖ
1140             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾗ
1141             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾘ
1142             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾙ
1143             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾚ
1144             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾛ
1145             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾜ
1146             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾝ
1147             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾞ
1148             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾟ
1149             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾠ
1150             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾡ
1151             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾢ
1152             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾣ
1153             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾤ
1154             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾥ
1155             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾦ
1156             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾧ
1157             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾨ
1158             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾩ
1159             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾪ
1160             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾫ
1161             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾬ
1162             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾭ
1163             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾮ
1164             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾯ
1165             0x0391 | HAS_VOWEL,  // ᾰ
1166             0x0391 | HAS_VOWEL,  // ᾱ
1167             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾲ
1168             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾳ
1169             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾴ
1170             0,
1171             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ᾶ
1172             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾷ
1173             0x0391 | HAS_VOWEL,  // Ᾰ
1174             0x0391 | HAS_VOWEL,  // Ᾱ
1175             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ὰ
1176             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ά
1177             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾼ
1178             0,
1179             0x0399 | HAS_VOWEL,  // ι
1180             0,
1181             0,
1182             0,
1183             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῂ
1184             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῃ
1185             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῄ
1186             0,
1187             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ῆ
1188             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῇ
1189             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ὲ
1190             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Έ
1191             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ὴ
1192             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ή
1193             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῌ
1194             0,
1195             0,
1196             0,
1197             0x0399 | HAS_VOWEL,  // ῐ
1198             0x0399 | HAS_VOWEL,  // ῑ
1199             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῒ
1200             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΐ
1201             0,
1202             0,
1203             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ῖ
1204             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῗ
1205             0x0399 | HAS_VOWEL,  // Ῐ
1206             0x0399 | HAS_VOWEL,  // Ῑ
1207             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ὶ
1208             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ί
1209             0,
1210             0,
1211             0,
1212             0,
1213             0x03A5 | HAS_VOWEL,  // ῠ
1214             0x03A5 | HAS_VOWEL,  // ῡ
1215             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῢ
1216             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΰ
1217             0x03A1,  // ῤ
1218             0x03A1,  // ῥ
1219             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ῦ
1220             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῧ
1221             0x03A5 | HAS_VOWEL,  // Ῠ
1222             0x03A5 | HAS_VOWEL,  // Ῡ
1223             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὺ
1224             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ύ
1225             0x03A1,  // Ῥ
1226             0,
1227             0,
1228             0,
1229             0,
1230             0,
1231             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῲ
1232             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῳ
1233             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῴ
1234             0,
1235             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ῶ
1236             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῷ
1237             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὸ
1238             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ό
1239             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὼ
1240             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ώ
1241             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῼ
1242             0,
1243             0,
1244             0,
1245         };
1246 
1247         // U+2126 Ohm sign
1248         private static final char data2126 = 0x03A9 | HAS_VOWEL;  // Ω
1249 
getLetterData(int c)1250         private static final int getLetterData(int c) {
1251             if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
1252                 return 0;
1253             } else if (c <= 0x3ff) {
1254                 return data0370[c - 0x370];
1255             } else if (c <= 0x1fff) {
1256                 return data1F00[c - 0x1f00];
1257             } else if (c == 0x2126) {
1258                 return data2126;
1259             } else {
1260                 return 0;
1261             }
1262         }
1263 
1264         /**
1265          * Returns a non-zero value for each of the Greek combining diacritics
1266          * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
1267          * plus some perispomeni look-alikes.
1268          */
getDiacriticData(int c)1269         private static final int getDiacriticData(int c) {
1270             switch (c) {
1271             case '\u0300':  // varia
1272             case '\u0301':  // tonos = oxia
1273             case '\u0342':  // perispomeni
1274             case '\u0302':  // circumflex can look like perispomeni
1275             case '\u0303':  // tilde can look like perispomeni
1276             case '\u0311':  // inverted breve can look like perispomeni
1277                 return HAS_ACCENT;
1278             case '\u0308':  // dialytika = diaeresis
1279                 return HAS_COMBINING_DIALYTIKA;
1280             case '\u0344':  // dialytika tonos
1281                 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
1282             case '\u0345':  // ypogegrammeni = iota subscript
1283                 return HAS_YPOGEGRAMMENI;
1284             case '\u0304':  // macron
1285             case '\u0306':  // breve
1286             case '\u0313':  // comma above
1287             case '\u0314':  // reversed comma above
1288             case '\u0343':  // koronis
1289                 return HAS_OTHER_GREEK_DIACRITIC;
1290             default:
1291                 return 0;
1292             }
1293         }
1294 
isFollowedByCasedLetter(CharSequence s, int i)1295         private static boolean isFollowedByCasedLetter(CharSequence s, int i) {
1296             while (i < s.length()) {
1297                 int c = Character.codePointAt(s, i);
1298                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
1299                 if ((type & UCaseProps.IGNORABLE) != 0) {
1300                     // Case-ignorable, continue with the loop.
1301                     i += Character.charCount(c);
1302                 } else if (type != UCaseProps.NONE) {
1303                     return true;  // Followed by cased letter.
1304                 } else {
1305                     return false;  // Uncased and not case-ignorable.
1306                 }
1307             }
1308             return false;  // Not followed by cased letter.
1309         }
1310 
1311         /**
1312          * Greek string uppercasing with a state machine.
1313          * Probably simpler than a stateless function that has to figure out complex context-before
1314          * for each character.
1315          * TODO: Try to re-consolidate one way or another with the non-Greek function.
1316          *
1317          * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
1318          * @throws IOException
1319          */
toUpper(int options, CharSequence src, A dest, Edits edits)1320         private static <A extends Appendable> A toUpper(int options,
1321                 CharSequence src, A dest, Edits edits) throws IOException {
1322             int state = 0;
1323             for (int i = 0; i < src.length();) {
1324                 int c = Character.codePointAt(src, i);
1325                 int nextIndex = i + Character.charCount(c);
1326                 int nextState = 0;
1327                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
1328                 if ((type & UCaseProps.IGNORABLE) != 0) {
1329                     // c is case-ignorable
1330                     nextState |= (state & AFTER_CASED);
1331                 } else if (type != UCaseProps.NONE) {
1332                     // c is cased
1333                     nextState |= AFTER_CASED;
1334                 }
1335                 int data = getLetterData(c);
1336                 if (data > 0) {
1337                     int upper = data & UPPER_MASK;
1338                     // Add a dialytika to this iota or ypsilon vowel
1339                     // if we removed a tonos from the previous vowel,
1340                     // and that previous vowel did not also have (or gain) a dialytika.
1341                     // Adding one only to the final vowel in a longer sequence
1342                     // (which does not occur in normal writing) would require lookahead.
1343                     // Set the same flag as for preserving an existing dialytika.
1344                     if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
1345                             (upper == 'Ι' || upper == 'Υ')) {
1346                         data |= HAS_DIALYTIKA;
1347                     }
1348                     int numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
1349                     if ((data & HAS_YPOGEGRAMMENI) != 0) {
1350                         numYpogegrammeni = 1;
1351                     }
1352                     // Skip combining diacritics after this Greek letter.
1353                     while (nextIndex < src.length()) {
1354                         int diacriticData = getDiacriticData(src.charAt(nextIndex));
1355                         if (diacriticData != 0) {
1356                             data |= diacriticData;
1357                             if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
1358                                 ++numYpogegrammeni;
1359                             }
1360                             ++nextIndex;
1361                         } else {
1362                             break;  // not a Greek diacritic
1363                         }
1364                     }
1365                     if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
1366                         nextState |= AFTER_VOWEL_WITH_ACCENT;
1367                     }
1368                     // Map according to Greek rules.
1369                     boolean addTonos = false;
1370                     if (upper == 'Η' &&
1371                             (data & HAS_ACCENT) != 0 &&
1372                             numYpogegrammeni == 0 &&
1373                             (state & AFTER_CASED) == 0 &&
1374                             !isFollowedByCasedLetter(src, nextIndex)) {
1375                         // Keep disjunctive "or" with (only) a tonos.
1376                         // We use the same "word boundary" conditions as for the Final_Sigma test.
1377                         if (i == nextIndex) {
1378                             upper = 'Ή';  // Preserve the precomposed form.
1379                         } else {
1380                             addTonos = true;
1381                         }
1382                     } else if ((data & HAS_DIALYTIKA) != 0) {
1383                         // Preserve a vowel with dialytika in precomposed form if it exists.
1384                         if (upper == 'Ι') {
1385                             upper = 'Ϊ';
1386                             data &= ~HAS_EITHER_DIALYTIKA;
1387                         } else if (upper == 'Υ') {
1388                             upper = 'Ϋ';
1389                             data &= ~HAS_EITHER_DIALYTIKA;
1390                         }
1391                     }
1392 
1393                     boolean change;
1394                     if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
1395                         change = true;  // common, simple usage
1396                     } else {
1397                         // Find out first whether we are changing the text.
1398                         change = src.charAt(i) != upper || numYpogegrammeni > 0;
1399                         int i2 = i + 1;
1400                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
1401                             change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
1402                             ++i2;
1403                         }
1404                         if (addTonos) {
1405                             change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
1406                             ++i2;
1407                         }
1408                         int oldLength = nextIndex - i;
1409                         int newLength = (i2 - i) + numYpogegrammeni;
1410                         change |= oldLength != newLength;
1411                         if (change) {
1412                             if (edits != null) {
1413                                 edits.addReplace(oldLength, newLength);
1414                             }
1415                         } else {
1416                             if (edits != null) {
1417                                 edits.addUnchanged(oldLength);
1418                             }
1419                             // Write unchanged text?
1420                             change = (options & OMIT_UNCHANGED_TEXT) == 0;
1421                         }
1422                     }
1423 
1424                     if (change) {
1425                         dest.append((char)upper);
1426                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
1427                             dest.append('\u0308');  // restore or add a dialytika
1428                         }
1429                         if (addTonos) {
1430                             dest.append('\u0301');
1431                         }
1432                         while (numYpogegrammeni > 0) {
1433                             dest.append('Ι');
1434                             --numYpogegrammeni;
1435                         }
1436                     }
1437                 } else {
1438                     c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
1439                     appendResult(c, dest, nextIndex - i, options, edits);
1440                 }
1441                 i = nextIndex;
1442                 state = nextState;
1443             }
1444             return dest;
1445         }
1446     }
1447 }
1448