1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 package ohos.global.icu.impl; 5 6 import java.io.IOException; 7 import java.text.CharacterIterator; 8 import java.util.Locale; 9 10 import ohos.global.icu.lang.UCharacter; 11 import ohos.global.icu.lang.UCharacterCategory; 12 import ohos.global.icu.text.BreakIterator; 13 import ohos.global.icu.text.Edits; 14 import ohos.global.icu.util.ICUUncheckedIOException; 15 import ohos.global.icu.util.ULocale; 16 17 /** 18 * @hide exposed on OHOS 19 */ 20 public final class CaseMapImpl { 21 /** 22 * Implementation of UCaseProps.ContextIterator, iterates over a String. 23 * See ustrcase.c/utf16_caseContextIterator(). 24 * @hide exposed on OHOS 25 */ 26 public static final class StringContextIterator implements UCaseProps.ContextIterator { 27 /** 28 * Constructor. 29 * @param src String to iterate over. 30 */ StringContextIterator(CharSequence src)31 public StringContextIterator(CharSequence src) { 32 this.s=src; 33 limit=src.length(); 34 cpStart=cpLimit=index=0; 35 dir=0; 36 } 37 38 /** 39 * Constructor. 40 * @param src String to iterate over. 41 * @param cpStart Start index of the current code point. 42 * @param cpLimit Limit index of the current code point. 43 */ StringContextIterator(CharSequence src, int cpStart, int cpLimit)44 public StringContextIterator(CharSequence src, int cpStart, int cpLimit) { 45 s = src; 46 index = 0; 47 limit = src.length(); 48 this.cpStart = cpStart; 49 this.cpLimit = cpLimit; 50 dir = 0; 51 } 52 53 /** 54 * Set the iteration limit for nextCaseMapCP() to an index within the string. 55 * If the limit parameter is negative or past the string, then the 56 * string length is restored as the iteration limit. 57 * 58 * <p>This limit does not affect the next() function which always 59 * iterates to the very end of the string. 60 * 61 * @param lim The iteration limit. 62 */ setLimit(int lim)63 public void setLimit(int lim) { 64 if(0<=lim && lim<=s.length()) { 65 limit=lim; 66 } else { 67 limit=s.length(); 68 } 69 } 70 71 /** 72 * Move to the iteration limit without fetching code points up to there. 73 */ moveToLimit()74 public void moveToLimit() { 75 cpStart=cpLimit=limit; 76 } 77 78 /** 79 * Iterate forward through the string to fetch the next code point 80 * to be case-mapped, and set the context indexes for it. 81 * 82 * <p>When the iteration limit is reached (and -1 is returned), 83 * getCPStart() will be at the iteration limit. 84 * 85 * <p>Iteration with next() does not affect the position for nextCaseMapCP(). 86 * 87 * @return The next code point to be case-mapped, or <0 when the iteration is done. 88 */ nextCaseMapCP()89 public int nextCaseMapCP() { 90 cpStart=cpLimit; 91 if(cpLimit<limit) { 92 int c=Character.codePointAt(s, cpLimit); 93 cpLimit+=Character.charCount(c); 94 return c; 95 } else { 96 return -1; 97 } 98 } 99 setCPStartAndLimit(int s, int l)100 public void setCPStartAndLimit(int s, int l) { 101 cpStart = s; 102 cpLimit = l; 103 dir = 0; 104 } 105 /** 106 * Returns the start of the code point that was last returned 107 * by nextCaseMapCP(). 108 */ getCPStart()109 public int getCPStart() { 110 return cpStart; 111 } 112 113 /** 114 * Returns the limit of the code point that was last returned 115 * by nextCaseMapCP(). 116 */ getCPLimit()117 public int getCPLimit() { 118 return cpLimit; 119 } 120 getCPLength()121 public int getCPLength() { 122 return cpLimit-cpStart; 123 } 124 125 // implement UCaseProps.ContextIterator 126 // The following code is not used anywhere in this private class 127 @Override reset(int direction)128 public void reset(int direction) { 129 if(direction>0) { 130 /* reset for forward iteration */ 131 dir=1; 132 index=cpLimit; 133 } else if(direction<0) { 134 /* reset for backward iteration */ 135 dir=-1; 136 index=cpStart; 137 } else { 138 // not a valid direction 139 dir=0; 140 index=0; 141 } 142 } 143 144 @Override next()145 public int next() { 146 int c; 147 148 if(dir>0 && index<s.length()) { 149 c=Character.codePointAt(s, index); 150 index+=Character.charCount(c); 151 return c; 152 } else if(dir<0 && index>0) { 153 c=Character.codePointBefore(s, index); 154 index-=Character.charCount(c); 155 return c; 156 } 157 return -1; 158 } 159 160 // variables 161 protected CharSequence s; 162 protected int index, limit, cpStart, cpLimit; 163 protected int dir; // 0=initial state >0=forward <0=backward 164 } 165 166 public static final int TITLECASE_WHOLE_STRING = 0x20; 167 public static final int TITLECASE_SENTENCES = 0x40; 168 169 /** 170 * Bit mask for the titlecasing iterator options bit field. 171 * Currently only 3 out of 8 values are used: 172 * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES. 173 * See stringoptions.h. 174 * @hide draft / provisional / internal are hidden on OHOS 175 */ 176 private static final int TITLECASE_ITERATOR_MASK = 0xe0; 177 178 public static final int TITLECASE_ADJUST_TO_CASED = 0x400; 179 180 /** 181 * Bit mask for the titlecasing index adjustment options bit set. 182 * Currently two bits are defined: 183 * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED. 184 * See stringoptions.h. 185 * @hide draft / provisional / internal are hidden on OHOS 186 */ 187 private static final int TITLECASE_ADJUSTMENT_MASK = 0x600; 188 addTitleAdjustmentOption(int options, int newOption)189 public static int addTitleAdjustmentOption(int options, int newOption) { 190 int adjOptions = options & TITLECASE_ADJUSTMENT_MASK; 191 if (adjOptions !=0 && adjOptions != newOption) { 192 throw new IllegalArgumentException("multiple titlecasing index adjustment options"); 193 } 194 return options | newOption; 195 } 196 197 private static final int LNS = 198 (1 << UCharacterCategory.UPPERCASE_LETTER) | 199 (1 << UCharacterCategory.LOWERCASE_LETTER) | 200 (1 << UCharacterCategory.TITLECASE_LETTER) | 201 // Not MODIFIER_LETTER: We count only cased modifier letters. 202 (1 << UCharacterCategory.OTHER_LETTER) | 203 204 (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) | 205 (1 << UCharacterCategory.LETTER_NUMBER) | 206 (1 << UCharacterCategory.OTHER_NUMBER) | 207 208 (1 << UCharacterCategory.MATH_SYMBOL) | 209 (1 << UCharacterCategory.CURRENCY_SYMBOL) | 210 (1 << UCharacterCategory.MODIFIER_SYMBOL) | 211 (1 << UCharacterCategory.OTHER_SYMBOL) | 212 213 (1 << UCharacterCategory.PRIVATE_USE); 214 isLNS(int c)215 private static boolean isLNS(int c) { 216 // Letter, number, symbol, 217 // or a private use code point because those are typically used as letters or numbers. 218 // Consider modifier letters only if they are cased. 219 int gc = UCharacterProperty.INSTANCE.getType(c); 220 return ((1 << gc) & LNS) != 0 || 221 (gc == UCharacterCategory.MODIFIER_LETTER && 222 UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE); 223 } 224 addTitleIteratorOption(int options, int newOption)225 public static int addTitleIteratorOption(int options, int newOption) { 226 int iterOptions = options & TITLECASE_ITERATOR_MASK; 227 if (iterOptions !=0 && iterOptions != newOption) { 228 throw new IllegalArgumentException("multiple titlecasing iterator options"); 229 } 230 return options | newOption; 231 } 232 getTitleBreakIterator( Locale locale, int options, BreakIterator iter)233 public static BreakIterator getTitleBreakIterator( 234 Locale locale, int options, BreakIterator iter) { 235 options &= TITLECASE_ITERATOR_MASK; 236 if (options != 0 && iter != null) { 237 throw new IllegalArgumentException( 238 "titlecasing iterator option together with an explicit iterator"); 239 } 240 if (iter == null) { 241 switch (options) { 242 case 0: 243 iter = BreakIterator.getWordInstance(locale); 244 break; 245 case TITLECASE_WHOLE_STRING: 246 iter = new WholeStringBreakIterator(); 247 break; 248 case TITLECASE_SENTENCES: 249 iter = BreakIterator.getSentenceInstance(locale); 250 break; 251 default: 252 throw new IllegalArgumentException("unknown titlecasing iterator option"); 253 } 254 } 255 return iter; 256 } 257 getTitleBreakIterator( ULocale locale, int options, BreakIterator iter)258 public static BreakIterator getTitleBreakIterator( 259 ULocale locale, int options, BreakIterator iter) { 260 options &= TITLECASE_ITERATOR_MASK; 261 if (options != 0 && iter != null) { 262 throw new IllegalArgumentException( 263 "titlecasing iterator option together with an explicit iterator"); 264 } 265 if (iter == null) { 266 switch (options) { 267 case 0: 268 iter = BreakIterator.getWordInstance(locale); 269 break; 270 case TITLECASE_WHOLE_STRING: 271 iter = new WholeStringBreakIterator(); 272 break; 273 case TITLECASE_SENTENCES: 274 iter = BreakIterator.getSentenceInstance(locale); 275 break; 276 default: 277 throw new IllegalArgumentException("unknown titlecasing iterator option"); 278 } 279 } 280 return iter; 281 } 282 283 /** 284 * Omit unchanged text when case-mapping with Edits. 285 */ 286 public static final int OMIT_UNCHANGED_TEXT = 0x4000; 287 288 private static final class WholeStringBreakIterator extends BreakIterator { 289 private int length; 290 notImplemented()291 private static void notImplemented() { 292 throw new UnsupportedOperationException("should not occur"); 293 } 294 295 @Override first()296 public int first() { 297 return 0; 298 } 299 300 @Override last()301 public int last() { 302 notImplemented(); 303 return 0; 304 } 305 306 @Override next(int n)307 public int next(int n) { 308 notImplemented(); 309 return 0; 310 } 311 312 @Override next()313 public int next() { 314 return length; 315 } 316 317 @Override previous()318 public int previous() { 319 notImplemented(); 320 return 0; 321 } 322 323 @Override following(int offset)324 public int following(int offset) { 325 notImplemented(); 326 return 0; 327 } 328 329 @Override current()330 public int current() { 331 notImplemented(); 332 return 0; 333 } 334 335 @Override getText()336 public CharacterIterator getText() { 337 notImplemented(); 338 return null; 339 } 340 341 @Override setText(CharacterIterator newText)342 public void setText(CharacterIterator newText) { 343 length = newText.getEndIndex(); 344 } 345 346 @Override setText(CharSequence newText)347 public void setText(CharSequence newText) { 348 length = newText.length(); 349 } 350 351 @Override setText(String newText)352 public void setText(String newText) { 353 length = newText.length(); 354 } 355 } 356 appendCodePoint(Appendable a, int c)357 private static int appendCodePoint(Appendable a, int c) throws IOException { 358 if (c <= Character.MAX_VALUE) { 359 a.append((char)c); 360 return 1; 361 } else { 362 a.append((char)(0xd7c0 + (c >> 10))); 363 a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff))); 364 return 2; 365 } 366 } 367 368 /** 369 * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. 370 * @throws IOException 371 */ appendResult(int result, Appendable dest, int cpLength, int options, Edits edits)372 private static void appendResult(int result, Appendable dest, 373 int cpLength, int options, Edits edits) throws IOException { 374 // Decode the result. 375 if (result < 0) { 376 // (not) original code point 377 if (edits != null) { 378 edits.addUnchanged(cpLength); 379 } 380 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 381 return; 382 } 383 appendCodePoint(dest, ~result); 384 } else if (result <= UCaseProps.MAX_STRING_LENGTH) { 385 // The mapping has already been appended to result. 386 if (edits != null) { 387 edits.addReplace(cpLength, result); 388 } 389 } else { 390 // Append the single-code point mapping. 391 int length = appendCodePoint(dest, result); 392 if (edits != null) { 393 edits.addReplace(cpLength, length); 394 } 395 } 396 } 397 appendUnchanged(CharSequence src, int start, int length, Appendable dest, int options, Edits edits)398 private static final void appendUnchanged(CharSequence src, int start, int length, 399 Appendable dest, int options, Edits edits) throws IOException { 400 if (length > 0) { 401 if (edits != null) { 402 edits.addUnchanged(length); 403 } 404 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 405 return; 406 } 407 dest.append(src, start, start + length); 408 } 409 } 410 applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits)411 private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) { 412 if (!edits.hasChanges()) { 413 return src.toString(); 414 } 415 StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta()); 416 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { 417 if (ei.hasChange()) { 418 int i = ei.replacementIndex(); 419 result.append(replacementChars, i, i + ei.newLength()); 420 } else { 421 int i = ei.sourceIndex(); 422 result.append(src, i, i + ei.oldLength()); 423 } 424 } 425 return result.toString(); 426 } 427 428 private static final Trie2_16 CASE_TRIE = UCaseProps.getTrie(); 429 430 /** 431 * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. 432 * caseLocale < 0: Case-folds [srcStart..srcLimit[. 433 */ internalToLower(int caseLocale, int options, CharSequence src, int srcStart, int srcLimit, StringContextIterator iter, Appendable dest, Edits edits)434 private static void internalToLower(int caseLocale, int options, 435 CharSequence src, int srcStart, int srcLimit, StringContextIterator iter, 436 Appendable dest, Edits edits) throws IOException { 437 byte[] latinToLower; 438 if (caseLocale == UCaseProps.LOC_ROOT || 439 (caseLocale >= 0 ? 440 !(caseLocale == UCaseProps.LOC_TURKISH || caseLocale == UCaseProps.LOC_LITHUANIAN) : 441 (options & UCaseProps.FOLD_CASE_OPTIONS_MASK) == UCharacter.FOLD_CASE_DEFAULT)) { 442 latinToLower = UCaseProps.LatinCase.TO_LOWER_NORMAL; 443 } else { 444 latinToLower = UCaseProps.LatinCase.TO_LOWER_TR_LT; 445 } 446 int prev = srcStart; 447 int srcIndex = srcStart; 448 outerLoop: 449 for (;;) { 450 // fast path for simple cases 451 char lead; 452 for (;;) { 453 if (srcIndex >= srcLimit) { 454 break outerLoop; 455 } 456 lead = src.charAt(srcIndex); 457 int delta; 458 if (lead < UCaseProps.LatinCase.LONG_S) { 459 byte d = latinToLower[lead]; 460 if (d == UCaseProps.LatinCase.EXC) { break; } 461 ++srcIndex; 462 if (d == 0) { continue; } 463 delta = d; 464 } else if (lead >= 0xd800) { 465 break; // surrogate or higher 466 } else { 467 int props = CASE_TRIE.getFromU16SingleLead(lead); 468 if (UCaseProps.propsHasException(props)) { break; } 469 ++srcIndex; 470 if (!UCaseProps.isUpperOrTitleFromProps(props) || 471 (delta = UCaseProps.getDelta(props)) == 0) { 472 continue; 473 } 474 } 475 lead += delta; 476 appendUnchanged(src, prev, srcIndex - 1 - prev, dest, options, edits); 477 dest.append(lead); 478 if (edits != null) { 479 edits.addReplace(1, 1); 480 } 481 prev = srcIndex; 482 } 483 // slow path 484 int cpStart = srcIndex++; 485 char trail; 486 int c; 487 if (Character.isHighSurrogate(lead) && srcIndex < srcLimit && 488 Character.isLowSurrogate(trail = src.charAt(srcIndex))) { 489 c = Character.toCodePoint(lead, trail); 490 ++srcIndex; 491 } else { 492 c = lead; 493 } 494 // We need to append unchanged text before calling the UCaseProps.toFullXyz() methods 495 // because they will sometimes append their mapping to dest, 496 // and that must be after copying the previous text. 497 appendUnchanged(src, prev, cpStart - prev, dest, options, edits); 498 prev = cpStart; 499 if (caseLocale >= 0) { 500 if (iter == null) { 501 iter = new StringContextIterator(src, cpStart, srcIndex); 502 } else { 503 iter.setCPStartAndLimit(cpStart, srcIndex); 504 } 505 c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); 506 } else { 507 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options); 508 } 509 if (c >= 0) { 510 appendResult(c, dest, srcIndex - cpStart, options, edits); 511 prev = srcIndex; 512 } 513 } 514 appendUnchanged(src, prev, srcIndex - prev, dest, options, edits); 515 } 516 internalToUpper(int caseLocale, int options, CharSequence src, Appendable dest, Edits edits)517 private static void internalToUpper(int caseLocale, int options, 518 CharSequence src, Appendable dest, Edits edits) throws IOException { 519 StringContextIterator iter = null; 520 byte[] latinToUpper; 521 if (caseLocale == UCaseProps.LOC_TURKISH) { 522 latinToUpper = UCaseProps.LatinCase.TO_UPPER_TR; 523 } else { 524 latinToUpper = UCaseProps.LatinCase.TO_UPPER_NORMAL; 525 } 526 int prev = 0; 527 int srcIndex = 0; 528 int srcLength = src.length(); 529 outerLoop: 530 for (;;) { 531 // fast path for simple cases 532 char lead; 533 for (;;) { 534 if (srcIndex >= srcLength) { 535 break outerLoop; 536 } 537 lead = src.charAt(srcIndex); 538 int delta; 539 if (lead < UCaseProps.LatinCase.LONG_S) { 540 byte d = latinToUpper[lead]; 541 if (d == UCaseProps.LatinCase.EXC) { break; } 542 ++srcIndex; 543 if (d == 0) { continue; } 544 delta = d; 545 } else if (lead >= 0xd800) { 546 break; // surrogate or higher 547 } else { 548 int props = CASE_TRIE.getFromU16SingleLead(lead); 549 if (UCaseProps.propsHasException(props)) { break; } 550 ++srcIndex; 551 if (UCaseProps.getTypeFromProps(props) != UCaseProps.LOWER || 552 (delta = UCaseProps.getDelta(props)) == 0) { 553 continue; 554 } 555 } 556 lead += delta; 557 appendUnchanged(src, prev, srcIndex - 1 - prev, dest, options, edits); 558 dest.append(lead); 559 if (edits != null) { 560 edits.addReplace(1, 1); 561 } 562 prev = srcIndex; 563 } 564 // slow path 565 int cpStart = srcIndex++; 566 char trail; 567 int c; 568 if (Character.isHighSurrogate(lead) && srcIndex < srcLength && 569 Character.isLowSurrogate(trail = src.charAt(srcIndex))) { 570 c = Character.toCodePoint(lead, trail); 571 ++srcIndex; 572 } else { 573 c = lead; 574 } 575 if (iter == null) { 576 iter = new StringContextIterator(src, cpStart, srcIndex); 577 } else { 578 iter.setCPStartAndLimit(cpStart, srcIndex); 579 } 580 // We need to append unchanged text before calling UCaseProps.toFullUpper() 581 // because it will sometimes append its mapping to dest, 582 // and that must be after copying the previous text. 583 appendUnchanged(src, prev, cpStart - prev, dest, options, edits); 584 prev = cpStart; 585 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale); 586 if (c >= 0) { 587 appendResult(c, dest, srcIndex - cpStart, options, edits); 588 prev = srcIndex; 589 } 590 } 591 appendUnchanged(src, prev, srcIndex - prev, dest, options, edits); 592 } 593 toLower(int caseLocale, int options, CharSequence src)594 public static String toLower(int caseLocale, int options, CharSequence src) { 595 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 596 if (src.length() == 0) { 597 return src.toString(); 598 } 599 // Collect and apply only changes. 600 // Good if no or few changes. Bad (slow) if many changes. 601 Edits edits = new Edits(); 602 StringBuilder replacementChars = toLower( 603 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 604 return applyEdits(src, replacementChars, edits); 605 } else { 606 return toLower(caseLocale, options, src, 607 new StringBuilder(src.length()), null).toString(); 608 } 609 } 610 toLower(int caseLocale, int options, CharSequence src, A dest, Edits edits)611 public static <A extends Appendable> A toLower(int caseLocale, int options, 612 CharSequence src, A dest, Edits edits) { 613 try { 614 if (edits != null) { 615 edits.reset(); 616 } 617 internalToLower(caseLocale, options, src, 0, src.length(), null, dest, edits); 618 return dest; 619 } catch (IOException e) { 620 throw new ICUUncheckedIOException(e); 621 } 622 } 623 toUpper(int caseLocale, int options, CharSequence src)624 public static String toUpper(int caseLocale, int options, CharSequence src) { 625 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 626 if (src.length() == 0) { 627 return src.toString(); 628 } 629 // Collect and apply only changes. 630 // Good if no or few changes. Bad (slow) if many changes. 631 Edits edits = new Edits(); 632 StringBuilder replacementChars = toUpper( 633 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 634 return applyEdits(src, replacementChars, edits); 635 } else { 636 return toUpper(caseLocale, options, src, 637 new StringBuilder(src.length()), null).toString(); 638 } 639 } 640 toUpper(int caseLocale, int options, CharSequence src, A dest, Edits edits)641 public static <A extends Appendable> A toUpper(int caseLocale, int options, 642 CharSequence src, A dest, Edits edits) { 643 try { 644 if (edits != null) { 645 edits.reset(); 646 } 647 if (caseLocale == UCaseProps.LOC_GREEK) { 648 return GreekUpper.toUpper(options, src, dest, edits); 649 } 650 internalToUpper(caseLocale, options, src, dest, edits); 651 return dest; 652 } catch (IOException e) { 653 throw new ICUUncheckedIOException(e); 654 } 655 } 656 toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src)657 public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) { 658 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 659 if (src.length() == 0) { 660 return src.toString(); 661 } 662 // Collect and apply only changes. 663 // Good if no or few changes. Bad (slow) if many changes. 664 Edits edits = new Edits(); 665 StringBuilder replacementChars = toTitle( 666 caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src, 667 new StringBuilder(), edits); 668 return applyEdits(src, replacementChars, edits); 669 } else { 670 return toTitle(caseLocale, options, iter, src, 671 new StringBuilder(src.length()), null).toString(); 672 } 673 } 674 toTitle( int caseLocale, int options, BreakIterator titleIter, CharSequence src, A dest, Edits edits)675 public static <A extends Appendable> A toTitle( 676 int caseLocale, int options, BreakIterator titleIter, 677 CharSequence src, A dest, Edits edits) { 678 try { 679 if (edits != null) { 680 edits.reset(); 681 } 682 683 /* set up local variables */ 684 StringContextIterator iter = new StringContextIterator(src); 685 int srcLength = src.length(); 686 int prev=0; 687 boolean isFirstIndex=true; 688 689 /* titlecasing loop */ 690 while(prev<srcLength) { 691 /* find next index where to titlecase */ 692 int index; 693 if(isFirstIndex) { 694 isFirstIndex=false; 695 index=titleIter.first(); 696 } else { 697 index=titleIter.next(); 698 } 699 if(index==BreakIterator.DONE || index>srcLength) { 700 index=srcLength; 701 } 702 703 /* 704 * Segment [prev..index[ into 3 parts: 705 * a) skipped characters (copy as-is) [prev..titleStart[ 706 * b) first letter (titlecase) [titleStart..titleLimit[ 707 * c) subsequent characters (lowercase) [titleLimit..index[ 708 */ 709 if(prev<index) { 710 // Find and copy skipped characters [prev..titleStart[ 711 int titleStart=prev; 712 iter.setLimit(index); 713 int c=iter.nextCaseMapCP(); 714 if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) { 715 // Adjust the titlecasing index to the next cased character, 716 // or to the next letter/number/symbol/private use. 717 // Stop with titleStart<titleLimit<=index 718 // if there is a character to be titlecased, 719 // or else stop with titleStart==titleLimit==index. 720 boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0; 721 while ((toCased ? 722 UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) : 723 !CaseMapImpl.isLNS(c)) && 724 (c=iter.nextCaseMapCP())>=0) {} 725 // If c<0 then we have only uncased characters in [prev..index[ 726 // and stopped with titleStart==titleLimit==index. 727 titleStart=iter.getCPStart(); 728 if (prev < titleStart) { 729 appendUnchanged(src, prev, titleStart-prev, dest, options, edits); 730 } 731 } 732 733 if(titleStart<index) { 734 int titleLimit=iter.getCPLimit(); 735 // titlecase c which is from [titleStart..titleLimit[ 736 c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale); 737 appendResult(c, dest, iter.getCPLength(), options, edits); 738 739 // Special case Dutch IJ titlecasing 740 if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) { 741 char c1 = src.charAt(titleStart); 742 if ((c1 == 'i' || c1 == 'I')) { 743 char c2 = src.charAt(titleStart+1); 744 if (c2 == 'j') { 745 dest.append('J'); 746 if (edits != null) { 747 edits.addReplace(1, 1); 748 } 749 c = iter.nextCaseMapCP(); 750 titleLimit++; 751 assert c == c2; 752 assert titleLimit == iter.getCPLimit(); 753 } else if (c2 == 'J') { 754 // Keep the capital J from getting lowercased. 755 appendUnchanged(src, titleStart + 1, 1, dest, options, edits); 756 c = iter.nextCaseMapCP(); 757 titleLimit++; 758 assert c == c2; 759 assert titleLimit == iter.getCPLimit(); 760 } 761 } 762 } 763 764 // lowercase [titleLimit..index[ 765 if(titleLimit<index) { 766 if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) { 767 // Normal operation: Lowercase the rest of the word. 768 internalToLower(caseLocale, options, 769 src, titleLimit, index, iter, dest, edits); 770 } else { 771 // Optionally just copy the rest of the word unchanged. 772 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits); 773 } 774 iter.moveToLimit(); 775 } 776 } 777 } 778 779 prev=index; 780 } 781 return dest; 782 } catch (IOException e) { 783 throw new ICUUncheckedIOException(e); 784 } 785 } 786 fold(int options, CharSequence src)787 public static String fold(int options, CharSequence src) { 788 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 789 if (src.length() == 0) { 790 return src.toString(); 791 } 792 // Collect and apply only changes. 793 // Good if no or few changes. Bad (slow) if many changes. 794 Edits edits = new Edits(); 795 StringBuilder replacementChars = fold( 796 options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 797 return applyEdits(src, replacementChars, edits); 798 } else { 799 return fold(options, src, new StringBuilder(src.length()), null).toString(); 800 } 801 } 802 fold(int options, CharSequence src, A dest, Edits edits)803 public static <A extends Appendable> A fold(int options, 804 CharSequence src, A dest, Edits edits) { 805 try { 806 if (edits != null) { 807 edits.reset(); 808 } 809 internalToLower(-1, options, src, 0, src.length(), null, dest, edits); 810 return dest; 811 } catch (IOException e) { 812 throw new ICUUncheckedIOException(e); 813 } 814 } 815 816 private static final class GreekUpper { 817 // Data bits. 818 private static final int UPPER_MASK = 0x3ff; 819 private static final int HAS_VOWEL = 0x1000; 820 private static final int HAS_YPOGEGRAMMENI = 0x2000; 821 private static final int HAS_ACCENT = 0x4000; 822 private static final int HAS_DIALYTIKA = 0x8000; 823 // Further bits during data building and processing, not stored in the data map. 824 private static final int HAS_COMBINING_DIALYTIKA = 0x10000; 825 private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000; 826 827 private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; 828 private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = 829 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; 830 private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; 831 832 // State bits. 833 private static final int AFTER_CASED = 1; 834 private static final int AFTER_VOWEL_WITH_ACCENT = 2; 835 836 // Data generated by prototype code, see 837 // http://site.icu-project.org/design/case/greek-upper 838 // TODO: Move this data into ucase.icu. 839 private static final char[] data0370 = { 840 // U+0370..03FF 841 0x0370, // Ͱ 842 0x0370, // ͱ 843 0x0372, // Ͳ 844 0x0372, // ͳ 845 0, 846 0, 847 0x0376, // Ͷ 848 0x0376, // ͷ 849 0, 850 0, 851 0x037A, // ͺ 852 0x03FD, // ͻ 853 0x03FE, // ͼ 854 0x03FF, // ͽ 855 0, 856 0x037F, // Ϳ 857 0, 858 0, 859 0, 860 0, 861 0, 862 0, 863 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά 864 0, 865 0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ 866 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή 867 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί 868 0, 869 0x039F | HAS_VOWEL | HAS_ACCENT, // Ό 870 0, 871 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ 872 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ 873 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ 874 0x0391 | HAS_VOWEL, // Α 875 0x0392, // Β 876 0x0393, // Γ 877 0x0394, // Δ 878 0x0395 | HAS_VOWEL, // Ε 879 0x0396, // Ζ 880 0x0397 | HAS_VOWEL, // Η 881 0x0398, // Θ 882 0x0399 | HAS_VOWEL, // Ι 883 0x039A, // Κ 884 0x039B, // Λ 885 0x039C, // Μ 886 0x039D, // Ν 887 0x039E, // Ξ 888 0x039F | HAS_VOWEL, // Ο 889 0x03A0, // Π 890 0x03A1, // Ρ 891 0, 892 0x03A3, // Σ 893 0x03A4, // Τ 894 0x03A5 | HAS_VOWEL, // Υ 895 0x03A6, // Φ 896 0x03A7, // Χ 897 0x03A8, // Ψ 898 0x03A9 | HAS_VOWEL, // Ω 899 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // Ϊ 900 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // Ϋ 901 0x0391 | HAS_VOWEL | HAS_ACCENT, // ά 902 0x0395 | HAS_VOWEL | HAS_ACCENT, // έ 903 0x0397 | HAS_VOWEL | HAS_ACCENT, // ή 904 0x0399 | HAS_VOWEL | HAS_ACCENT, // ί 905 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ 906 0x0391 | HAS_VOWEL, // α 907 0x0392, // β 908 0x0393, // γ 909 0x0394, // δ 910 0x0395 | HAS_VOWEL, // ε 911 0x0396, // ζ 912 0x0397 | HAS_VOWEL, // η 913 0x0398, // θ 914 0x0399 | HAS_VOWEL, // ι 915 0x039A, // κ 916 0x039B, // λ 917 0x039C, // μ 918 0x039D, // ν 919 0x039E, // ξ 920 0x039F | HAS_VOWEL, // ο 921 0x03A0, // π 922 0x03A1, // ρ 923 0x03A3, // ς 924 0x03A3, // σ 925 0x03A4, // τ 926 0x03A5 | HAS_VOWEL, // υ 927 0x03A6, // φ 928 0x03A7, // χ 929 0x03A8, // ψ 930 0x03A9 | HAS_VOWEL, // ω 931 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // ϊ 932 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // ϋ 933 0x039F | HAS_VOWEL | HAS_ACCENT, // ό 934 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ 935 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ 936 0x03CF, // Ϗ 937 0x0392, // ϐ 938 0x0398, // ϑ 939 0x03D2, // ϒ 940 0x03D2 | HAS_ACCENT, // ϓ 941 0x03D2 | HAS_DIALYTIKA, // ϔ 942 0x03A6, // ϕ 943 0x03A0, // ϖ 944 0x03CF, // ϗ 945 0x03D8, // Ϙ 946 0x03D8, // ϙ 947 0x03DA, // Ϛ 948 0x03DA, // ϛ 949 0x03DC, // Ϝ 950 0x03DC, // ϝ 951 0x03DE, // Ϟ 952 0x03DE, // ϟ 953 0x03E0, // Ϡ 954 0x03E0, // ϡ 955 0, 956 0, 957 0, 958 0, 959 0, 960 0, 961 0, 962 0, 963 0, 964 0, 965 0, 966 0, 967 0, 968 0, 969 0x039A, // ϰ 970 0x03A1, // ϱ 971 0x03F9, // ϲ 972 0x037F, // ϳ 973 0x03F4, // ϴ 974 0x0395 | HAS_VOWEL, // ϵ 975 0, 976 0x03F7, // Ϸ 977 0x03F7, // ϸ 978 0x03F9, // Ϲ 979 0x03FA, // Ϻ 980 0x03FA, // ϻ 981 0x03FC, // ϼ 982 0x03FD, // Ͻ 983 0x03FE, // Ͼ 984 0x03FF, // Ͽ 985 }; 986 987 private static final char[] data1F00 = { 988 // U+1F00..1FFF 989 0x0391 | HAS_VOWEL, // ἀ 990 0x0391 | HAS_VOWEL, // ἁ 991 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἂ 992 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἃ 993 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἄ 994 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἅ 995 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἆ 996 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἇ 997 0x0391 | HAS_VOWEL, // Ἀ 998 0x0391 | HAS_VOWEL, // Ἁ 999 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἂ 1000 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἃ 1001 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἄ 1002 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἅ 1003 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἆ 1004 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἇ 1005 0x0395 | HAS_VOWEL, // ἐ 1006 0x0395 | HAS_VOWEL, // ἑ 1007 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἒ 1008 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἓ 1009 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἔ 1010 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἕ 1011 0, 1012 0, 1013 0x0395 | HAS_VOWEL, // Ἐ 1014 0x0395 | HAS_VOWEL, // Ἑ 1015 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἒ 1016 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἓ 1017 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἔ 1018 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἕ 1019 0, 1020 0, 1021 0x0397 | HAS_VOWEL, // ἠ 1022 0x0397 | HAS_VOWEL, // ἡ 1023 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἢ 1024 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἣ 1025 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἤ 1026 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἥ 1027 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἦ 1028 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἧ 1029 0x0397 | HAS_VOWEL, // Ἠ 1030 0x0397 | HAS_VOWEL, // Ἡ 1031 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἢ 1032 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἣ 1033 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἤ 1034 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἥ 1035 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἦ 1036 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἧ 1037 0x0399 | HAS_VOWEL, // ἰ 1038 0x0399 | HAS_VOWEL, // ἱ 1039 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἲ 1040 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἳ 1041 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἴ 1042 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἵ 1043 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἶ 1044 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἷ 1045 0x0399 | HAS_VOWEL, // Ἰ 1046 0x0399 | HAS_VOWEL, // Ἱ 1047 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἲ 1048 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἳ 1049 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἴ 1050 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἵ 1051 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἶ 1052 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἷ 1053 0x039F | HAS_VOWEL, // ὀ 1054 0x039F | HAS_VOWEL, // ὁ 1055 0x039F | HAS_VOWEL | HAS_ACCENT, // ὂ 1056 0x039F | HAS_VOWEL | HAS_ACCENT, // ὃ 1057 0x039F | HAS_VOWEL | HAS_ACCENT, // ὄ 1058 0x039F | HAS_VOWEL | HAS_ACCENT, // ὅ 1059 0, 1060 0, 1061 0x039F | HAS_VOWEL, // Ὀ 1062 0x039F | HAS_VOWEL, // Ὁ 1063 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὂ 1064 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὃ 1065 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὄ 1066 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὅ 1067 0, 1068 0, 1069 0x03A5 | HAS_VOWEL, // ὐ 1070 0x03A5 | HAS_VOWEL, // ὑ 1071 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὒ 1072 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὓ 1073 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὔ 1074 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὕ 1075 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὖ 1076 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὗ 1077 0, 1078 0x03A5 | HAS_VOWEL, // Ὑ 1079 0, 1080 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὓ 1081 0, 1082 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὕ 1083 0, 1084 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὗ 1085 0x03A9 | HAS_VOWEL, // ὠ 1086 0x03A9 | HAS_VOWEL, // ὡ 1087 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὢ 1088 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὣ 1089 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὤ 1090 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὥ 1091 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὦ 1092 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὧ 1093 0x03A9 | HAS_VOWEL, // Ὠ 1094 0x03A9 | HAS_VOWEL, // Ὡ 1095 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὢ 1096 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὣ 1097 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὤ 1098 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὥ 1099 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὦ 1100 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὧ 1101 0x0391 | HAS_VOWEL | HAS_ACCENT, // ὰ 1102 0x0391 | HAS_VOWEL | HAS_ACCENT, // ά 1103 0x0395 | HAS_VOWEL | HAS_ACCENT, // ὲ 1104 0x0395 | HAS_VOWEL | HAS_ACCENT, // έ 1105 0x0397 | HAS_VOWEL | HAS_ACCENT, // ὴ 1106 0x0397 | HAS_VOWEL | HAS_ACCENT, // ή 1107 0x0399 | HAS_VOWEL | HAS_ACCENT, // ὶ 1108 0x0399 | HAS_VOWEL | HAS_ACCENT, // ί 1109 0x039F | HAS_VOWEL | HAS_ACCENT, // ὸ 1110 0x039F | HAS_VOWEL | HAS_ACCENT, // ό 1111 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὺ 1112 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ 1113 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὼ 1114 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ 1115 0, 1116 0, 1117 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾀ 1118 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾁ 1119 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾂ 1120 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾃ 1121 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾄ 1122 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾅ 1123 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾆ 1124 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾇ 1125 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾈ 1126 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾉ 1127 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾊ 1128 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾋ 1129 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾌ 1130 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾍ 1131 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾎ 1132 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾏ 1133 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾐ 1134 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾑ 1135 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾒ 1136 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾓ 1137 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾔ 1138 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾕ 1139 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾖ 1140 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾗ 1141 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾘ 1142 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾙ 1143 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾚ 1144 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾛ 1145 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾜ 1146 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾝ 1147 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾞ 1148 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾟ 1149 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾠ 1150 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾡ 1151 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾢ 1152 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾣ 1153 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾤ 1154 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾥ 1155 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾦ 1156 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾧ 1157 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾨ 1158 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾩ 1159 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾪ 1160 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾫ 1161 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾬ 1162 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾭ 1163 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾮ 1164 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾯ 1165 0x0391 | HAS_VOWEL, // ᾰ 1166 0x0391 | HAS_VOWEL, // ᾱ 1167 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾲ 1168 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾳ 1169 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾴ 1170 0, 1171 0x0391 | HAS_VOWEL | HAS_ACCENT, // ᾶ 1172 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾷ 1173 0x0391 | HAS_VOWEL, // Ᾰ 1174 0x0391 | HAS_VOWEL, // Ᾱ 1175 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ὰ 1176 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά 1177 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾼ 1178 0, 1179 0x0399 | HAS_VOWEL, // ι 1180 0, 1181 0, 1182 0, 1183 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῂ 1184 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῃ 1185 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῄ 1186 0, 1187 0x0397 | HAS_VOWEL | HAS_ACCENT, // ῆ 1188 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῇ 1189 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ὲ 1190 0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ 1191 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ὴ 1192 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή 1193 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῌ 1194 0, 1195 0, 1196 0, 1197 0x0399 | HAS_VOWEL, // ῐ 1198 0x0399 | HAS_VOWEL, // ῑ 1199 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῒ 1200 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ 1201 0, 1202 0, 1203 0x0399 | HAS_VOWEL | HAS_ACCENT, // ῖ 1204 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῗ 1205 0x0399 | HAS_VOWEL, // Ῐ 1206 0x0399 | HAS_VOWEL, // Ῑ 1207 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ὶ 1208 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί 1209 0, 1210 0, 1211 0, 1212 0, 1213 0x03A5 | HAS_VOWEL, // ῠ 1214 0x03A5 | HAS_VOWEL, // ῡ 1215 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῢ 1216 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ 1217 0x03A1, // ῤ 1218 0x03A1, // ῥ 1219 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ῦ 1220 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῧ 1221 0x03A5 | HAS_VOWEL, // Ῠ 1222 0x03A5 | HAS_VOWEL, // Ῡ 1223 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὺ 1224 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ 1225 0x03A1, // Ῥ 1226 0, 1227 0, 1228 0, 1229 0, 1230 0, 1231 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῲ 1232 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῳ 1233 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῴ 1234 0, 1235 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ῶ 1236 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῷ 1237 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὸ 1238 0x039F | HAS_VOWEL | HAS_ACCENT, // Ό 1239 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὼ 1240 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ 1241 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῼ 1242 0, 1243 0, 1244 0, 1245 }; 1246 1247 // U+2126 Ohm sign 1248 private static final char data2126 = 0x03A9 | HAS_VOWEL; // Ω 1249 getLetterData(int c)1250 private static final int getLetterData(int c) { 1251 if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { 1252 return 0; 1253 } else if (c <= 0x3ff) { 1254 return data0370[c - 0x370]; 1255 } else if (c <= 0x1fff) { 1256 return data1F00[c - 0x1f00]; 1257 } else if (c == 0x2126) { 1258 return data2126; 1259 } else { 1260 return 0; 1261 } 1262 } 1263 1264 /** 1265 * Returns a non-zero value for each of the Greek combining diacritics 1266 * listed in The Unicode Standard, version 8, chapter 7.2 Greek, 1267 * plus some perispomeni look-alikes. 1268 */ getDiacriticData(int c)1269 private static final int getDiacriticData(int c) { 1270 switch (c) { 1271 case '\u0300': // varia 1272 case '\u0301': // tonos = oxia 1273 case '\u0342': // perispomeni 1274 case '\u0302': // circumflex can look like perispomeni 1275 case '\u0303': // tilde can look like perispomeni 1276 case '\u0311': // inverted breve can look like perispomeni 1277 return HAS_ACCENT; 1278 case '\u0308': // dialytika = diaeresis 1279 return HAS_COMBINING_DIALYTIKA; 1280 case '\u0344': // dialytika tonos 1281 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT; 1282 case '\u0345': // ypogegrammeni = iota subscript 1283 return HAS_YPOGEGRAMMENI; 1284 case '\u0304': // macron 1285 case '\u0306': // breve 1286 case '\u0313': // comma above 1287 case '\u0314': // reversed comma above 1288 case '\u0343': // koronis 1289 return HAS_OTHER_GREEK_DIACRITIC; 1290 default: 1291 return 0; 1292 } 1293 } 1294 isFollowedByCasedLetter(CharSequence s, int i)1295 private static boolean isFollowedByCasedLetter(CharSequence s, int i) { 1296 while (i < s.length()) { 1297 int c = Character.codePointAt(s, i); 1298 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1299 if ((type & UCaseProps.IGNORABLE) != 0) { 1300 // Case-ignorable, continue with the loop. 1301 i += Character.charCount(c); 1302 } else if (type != UCaseProps.NONE) { 1303 return true; // Followed by cased letter. 1304 } else { 1305 return false; // Uncased and not case-ignorable. 1306 } 1307 } 1308 return false; // Not followed by cased letter. 1309 } 1310 1311 /** 1312 * Greek string uppercasing with a state machine. 1313 * Probably simpler than a stateless function that has to figure out complex context-before 1314 * for each character. 1315 * TODO: Try to re-consolidate one way or another with the non-Greek function. 1316 * 1317 * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). 1318 * @throws IOException 1319 */ toUpper(int options, CharSequence src, A dest, Edits edits)1320 private static <A extends Appendable> A toUpper(int options, 1321 CharSequence src, A dest, Edits edits) throws IOException { 1322 int state = 0; 1323 for (int i = 0; i < src.length();) { 1324 int c = Character.codePointAt(src, i); 1325 int nextIndex = i + Character.charCount(c); 1326 int nextState = 0; 1327 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1328 if ((type & UCaseProps.IGNORABLE) != 0) { 1329 // c is case-ignorable 1330 nextState |= (state & AFTER_CASED); 1331 } else if (type != UCaseProps.NONE) { 1332 // c is cased 1333 nextState |= AFTER_CASED; 1334 } 1335 int data = getLetterData(c); 1336 if (data > 0) { 1337 int upper = data & UPPER_MASK; 1338 // Add a dialytika to this iota or ypsilon vowel 1339 // if we removed a tonos from the previous vowel, 1340 // and that previous vowel did not also have (or gain) a dialytika. 1341 // Adding one only to the final vowel in a longer sequence 1342 // (which does not occur in normal writing) would require lookahead. 1343 // Set the same flag as for preserving an existing dialytika. 1344 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && 1345 (upper == 'Ι' || upper == 'Υ')) { 1346 data |= HAS_DIALYTIKA; 1347 } 1348 int numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. 1349 if ((data & HAS_YPOGEGRAMMENI) != 0) { 1350 numYpogegrammeni = 1; 1351 } 1352 // Skip combining diacritics after this Greek letter. 1353 while (nextIndex < src.length()) { 1354 int diacriticData = getDiacriticData(src.charAt(nextIndex)); 1355 if (diacriticData != 0) { 1356 data |= diacriticData; 1357 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { 1358 ++numYpogegrammeni; 1359 } 1360 ++nextIndex; 1361 } else { 1362 break; // not a Greek diacritic 1363 } 1364 } 1365 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { 1366 nextState |= AFTER_VOWEL_WITH_ACCENT; 1367 } 1368 // Map according to Greek rules. 1369 boolean addTonos = false; 1370 if (upper == 'Η' && 1371 (data & HAS_ACCENT) != 0 && 1372 numYpogegrammeni == 0 && 1373 (state & AFTER_CASED) == 0 && 1374 !isFollowedByCasedLetter(src, nextIndex)) { 1375 // Keep disjunctive "or" with (only) a tonos. 1376 // We use the same "word boundary" conditions as for the Final_Sigma test. 1377 if (i == nextIndex) { 1378 upper = 'Ή'; // Preserve the precomposed form. 1379 } else { 1380 addTonos = true; 1381 } 1382 } else if ((data & HAS_DIALYTIKA) != 0) { 1383 // Preserve a vowel with dialytika in precomposed form if it exists. 1384 if (upper == 'Ι') { 1385 upper = 'Ϊ'; 1386 data &= ~HAS_EITHER_DIALYTIKA; 1387 } else if (upper == 'Υ') { 1388 upper = 'Ϋ'; 1389 data &= ~HAS_EITHER_DIALYTIKA; 1390 } 1391 } 1392 1393 boolean change; 1394 if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) { 1395 change = true; // common, simple usage 1396 } else { 1397 // Find out first whether we are changing the text. 1398 change = src.charAt(i) != upper || numYpogegrammeni > 0; 1399 int i2 = i + 1; 1400 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1401 change |= i2 >= nextIndex || src.charAt(i2) != 0x308; 1402 ++i2; 1403 } 1404 if (addTonos) { 1405 change |= i2 >= nextIndex || src.charAt(i2) != 0x301; 1406 ++i2; 1407 } 1408 int oldLength = nextIndex - i; 1409 int newLength = (i2 - i) + numYpogegrammeni; 1410 change |= oldLength != newLength; 1411 if (change) { 1412 if (edits != null) { 1413 edits.addReplace(oldLength, newLength); 1414 } 1415 } else { 1416 if (edits != null) { 1417 edits.addUnchanged(oldLength); 1418 } 1419 // Write unchanged text? 1420 change = (options & OMIT_UNCHANGED_TEXT) == 0; 1421 } 1422 } 1423 1424 if (change) { 1425 dest.append((char)upper); 1426 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1427 dest.append('\u0308'); // restore or add a dialytika 1428 } 1429 if (addTonos) { 1430 dest.append('\u0301'); 1431 } 1432 while (numYpogegrammeni > 0) { 1433 dest.append('Ι'); 1434 --numYpogegrammeni; 1435 } 1436 } 1437 } else { 1438 c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK); 1439 appendResult(c, dest, nextIndex - i, options, edits); 1440 } 1441 i = nextIndex; 1442 state = nextState; 1443 } 1444 return dest; 1445 } 1446 } 1447 } 1448