• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 1996-2014, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10 
11 
12 package ohos.global.icu.dev.test.lang;
13 
14 
15 import java.io.BufferedReader;
16 import java.util.ArrayList;
17 import java.util.Collections;
18 import java.util.List;
19 import java.util.Locale;
20 
21 import org.junit.Test;
22 import org.junit.runner.RunWith;
23 import org.junit.runners.JUnit4;
24 
25 import ohos.global.icu.dev.test.TestFmwk;
26 import ohos.global.icu.dev.test.TestUtil;
27 import ohos.global.icu.impl.Utility;
28 import ohos.global.icu.lang.UCharacter;
29 import ohos.global.icu.lang.UProperty;
30 import ohos.global.icu.text.BreakIterator;
31 import ohos.global.icu.text.CaseMap;
32 import ohos.global.icu.text.Edits;
33 import ohos.global.icu.text.RuleBasedBreakIterator;
34 import ohos.global.icu.text.UTF16;
35 import ohos.global.icu.util.ULocale;
36 
37 
38 
39 /**
40 * <p>Testing character casing</p>
41 * <p>Mostly following the test cases in strcase.cpp for ICU</p>
42 * @author Syn Wee Quek
43 * @since march 14 2002
44 */
45 
46 @RunWith(JUnit4.class)
47 public final class UCharacterCaseTest extends TestFmwk
48 {
49     // constructor -----------------------------------------------------------
50 
51     /**
52      * Constructor
53      */
UCharacterCaseTest()54     public UCharacterCaseTest()
55     {
56     }
57 
58     // public methods --------------------------------------------------------
59 
60     /**
61      * Testing the uppercase and lowercase function of UCharacter
62      */
63     @Test
TestCharacter()64     public void TestCharacter()
65     {
66         for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {
67             if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&
68                 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {
69                 errln("FAIL isLowerCase test for \\u" +
70                       hex(CHARACTER_LOWER_[i]));
71                 break;
72             }
73             if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&
74                 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||
75                   UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {
76                 errln("FAIL isUpperCase test for \\u" +
77                       hex(CHARACTER_UPPER_[i]));
78                 break;
79             }
80             if (CHARACTER_LOWER_[i] !=
81                 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||
82                 (CHARACTER_UPPER_[i] !=
83                 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&
84                 CHARACTER_UPPER_[i] !=
85                 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {
86                 errln("FAIL case conversion test for \\u" +
87                       hex(CHARACTER_UPPER_[i]) +
88                       " to \\u" + hex(CHARACTER_LOWER_[i]));
89                 break;
90             }
91             if (CHARACTER_LOWER_[i] !=
92                 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {
93                 errln("FAIL lower case conversion test for \\u" +
94                       hex(CHARACTER_LOWER_[i]));
95                 break;
96             }
97             if (CHARACTER_UPPER_[i] !=
98                 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&
99                 CHARACTER_UPPER_[i] !=
100                 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {
101                 errln("FAIL upper case conversion test for \\u" +
102                       hex(CHARACTER_UPPER_[i]));
103                 break;
104             }
105             logln("Ok    \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +
106                   hex(CHARACTER_LOWER_[i]));
107         }
108     }
109 
110     @Test
TestFolding()111     public void TestFolding()
112     {
113         // test simple case folding
114         for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {
115             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=
116                 FOLDING_SIMPLE_[i + 1]) {
117                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
118                       ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));
119             }
120             if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
121                                     UCharacter.FOLD_CASE_DEFAULT) !=
122                                                       FOLDING_SIMPLE_[i + 1]) {
123                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
124                       ", UCharacter.FOLD_CASE_DEFAULT) should be \\u"
125                       + hex(FOLDING_SIMPLE_[i + 1]));
126             }
127             if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=
128                 FOLDING_SIMPLE_[i + 2]) {
129                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
130                       ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));
131             }
132             if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
133                                     UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=
134                                     FOLDING_SIMPLE_[i + 2]) {
135                 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
136                       ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u"
137                       + hex(FOLDING_SIMPLE_[i + 2]));
138             }
139         }
140 
141         // Test full string case folding with default option and separate
142         // buffers
143         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {
144             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
145                   ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +
146                   " should be " + prettify(FOLDING_DEFAULT_[0]));
147         }
148 
149         if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {
150                     errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
151                           ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))
152                           + " should be " + prettify(FOLDING_DEFAULT_[0]));
153                 }
154 
155         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
156                             UCharacter.foldCase(FOLDING_MIXED_[0], false))) {
157             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
158                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))
159                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
160         }
161 
162         if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
163                                     UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
164             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
165                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
166                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
167         }
168 
169         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {
170            errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
171                  ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))
172                  + " should be " + prettify(FOLDING_DEFAULT_[1]));
173         }
174 
175         if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {
176             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
177                          ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))
178                          + " should be " + prettify(FOLDING_DEFAULT_[1]));
179         }
180 
181         // alternate handling for dotted I/dotless i (U+0130, U+0131)
182         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
183                         UCharacter.foldCase(FOLDING_MIXED_[1], false))) {
184             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
185                   ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))
186                   + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
187         }
188 
189         if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
190                                 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
191             errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
192                   ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
193                   + " should be "
194                   + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
195         }
196     }
197 
198     @Test
TestInvalidCodePointFolding()199     public void TestInvalidCodePointFolding() {
200         int[] invalidCodePoints = {
201                 0xD800, // lead surrogate
202                 0xDFFF, // trail surrogate
203                 0xFDD0, // noncharacter
204                 0xFFFF, // noncharacter
205                 0x110000, // out of range
206                 -1 // negative
207         };
208         for (int cp : invalidCodePoints) {
209             assertEquals("Invalid code points should be echoed back",
210                     cp, UCharacter.foldCase(cp, true));
211             assertEquals("Invalid code points should be echoed back",
212                     cp, UCharacter.foldCase(cp, false));
213             assertEquals("Invalid code points should be echoed back",
214                     cp, UCharacter.foldCase(cp, UCharacter.FOLD_CASE_DEFAULT));
215             assertEquals("Invalid code points should be echoed back",
216                     cp, UCharacter.foldCase(cp, UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I));
217         }
218     }
219 
220     /**
221      * Testing the strings case mapping methods
222      */
223     @Test
TestUpper()224     public void TestUpper()
225     {
226         // uppercase with root locale and in the same buffer
227         if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {
228             errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +
229                   UPPER_ROOT_ + " instead got " +
230                   UCharacter.toUpperCase(UPPER_BEFORE_));
231         }
232 
233         // uppercase with turkish locale and separate buffers
234         if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,
235                                                          UPPER_BEFORE_))) {
236             errln("Fail " + UPPER_BEFORE_ +
237                   " after turkish-sensitive uppercase should be " +
238                   UPPER_TURKISH_ + " instead of " +
239                   UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));
240         }
241 
242         // uppercase a short string with root locale
243         if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {
244             errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +
245                   "\" expected \"" + UPPER_MINI_UPPER_ + "\"");
246         }
247 
248         if (!SHARED_UPPERCASE_TOPKAP_.equals(
249                        UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {
250             errln("toUpper failed: expected \"" +
251                   SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +
252                   UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");
253         }
254 
255         if (!SHARED_UPPERCASE_TURKISH_.equals(
256                   UCharacter.toUpperCase(TURKISH_LOCALE_,
257                                          SHARED_LOWERCASE_TOPKAP_))) {
258             errln("toUpper failed: expected \"" +
259                   SHARED_UPPERCASE_TURKISH_ + "\", got \"" +
260                   UCharacter.toUpperCase(TURKISH_LOCALE_,
261                                      SHARED_LOWERCASE_TOPKAP_) + "\".");
262         }
263 
264         if (!SHARED_UPPERCASE_GERMAN_.equals(
265                 UCharacter.toUpperCase(GERMAN_LOCALE_,
266                                        SHARED_LOWERCASE_GERMAN_))) {
267             errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_
268                   + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,
269                                         SHARED_LOWERCASE_GERMAN_) + "\".");
270         }
271 
272         if (!SHARED_UPPERCASE_GREEK_.equals(
273                 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {
274             errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +
275                   "\", got \"" + UCharacter.toUpperCase(
276                                         SHARED_LOWERCASE_GREEK_) + "\".");
277         }
278     }
279 
280     @Test
TestLower()281     public void TestLower()
282     {
283         if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {
284             errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +
285                   LOWER_ROOT_ + " instead of " +
286                   UCharacter.toLowerCase(LOWER_BEFORE_));
287         }
288 
289         // lowercase with turkish locale
290         if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,
291                                                           LOWER_BEFORE_))) {
292             errln("Fail " + LOWER_BEFORE_ +
293                   " after turkish-sensitive lowercase should be " +
294                   LOWER_TURKISH_ + " instead of " +
295                   UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));
296         }
297         if (!SHARED_LOWERCASE_ISTANBUL_.equals(
298                      UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {
299             errln("1. toLower failed: expected \"" +
300                   SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +
301               UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");
302         }
303 
304         if (!SHARED_LOWERCASE_TURKISH_.equals(
305                 UCharacter.toLowerCase(TURKISH_LOCALE_,
306                                        SHARED_UPPERCASE_ISTANBUL_))) {
307             errln("2. toLower failed: expected \"" +
308                   SHARED_LOWERCASE_TURKISH_ + "\", got \"" +
309                   UCharacter.toLowerCase(TURKISH_LOCALE_,
310                                 SHARED_UPPERCASE_ISTANBUL_) + "\".");
311         }
312         if (!SHARED_LOWERCASE_GREEK_.equals(
313                 UCharacter.toLowerCase(GREEK_LOCALE_,
314                                        SHARED_UPPERCASE_GREEK_))) {
315             errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +
316                   "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,
317                                         SHARED_UPPERCASE_GREEK_) + "\".");
318         }
319     }
320 
321     @Test
TestTitleRegression()322     public void TestTitleRegression() throws java.io.IOException {
323         boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE);
324         assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable);
325         assertEquals("Titlecase check",
326                 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
327                 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
328     }
329 
330     @Test
TestTitle()331     public void TestTitle()
332     {
333          try{
334             for (int i = 0; i < TITLE_DATA_.length;) {
335                 String test = TITLE_DATA_[i++];
336                 String expected = TITLE_DATA_[i++];
337                 ULocale locale = new ULocale(TITLE_DATA_[i++]);
338                 int breakType = Integer.parseInt(TITLE_DATA_[i++]);
339                 String optionsString = TITLE_DATA_[i++];
340                 BreakIterator iter =
341                     breakType >= 0 ?
342                         BreakIterator.getBreakInstance(locale, breakType) :
343                         breakType == -2 ?
344                             // Open a trivial break iterator that only delivers { 0, length }
345                             // or even just { 0 } as boundaries.
346                             new RuleBasedBreakIterator(".*;") :
347                             null;
348                 int options = 0;
349                 if (optionsString.indexOf('L') >= 0) {
350                     options |= UCharacter.TITLECASE_NO_LOWERCASE;
351                 }
352                 if (optionsString.indexOf('A') >= 0) {
353                     options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;
354                 }
355                 String result = UCharacter.toTitleCase(locale, test, iter, options);
356                 if (!expected.equals(result)) {
357                     errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +
358                           prettify(expected) + " but got " +
359                           prettify(result));
360                 }
361                 if (options == 0) {
362                     result = UCharacter.toTitleCase(locale, test, iter);
363                     if (!expected.equals(result)) {
364                         errln("titlecasing for " + prettify(test) + " should be " +
365                               prettify(expected) + " but got " +
366                               prettify(result));
367                     }
368                 }
369             }
370          }catch(Exception ex){
371             warnln("Could not find data for BreakIterators");
372          }
373     }
374 
375     // Not a @Test. See ICU4C intltest strcase.cpp TestCasingImpl().
TestCasingImpl(String input, String output, CaseMap.Title toTitle, Locale locale)376     void TestCasingImpl(String input, String output, CaseMap.Title toTitle, Locale locale) {
377         String result = toTitle.apply(locale, null, input, new StringBuilder(), null).toString();
378         assertEquals("toTitle(" + input + ')', output, result);
379     }
380 
381     @Test
TestTitleOptions()382     public void TestTitleOptions() {
383         Locale root = Locale.ROOT;
384         // New options in ICU 60.
385         TestCasingImpl("ʻcAt! ʻeTc.", "ʻCat! ʻetc.",
386                 CaseMap.toTitle().wholeString(), root);
387         TestCasingImpl("a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCaT. A ʻdOg! ʻETc.",
388                 CaseMap.toTitle().sentences().noLowercase(), root);
389         TestCasingImpl("49eRs", "49ers",
390                 CaseMap.toTitle().wholeString(), root);
391         TestCasingImpl("«丰(aBc)»", "«丰(abc)»",
392                 CaseMap.toTitle().wholeString(), root);
393         TestCasingImpl("49eRs", "49Ers",
394                 CaseMap.toTitle().wholeString().adjustToCased(), root);
395         TestCasingImpl("«丰(aBc)»", "«丰(Abc)»",
396                 CaseMap.toTitle().wholeString().adjustToCased(), root);
397         TestCasingImpl(" john. Smith", " John. Smith",
398                 CaseMap.toTitle().wholeString().noLowercase(), root);
399         TestCasingImpl(" john. Smith", " john. smith",
400                 CaseMap.toTitle().wholeString().noBreakAdjustment(), root);
401         TestCasingImpl("«ijs»", "«IJs»",
402                 CaseMap.toTitle().wholeString(), new Locale("nl", "BE"));
403         TestCasingImpl("«ijs»", "«İjs»",
404                 CaseMap.toTitle().wholeString(), new Locale("tr", "DE"));
405 
406         // Test conflicting settings.
407         // If & when we add more options, then the ORed combinations may become
408         // indistinguishable from valid values.
409         try {
410             CaseMap.toTitle().noBreakAdjustment().adjustToCased().
411                     apply(root, null, "", new StringBuilder(), null);
412             fail("CaseMap.toTitle(multiple adjustment options) " +
413                     "did not throw an IllegalArgumentException");
414         } catch(IllegalArgumentException expected) {
415         }
416         try {
417             CaseMap.toTitle().wholeString().sentences().
418                     apply(root, null, "", new StringBuilder(), null);
419             fail("CaseMap.toTitle(multiple iterator options) " +
420                     "did not throw an IllegalArgumentException");
421         } catch(IllegalArgumentException expected) {
422         }
423         BreakIterator iter = BreakIterator.getCharacterInstance(root);
424         try {
425             CaseMap.toTitle().wholeString().apply(root, iter, "", new StringBuilder(), null);
426             fail("CaseMap.toTitle(iterator option + iterator) " +
427                     "did not throw an IllegalArgumentException");
428         } catch(IllegalArgumentException expected) {
429         }
430     }
431 
432     @Test
TestLithuanianTitle()433     public void TestLithuanianTitle() {
434         ULocale LOC_LITHUANIAN = new ULocale("lt");
435 
436         assertEquals("Lithuanian titlecase check in Lithuanian",
437                 "\u0058\u0069\u0307\u0308",
438                 UCharacter.toTitleCase(LOC_LITHUANIAN, "\u0058\u0049\u0308", null));
439 
440         assertEquals("Lithuanian titlecase check in Lithuanian",
441                 "\u0058\u0069\u0307\u0308",
442                 UCharacter.toTitleCase(LITHUANIAN_LOCALE_, "\u0058\u0049\u0308", null));
443     }
444 
445     @Test
TestDutchTitle()446     public void TestDutchTitle() {
447         ULocale LOC_DUTCH = new ULocale("nl");
448         int options = 0;
449         options |= UCharacter.TITLECASE_NO_LOWERCASE;
450         BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);
451 
452         assertEquals("Dutch titlecase check in English",
453                 "Ijssel Igloo Ijmuiden",
454                 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));
455 
456         assertEquals("Dutch titlecase check in Dutch",
457                 "IJssel Igloo IJmuiden",
458                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
459 
460         // Also check the behavior using Java Locale
461         assertEquals("Dutch titlecase check in English (Java Locale)",
462                 "Ijssel Igloo Ijmuiden",
463                 UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null));
464 
465         assertEquals("Dutch titlecase check in Dutch (Java Locale)",
466                 "IJssel Igloo IJmuiden",
467                 UCharacter.toTitleCase(DUTCH_LOCALE_, "ijssel igloo IJMUIDEN", null));
468 
469         iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
470         assertEquals("Dutch titlecase check in Dutch with nolowercase option",
471                 "IJssel Igloo IJMUIdEN IPoD IJenough",
472                 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
473     }
474 
475     @Test
TestSpecial()476     public void TestSpecial()
477     {
478         for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {
479             int    j      = i * 3;
480             Locale locale = SPECIAL_LOCALES_[i];
481             String str    = SPECIAL_DATA_[j];
482             if (locale != null) {
483                 if (!SPECIAL_DATA_[j + 1].equals(
484                      UCharacter.toLowerCase(locale, str))) {
485                     errln("error lowercasing special characters " +
486                         hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])
487                         + " for locale " + locale.toString() + " but got " +
488                         hex(UCharacter.toLowerCase(locale, str)));
489                 }
490                 if (!SPECIAL_DATA_[j + 2].equals(
491                      UCharacter.toUpperCase(locale, str))) {
492                     errln("error uppercasing special characters " +
493                         hex(str) + " expected " + SPECIAL_DATA_[j + 2]
494                         + " for locale " + locale.toString() + " but got " +
495                         hex(UCharacter.toUpperCase(locale, str)));
496                 }
497             }
498             else {
499                 String lower = UCharacter.toLowerCase(str);
500                 if (!SPECIAL_DATA_[j + 1].equals(lower)) {
501                     errln("error lowercasing special characters " +
502                         hex(str) + " expected " + SPECIAL_DATA_[j + 1] +
503                         " but got " + hex(lower));
504                 }
505                 String upper = UCharacter.toUpperCase(str);
506                 if (!SPECIAL_DATA_[j + 2].equals(upper)) {
507                     errln("error uppercasing special characters " +
508                         hex(str) + " expected " + SPECIAL_DATA_[j + 2] +
509                         " but got " + hex(upper));
510                 }
511             }
512         }
513 
514         // turkish & azerbaijani dotless i & dotted I
515         // remove dot above if there was a capital I before and there are no
516         // more accents above
517         if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(
518                                         TURKISH_LOCALE_, SPECIAL_DOTTED_))) {
519             errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +
520                   "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +
521                   "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,
522                                                          SPECIAL_DOTTED_));
523         }
524         if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(
525                                              GERMAN_LOCALE_, SPECIAL_DOTTED_))) {
526             errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +
527                   "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +
528                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
529                                                          SPECIAL_DOTTED_));
530         }
531 
532         // lithuanian dot above in uppercasing
533         if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(
534              UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
535             errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +
536                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +
537                   "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,
538                                                          SPECIAL_DOT_ABOVE_));
539         }
540         if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(
541                                         GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
542             errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +
543                   "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +
544                   "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,
545                                                          SPECIAL_DOT_ABOVE_));
546         }
547 
548         // lithuanian adds dot above to i in lowercasing if there are more
549         // above accents
550         if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(
551             UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
552                                    SPECIAL_DOT_ABOVE_UPPER_))) {
553             errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
554                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +
555                   "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
556                                                    SPECIAL_DOT_ABOVE_UPPER_));
557         }
558         if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(
559             UCharacter.toLowerCase(GERMAN_LOCALE_,
560                                    SPECIAL_DOT_ABOVE_UPPER_))) {
561             errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
562                   "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +
563                   "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
564                                                    SPECIAL_DOT_ABOVE_UPPER_));
565         }
566     }
567 
568     /**
569      * Tests for case mapping in the file SpecialCasing.txt
570      * This method reads in SpecialCasing.txt file for testing purposes.
571      * A default path is provided relative to the src path, however the user
572      * could set a system property to change the directory path.<br>
573      * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest
574      */
575     @Test
TestSpecialCasingTxt()576     public void TestSpecialCasingTxt()
577     {
578         try
579         {
580             // reading in the SpecialCasing file
581             BufferedReader input = TestUtil.getDataReader(
582                                                   "unicode/SpecialCasing.txt");
583             while (true)
584             {
585                 String s = input.readLine();
586                 if (s == null) {
587                     break;
588                 }
589                 if (s.length() == 0 || s.charAt(0) == '#') {
590                     continue;
591                 }
592 
593                 String chstr[] = getUnicodeStrings(s);
594                 StringBuffer strbuffer   = new StringBuffer(chstr[0]);
595                 StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
596                 StringBuffer upperbuffer = new StringBuffer(chstr[3]);
597                 Locale locale = null;
598                 for (int i = 4; i < chstr.length; i ++) {
599                     String condition = chstr[i];
600                     if (Character.isLowerCase(chstr[i].charAt(0))) {
601                         // specified locale
602                         locale = new Locale(chstr[i], "");
603                     }
604                     else if (condition.compareToIgnoreCase("Not_Before_Dot")
605                                                       == 0) {
606                         // turns I into dotless i
607                     }
608                     else if (condition.compareToIgnoreCase(
609                                                       "More_Above") == 0) {
610                             strbuffer.append((char)0x300);
611                             lowerbuffer.append((char)0x300);
612                             upperbuffer.append((char)0x300);
613                     }
614                     else if (condition.compareToIgnoreCase(
615                                                 "After_Soft_Dotted") == 0) {
616                             strbuffer.insert(0, 'i');
617                             lowerbuffer.insert(0, 'i');
618                             String lang = "";
619                             if (locale != null) {
620                                 lang = locale.getLanguage();
621                             }
622                             if (lang.equals("tr") || lang.equals("az")) {
623                                 // this is to be removed when 4.0 data comes out
624                                 // and upperbuffer.insert uncommented
625                                 // see jitterbug 2344
626                                 chstr[i] = "After_I";
627                                 strbuffer.deleteCharAt(0);
628                                 lowerbuffer.deleteCharAt(0);
629                                 i --;
630                                 continue;
631                                 // upperbuffer.insert(0, '\u0130');
632                             }
633                             else {
634                                 upperbuffer.insert(0, 'I');
635                             }
636                     }
637                     else if (condition.compareToIgnoreCase(
638                                                       "Final_Sigma") == 0) {
639                             strbuffer.insert(0, 'c');
640                             lowerbuffer.insert(0, 'c');
641                             upperbuffer.insert(0, 'C');
642                     }
643                     else if (condition.compareToIgnoreCase("After_I") == 0) {
644                             strbuffer.insert(0, 'I');
645                             lowerbuffer.insert(0, 'i');
646                             String lang = "";
647                             if (locale != null) {
648                                 lang = locale.getLanguage();
649                             }
650                             if (lang.equals("tr") || lang.equals("az")) {
651                                 upperbuffer.insert(0, 'I');
652                             }
653                     }
654                 }
655                 chstr[0] = strbuffer.toString();
656                 chstr[1] = lowerbuffer.toString();
657                 chstr[3] = upperbuffer.toString();
658                 if (locale == null) {
659                     if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
660                         errln(s);
661                         errln("Fail: toLowerCase for character " +
662                               Utility.escape(chstr[0]) + ", expected "
663                               + Utility.escape(chstr[1]) + " but resulted in " +
664                               Utility.escape(UCharacter.toLowerCase(chstr[0])));
665                     }
666                     if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
667                         errln(s);
668                         errln("Fail: toUpperCase for character " +
669                               Utility.escape(chstr[0]) + ", expected "
670                               + Utility.escape(chstr[3]) + " but resulted in " +
671                               Utility.escape(UCharacter.toUpperCase(chstr[0])));
672                     }
673                 }
674                 else {
675                     if (!UCharacter.toLowerCase(locale, chstr[0]).equals(
676                                                                    chstr[1])) {
677                         errln(s);
678                         errln("Fail: toLowerCase for character " +
679                               Utility.escape(chstr[0]) + ", expected "
680                               + Utility.escape(chstr[1]) + " but resulted in " +
681                               Utility.escape(UCharacter.toLowerCase(locale,
682                                                                     chstr[0])));
683                     }
684                     if (!UCharacter.toUpperCase(locale, chstr[0]).equals(
685                                                                    chstr[3])) {
686                         errln(s);
687                         errln("Fail: toUpperCase for character " +
688                               Utility.escape(chstr[0]) + ", expected "
689                               + Utility.escape(chstr[3]) + " but resulted in " +
690                               Utility.escape(UCharacter.toUpperCase(locale,
691                                                                     chstr[0])));
692                     }
693                 }
694             }
695             input.close();
696         }
697         catch (Exception e)
698         {
699           e.printStackTrace();
700         }
701     }
702 
703     @Test
TestUpperLower()704     public void TestUpperLower()
705     {
706         int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,
707                         0x01c9, 0x000c};
708         int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,
709                         0x01c9, 0x000c};
710         String upperTest = "abcdefg123hij.?:klmno";
711         String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
712 
713         // Checks LetterLike Symbols which were previously a source of
714         // confusion [Bertrand A. D. 02/04/98]
715         for (int i = 0x2100; i < 0x2138; i ++) {
716             /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
717             if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {
718                 if (i != UCharacter.toLowerCase(i)) { // itself
719                     errln("Failed case conversion with itself: \\u"
720                             + Utility.hex(i, 4));
721                 }
722                 if (i != UCharacter.toUpperCase(i)) {
723                     errln("Failed case conversion with itself: \\u"
724                             + Utility.hex(i, 4));
725                 }
726             }
727         }
728         for (int i = 0; i < upper.length; i ++) {
729             if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
730                 errln("FAILED UCharacter.tolower() for \\u"
731                         + Utility.hex(upper[i], 4)
732                         + " Expected \\u" + Utility.hex(lower[i], 4)
733                         + " Got \\u"
734                         + Utility.hex(UCharacter.toLowerCase(upper[i]), 4));
735             }
736         }
737         logln("testing upper lower");
738         for (int i = 0; i < upperTest.length(); i ++) {
739             logln("testing to upper to lower");
740             if (UCharacter.isLetter(upperTest.charAt(i)) &&
741                 !UCharacter.isLowerCase(upperTest.charAt(i))) {
742                 errln("Failed isLowerCase test at \\u"
743                         + Utility.hex(upperTest.charAt(i), 4));
744             }
745             else if (UCharacter.isLetter(lowerTest.charAt(i))
746                      && !UCharacter.isUpperCase(lowerTest.charAt(i))) {
747                 errln("Failed isUpperCase test at \\u"
748                       + Utility.hex(lowerTest.charAt(i), 4));
749             }
750             else if (upperTest.charAt(i)
751                             != UCharacter.toLowerCase(lowerTest.charAt(i))) {
752                 errln("Failed case conversion from \\u"
753                         + Utility.hex(lowerTest.charAt(i), 4) + " To \\u"
754                         + Utility.hex(upperTest.charAt(i), 4));
755             }
756             else if (lowerTest.charAt(i)
757                     != UCharacter.toUpperCase(upperTest.charAt(i))) {
758                 errln("Failed case conversion : \\u"
759                         + Utility.hex(upperTest.charAt(i), 4) + " To \\u"
760                         + Utility.hex(lowerTest.charAt(i), 4));
761             }
762             else if (upperTest.charAt(i)
763                     != UCharacter.toLowerCase(upperTest.charAt(i))) {
764                 errln("Failed case conversion with itself: \\u"
765                         + Utility.hex(upperTest.charAt(i)));
766             }
767             else if (lowerTest.charAt(i)
768                     != UCharacter.toUpperCase(lowerTest.charAt(i))) {
769                 errln("Failed case conversion with itself: \\u"
770                         + Utility.hex(lowerTest.charAt(i)));
771             }
772         }
773         logln("done testing upper Lower");
774     }
775 
assertGreekUpper(String s, String expected)776     private void assertGreekUpper(String s, String expected) {
777         assertEquals("toUpper/Greek(" + s + ')', expected, UCharacter.toUpperCase(GREEK_LOCALE_, s));
778     }
779 
780     @Test
TestGreekUpper()781     public void TestGreekUpper() {
782         // http://bugs.icu-project.org/trac/ticket/5456
783         assertGreekUpper("άδικος, κείμενο, ίριδα", "ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
784         // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
785         // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
786         assertGreekUpper("Πατάτα", "ΠΑΤΑΤΑ");
787         assertGreekUpper("Αέρας, Μυστήριο, Ωραίο", "ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
788         assertGreekUpper("Μαΐου, Πόρος, Ρύθμιση", "ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
789         assertGreekUpper("ΰ, Τηρώ, Μάιος", "Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
790         assertGreekUpper("άυλος", "ΑΫΛΟΣ");
791         assertGreekUpper("ΑΫΛΟΣ", "ΑΫΛΟΣ");
792         assertGreekUpper("Άκλιτα ρήματα ή άκλιτες μετοχές", "ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
793         // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
794         assertGreekUpper("Επειδή η αναγνώριση της αξιοπρέπειας", "ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
795         assertGreekUpper("νομικού ή διεθνούς", "ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
796         // http://unicode.org/udhr/d/udhr_ell_polytonic.html
797         assertGreekUpper("Ἐπειδὴ ἡ ἀναγνώριση", "ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
798         assertGreekUpper("νομικοῦ ἢ διεθνοῦς", "ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
799         // From Google bug report
800         assertGreekUpper("Νέο, Δημιουργία", "ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
801         // http://crbug.com/234797
802         assertGreekUpper("Ελάτε να φάτε τα καλύτερα παϊδάκια!", "ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
803         assertGreekUpper("Μαΐου, τρόλεϊ", "ΜΑΪΟΥ, ΤΡΟΛΕΪ");
804         assertGreekUpper("Το ένα ή το άλλο.", "ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
805         // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
806         assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
807         assertGreekUpper("ή.", "Ή.");
808     }
809 
810     private static final class EditChange {
811         private boolean change;
812         private int oldLength, newLength;
EditChange(boolean change, int oldLength, int newLength)813         EditChange(boolean change, int oldLength, int newLength) {
814             this.change = change;
815             this.oldLength = oldLength;
816             this.newLength = newLength;
817         }
818     }
819 
printOneEdit(Edits.Iterator ei)820     private static String printOneEdit(Edits.Iterator ei) {
821         if (ei.hasChange()) {
822             return "" + ei.oldLength() + "->" + ei.newLength();
823         } else {
824             return "" + ei.oldLength() + "=" + ei.newLength();
825         }
826     }
827 
828     /**
829      * Maps indexes according to the expected edits.
830      * A destination index can occur multiple times when there are source deletions.
831      * Map according to the last occurrence, normally in a non-empty destination span.
832      * Simplest is to search from the back.
833      */
srcIndexFromDest( EditChange expected[], int srcLength, int destLength, int index)834     private static int srcIndexFromDest(
835             EditChange expected[], int srcLength, int destLength, int index) {
836         int srcIndex = srcLength;
837         int destIndex = destLength;
838         int i = expected.length;
839         while (index < destIndex && i > 0) {
840             --i;
841             int prevSrcIndex = srcIndex - expected[i].oldLength;
842             int prevDestIndex = destIndex - expected[i].newLength;
843             if (index == prevDestIndex) {
844                 return prevSrcIndex;
845             } else if (index > prevDestIndex) {
846                 if (expected[i].change) {
847                     // In a change span, map to its end.
848                     return srcIndex;
849                 } else {
850                     // In an unchanged span, offset within it.
851                     return prevSrcIndex + (index - prevDestIndex);
852                 }
853             }
854             srcIndex = prevSrcIndex;
855             destIndex = prevDestIndex;
856         }
857         // index is outside the string.
858         return srcIndex;
859     }
860 
destIndexFromSrc( EditChange expected[], int srcLength, int destLength, int index)861     private static int destIndexFromSrc(
862             EditChange expected[], int srcLength, int destLength, int index) {
863         int srcIndex = srcLength;
864         int destIndex = destLength;
865         int i = expected.length;
866         while (index < srcIndex && i > 0) {
867             --i;
868             int prevSrcIndex = srcIndex - expected[i].oldLength;
869             int prevDestIndex = destIndex - expected[i].newLength;
870             if (index == prevSrcIndex) {
871                 return prevDestIndex;
872             } else if (index > prevSrcIndex) {
873                 if (expected[i].change) {
874                     // In a change span, map to its end.
875                     return destIndex;
876                 } else {
877                     // In an unchanged span, offset within it.
878                     return prevDestIndex + (index - prevSrcIndex);
879                 }
880             }
881             srcIndex = prevSrcIndex;
882             destIndex = prevDestIndex;
883         }
884         // index is outside the string.
885         return destIndex;
886     }
887 
checkEqualEdits(String name, Edits e1, Edits e2)888     private void checkEqualEdits(String name, Edits e1, Edits e2) {
889         Edits.Iterator ei1 = e1.getFineIterator();
890         Edits.Iterator ei2 = e2.getFineIterator();
891         for (int i = 0;; ++i) {
892             boolean ei1HasNext = ei1.next();
893             boolean ei2HasNext = ei2.next();
894             assertEquals(name + " next()[" + i + "]", ei1HasNext, ei2HasNext);
895             assertEquals(name + " edit[" + i + "]", printOneEdit(ei1), printOneEdit(ei2));
896             if (!ei1HasNext || !ei2HasNext) {
897                 break;
898             }
899         }
900     }
901 
checkEditsIter( String name, Edits.Iterator ei1, Edits.Iterator ei2, EditChange[] expected, boolean withUnchanged)902     private static void checkEditsIter(
903             String name, Edits.Iterator ei1, Edits.Iterator ei2,  // two equal iterators
904             EditChange[] expected, boolean withUnchanged) {
905         assertFalse(name, ei2.findSourceIndex(-1));
906         assertFalse(name, ei2.findDestinationIndex(-1));
907 
908         int expSrcIndex = 0;
909         int expDestIndex = 0;
910         int expReplIndex = 0;
911         for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
912             EditChange expect = expected[expIndex];
913             String msg = name + ' ' + expIndex;
914             if (withUnchanged || expect.change) {
915                 assertTrue(msg, ei1.next());
916                 assertEquals(msg, expect.change, ei1.hasChange());
917                 assertEquals(msg, expect.oldLength, ei1.oldLength());
918                 assertEquals(msg, expect.newLength, ei1.newLength());
919                 assertEquals(msg, expSrcIndex, ei1.sourceIndex());
920                 assertEquals(msg, expDestIndex, ei1.destinationIndex());
921                 assertEquals(msg, expReplIndex, ei1.replacementIndex());
922             }
923 
924             if (expect.oldLength > 0) {
925                 assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
926                 assertEquals(msg, expect.change, ei2.hasChange());
927                 assertEquals(msg, expect.oldLength, ei2.oldLength());
928                 assertEquals(msg, expect.newLength, ei2.newLength());
929                 assertEquals(msg, expSrcIndex, ei2.sourceIndex());
930                 assertEquals(msg, expDestIndex, ei2.destinationIndex());
931                 assertEquals(msg, expReplIndex, ei2.replacementIndex());
932                 if (!withUnchanged) {
933                     // For some iterators, move past the current range
934                     // so that findSourceIndex() has to look before the current index.
935                     ei2.next();
936                     ei2.next();
937                 }
938             }
939 
940             if (expect.newLength > 0) {
941                 assertTrue(msg, ei2.findDestinationIndex(expDestIndex));
942                 assertEquals(msg, expect.change, ei2.hasChange());
943                 assertEquals(msg, expect.oldLength, ei2.oldLength());
944                 assertEquals(msg, expect.newLength, ei2.newLength());
945                 assertEquals(msg, expSrcIndex, ei2.sourceIndex());
946                 assertEquals(msg, expDestIndex, ei2.destinationIndex());
947                 assertEquals(msg, expReplIndex, ei2.replacementIndex());
948                 if (!withUnchanged) {
949                     // For some iterators, move past the current range
950                     // so that findSourceIndex() has to look before the current index.
951                     ei2.next();
952                     ei2.next();
953                 }
954             }
955 
956             expSrcIndex += expect.oldLength;
957             expDestIndex += expect.newLength;
958             if (expect.change) {
959                 expReplIndex += expect.newLength;
960             }
961         }
962         String msg = name + " end";
963         assertFalse(msg, ei1.next());
964         assertFalse(msg, ei1.hasChange());
965         assertEquals(msg, 0, ei1.oldLength());
966         assertEquals(msg, 0, ei1.newLength());
967         assertEquals(msg, expSrcIndex, ei1.sourceIndex());
968         assertEquals(msg, expDestIndex, ei1.destinationIndex());
969         assertEquals(msg, expReplIndex, ei1.replacementIndex());
970 
971         assertFalse(name, ei2.findSourceIndex(expSrcIndex));
972         assertFalse(name, ei2.findDestinationIndex(expDestIndex));
973 
974         // Check mapping of all indexes against a simple implementation
975         // that works on the expected changes.
976         // Iterate once forward, once backward, to cover more runtime conditions.
977         int srcLength = expSrcIndex;
978         int destLength = expDestIndex;
979         List<Integer> srcIndexes = new ArrayList<>();
980         List<Integer> destIndexes = new ArrayList<>();
981         srcIndexes.add(-1);
982         destIndexes.add(-1);
983         int srcIndex = 0;
984         int destIndex = 0;
985         for (int i = 0; i < expected.length; ++i) {
986             if (expected[i].oldLength > 0) {
987                 srcIndexes.add(srcIndex);
988                 if (expected[i].oldLength > 1) {
989                     srcIndexes.add(srcIndex + 1);
990                     if (expected[i].oldLength > 2) {
991                         srcIndexes.add(srcIndex + expected[i].oldLength - 1);
992                     }
993                 }
994             }
995             if (expected[i].newLength > 0) {
996                 destIndexes.add(destIndex);
997                 if (expected[i].newLength > 1) {
998                     destIndexes.add(destIndex + 1);
999                     if (expected[i].newLength > 2) {
1000                         destIndexes.add(destIndex + expected[i].newLength - 1);
1001                     }
1002                 }
1003             }
1004             srcIndex += expected[i].oldLength;
1005             destIndex += expected[i].newLength;
1006         }
1007         srcIndexes.add(srcLength);
1008         destIndexes.add(destLength);
1009         srcIndexes.add(srcLength + 1);
1010         destIndexes.add(destLength + 1);
1011         Collections.reverse(destIndexes);
1012         // Zig-zag across the indexes to stress next() <-> previous().
1013         for (int i = 0; i < srcIndexes.size(); ++i) {
1014             for (int j : ZIG_ZAG) {
1015                 if ((i + j) < srcIndexes.size()) {
1016                     int si = srcIndexes.get(i + j);
1017                     assertEquals(name + " destIndexFromSrc(" + si + "):",
1018                             destIndexFromSrc(expected, srcLength, destLength, si),
1019                             ei2.destinationIndexFromSourceIndex(si));
1020                 }
1021             }
1022         }
1023         for (int i = 0; i < destIndexes.size(); ++i) {
1024             for (int j : ZIG_ZAG) {
1025                 if ((i + j) < destIndexes.size()) {
1026                     int di = destIndexes.get(i + j);
1027                     assertEquals(name + " srcIndexFromDest(" + di + "):",
1028                             srcIndexFromDest(expected, srcLength, destLength, di),
1029                             ei2.sourceIndexFromDestinationIndex(di));
1030                 }
1031             }
1032         }
1033     }
1034 
1035     private static final int[] ZIG_ZAG = { 0, 1, 2, 3, 2, 1 };
1036 
1037     @Test
TestEdits()1038     public void TestEdits() {
1039         Edits edits = new Edits();
1040         assertFalse("new Edits hasChanges", edits.hasChanges());
1041         assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
1042         assertEquals("new Edits", 0, edits.lengthDelta());
1043         edits.addUnchanged(1);  // multiple unchanged ranges are combined
1044         edits.addUnchanged(10000);  // too long, and they are split
1045         edits.addReplace(0, 0);
1046         edits.addUnchanged(2);
1047         assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
1048         assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
1049         assertEquals("unchanged 10003", 0, edits.lengthDelta());
1050         edits.addReplace(2, 1);  // multiple short equal-lengths edits are compressed
1051         edits.addUnchanged(0);
1052         edits.addReplace(2, 1);
1053         edits.addReplace(2, 1);
1054         edits.addReplace(0, 10);
1055         edits.addReplace(100, 0);
1056         edits.addReplace(3000, 4000);  // variable-length encoding
1057         edits.addReplace(100000, 100000);
1058         assertTrue("some edits hasChanges", edits.hasChanges());
1059         assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
1060         assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
1061 
1062         EditChange[] coarseExpectedChanges = new EditChange[] {
1063                 new EditChange(false, 10003, 10003),
1064                 new EditChange(true, 103106, 104013)
1065         };
1066         checkEditsIter("coarse",
1067                 edits.getCoarseIterator(), edits.getCoarseIterator(),
1068                 coarseExpectedChanges, true);
1069         checkEditsIter("coarse changes",
1070                 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
1071                 coarseExpectedChanges, false);
1072 
1073         EditChange[] fineExpectedChanges = new EditChange[] {
1074                 new EditChange(false, 10003, 10003),
1075                 new EditChange(true, 2, 1),
1076                 new EditChange(true, 2, 1),
1077                 new EditChange(true, 2, 1),
1078                 new EditChange(true, 0, 10),
1079                 new EditChange(true, 100, 0),
1080                 new EditChange(true, 3000, 4000),
1081                 new EditChange(true, 100000, 100000)
1082         };
1083         checkEditsIter("fine",
1084                 edits.getFineIterator(), edits.getFineIterator(),
1085                 fineExpectedChanges, true);
1086         checkEditsIter("fine changes",
1087                 edits.getFineChangesIterator(), edits.getFineChangesIterator(),
1088                 fineExpectedChanges, false);
1089 
1090         edits.reset();
1091         assertFalse("reset hasChanges", edits.hasChanges());
1092         assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
1093         assertEquals("reset", 0, edits.lengthDelta());
1094         Edits.Iterator ei = edits.getCoarseChangesIterator();
1095         assertFalse("reset then iterator", ei.next());
1096     }
1097 
1098     @Test
TestEditsFindFwdBwd()1099     public void TestEditsFindFwdBwd() {
1100         // Some users need index mappings to be efficient when they are out of order.
1101         // The most interesting failure case for this test is it taking a very long time.
1102         Edits e = new Edits();
1103         int N = 200000;
1104         for (int i = 0; i < N; ++i) {
1105             e.addUnchanged(1);
1106             e.addReplace(3, 1);
1107         }
1108         Edits.Iterator iter = e.getFineIterator();
1109         for (int i = 0; i <= N; i += 2) {
1110             assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i));
1111             assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1));
1112         }
1113         for (int i = N; i >= 0; i -= 2) {
1114             assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1));
1115             assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i));
1116         }
1117     }
1118 
1119     @Test
TestMergeEdits()1120     public void TestMergeEdits() {
1121         Edits ab = new Edits(), bc = new Edits(), ac = new Edits(), expected_ac = new Edits();
1122 
1123         // Simple: Two parallel non-changes.
1124         ab.addUnchanged(2);
1125         bc.addUnchanged(2);
1126         expected_ac.addUnchanged(2);
1127 
1128         // Simple: Two aligned changes.
1129         ab.addReplace(3, 2);
1130         bc.addReplace(2, 1);
1131         expected_ac.addReplace(3, 1);
1132 
1133         // Unequal non-changes.
1134         ab.addUnchanged(5);
1135         bc.addUnchanged(3);
1136         expected_ac.addUnchanged(3);
1137         // ab ahead by 2
1138 
1139         // Overlapping changes accumulate until they share a boundary.
1140         ab.addReplace(4, 3);
1141         bc.addReplace(3, 2);
1142         ab.addReplace(4, 3);
1143         bc.addReplace(3, 2);
1144         ab.addReplace(4, 3);
1145         bc.addReplace(3, 2);
1146         bc.addUnchanged(4);
1147         expected_ac.addReplace(14, 8);
1148         // bc ahead by 2
1149 
1150         // Balance out intermediate-string lengths.
1151         ab.addUnchanged(2);
1152         expected_ac.addUnchanged(2);
1153 
1154         // Insert something and delete it: Should disappear.
1155         ab.addReplace(0, 5);
1156         ab.addReplace(0, 2);
1157         bc.addReplace(7, 0);
1158 
1159         // Parallel change to make a new boundary.
1160         ab.addReplace(1, 2);
1161         bc.addReplace(2, 3);
1162         expected_ac.addReplace(1, 3);
1163 
1164         // Multiple ab deletions should remain separate at the boundary.
1165         ab.addReplace(1, 0);
1166         ab.addReplace(2, 0);
1167         ab.addReplace(3, 0);
1168         expected_ac.addReplace(1, 0);
1169         expected_ac.addReplace(2, 0);
1170         expected_ac.addReplace(3, 0);
1171 
1172         // Unequal non-changes can be split for another boundary.
1173         ab.addUnchanged(2);
1174         bc.addUnchanged(1);
1175         expected_ac.addUnchanged(1);
1176         // ab ahead by 1
1177 
1178         // Multiple bc insertions should create a boundary and remain separate.
1179         bc.addReplace(0, 4);
1180         bc.addReplace(0, 5);
1181         bc.addReplace(0, 6);
1182         expected_ac.addReplace(0, 4);
1183         expected_ac.addReplace(0, 5);
1184         expected_ac.addReplace(0, 6);
1185         // ab ahead by 1
1186 
1187         // Multiple ab deletions in the middle of a bc change are merged.
1188         bc.addReplace(2, 2);
1189         // bc ahead by 1
1190         ab.addReplace(1, 0);
1191         ab.addReplace(2, 0);
1192         ab.addReplace(3, 0);
1193         ab.addReplace(4, 1);
1194         expected_ac.addReplace(11, 2);
1195 
1196         // Multiple bc insertions in the middle of an ab change are merged.
1197         ab.addReplace(5, 6);
1198         bc.addReplace(3, 3);
1199         // ab ahead by 3
1200         bc.addReplace(0, 4);
1201         bc.addReplace(0, 5);
1202         bc.addReplace(0, 6);
1203         bc.addReplace(3, 7);
1204         expected_ac.addReplace(5, 25);
1205 
1206         // Delete around a deletion.
1207         ab.addReplace(4, 4);
1208         ab.addReplace(3, 0);
1209         ab.addUnchanged(2);
1210         bc.addReplace(2, 2);
1211         bc.addReplace(4, 0);
1212         expected_ac.addReplace(9, 2);
1213 
1214         // Insert into an insertion.
1215         ab.addReplace(0, 2);
1216         bc.addReplace(1, 1);
1217         bc.addReplace(0, 8);
1218         bc.addUnchanged(4);
1219         expected_ac.addReplace(0, 10);
1220         // bc ahead by 3
1221 
1222         // Balance out intermediate-string lengths.
1223         ab.addUnchanged(3);
1224         expected_ac.addUnchanged(3);
1225 
1226         // Deletions meet insertions.
1227         // Output order is arbitrary in principle, but we expect insertions first
1228         // and want to keep it that way.
1229         ab.addReplace(2, 0);
1230         ab.addReplace(4, 0);
1231         ab.addReplace(6, 0);
1232         bc.addReplace(0, 1);
1233         bc.addReplace(0, 3);
1234         bc.addReplace(0, 5);
1235         expected_ac.addReplace(0, 1);
1236         expected_ac.addReplace(0, 3);
1237         expected_ac.addReplace(0, 5);
1238         expected_ac.addReplace(2, 0);
1239         expected_ac.addReplace(4, 0);
1240         expected_ac.addReplace(6, 0);
1241 
1242         // End with a non-change, so that further edits are never reordered.
1243         ab.addUnchanged(1);
1244         bc.addUnchanged(1);
1245         expected_ac.addUnchanged(1);
1246 
1247         ac.mergeAndAppend(ab, bc);
1248         checkEqualEdits("ab+bc", expected_ac, ac);
1249 
1250         // Append more Edits.
1251         Edits ab2 = new Edits(), bc2 = new Edits();
1252         ab2.addUnchanged(5);
1253         bc2.addReplace(1, 2);
1254         bc2.addUnchanged(4);
1255         expected_ac.addReplace(1, 2);
1256         expected_ac.addUnchanged(4);
1257         ac.mergeAndAppend(ab2, bc2);
1258         checkEqualEdits("ab2+bc2", expected_ac, ac);
1259 
1260         // Append empty edits.
1261         Edits empty = new Edits();
1262         ac.mergeAndAppend(empty, empty);
1263         checkEqualEdits("empty+empty", expected_ac, ac);
1264 
1265         // Error: Append more edits with mismatched intermediate-string lengths.
1266         Edits mismatch = new Edits();
1267         mismatch.addReplace(1, 1);
1268         try {
1269             ac.mergeAndAppend(ab2, mismatch);
1270             fail("ab2+mismatch did not yield IllegalArgumentException");
1271         } catch (IllegalArgumentException expected) {
1272         }
1273         try {
1274             ac.mergeAndAppend(mismatch, bc2);
1275             fail("mismatch+bc2 did not yield IllegalArgumentException");
1276         } catch (IllegalArgumentException expected) {
1277         }
1278     }
1279 
1280     @Test
TestCaseMapWithEdits()1281     public void TestCaseMapWithEdits() {
1282         StringBuilder sb = new StringBuilder();
1283         Edits edits = new Edits();
1284 
1285         sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits);
1286         assertEquals("toLower(Istanbul)", "ıb", sb.toString());
1287         EditChange[] lowerExpectedChanges = new EditChange[] {
1288                 new EditChange(true, 1, 1),
1289                 new EditChange(false, 4, 4),
1290                 new EditChange(true, 1, 1),
1291                 new EditChange(false, 2, 2)
1292         };
1293         checkEditsIter("toLower(Istanbul)",
1294                 edits.getFineIterator(), edits.getFineIterator(),
1295                 lowerExpectedChanges, true);
1296 
1297         sb.delete(0, sb.length());
1298         edits.reset();
1299         sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits);
1300         assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString());
1301         EditChange[] upperExpectedChanges = new EditChange[] {
1302                 new EditChange(false, 1, 1),
1303                 new EditChange(true, 1, 1),
1304                 new EditChange(true, 1, 1),
1305                 new EditChange(true, 1, 1),
1306                 new EditChange(true, 1, 1),
1307                 new EditChange(true, 1, 1)
1308         };
1309         checkEditsIter("toUpper(Πατάτα)",
1310                 edits.getFineIterator(), edits.getFineIterator(),
1311                 upperExpectedChanges, true);
1312 
1313         sb.delete(0, sb.length());
1314         edits.reset();
1315         sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
1316                 DUTCH_LOCALE_, null, "IjssEL IglOo", sb, edits);
1317         assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
1318         EditChange[] titleExpectedChanges = new EditChange[] {
1319                 new EditChange(false, 1, 1),
1320                 new EditChange(true, 1, 1),
1321                 new EditChange(false, 10, 10)
1322         };
1323         checkEditsIter("toTitle(IjssEL IglOo)",
1324                 edits.getFineIterator(), edits.getFineIterator(),
1325                 titleExpectedChanges, true);
1326 
1327         sb.delete(0, sb.length());
1328         edits.reset();
1329         sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits);
1330         assertEquals("fold(IßtanBul)", "ıssb", sb.toString());
1331         EditChange[] foldExpectedChanges = new EditChange[] {
1332                 new EditChange(true, 1, 1),
1333                 new EditChange(true, 1, 2),
1334                 new EditChange(false, 3, 3),
1335                 new EditChange(true, 1, 1),
1336                 new EditChange(false, 2, 2)
1337         };
1338         checkEditsIter("fold(IßtanBul)",
1339                 edits.getFineIterator(), edits.getFineIterator(),
1340                 foldExpectedChanges, true);
1341     }
1342 
1343     @Test
TestCaseMapToString()1344     public void TestCaseMapToString() {
1345         // String apply(..., CharSequence)
1346         // Omit unchanged text.
1347         assertEquals("toLower(Istanbul)", "ıb",
1348                 CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul"));
1349         assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ",
1350                 CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα"));
1351         assertEquals("toTitle(IjssEL IglOo)", "J",
1352                 CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
1353                         DUTCH_LOCALE_, null, "IjssEL IglOo"));
1354         assertEquals("fold(IßtanBul)", "ıssb",
1355                 CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul"));
1356 
1357         // Return the whole result string.
1358         assertEquals("toLower(Istanbul)", "ıstanbul",
1359                 CaseMap.toLower().apply(TURKISH_LOCALE_, "IstanBul"));
1360         assertEquals("toUpper(Πατάτα)", "ΠΑΤΑΤΑ",
1361                 CaseMap.toUpper().apply(GREEK_LOCALE_, "Πατάτα"));
1362         assertEquals("toTitle(IjssEL IglOo)", "IJssEL IglOo",
1363                 CaseMap.toTitle().noBreakAdjustment().noLowercase().apply(
1364                         DUTCH_LOCALE_, null, "IjssEL IglOo"));
1365         assertEquals("fold(IßtanBul)", "ısstanbul",
1366                 CaseMap.fold().turkic().apply("IßtanBul"));
1367     }
1368 
1369     @Test
TestCaseMapEditsIteratorDocs()1370     public void TestCaseMapEditsIteratorDocs() {
1371         String input = "abcßDeF";
1372         // output: "abcssdef"
1373 
1374         StringBuilder sb = new StringBuilder();
1375         Edits edits = new Edits();
1376         CaseMap.fold().apply(input, sb, edits);
1377 
1378         String[] fineIteratorExpected = {
1379                 "{ src[0..3] ≡ dest[0..3] (no-change) }",
1380                 "{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1381                 "{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1382                 "{ src[5..6] ≡ dest[6..7] (no-change) }",
1383                 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1384         };
1385         String[] fineChangesIteratorExpected = {
1386                 "{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1387                 "{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1388                 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1389         };
1390         String[] coarseIteratorExpected = {
1391                 "{ src[0..3] ≡ dest[0..3] (no-change) }",
1392                 "{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1393                 "{ src[5..6] ≡ dest[6..7] (no-change) }",
1394                 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1395         };
1396         String[] coarseChangesIteratorExpected = {
1397                 "{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1398                 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1399         };
1400 
1401         // Expected destination indices when source index is queried
1402         int[] expectedDestFineEditIndices = {0, 0, 0, 3, 5, 6, 7};
1403         int[] expectedDestCoarseEditIndices = {0, 0, 0, 3, 3, 6, 7};
1404         int[] expectedDestFineStringIndices = {0, 1, 2, 3, 5, 6, 7};
1405         int[] expectedDestCoarseStringIndices = {0, 1, 2, 3, 6, 6, 7};
1406 
1407         // Expected source indices when destination index is queried
1408         int[] expectedSrcFineEditIndices = { 0, 0, 0, 3, 3, 4, 5, 6 };
1409         int[] expectedSrcCoarseEditIndices = { 0, 0, 0, 3, 3, 3, 5, 6 };
1410         int[] expectedSrcFineStringIndices = { 0, 1, 2, 3, 4, 4, 5, 6 };
1411         int[] expectedSrcCoarseStringIndices = { 0, 1, 2, 3, 5, 5, 5, 6 };
1412 
1413         // Demonstrate the iterator next() method:
1414         Edits.Iterator fineIterator = edits.getFineIterator();
1415         int i = 0;
1416         while (fineIterator.next()) {
1417             String expected = fineIteratorExpected[i++];
1418             String actual = fineIterator.toString();
1419             assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
1420         }
1421         Edits.Iterator fineChangesIterator = edits.getFineChangesIterator();
1422         i = 0;
1423         while (fineChangesIterator.next()) {
1424             String expected = fineChangesIteratorExpected[i++];
1425             String actual = fineChangesIterator.toString();
1426             assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
1427         }
1428         Edits.Iterator coarseIterator = edits.getCoarseIterator();
1429         i = 0;
1430         while (coarseIterator.next()) {
1431             String expected = coarseIteratorExpected[i++];
1432             String actual = coarseIterator.toString();
1433             assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
1434         }
1435         Edits.Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
1436         i = 0;
1437         while (coarseChangesIterator.next()) {
1438             String expected = coarseChangesIteratorExpected[i++];
1439             String actual = coarseChangesIterator.toString();
1440             assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
1441         }
1442 
1443         // Demonstrate the iterator indexing methods:
1444         // fineIterator should have the same behavior as fineChangesIterator, and
1445         // coarseIterator should have the same behavior as coarseChangesIterator.
1446         for (int srcIndex=0; srcIndex<input.length(); srcIndex++) {
1447             fineIterator.findSourceIndex(srcIndex);
1448             fineChangesIterator.findSourceIndex(srcIndex);
1449             coarseIterator.findSourceIndex(srcIndex);
1450             coarseChangesIterator.findSourceIndex(srcIndex);
1451 
1452             assertEquals("Source index: " + srcIndex,
1453                     expectedDestFineEditIndices[srcIndex],
1454                     fineIterator.destinationIndex());
1455             assertEquals("Source index: " + srcIndex,
1456                     expectedDestFineEditIndices[srcIndex],
1457                     fineChangesIterator.destinationIndex());
1458             assertEquals("Source index: " + srcIndex,
1459                     expectedDestCoarseEditIndices[srcIndex],
1460                     coarseIterator.destinationIndex());
1461             assertEquals("Source index: " + srcIndex,
1462                     expectedDestCoarseEditIndices[srcIndex],
1463                     coarseChangesIterator.destinationIndex());
1464 
1465             assertEquals("Source index: " + srcIndex,
1466                     expectedDestFineStringIndices[srcIndex],
1467                     fineIterator.destinationIndexFromSourceIndex(srcIndex));
1468             assertEquals("Source index: " + srcIndex,
1469                     expectedDestFineStringIndices[srcIndex],
1470                     fineChangesIterator.destinationIndexFromSourceIndex(srcIndex));
1471             assertEquals("Source index: " + srcIndex,
1472                     expectedDestCoarseStringIndices[srcIndex],
1473                     coarseIterator.destinationIndexFromSourceIndex(srcIndex));
1474             assertEquals("Source index: " + srcIndex,
1475                     expectedDestCoarseStringIndices[srcIndex],
1476                     coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex));
1477         }
1478         for (int destIndex=0; destIndex<input.length(); destIndex++) {
1479             fineIterator.findDestinationIndex(destIndex);
1480             fineChangesIterator.findDestinationIndex(destIndex);
1481             coarseIterator.findDestinationIndex(destIndex);
1482             coarseChangesIterator.findDestinationIndex(destIndex);
1483 
1484             assertEquals("Destination index: " + destIndex,
1485                     expectedSrcFineEditIndices[destIndex],
1486                     fineIterator.sourceIndex());
1487             assertEquals("Destination index: " + destIndex,
1488                     expectedSrcFineEditIndices[destIndex],
1489                     fineChangesIterator.sourceIndex());
1490             assertEquals("Destination index: " + destIndex,
1491                     expectedSrcCoarseEditIndices[destIndex],
1492                     coarseIterator.sourceIndex());
1493             assertEquals("Destination index: " + destIndex,
1494                     expectedSrcCoarseEditIndices[destIndex],
1495                     coarseChangesIterator.sourceIndex());
1496 
1497             assertEquals("Destination index: " + destIndex,
1498                     expectedSrcFineStringIndices[destIndex],
1499                     fineIterator.sourceIndexFromDestinationIndex(destIndex));
1500             assertEquals("Destination index: " + destIndex,
1501                     expectedSrcFineStringIndices[destIndex],
1502                     fineChangesIterator.sourceIndexFromDestinationIndex(destIndex));
1503             assertEquals("Destination index: " + destIndex,
1504                     expectedSrcCoarseStringIndices[destIndex],
1505                     coarseIterator.sourceIndexFromDestinationIndex(destIndex));
1506             assertEquals("Destination index: " + destIndex,
1507                     expectedSrcCoarseStringIndices[destIndex],
1508                     coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex));
1509         }
1510     }
1511 
1512     @Test
TestCaseMapGreekExtended()1513     public void TestCaseMapGreekExtended() {
1514         // Ticket 13851
1515         String s = "\u1F80\u1F88\u1FFC";
1516         String result = CaseMap.toLower().apply(Locale.ROOT,  s);
1517         assertEquals("lower", "\u1F80\u1F80\u1FF3", result);
1518         result = CaseMap.toTitle().apply(Locale.ROOT, null, s);
1519         assertEquals("title", "\u1F88\u1F80\u1FF3", result);
1520     }
1521 
1522     @Test
TestFoldBug20316()1523     public void TestFoldBug20316() {
1524         String s = "廬ᾒ뻪ᣃइ垚Ⴡₓ렞체ꖲ갹ݖ䕷꾬쯎㊅ᦘᰄ㸜䡏遁럢豑黾奯㸀⊻줮끎蒹衤劔뽳趧熶撒쫃窩겨ཇ脌쵐嫑⟑겭㋋濜隣ᳰ봢ℼ櫩靛㉃炔鋳" +
1525                 "оे⳨ᦧྃ깢粣ᑤꇪ찃̹鵄ዤꛛᰙ⡝捣쯋톐蕩栭쥀뎊ᄯ৻恳〬昴껤룩列潱ᑮ煃鶖안꽊鹭宪帐❖ा쥈잔";
1526         String result = CaseMap.fold().apply(s);
1527         assertTrue("廬ᾒ...->廬ἢι...", result.startsWith("廬ἢι"));
1528         s = "儊ẖ깸ᝓ恷ᇁ䜄쌼ꇸჃ䗑䘬䒥㈴槁蛚紆洔㖣믏亝醣黹Ά嶨䖕篕舀ꖧ₭ଯᒗ✧ԗ墖쁳㽎苊澎긁⾆⒞蠻왃囨ᡠ邏꾭⪐턣搤穳≠톲絋砖ሷ⠆" +
1529                 "瞏惢鵶剕듘ᅤ♟Ԡⴠ⊡鹔ጙ갑⣚堟ᣗ✸㕇絮䠎瘗⟡놥擢ꉭ佱ྪ飹痵⿑⨴츿璿僖㯷넴鋰膄釚겼ナ黪差";
1530         result = CaseMap.fold().apply(s);
1531         assertTrue("儊ẖ...->儊h\u0331...", result.startsWith("儊h\u0331"));
1532     }
1533 
1534     // private data members - test data --------------------------------------
1535 
1536     private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
1537     private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");
1538     private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
1539     private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
1540     private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
1541     private static final Locale DUTCH_LOCALE_ = new Locale("nl");
1542 
1543     private static final int CHARACTER_UPPER_[] =
1544                       {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
1545                        0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,
1546                        0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,
1547                        0x01c4, 0x01c8, 0x000c, 0x0000};
1548     private static final int CHARACTER_LOWER_[] =
1549                       {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
1550                        0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,
1551                        0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,
1552                        0x01c6, 0x01c9, 0x000c, 0x0000};
1553 
1554     /*
1555      * CaseFolding.txt says about i and its cousins:
1556      *   0049; C; 0069; # LATIN CAPITAL LETTER I
1557      *   0049; T; 0131; # LATIN CAPITAL LETTER I
1558      *
1559      *   0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1560      *   0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1561      * That's all.
1562      * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
1563      */
1564     private static final int FOLDING_SIMPLE_[] = {
1565         // input, default, exclude special i
1566         0x61,   0x61,  0x61,
1567         0x49,   0x69,  0x131,
1568         0x130,  0x130, 0x69,
1569         0x131,  0x131, 0x131,
1570         0xdf,   0xdf,  0xdf,
1571         0xfb03, 0xfb03, 0xfb03,
1572         0x1040e,0x10436,0x10436,
1573         0x5ffff,0x5ffff,0x5ffff
1574     };
1575     private static final String FOLDING_MIXED_[] =
1576                           {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",
1577                            "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};
1578     private static final String FOLDING_DEFAULT_[] =
1579          {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
1580           "ass\u03bcffi\uD801\uDC34i\u0307\u0131"};
1581     private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =
1582          {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
1583           "ass\u03bcffi\uD801\uDC34i\u0131"};
1584     /**
1585      * "IESUS CHRISTOS"
1586      */
1587     private static final String SHARED_UPPERCASE_GREEK_ =
1588         "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";
1589     /**
1590      * "iesus christos"
1591      */
1592     private static final String SHARED_LOWERCASE_GREEK_ =
1593         "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";
1594     private static final String SHARED_LOWERCASE_TURKISH_ =
1595         "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";
1596     private static final String SHARED_UPPERCASE_TURKISH_ =
1597         "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";
1598     private static final String SHARED_UPPERCASE_ISTANBUL_ =
1599                                           "\u0130STANBUL, NOT CONSTANTINOPLE!";
1600     private static final String SHARED_LOWERCASE_ISTANBUL_ =
1601                                           "i\u0307stanbul, not constantinople!";
1602     private static final String SHARED_LOWERCASE_TOPKAP_ =
1603                                           "topkap\u0131 palace, istanbul";
1604     private static final String SHARED_UPPERCASE_TOPKAP_ =
1605                                           "TOPKAPI PALACE, ISTANBUL";
1606     private static final String SHARED_LOWERCASE_GERMAN_ =
1607                                           "S\u00FC\u00DFmayrstra\u00DFe";
1608     private static final String SHARED_UPPERCASE_GERMAN_ =
1609                                           "S\u00DCSSMAYRSTRASSE";
1610 
1611     private static final String UPPER_BEFORE_ =
1612          "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";
1613     private static final String UPPER_ROOT_ =
1614          "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
1615     private static final String UPPER_TURKISH_ =
1616          "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
1617     private static final String UPPER_MINI_ = "\u00df\u0061";
1618     private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";
1619 
1620     private static final String LOWER_BEFORE_ =
1621                       "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";
1622     private static final String LOWER_ROOT_ =
1623                       "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";
1624     private static final String LOWER_TURKISH_ =
1625                       "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";
1626 
1627     /**
1628      * each item is an array with input string, result string, locale ID, break iterator, options
1629      * the break iterator is specified as an int, same as in BreakIterator.KIND_*:
1630      * 0=KIND_CHARACTER  1=KIND_WORD  2=KIND_LINE  3=KIND_SENTENCE  4=KIND_TITLE  -1=default (NULL=words)  -2=no breaks (.*)
1631      * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I  L=U_TITLECASE_NO_LOWERCASE  A=U_TITLECASE_NO_BREAK_ADJUSTMENT
1632      * see ICU4C source/test/testdata/casing.txt
1633      */
1634     private static final String TITLE_DATA_[] = {
1635         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
1636         "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",
1637         "",
1638         "0",
1639         "",
1640 
1641         "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
1642         "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",
1643         "",
1644         "1",
1645         "",
1646 
1647         "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933
1648         "",
1649         "-1",
1650         "",
1651 
1652         " tHe QUIcK bRoWn", " The Quick Brown",
1653         "",
1654         "4",
1655         "",
1656 
1657         "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc",
1658         "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER
1659         "",
1660         "0",
1661         "",
1662 
1663         "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j
1664         "",
1665         "-1",
1666         "",
1667 
1668         "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'",  "'Oh Don't Titlecase After Letter+'",
1669         "",
1670         "-1",
1671         "",
1672 
1673         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
1674         "A \u02bbCat. A \u02bbDog! \u02bbEtc.",
1675         "",
1676         "-1",
1677         "", // default
1678 
1679         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
1680         "A \u02bbcat. A \u02bbdog! \u02bbetc.",
1681         "",
1682         "-1",
1683         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
1684 
1685         "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
1686         "A \u02bbCaT. A \u02bbdOg! \u02bbETc.",
1687         "",
1688         "3",
1689         "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
1690 
1691 
1692         "\u02bbcAt! \u02bbeTc.",
1693         "\u02bbCat! \u02bbetc.",
1694         "",
1695         "-2",
1696         "", // -2=Trivial break iterator
1697 
1698         "\u02bbcAt! \u02bbeTc.",
1699         "\u02bbcat! \u02bbetc.",
1700         "",
1701         "-2",
1702         "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
1703 
1704         "\u02bbcAt! \u02bbeTc.",
1705         "\u02bbCAt! \u02bbeTc.",
1706         "",
1707         "-2",
1708         "L", // U_TITLECASE_NO_LOWERCASE
1709 
1710         "\u02bbcAt! \u02bbeTc.",
1711         "\u02bbcAt! \u02bbeTc.",
1712         "",
1713         "-2",
1714         "AL", // Both options
1715 
1716         // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
1717         // when TITLECASE_NO_LOWERCASE encounters a single-letter word
1718         "a b c",
1719         "A B C",
1720         "",
1721         "1",
1722         "L" // U_TITLECASE_NO_LOWERCASE
1723     };
1724 
1725 
1726     /**
1727      * <p>basic string, lower string, upper string, title string</p>
1728      */
1729     private static final String SPECIAL_DATA_[] = {
1730         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
1731         UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
1732         UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
1733         "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +
1734                          UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
1735         "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +
1736                               UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
1737         "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +
1738                               UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
1739         // sigmas followed/preceded by cased letters
1740         "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",
1741         "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",
1742         "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "
1743     };
1744     private static final Locale SPECIAL_LOCALES_[] = {
1745         null,
1746         ENGLISH_LOCALE_,
1747         null,
1748     };
1749 
1750     private static final String SPECIAL_DOTTED_ =
1751             "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";
1752     private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =
1753             "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";
1754     private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =
1755             "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";
1756     private static final String SPECIAL_DOT_ABOVE_ =
1757             "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";
1758     private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =
1759             "A\u0307 \u0307 I J\u0327 J\u0301\u0307";
1760     private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =
1761             "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";
1762     private static final String SPECIAL_DOT_ABOVE_UPPER_ =
1763             "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";
1764     private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =
1765             "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";
1766     private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =
1767             "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";
1768 
1769     // private methods -------------------------------------------------------
1770 
1771     /**
1772      * Converting the hex numbers represented between ';' to Unicode strings
1773      * @param str string to break up into Unicode strings
1774      * @return array of Unicode strings ending with a null
1775      */
getUnicodeStrings(String str)1776     private String[] getUnicodeStrings(String str)
1777     {
1778         List<String> v = new ArrayList<>(10);
1779         int start = 0;
1780         for (int casecount = 4; casecount > 0; casecount --) {
1781             int end = str.indexOf("; ", start);
1782             String casestr = str.substring(start, end);
1783             StringBuffer buffer = new StringBuffer();
1784             int spaceoffset = 0;
1785             while (spaceoffset < casestr.length()) {
1786                 int nextspace = casestr.indexOf(' ', spaceoffset);
1787                 if (nextspace == -1) {
1788                     nextspace = casestr.length();
1789                 }
1790                 buffer.append((char)Integer.parseInt(
1791                                      casestr.substring(spaceoffset, nextspace),
1792                                                       16));
1793                 spaceoffset = nextspace + 1;
1794             }
1795             start = end + 2;
1796             v.add(buffer.toString());
1797         }
1798         int comments = str.indexOf(" #", start);
1799         if (comments != -1 && comments != start) {
1800             if (str.charAt(comments - 1) == ';') {
1801                 comments --;
1802             }
1803             String conditions = str.substring(start, comments);
1804             int offset = 0;
1805             while (offset < conditions.length()) {
1806                 int spaceoffset = conditions.indexOf(' ', offset);
1807                 if (spaceoffset == -1) {
1808                     spaceoffset = conditions.length();
1809                 }
1810                 v.add(conditions.substring(offset, spaceoffset));
1811                 offset = spaceoffset + 1;
1812             }
1813         }
1814         int size = v.size();
1815         String result[] = new String[size];
1816         for (int i = 0; i < size; i ++) {
1817             result[i] = v.get(i);
1818         }
1819         return result;
1820     }
1821 }
1822