1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2014, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 12 package ohos.global.icu.dev.test.lang; 13 14 15 import java.io.BufferedReader; 16 import java.util.ArrayList; 17 import java.util.Collections; 18 import java.util.List; 19 import java.util.Locale; 20 21 import org.junit.Test; 22 import org.junit.runner.RunWith; 23 import org.junit.runners.JUnit4; 24 25 import ohos.global.icu.dev.test.TestFmwk; 26 import ohos.global.icu.dev.test.TestUtil; 27 import ohos.global.icu.impl.Utility; 28 import ohos.global.icu.lang.UCharacter; 29 import ohos.global.icu.lang.UProperty; 30 import ohos.global.icu.text.BreakIterator; 31 import ohos.global.icu.text.CaseMap; 32 import ohos.global.icu.text.Edits; 33 import ohos.global.icu.text.RuleBasedBreakIterator; 34 import ohos.global.icu.text.UTF16; 35 import ohos.global.icu.util.ULocale; 36 37 38 39 /** 40 * <p>Testing character casing</p> 41 * <p>Mostly following the test cases in strcase.cpp for ICU</p> 42 * @author Syn Wee Quek 43 * @since march 14 2002 44 */ 45 46 @RunWith(JUnit4.class) 47 public final class UCharacterCaseTest extends TestFmwk 48 { 49 // constructor ----------------------------------------------------------- 50 51 /** 52 * Constructor 53 */ UCharacterCaseTest()54 public UCharacterCaseTest() 55 { 56 } 57 58 // public methods -------------------------------------------------------- 59 60 /** 61 * Testing the uppercase and lowercase function of UCharacter 62 */ 63 @Test TestCharacter()64 public void TestCharacter() 65 { 66 for (int i = 0; i < CHARACTER_LOWER_.length; i ++) { 67 if (UCharacter.isLetter(CHARACTER_LOWER_[i]) && 68 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) { 69 errln("FAIL isLowerCase test for \\u" + 70 hex(CHARACTER_LOWER_[i])); 71 break; 72 } 73 if (UCharacter.isLetter(CHARACTER_UPPER_[i]) && 74 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) || 75 UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) { 76 errln("FAIL isUpperCase test for \\u" + 77 hex(CHARACTER_UPPER_[i])); 78 break; 79 } 80 if (CHARACTER_LOWER_[i] != 81 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) || 82 (CHARACTER_UPPER_[i] != 83 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) && 84 CHARACTER_UPPER_[i] != 85 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) { 86 errln("FAIL case conversion test for \\u" + 87 hex(CHARACTER_UPPER_[i]) + 88 " to \\u" + hex(CHARACTER_LOWER_[i])); 89 break; 90 } 91 if (CHARACTER_LOWER_[i] != 92 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) { 93 errln("FAIL lower case conversion test for \\u" + 94 hex(CHARACTER_LOWER_[i])); 95 break; 96 } 97 if (CHARACTER_UPPER_[i] != 98 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) && 99 CHARACTER_UPPER_[i] != 100 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) { 101 errln("FAIL upper case conversion test for \\u" + 102 hex(CHARACTER_UPPER_[i])); 103 break; 104 } 105 logln("Ok \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" + 106 hex(CHARACTER_LOWER_[i])); 107 } 108 } 109 110 @Test TestFolding()111 public void TestFolding() 112 { 113 // test simple case folding 114 for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) { 115 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) != 116 FOLDING_SIMPLE_[i + 1]) { 117 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 118 ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1])); 119 } 120 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], 121 UCharacter.FOLD_CASE_DEFAULT) != 122 FOLDING_SIMPLE_[i + 1]) { 123 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 124 ", UCharacter.FOLD_CASE_DEFAULT) should be \\u" 125 + hex(FOLDING_SIMPLE_[i + 1])); 126 } 127 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) != 128 FOLDING_SIMPLE_[i + 2]) { 129 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 130 ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2])); 131 } 132 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], 133 UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) != 134 FOLDING_SIMPLE_[i + 2]) { 135 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 136 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u" 137 + hex(FOLDING_SIMPLE_[i + 2])); 138 } 139 } 140 141 // Test full string case folding with default option and separate 142 // buffers 143 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) { 144 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 145 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) + 146 " should be " + prettify(FOLDING_DEFAULT_[0])); 147 } 148 149 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) { 150 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 151 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT)) 152 + " should be " + prettify(FOLDING_DEFAULT_[0])); 153 } 154 155 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals( 156 UCharacter.foldCase(FOLDING_MIXED_[0], false))) { 157 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 158 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false)) 159 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0])); 160 } 161 162 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals( 163 UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) { 164 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 165 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)) 166 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0])); 167 } 168 169 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) { 170 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 171 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true)) 172 + " should be " + prettify(FOLDING_DEFAULT_[1])); 173 } 174 175 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) { 176 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 177 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT)) 178 + " should be " + prettify(FOLDING_DEFAULT_[1])); 179 } 180 181 // alternate handling for dotted I/dotless i (U+0130, U+0131) 182 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals( 183 UCharacter.foldCase(FOLDING_MIXED_[1], false))) { 184 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 185 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false)) 186 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1])); 187 } 188 189 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals( 190 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) { 191 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 192 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)) 193 + " should be " 194 + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1])); 195 } 196 } 197 198 @Test TestInvalidCodePointFolding()199 public void TestInvalidCodePointFolding() { 200 int[] invalidCodePoints = { 201 0xD800, // lead surrogate 202 0xDFFF, // trail surrogate 203 0xFDD0, // noncharacter 204 0xFFFF, // noncharacter 205 0x110000, // out of range 206 -1 // negative 207 }; 208 for (int cp : invalidCodePoints) { 209 assertEquals("Invalid code points should be echoed back", 210 cp, UCharacter.foldCase(cp, true)); 211 assertEquals("Invalid code points should be echoed back", 212 cp, UCharacter.foldCase(cp, false)); 213 assertEquals("Invalid code points should be echoed back", 214 cp, UCharacter.foldCase(cp, UCharacter.FOLD_CASE_DEFAULT)); 215 assertEquals("Invalid code points should be echoed back", 216 cp, UCharacter.foldCase(cp, UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)); 217 } 218 } 219 220 /** 221 * Testing the strings case mapping methods 222 */ 223 @Test TestUpper()224 public void TestUpper() 225 { 226 // uppercase with root locale and in the same buffer 227 if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) { 228 errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " + 229 UPPER_ROOT_ + " instead got " + 230 UCharacter.toUpperCase(UPPER_BEFORE_)); 231 } 232 233 // uppercase with turkish locale and separate buffers 234 if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_, 235 UPPER_BEFORE_))) { 236 errln("Fail " + UPPER_BEFORE_ + 237 " after turkish-sensitive uppercase should be " + 238 UPPER_TURKISH_ + " instead of " + 239 UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_)); 240 } 241 242 // uppercase a short string with root locale 243 if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) { 244 errln("error in toUpper(root locale)=\"" + UPPER_MINI_ + 245 "\" expected \"" + UPPER_MINI_UPPER_ + "\""); 246 } 247 248 if (!SHARED_UPPERCASE_TOPKAP_.equals( 249 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) { 250 errln("toUpper failed: expected \"" + 251 SHARED_UPPERCASE_TOPKAP_ + "\", got \"" + 252 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\"."); 253 } 254 255 if (!SHARED_UPPERCASE_TURKISH_.equals( 256 UCharacter.toUpperCase(TURKISH_LOCALE_, 257 SHARED_LOWERCASE_TOPKAP_))) { 258 errln("toUpper failed: expected \"" + 259 SHARED_UPPERCASE_TURKISH_ + "\", got \"" + 260 UCharacter.toUpperCase(TURKISH_LOCALE_, 261 SHARED_LOWERCASE_TOPKAP_) + "\"."); 262 } 263 264 if (!SHARED_UPPERCASE_GERMAN_.equals( 265 UCharacter.toUpperCase(GERMAN_LOCALE_, 266 SHARED_LOWERCASE_GERMAN_))) { 267 errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_ 268 + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_, 269 SHARED_LOWERCASE_GERMAN_) + "\"."); 270 } 271 272 if (!SHARED_UPPERCASE_GREEK_.equals( 273 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) { 274 errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ + 275 "\", got \"" + UCharacter.toUpperCase( 276 SHARED_LOWERCASE_GREEK_) + "\"."); 277 } 278 } 279 280 @Test TestLower()281 public void TestLower() 282 { 283 if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) { 284 errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " + 285 LOWER_ROOT_ + " instead of " + 286 UCharacter.toLowerCase(LOWER_BEFORE_)); 287 } 288 289 // lowercase with turkish locale 290 if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_, 291 LOWER_BEFORE_))) { 292 errln("Fail " + LOWER_BEFORE_ + 293 " after turkish-sensitive lowercase should be " + 294 LOWER_TURKISH_ + " instead of " + 295 UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_)); 296 } 297 if (!SHARED_LOWERCASE_ISTANBUL_.equals( 298 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) { 299 errln("1. toLower failed: expected \"" + 300 SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" + 301 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\"."); 302 } 303 304 if (!SHARED_LOWERCASE_TURKISH_.equals( 305 UCharacter.toLowerCase(TURKISH_LOCALE_, 306 SHARED_UPPERCASE_ISTANBUL_))) { 307 errln("2. toLower failed: expected \"" + 308 SHARED_LOWERCASE_TURKISH_ + "\", got \"" + 309 UCharacter.toLowerCase(TURKISH_LOCALE_, 310 SHARED_UPPERCASE_ISTANBUL_) + "\"."); 311 } 312 if (!SHARED_LOWERCASE_GREEK_.equals( 313 UCharacter.toLowerCase(GREEK_LOCALE_, 314 SHARED_UPPERCASE_GREEK_))) { 315 errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ + 316 "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_, 317 SHARED_UPPERCASE_GREEK_) + "\"."); 318 } 319 } 320 321 @Test TestTitleRegression()322 public void TestTitleRegression() throws java.io.IOException { 323 boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE); 324 assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable); 325 assertEquals("Titlecase check", 326 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.", 327 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null)); 328 } 329 330 @Test TestTitle()331 public void TestTitle() 332 { 333 try{ 334 for (int i = 0; i < TITLE_DATA_.length;) { 335 String test = TITLE_DATA_[i++]; 336 String expected = TITLE_DATA_[i++]; 337 ULocale locale = new ULocale(TITLE_DATA_[i++]); 338 int breakType = Integer.parseInt(TITLE_DATA_[i++]); 339 String optionsString = TITLE_DATA_[i++]; 340 BreakIterator iter = 341 breakType >= 0 ? 342 BreakIterator.getBreakInstance(locale, breakType) : 343 breakType == -2 ? 344 // Open a trivial break iterator that only delivers { 0, length } 345 // or even just { 0 } as boundaries. 346 new RuleBasedBreakIterator(".*;") : 347 null; 348 int options = 0; 349 if (optionsString.indexOf('L') >= 0) { 350 options |= UCharacter.TITLECASE_NO_LOWERCASE; 351 } 352 if (optionsString.indexOf('A') >= 0) { 353 options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT; 354 } 355 String result = UCharacter.toTitleCase(locale, test, iter, options); 356 if (!expected.equals(result)) { 357 errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " + 358 prettify(expected) + " but got " + 359 prettify(result)); 360 } 361 if (options == 0) { 362 result = UCharacter.toTitleCase(locale, test, iter); 363 if (!expected.equals(result)) { 364 errln("titlecasing for " + prettify(test) + " should be " + 365 prettify(expected) + " but got " + 366 prettify(result)); 367 } 368 } 369 } 370 }catch(Exception ex){ 371 warnln("Could not find data for BreakIterators"); 372 } 373 } 374 375 // Not a @Test. See ICU4C intltest strcase.cpp TestCasingImpl(). TestCasingImpl(String input, String output, CaseMap.Title toTitle, Locale locale)376 void TestCasingImpl(String input, String output, CaseMap.Title toTitle, Locale locale) { 377 String result = toTitle.apply(locale, null, input, new StringBuilder(), null).toString(); 378 assertEquals("toTitle(" + input + ')', output, result); 379 } 380 381 @Test TestTitleOptions()382 public void TestTitleOptions() { 383 Locale root = Locale.ROOT; 384 // New options in ICU 60. 385 TestCasingImpl("ʻcAt! ʻeTc.", "ʻCat! ʻetc.", 386 CaseMap.toTitle().wholeString(), root); 387 TestCasingImpl("a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCaT. A ʻdOg! ʻETc.", 388 CaseMap.toTitle().sentences().noLowercase(), root); 389 TestCasingImpl("49eRs", "49ers", 390 CaseMap.toTitle().wholeString(), root); 391 TestCasingImpl("«丰(aBc)»", "«丰(abc)»", 392 CaseMap.toTitle().wholeString(), root); 393 TestCasingImpl("49eRs", "49Ers", 394 CaseMap.toTitle().wholeString().adjustToCased(), root); 395 TestCasingImpl("«丰(aBc)»", "«丰(Abc)»", 396 CaseMap.toTitle().wholeString().adjustToCased(), root); 397 TestCasingImpl(" john. Smith", " John. Smith", 398 CaseMap.toTitle().wholeString().noLowercase(), root); 399 TestCasingImpl(" john. Smith", " john. smith", 400 CaseMap.toTitle().wholeString().noBreakAdjustment(), root); 401 TestCasingImpl("«ijs»", "«IJs»", 402 CaseMap.toTitle().wholeString(), new Locale("nl", "BE")); 403 TestCasingImpl("«ijs»", "«İjs»", 404 CaseMap.toTitle().wholeString(), new Locale("tr", "DE")); 405 406 // Test conflicting settings. 407 // If & when we add more options, then the ORed combinations may become 408 // indistinguishable from valid values. 409 try { 410 CaseMap.toTitle().noBreakAdjustment().adjustToCased(). 411 apply(root, null, "", new StringBuilder(), null); 412 fail("CaseMap.toTitle(multiple adjustment options) " + 413 "did not throw an IllegalArgumentException"); 414 } catch(IllegalArgumentException expected) { 415 } 416 try { 417 CaseMap.toTitle().wholeString().sentences(). 418 apply(root, null, "", new StringBuilder(), null); 419 fail("CaseMap.toTitle(multiple iterator options) " + 420 "did not throw an IllegalArgumentException"); 421 } catch(IllegalArgumentException expected) { 422 } 423 BreakIterator iter = BreakIterator.getCharacterInstance(root); 424 try { 425 CaseMap.toTitle().wholeString().apply(root, iter, "", new StringBuilder(), null); 426 fail("CaseMap.toTitle(iterator option + iterator) " + 427 "did not throw an IllegalArgumentException"); 428 } catch(IllegalArgumentException expected) { 429 } 430 } 431 432 @Test TestLithuanianTitle()433 public void TestLithuanianTitle() { 434 ULocale LOC_LITHUANIAN = new ULocale("lt"); 435 436 assertEquals("Lithuanian titlecase check in Lithuanian", 437 "\u0058\u0069\u0307\u0308", 438 UCharacter.toTitleCase(LOC_LITHUANIAN, "\u0058\u0049\u0308", null)); 439 440 assertEquals("Lithuanian titlecase check in Lithuanian", 441 "\u0058\u0069\u0307\u0308", 442 UCharacter.toTitleCase(LITHUANIAN_LOCALE_, "\u0058\u0049\u0308", null)); 443 } 444 445 @Test TestDutchTitle()446 public void TestDutchTitle() { 447 ULocale LOC_DUTCH = new ULocale("nl"); 448 int options = 0; 449 options |= UCharacter.TITLECASE_NO_LOWERCASE; 450 BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH); 451 452 assertEquals("Dutch titlecase check in English", 453 "Ijssel Igloo Ijmuiden", 454 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null)); 455 456 assertEquals("Dutch titlecase check in Dutch", 457 "IJssel Igloo IJmuiden", 458 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null)); 459 460 // Also check the behavior using Java Locale 461 assertEquals("Dutch titlecase check in English (Java Locale)", 462 "Ijssel Igloo Ijmuiden", 463 UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null)); 464 465 assertEquals("Dutch titlecase check in Dutch (Java Locale)", 466 "IJssel Igloo IJmuiden", 467 UCharacter.toTitleCase(DUTCH_LOCALE_, "ijssel igloo IJMUIDEN", null)); 468 469 iter.setText("ijssel igloo IjMUIdEN iPoD ijenough"); 470 assertEquals("Dutch titlecase check in Dutch with nolowercase option", 471 "IJssel Igloo IJMUIdEN IPoD IJenough", 472 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options)); 473 } 474 475 @Test TestSpecial()476 public void TestSpecial() 477 { 478 for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) { 479 int j = i * 3; 480 Locale locale = SPECIAL_LOCALES_[i]; 481 String str = SPECIAL_DATA_[j]; 482 if (locale != null) { 483 if (!SPECIAL_DATA_[j + 1].equals( 484 UCharacter.toLowerCase(locale, str))) { 485 errln("error lowercasing special characters " + 486 hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1]) 487 + " for locale " + locale.toString() + " but got " + 488 hex(UCharacter.toLowerCase(locale, str))); 489 } 490 if (!SPECIAL_DATA_[j + 2].equals( 491 UCharacter.toUpperCase(locale, str))) { 492 errln("error uppercasing special characters " + 493 hex(str) + " expected " + SPECIAL_DATA_[j + 2] 494 + " for locale " + locale.toString() + " but got " + 495 hex(UCharacter.toUpperCase(locale, str))); 496 } 497 } 498 else { 499 String lower = UCharacter.toLowerCase(str); 500 if (!SPECIAL_DATA_[j + 1].equals(lower)) { 501 errln("error lowercasing special characters " + 502 hex(str) + " expected " + SPECIAL_DATA_[j + 1] + 503 " but got " + hex(lower)); 504 } 505 String upper = UCharacter.toUpperCase(str); 506 if (!SPECIAL_DATA_[j + 2].equals(upper)) { 507 errln("error uppercasing special characters " + 508 hex(str) + " expected " + SPECIAL_DATA_[j + 2] + 509 " but got " + hex(upper)); 510 } 511 } 512 } 513 514 // turkish & azerbaijani dotless i & dotted I 515 // remove dot above if there was a capital I before and there are no 516 // more accents above 517 if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase( 518 TURKISH_LOCALE_, SPECIAL_DOTTED_))) { 519 errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ + 520 "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ + 521 "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_, 522 SPECIAL_DOTTED_)); 523 } 524 if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase( 525 GERMAN_LOCALE_, SPECIAL_DOTTED_))) { 526 errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ + 527 "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ + 528 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_, 529 SPECIAL_DOTTED_)); 530 } 531 532 // lithuanian dot above in uppercasing 533 if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals( 534 UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) { 535 errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ + 536 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ + 537 "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_, 538 SPECIAL_DOT_ABOVE_)); 539 } 540 if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase( 541 GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) { 542 errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ + 543 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ + 544 "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_, 545 SPECIAL_DOT_ABOVE_)); 546 } 547 548 // lithuanian adds dot above to i in lowercasing if there are more 549 // above accents 550 if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals( 551 UCharacter.toLowerCase(LITHUANIAN_LOCALE_, 552 SPECIAL_DOT_ABOVE_UPPER_))) { 553 errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ + 554 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ + 555 "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_, 556 SPECIAL_DOT_ABOVE_UPPER_)); 557 } 558 if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals( 559 UCharacter.toLowerCase(GERMAN_LOCALE_, 560 SPECIAL_DOT_ABOVE_UPPER_))) { 561 errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ + 562 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ + 563 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_, 564 SPECIAL_DOT_ABOVE_UPPER_)); 565 } 566 } 567 568 /** 569 * Tests for case mapping in the file SpecialCasing.txt 570 * This method reads in SpecialCasing.txt file for testing purposes. 571 * A default path is provided relative to the src path, however the user 572 * could set a system property to change the directory path.<br> 573 * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest 574 */ 575 @Test TestSpecialCasingTxt()576 public void TestSpecialCasingTxt() 577 { 578 try 579 { 580 // reading in the SpecialCasing file 581 BufferedReader input = TestUtil.getDataReader( 582 "unicode/SpecialCasing.txt"); 583 while (true) 584 { 585 String s = input.readLine(); 586 if (s == null) { 587 break; 588 } 589 if (s.length() == 0 || s.charAt(0) == '#') { 590 continue; 591 } 592 593 String chstr[] = getUnicodeStrings(s); 594 StringBuffer strbuffer = new StringBuffer(chstr[0]); 595 StringBuffer lowerbuffer = new StringBuffer(chstr[1]); 596 StringBuffer upperbuffer = new StringBuffer(chstr[3]); 597 Locale locale = null; 598 for (int i = 4; i < chstr.length; i ++) { 599 String condition = chstr[i]; 600 if (Character.isLowerCase(chstr[i].charAt(0))) { 601 // specified locale 602 locale = new Locale(chstr[i], ""); 603 } 604 else if (condition.compareToIgnoreCase("Not_Before_Dot") 605 == 0) { 606 // turns I into dotless i 607 } 608 else if (condition.compareToIgnoreCase( 609 "More_Above") == 0) { 610 strbuffer.append((char)0x300); 611 lowerbuffer.append((char)0x300); 612 upperbuffer.append((char)0x300); 613 } 614 else if (condition.compareToIgnoreCase( 615 "After_Soft_Dotted") == 0) { 616 strbuffer.insert(0, 'i'); 617 lowerbuffer.insert(0, 'i'); 618 String lang = ""; 619 if (locale != null) { 620 lang = locale.getLanguage(); 621 } 622 if (lang.equals("tr") || lang.equals("az")) { 623 // this is to be removed when 4.0 data comes out 624 // and upperbuffer.insert uncommented 625 // see jitterbug 2344 626 chstr[i] = "After_I"; 627 strbuffer.deleteCharAt(0); 628 lowerbuffer.deleteCharAt(0); 629 i --; 630 continue; 631 // upperbuffer.insert(0, '\u0130'); 632 } 633 else { 634 upperbuffer.insert(0, 'I'); 635 } 636 } 637 else if (condition.compareToIgnoreCase( 638 "Final_Sigma") == 0) { 639 strbuffer.insert(0, 'c'); 640 lowerbuffer.insert(0, 'c'); 641 upperbuffer.insert(0, 'C'); 642 } 643 else if (condition.compareToIgnoreCase("After_I") == 0) { 644 strbuffer.insert(0, 'I'); 645 lowerbuffer.insert(0, 'i'); 646 String lang = ""; 647 if (locale != null) { 648 lang = locale.getLanguage(); 649 } 650 if (lang.equals("tr") || lang.equals("az")) { 651 upperbuffer.insert(0, 'I'); 652 } 653 } 654 } 655 chstr[0] = strbuffer.toString(); 656 chstr[1] = lowerbuffer.toString(); 657 chstr[3] = upperbuffer.toString(); 658 if (locale == null) { 659 if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) { 660 errln(s); 661 errln("Fail: toLowerCase for character " + 662 Utility.escape(chstr[0]) + ", expected " 663 + Utility.escape(chstr[1]) + " but resulted in " + 664 Utility.escape(UCharacter.toLowerCase(chstr[0]))); 665 } 666 if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) { 667 errln(s); 668 errln("Fail: toUpperCase for character " + 669 Utility.escape(chstr[0]) + ", expected " 670 + Utility.escape(chstr[3]) + " but resulted in " + 671 Utility.escape(UCharacter.toUpperCase(chstr[0]))); 672 } 673 } 674 else { 675 if (!UCharacter.toLowerCase(locale, chstr[0]).equals( 676 chstr[1])) { 677 errln(s); 678 errln("Fail: toLowerCase for character " + 679 Utility.escape(chstr[0]) + ", expected " 680 + Utility.escape(chstr[1]) + " but resulted in " + 681 Utility.escape(UCharacter.toLowerCase(locale, 682 chstr[0]))); 683 } 684 if (!UCharacter.toUpperCase(locale, chstr[0]).equals( 685 chstr[3])) { 686 errln(s); 687 errln("Fail: toUpperCase for character " + 688 Utility.escape(chstr[0]) + ", expected " 689 + Utility.escape(chstr[3]) + " but resulted in " + 690 Utility.escape(UCharacter.toUpperCase(locale, 691 chstr[0]))); 692 } 693 } 694 } 695 input.close(); 696 } 697 catch (Exception e) 698 { 699 e.printStackTrace(); 700 } 701 } 702 703 @Test TestUpperLower()704 public void TestUpperLower() 705 { 706 int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 707 0x01c9, 0x000c}; 708 int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 709 0x01c9, 0x000c}; 710 String upperTest = "abcdefg123hij.?:klmno"; 711 String lowerTest = "ABCDEFG123HIJ.?:KLMNO"; 712 713 // Checks LetterLike Symbols which were previously a source of 714 // confusion [Bertrand A. D. 02/04/98] 715 for (int i = 0x2100; i < 0x2138; i ++) { 716 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */ 717 if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) { 718 if (i != UCharacter.toLowerCase(i)) { // itself 719 errln("Failed case conversion with itself: \\u" 720 + Utility.hex(i, 4)); 721 } 722 if (i != UCharacter.toUpperCase(i)) { 723 errln("Failed case conversion with itself: \\u" 724 + Utility.hex(i, 4)); 725 } 726 } 727 } 728 for (int i = 0; i < upper.length; i ++) { 729 if (UCharacter.toLowerCase(upper[i]) != lower[i]) { 730 errln("FAILED UCharacter.tolower() for \\u" 731 + Utility.hex(upper[i], 4) 732 + " Expected \\u" + Utility.hex(lower[i], 4) 733 + " Got \\u" 734 + Utility.hex(UCharacter.toLowerCase(upper[i]), 4)); 735 } 736 } 737 logln("testing upper lower"); 738 for (int i = 0; i < upperTest.length(); i ++) { 739 logln("testing to upper to lower"); 740 if (UCharacter.isLetter(upperTest.charAt(i)) && 741 !UCharacter.isLowerCase(upperTest.charAt(i))) { 742 errln("Failed isLowerCase test at \\u" 743 + Utility.hex(upperTest.charAt(i), 4)); 744 } 745 else if (UCharacter.isLetter(lowerTest.charAt(i)) 746 && !UCharacter.isUpperCase(lowerTest.charAt(i))) { 747 errln("Failed isUpperCase test at \\u" 748 + Utility.hex(lowerTest.charAt(i), 4)); 749 } 750 else if (upperTest.charAt(i) 751 != UCharacter.toLowerCase(lowerTest.charAt(i))) { 752 errln("Failed case conversion from \\u" 753 + Utility.hex(lowerTest.charAt(i), 4) + " To \\u" 754 + Utility.hex(upperTest.charAt(i), 4)); 755 } 756 else if (lowerTest.charAt(i) 757 != UCharacter.toUpperCase(upperTest.charAt(i))) { 758 errln("Failed case conversion : \\u" 759 + Utility.hex(upperTest.charAt(i), 4) + " To \\u" 760 + Utility.hex(lowerTest.charAt(i), 4)); 761 } 762 else if (upperTest.charAt(i) 763 != UCharacter.toLowerCase(upperTest.charAt(i))) { 764 errln("Failed case conversion with itself: \\u" 765 + Utility.hex(upperTest.charAt(i))); 766 } 767 else if (lowerTest.charAt(i) 768 != UCharacter.toUpperCase(lowerTest.charAt(i))) { 769 errln("Failed case conversion with itself: \\u" 770 + Utility.hex(lowerTest.charAt(i))); 771 } 772 } 773 logln("done testing upper Lower"); 774 } 775 assertGreekUpper(String s, String expected)776 private void assertGreekUpper(String s, String expected) { 777 assertEquals("toUpper/Greek(" + s + ')', expected, UCharacter.toUpperCase(GREEK_LOCALE_, s)); 778 } 779 780 @Test TestGreekUpper()781 public void TestGreekUpper() { 782 // http://bugs.icu-project.org/trac/ticket/5456 783 assertGreekUpper("άδικος, κείμενο, ίριδα", "ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ"); 784 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039 785 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893 786 assertGreekUpper("Πατάτα", "ΠΑΤΑΤΑ"); 787 assertGreekUpper("Αέρας, Μυστήριο, Ωραίο", "ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ"); 788 assertGreekUpper("Μαΐου, Πόρος, Ρύθμιση", "ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ"); 789 assertGreekUpper("ΰ, Τηρώ, Μάιος", "Ϋ, ΤΗΡΩ, ΜΑΪΟΣ"); 790 assertGreekUpper("άυλος", "ΑΫΛΟΣ"); 791 assertGreekUpper("ΑΫΛΟΣ", "ΑΫΛΟΣ"); 792 assertGreekUpper("Άκλιτα ρήματα ή άκλιτες μετοχές", "ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ"); 793 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html 794 assertGreekUpper("Επειδή η αναγνώριση της αξιοπρέπειας", "ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ"); 795 assertGreekUpper("νομικού ή διεθνούς", "ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ"); 796 // http://unicode.org/udhr/d/udhr_ell_polytonic.html 797 assertGreekUpper("Ἐπειδὴ ἡ ἀναγνώριση", "ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ"); 798 assertGreekUpper("νομικοῦ ἢ διεθνοῦς", "ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ"); 799 // From Google bug report 800 assertGreekUpper("Νέο, Δημιουργία", "ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ"); 801 // http://crbug.com/234797 802 assertGreekUpper("Ελάτε να φάτε τα καλύτερα παϊδάκια!", "ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!"); 803 assertGreekUpper("Μαΐου, τρόλεϊ", "ΜΑΪΟΥ, ΤΡΟΛΕΪ"); 804 assertGreekUpper("Το ένα ή το άλλο.", "ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ."); 805 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/ 806 assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ"); 807 assertGreekUpper("ή.", "Ή."); 808 } 809 810 private static final class EditChange { 811 private boolean change; 812 private int oldLength, newLength; EditChange(boolean change, int oldLength, int newLength)813 EditChange(boolean change, int oldLength, int newLength) { 814 this.change = change; 815 this.oldLength = oldLength; 816 this.newLength = newLength; 817 } 818 } 819 printOneEdit(Edits.Iterator ei)820 private static String printOneEdit(Edits.Iterator ei) { 821 if (ei.hasChange()) { 822 return "" + ei.oldLength() + "->" + ei.newLength(); 823 } else { 824 return "" + ei.oldLength() + "=" + ei.newLength(); 825 } 826 } 827 828 /** 829 * Maps indexes according to the expected edits. 830 * A destination index can occur multiple times when there are source deletions. 831 * Map according to the last occurrence, normally in a non-empty destination span. 832 * Simplest is to search from the back. 833 */ srcIndexFromDest( EditChange expected[], int srcLength, int destLength, int index)834 private static int srcIndexFromDest( 835 EditChange expected[], int srcLength, int destLength, int index) { 836 int srcIndex = srcLength; 837 int destIndex = destLength; 838 int i = expected.length; 839 while (index < destIndex && i > 0) { 840 --i; 841 int prevSrcIndex = srcIndex - expected[i].oldLength; 842 int prevDestIndex = destIndex - expected[i].newLength; 843 if (index == prevDestIndex) { 844 return prevSrcIndex; 845 } else if (index > prevDestIndex) { 846 if (expected[i].change) { 847 // In a change span, map to its end. 848 return srcIndex; 849 } else { 850 // In an unchanged span, offset within it. 851 return prevSrcIndex + (index - prevDestIndex); 852 } 853 } 854 srcIndex = prevSrcIndex; 855 destIndex = prevDestIndex; 856 } 857 // index is outside the string. 858 return srcIndex; 859 } 860 destIndexFromSrc( EditChange expected[], int srcLength, int destLength, int index)861 private static int destIndexFromSrc( 862 EditChange expected[], int srcLength, int destLength, int index) { 863 int srcIndex = srcLength; 864 int destIndex = destLength; 865 int i = expected.length; 866 while (index < srcIndex && i > 0) { 867 --i; 868 int prevSrcIndex = srcIndex - expected[i].oldLength; 869 int prevDestIndex = destIndex - expected[i].newLength; 870 if (index == prevSrcIndex) { 871 return prevDestIndex; 872 } else if (index > prevSrcIndex) { 873 if (expected[i].change) { 874 // In a change span, map to its end. 875 return destIndex; 876 } else { 877 // In an unchanged span, offset within it. 878 return prevDestIndex + (index - prevSrcIndex); 879 } 880 } 881 srcIndex = prevSrcIndex; 882 destIndex = prevDestIndex; 883 } 884 // index is outside the string. 885 return destIndex; 886 } 887 checkEqualEdits(String name, Edits e1, Edits e2)888 private void checkEqualEdits(String name, Edits e1, Edits e2) { 889 Edits.Iterator ei1 = e1.getFineIterator(); 890 Edits.Iterator ei2 = e2.getFineIterator(); 891 for (int i = 0;; ++i) { 892 boolean ei1HasNext = ei1.next(); 893 boolean ei2HasNext = ei2.next(); 894 assertEquals(name + " next()[" + i + "]", ei1HasNext, ei2HasNext); 895 assertEquals(name + " edit[" + i + "]", printOneEdit(ei1), printOneEdit(ei2)); 896 if (!ei1HasNext || !ei2HasNext) { 897 break; 898 } 899 } 900 } 901 checkEditsIter( String name, Edits.Iterator ei1, Edits.Iterator ei2, EditChange[] expected, boolean withUnchanged)902 private static void checkEditsIter( 903 String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators 904 EditChange[] expected, boolean withUnchanged) { 905 assertFalse(name, ei2.findSourceIndex(-1)); 906 assertFalse(name, ei2.findDestinationIndex(-1)); 907 908 int expSrcIndex = 0; 909 int expDestIndex = 0; 910 int expReplIndex = 0; 911 for (int expIndex = 0; expIndex < expected.length; ++expIndex) { 912 EditChange expect = expected[expIndex]; 913 String msg = name + ' ' + expIndex; 914 if (withUnchanged || expect.change) { 915 assertTrue(msg, ei1.next()); 916 assertEquals(msg, expect.change, ei1.hasChange()); 917 assertEquals(msg, expect.oldLength, ei1.oldLength()); 918 assertEquals(msg, expect.newLength, ei1.newLength()); 919 assertEquals(msg, expSrcIndex, ei1.sourceIndex()); 920 assertEquals(msg, expDestIndex, ei1.destinationIndex()); 921 assertEquals(msg, expReplIndex, ei1.replacementIndex()); 922 } 923 924 if (expect.oldLength > 0) { 925 assertTrue(msg, ei2.findSourceIndex(expSrcIndex)); 926 assertEquals(msg, expect.change, ei2.hasChange()); 927 assertEquals(msg, expect.oldLength, ei2.oldLength()); 928 assertEquals(msg, expect.newLength, ei2.newLength()); 929 assertEquals(msg, expSrcIndex, ei2.sourceIndex()); 930 assertEquals(msg, expDestIndex, ei2.destinationIndex()); 931 assertEquals(msg, expReplIndex, ei2.replacementIndex()); 932 if (!withUnchanged) { 933 // For some iterators, move past the current range 934 // so that findSourceIndex() has to look before the current index. 935 ei2.next(); 936 ei2.next(); 937 } 938 } 939 940 if (expect.newLength > 0) { 941 assertTrue(msg, ei2.findDestinationIndex(expDestIndex)); 942 assertEquals(msg, expect.change, ei2.hasChange()); 943 assertEquals(msg, expect.oldLength, ei2.oldLength()); 944 assertEquals(msg, expect.newLength, ei2.newLength()); 945 assertEquals(msg, expSrcIndex, ei2.sourceIndex()); 946 assertEquals(msg, expDestIndex, ei2.destinationIndex()); 947 assertEquals(msg, expReplIndex, ei2.replacementIndex()); 948 if (!withUnchanged) { 949 // For some iterators, move past the current range 950 // so that findSourceIndex() has to look before the current index. 951 ei2.next(); 952 ei2.next(); 953 } 954 } 955 956 expSrcIndex += expect.oldLength; 957 expDestIndex += expect.newLength; 958 if (expect.change) { 959 expReplIndex += expect.newLength; 960 } 961 } 962 String msg = name + " end"; 963 assertFalse(msg, ei1.next()); 964 assertFalse(msg, ei1.hasChange()); 965 assertEquals(msg, 0, ei1.oldLength()); 966 assertEquals(msg, 0, ei1.newLength()); 967 assertEquals(msg, expSrcIndex, ei1.sourceIndex()); 968 assertEquals(msg, expDestIndex, ei1.destinationIndex()); 969 assertEquals(msg, expReplIndex, ei1.replacementIndex()); 970 971 assertFalse(name, ei2.findSourceIndex(expSrcIndex)); 972 assertFalse(name, ei2.findDestinationIndex(expDestIndex)); 973 974 // Check mapping of all indexes against a simple implementation 975 // that works on the expected changes. 976 // Iterate once forward, once backward, to cover more runtime conditions. 977 int srcLength = expSrcIndex; 978 int destLength = expDestIndex; 979 List<Integer> srcIndexes = new ArrayList<>(); 980 List<Integer> destIndexes = new ArrayList<>(); 981 srcIndexes.add(-1); 982 destIndexes.add(-1); 983 int srcIndex = 0; 984 int destIndex = 0; 985 for (int i = 0; i < expected.length; ++i) { 986 if (expected[i].oldLength > 0) { 987 srcIndexes.add(srcIndex); 988 if (expected[i].oldLength > 1) { 989 srcIndexes.add(srcIndex + 1); 990 if (expected[i].oldLength > 2) { 991 srcIndexes.add(srcIndex + expected[i].oldLength - 1); 992 } 993 } 994 } 995 if (expected[i].newLength > 0) { 996 destIndexes.add(destIndex); 997 if (expected[i].newLength > 1) { 998 destIndexes.add(destIndex + 1); 999 if (expected[i].newLength > 2) { 1000 destIndexes.add(destIndex + expected[i].newLength - 1); 1001 } 1002 } 1003 } 1004 srcIndex += expected[i].oldLength; 1005 destIndex += expected[i].newLength; 1006 } 1007 srcIndexes.add(srcLength); 1008 destIndexes.add(destLength); 1009 srcIndexes.add(srcLength + 1); 1010 destIndexes.add(destLength + 1); 1011 Collections.reverse(destIndexes); 1012 // Zig-zag across the indexes to stress next() <-> previous(). 1013 for (int i = 0; i < srcIndexes.size(); ++i) { 1014 for (int j : ZIG_ZAG) { 1015 if ((i + j) < srcIndexes.size()) { 1016 int si = srcIndexes.get(i + j); 1017 assertEquals(name + " destIndexFromSrc(" + si + "):", 1018 destIndexFromSrc(expected, srcLength, destLength, si), 1019 ei2.destinationIndexFromSourceIndex(si)); 1020 } 1021 } 1022 } 1023 for (int i = 0; i < destIndexes.size(); ++i) { 1024 for (int j : ZIG_ZAG) { 1025 if ((i + j) < destIndexes.size()) { 1026 int di = destIndexes.get(i + j); 1027 assertEquals(name + " srcIndexFromDest(" + di + "):", 1028 srcIndexFromDest(expected, srcLength, destLength, di), 1029 ei2.sourceIndexFromDestinationIndex(di)); 1030 } 1031 } 1032 } 1033 } 1034 1035 private static final int[] ZIG_ZAG = { 0, 1, 2, 3, 2, 1 }; 1036 1037 @Test TestEdits()1038 public void TestEdits() { 1039 Edits edits = new Edits(); 1040 assertFalse("new Edits hasChanges", edits.hasChanges()); 1041 assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges()); 1042 assertEquals("new Edits", 0, edits.lengthDelta()); 1043 edits.addUnchanged(1); // multiple unchanged ranges are combined 1044 edits.addUnchanged(10000); // too long, and they are split 1045 edits.addReplace(0, 0); 1046 edits.addUnchanged(2); 1047 assertFalse("unchanged 10003 hasChanges", edits.hasChanges()); 1048 assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges()); 1049 assertEquals("unchanged 10003", 0, edits.lengthDelta()); 1050 edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed 1051 edits.addUnchanged(0); 1052 edits.addReplace(2, 1); 1053 edits.addReplace(2, 1); 1054 edits.addReplace(0, 10); 1055 edits.addReplace(100, 0); 1056 edits.addReplace(3000, 4000); // variable-length encoding 1057 edits.addReplace(100000, 100000); 1058 assertTrue("some edits hasChanges", edits.hasChanges()); 1059 assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges()); 1060 assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta()); 1061 1062 EditChange[] coarseExpectedChanges = new EditChange[] { 1063 new EditChange(false, 10003, 10003), 1064 new EditChange(true, 103106, 104013) 1065 }; 1066 checkEditsIter("coarse", 1067 edits.getCoarseIterator(), edits.getCoarseIterator(), 1068 coarseExpectedChanges, true); 1069 checkEditsIter("coarse changes", 1070 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), 1071 coarseExpectedChanges, false); 1072 1073 EditChange[] fineExpectedChanges = new EditChange[] { 1074 new EditChange(false, 10003, 10003), 1075 new EditChange(true, 2, 1), 1076 new EditChange(true, 2, 1), 1077 new EditChange(true, 2, 1), 1078 new EditChange(true, 0, 10), 1079 new EditChange(true, 100, 0), 1080 new EditChange(true, 3000, 4000), 1081 new EditChange(true, 100000, 100000) 1082 }; 1083 checkEditsIter("fine", 1084 edits.getFineIterator(), edits.getFineIterator(), 1085 fineExpectedChanges, true); 1086 checkEditsIter("fine changes", 1087 edits.getFineChangesIterator(), edits.getFineChangesIterator(), 1088 fineExpectedChanges, false); 1089 1090 edits.reset(); 1091 assertFalse("reset hasChanges", edits.hasChanges()); 1092 assertEquals("reset numberOfChanges", 0, edits.numberOfChanges()); 1093 assertEquals("reset", 0, edits.lengthDelta()); 1094 Edits.Iterator ei = edits.getCoarseChangesIterator(); 1095 assertFalse("reset then iterator", ei.next()); 1096 } 1097 1098 @Test TestEditsFindFwdBwd()1099 public void TestEditsFindFwdBwd() { 1100 // Some users need index mappings to be efficient when they are out of order. 1101 // The most interesting failure case for this test is it taking a very long time. 1102 Edits e = new Edits(); 1103 int N = 200000; 1104 for (int i = 0; i < N; ++i) { 1105 e.addUnchanged(1); 1106 e.addReplace(3, 1); 1107 } 1108 Edits.Iterator iter = e.getFineIterator(); 1109 for (int i = 0; i <= N; i += 2) { 1110 assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i)); 1111 assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1)); 1112 } 1113 for (int i = N; i >= 0; i -= 2) { 1114 assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1)); 1115 assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i)); 1116 } 1117 } 1118 1119 @Test TestMergeEdits()1120 public void TestMergeEdits() { 1121 Edits ab = new Edits(), bc = new Edits(), ac = new Edits(), expected_ac = new Edits(); 1122 1123 // Simple: Two parallel non-changes. 1124 ab.addUnchanged(2); 1125 bc.addUnchanged(2); 1126 expected_ac.addUnchanged(2); 1127 1128 // Simple: Two aligned changes. 1129 ab.addReplace(3, 2); 1130 bc.addReplace(2, 1); 1131 expected_ac.addReplace(3, 1); 1132 1133 // Unequal non-changes. 1134 ab.addUnchanged(5); 1135 bc.addUnchanged(3); 1136 expected_ac.addUnchanged(3); 1137 // ab ahead by 2 1138 1139 // Overlapping changes accumulate until they share a boundary. 1140 ab.addReplace(4, 3); 1141 bc.addReplace(3, 2); 1142 ab.addReplace(4, 3); 1143 bc.addReplace(3, 2); 1144 ab.addReplace(4, 3); 1145 bc.addReplace(3, 2); 1146 bc.addUnchanged(4); 1147 expected_ac.addReplace(14, 8); 1148 // bc ahead by 2 1149 1150 // Balance out intermediate-string lengths. 1151 ab.addUnchanged(2); 1152 expected_ac.addUnchanged(2); 1153 1154 // Insert something and delete it: Should disappear. 1155 ab.addReplace(0, 5); 1156 ab.addReplace(0, 2); 1157 bc.addReplace(7, 0); 1158 1159 // Parallel change to make a new boundary. 1160 ab.addReplace(1, 2); 1161 bc.addReplace(2, 3); 1162 expected_ac.addReplace(1, 3); 1163 1164 // Multiple ab deletions should remain separate at the boundary. 1165 ab.addReplace(1, 0); 1166 ab.addReplace(2, 0); 1167 ab.addReplace(3, 0); 1168 expected_ac.addReplace(1, 0); 1169 expected_ac.addReplace(2, 0); 1170 expected_ac.addReplace(3, 0); 1171 1172 // Unequal non-changes can be split for another boundary. 1173 ab.addUnchanged(2); 1174 bc.addUnchanged(1); 1175 expected_ac.addUnchanged(1); 1176 // ab ahead by 1 1177 1178 // Multiple bc insertions should create a boundary and remain separate. 1179 bc.addReplace(0, 4); 1180 bc.addReplace(0, 5); 1181 bc.addReplace(0, 6); 1182 expected_ac.addReplace(0, 4); 1183 expected_ac.addReplace(0, 5); 1184 expected_ac.addReplace(0, 6); 1185 // ab ahead by 1 1186 1187 // Multiple ab deletions in the middle of a bc change are merged. 1188 bc.addReplace(2, 2); 1189 // bc ahead by 1 1190 ab.addReplace(1, 0); 1191 ab.addReplace(2, 0); 1192 ab.addReplace(3, 0); 1193 ab.addReplace(4, 1); 1194 expected_ac.addReplace(11, 2); 1195 1196 // Multiple bc insertions in the middle of an ab change are merged. 1197 ab.addReplace(5, 6); 1198 bc.addReplace(3, 3); 1199 // ab ahead by 3 1200 bc.addReplace(0, 4); 1201 bc.addReplace(0, 5); 1202 bc.addReplace(0, 6); 1203 bc.addReplace(3, 7); 1204 expected_ac.addReplace(5, 25); 1205 1206 // Delete around a deletion. 1207 ab.addReplace(4, 4); 1208 ab.addReplace(3, 0); 1209 ab.addUnchanged(2); 1210 bc.addReplace(2, 2); 1211 bc.addReplace(4, 0); 1212 expected_ac.addReplace(9, 2); 1213 1214 // Insert into an insertion. 1215 ab.addReplace(0, 2); 1216 bc.addReplace(1, 1); 1217 bc.addReplace(0, 8); 1218 bc.addUnchanged(4); 1219 expected_ac.addReplace(0, 10); 1220 // bc ahead by 3 1221 1222 // Balance out intermediate-string lengths. 1223 ab.addUnchanged(3); 1224 expected_ac.addUnchanged(3); 1225 1226 // Deletions meet insertions. 1227 // Output order is arbitrary in principle, but we expect insertions first 1228 // and want to keep it that way. 1229 ab.addReplace(2, 0); 1230 ab.addReplace(4, 0); 1231 ab.addReplace(6, 0); 1232 bc.addReplace(0, 1); 1233 bc.addReplace(0, 3); 1234 bc.addReplace(0, 5); 1235 expected_ac.addReplace(0, 1); 1236 expected_ac.addReplace(0, 3); 1237 expected_ac.addReplace(0, 5); 1238 expected_ac.addReplace(2, 0); 1239 expected_ac.addReplace(4, 0); 1240 expected_ac.addReplace(6, 0); 1241 1242 // End with a non-change, so that further edits are never reordered. 1243 ab.addUnchanged(1); 1244 bc.addUnchanged(1); 1245 expected_ac.addUnchanged(1); 1246 1247 ac.mergeAndAppend(ab, bc); 1248 checkEqualEdits("ab+bc", expected_ac, ac); 1249 1250 // Append more Edits. 1251 Edits ab2 = new Edits(), bc2 = new Edits(); 1252 ab2.addUnchanged(5); 1253 bc2.addReplace(1, 2); 1254 bc2.addUnchanged(4); 1255 expected_ac.addReplace(1, 2); 1256 expected_ac.addUnchanged(4); 1257 ac.mergeAndAppend(ab2, bc2); 1258 checkEqualEdits("ab2+bc2", expected_ac, ac); 1259 1260 // Append empty edits. 1261 Edits empty = new Edits(); 1262 ac.mergeAndAppend(empty, empty); 1263 checkEqualEdits("empty+empty", expected_ac, ac); 1264 1265 // Error: Append more edits with mismatched intermediate-string lengths. 1266 Edits mismatch = new Edits(); 1267 mismatch.addReplace(1, 1); 1268 try { 1269 ac.mergeAndAppend(ab2, mismatch); 1270 fail("ab2+mismatch did not yield IllegalArgumentException"); 1271 } catch (IllegalArgumentException expected) { 1272 } 1273 try { 1274 ac.mergeAndAppend(mismatch, bc2); 1275 fail("mismatch+bc2 did not yield IllegalArgumentException"); 1276 } catch (IllegalArgumentException expected) { 1277 } 1278 } 1279 1280 @Test TestCaseMapWithEdits()1281 public void TestCaseMapWithEdits() { 1282 StringBuilder sb = new StringBuilder(); 1283 Edits edits = new Edits(); 1284 1285 sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits); 1286 assertEquals("toLower(Istanbul)", "ıb", sb.toString()); 1287 EditChange[] lowerExpectedChanges = new EditChange[] { 1288 new EditChange(true, 1, 1), 1289 new EditChange(false, 4, 4), 1290 new EditChange(true, 1, 1), 1291 new EditChange(false, 2, 2) 1292 }; 1293 checkEditsIter("toLower(Istanbul)", 1294 edits.getFineIterator(), edits.getFineIterator(), 1295 lowerExpectedChanges, true); 1296 1297 sb.delete(0, sb.length()); 1298 edits.reset(); 1299 sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits); 1300 assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString()); 1301 EditChange[] upperExpectedChanges = new EditChange[] { 1302 new EditChange(false, 1, 1), 1303 new EditChange(true, 1, 1), 1304 new EditChange(true, 1, 1), 1305 new EditChange(true, 1, 1), 1306 new EditChange(true, 1, 1), 1307 new EditChange(true, 1, 1) 1308 }; 1309 checkEditsIter("toUpper(Πατάτα)", 1310 edits.getFineIterator(), edits.getFineIterator(), 1311 upperExpectedChanges, true); 1312 1313 sb.delete(0, sb.length()); 1314 edits.reset(); 1315 sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply( 1316 DUTCH_LOCALE_, null, "IjssEL IglOo", sb, edits); 1317 assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString()); 1318 EditChange[] titleExpectedChanges = new EditChange[] { 1319 new EditChange(false, 1, 1), 1320 new EditChange(true, 1, 1), 1321 new EditChange(false, 10, 10) 1322 }; 1323 checkEditsIter("toTitle(IjssEL IglOo)", 1324 edits.getFineIterator(), edits.getFineIterator(), 1325 titleExpectedChanges, true); 1326 1327 sb.delete(0, sb.length()); 1328 edits.reset(); 1329 sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits); 1330 assertEquals("fold(IßtanBul)", "ıssb", sb.toString()); 1331 EditChange[] foldExpectedChanges = new EditChange[] { 1332 new EditChange(true, 1, 1), 1333 new EditChange(true, 1, 2), 1334 new EditChange(false, 3, 3), 1335 new EditChange(true, 1, 1), 1336 new EditChange(false, 2, 2) 1337 }; 1338 checkEditsIter("fold(IßtanBul)", 1339 edits.getFineIterator(), edits.getFineIterator(), 1340 foldExpectedChanges, true); 1341 } 1342 1343 @Test TestCaseMapToString()1344 public void TestCaseMapToString() { 1345 // String apply(..., CharSequence) 1346 // Omit unchanged text. 1347 assertEquals("toLower(Istanbul)", "ıb", 1348 CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul")); 1349 assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", 1350 CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα")); 1351 assertEquals("toTitle(IjssEL IglOo)", "J", 1352 CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply( 1353 DUTCH_LOCALE_, null, "IjssEL IglOo")); 1354 assertEquals("fold(IßtanBul)", "ıssb", 1355 CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul")); 1356 1357 // Return the whole result string. 1358 assertEquals("toLower(Istanbul)", "ıstanbul", 1359 CaseMap.toLower().apply(TURKISH_LOCALE_, "IstanBul")); 1360 assertEquals("toUpper(Πατάτα)", "ΠΑΤΑΤΑ", 1361 CaseMap.toUpper().apply(GREEK_LOCALE_, "Πατάτα")); 1362 assertEquals("toTitle(IjssEL IglOo)", "IJssEL IglOo", 1363 CaseMap.toTitle().noBreakAdjustment().noLowercase().apply( 1364 DUTCH_LOCALE_, null, "IjssEL IglOo")); 1365 assertEquals("fold(IßtanBul)", "ısstanbul", 1366 CaseMap.fold().turkic().apply("IßtanBul")); 1367 } 1368 1369 @Test TestCaseMapEditsIteratorDocs()1370 public void TestCaseMapEditsIteratorDocs() { 1371 String input = "abcßDeF"; 1372 // output: "abcssdef" 1373 1374 StringBuilder sb = new StringBuilder(); 1375 Edits edits = new Edits(); 1376 CaseMap.fold().apply(input, sb, edits); 1377 1378 String[] fineIteratorExpected = { 1379 "{ src[0..3] ≡ dest[0..3] (no-change) }", 1380 "{ src[3..4] ⇝ dest[3..5], repl[0..2] }", 1381 "{ src[4..5] ⇝ dest[5..6], repl[2..3] }", 1382 "{ src[5..6] ≡ dest[6..7] (no-change) }", 1383 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }", 1384 }; 1385 String[] fineChangesIteratorExpected = { 1386 "{ src[3..4] ⇝ dest[3..5], repl[0..2] }", 1387 "{ src[4..5] ⇝ dest[5..6], repl[2..3] }", 1388 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }", 1389 }; 1390 String[] coarseIteratorExpected = { 1391 "{ src[0..3] ≡ dest[0..3] (no-change) }", 1392 "{ src[3..5] ⇝ dest[3..6], repl[0..3] }", 1393 "{ src[5..6] ≡ dest[6..7] (no-change) }", 1394 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }", 1395 }; 1396 String[] coarseChangesIteratorExpected = { 1397 "{ src[3..5] ⇝ dest[3..6], repl[0..3] }", 1398 "{ src[6..7] ⇝ dest[7..8], repl[3..4] }", 1399 }; 1400 1401 // Expected destination indices when source index is queried 1402 int[] expectedDestFineEditIndices = {0, 0, 0, 3, 5, 6, 7}; 1403 int[] expectedDestCoarseEditIndices = {0, 0, 0, 3, 3, 6, 7}; 1404 int[] expectedDestFineStringIndices = {0, 1, 2, 3, 5, 6, 7}; 1405 int[] expectedDestCoarseStringIndices = {0, 1, 2, 3, 6, 6, 7}; 1406 1407 // Expected source indices when destination index is queried 1408 int[] expectedSrcFineEditIndices = { 0, 0, 0, 3, 3, 4, 5, 6 }; 1409 int[] expectedSrcCoarseEditIndices = { 0, 0, 0, 3, 3, 3, 5, 6 }; 1410 int[] expectedSrcFineStringIndices = { 0, 1, 2, 3, 4, 4, 5, 6 }; 1411 int[] expectedSrcCoarseStringIndices = { 0, 1, 2, 3, 5, 5, 5, 6 }; 1412 1413 // Demonstrate the iterator next() method: 1414 Edits.Iterator fineIterator = edits.getFineIterator(); 1415 int i = 0; 1416 while (fineIterator.next()) { 1417 String expected = fineIteratorExpected[i++]; 1418 String actual = fineIterator.toString(); 1419 assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length())); 1420 } 1421 Edits.Iterator fineChangesIterator = edits.getFineChangesIterator(); 1422 i = 0; 1423 while (fineChangesIterator.next()) { 1424 String expected = fineChangesIteratorExpected[i++]; 1425 String actual = fineChangesIterator.toString(); 1426 assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length())); 1427 } 1428 Edits.Iterator coarseIterator = edits.getCoarseIterator(); 1429 i = 0; 1430 while (coarseIterator.next()) { 1431 String expected = coarseIteratorExpected[i++]; 1432 String actual = coarseIterator.toString(); 1433 assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length())); 1434 } 1435 Edits.Iterator coarseChangesIterator = edits.getCoarseChangesIterator(); 1436 i = 0; 1437 while (coarseChangesIterator.next()) { 1438 String expected = coarseChangesIteratorExpected[i++]; 1439 String actual = coarseChangesIterator.toString(); 1440 assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length())); 1441 } 1442 1443 // Demonstrate the iterator indexing methods: 1444 // fineIterator should have the same behavior as fineChangesIterator, and 1445 // coarseIterator should have the same behavior as coarseChangesIterator. 1446 for (int srcIndex=0; srcIndex<input.length(); srcIndex++) { 1447 fineIterator.findSourceIndex(srcIndex); 1448 fineChangesIterator.findSourceIndex(srcIndex); 1449 coarseIterator.findSourceIndex(srcIndex); 1450 coarseChangesIterator.findSourceIndex(srcIndex); 1451 1452 assertEquals("Source index: " + srcIndex, 1453 expectedDestFineEditIndices[srcIndex], 1454 fineIterator.destinationIndex()); 1455 assertEquals("Source index: " + srcIndex, 1456 expectedDestFineEditIndices[srcIndex], 1457 fineChangesIterator.destinationIndex()); 1458 assertEquals("Source index: " + srcIndex, 1459 expectedDestCoarseEditIndices[srcIndex], 1460 coarseIterator.destinationIndex()); 1461 assertEquals("Source index: " + srcIndex, 1462 expectedDestCoarseEditIndices[srcIndex], 1463 coarseChangesIterator.destinationIndex()); 1464 1465 assertEquals("Source index: " + srcIndex, 1466 expectedDestFineStringIndices[srcIndex], 1467 fineIterator.destinationIndexFromSourceIndex(srcIndex)); 1468 assertEquals("Source index: " + srcIndex, 1469 expectedDestFineStringIndices[srcIndex], 1470 fineChangesIterator.destinationIndexFromSourceIndex(srcIndex)); 1471 assertEquals("Source index: " + srcIndex, 1472 expectedDestCoarseStringIndices[srcIndex], 1473 coarseIterator.destinationIndexFromSourceIndex(srcIndex)); 1474 assertEquals("Source index: " + srcIndex, 1475 expectedDestCoarseStringIndices[srcIndex], 1476 coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex)); 1477 } 1478 for (int destIndex=0; destIndex<input.length(); destIndex++) { 1479 fineIterator.findDestinationIndex(destIndex); 1480 fineChangesIterator.findDestinationIndex(destIndex); 1481 coarseIterator.findDestinationIndex(destIndex); 1482 coarseChangesIterator.findDestinationIndex(destIndex); 1483 1484 assertEquals("Destination index: " + destIndex, 1485 expectedSrcFineEditIndices[destIndex], 1486 fineIterator.sourceIndex()); 1487 assertEquals("Destination index: " + destIndex, 1488 expectedSrcFineEditIndices[destIndex], 1489 fineChangesIterator.sourceIndex()); 1490 assertEquals("Destination index: " + destIndex, 1491 expectedSrcCoarseEditIndices[destIndex], 1492 coarseIterator.sourceIndex()); 1493 assertEquals("Destination index: " + destIndex, 1494 expectedSrcCoarseEditIndices[destIndex], 1495 coarseChangesIterator.sourceIndex()); 1496 1497 assertEquals("Destination index: " + destIndex, 1498 expectedSrcFineStringIndices[destIndex], 1499 fineIterator.sourceIndexFromDestinationIndex(destIndex)); 1500 assertEquals("Destination index: " + destIndex, 1501 expectedSrcFineStringIndices[destIndex], 1502 fineChangesIterator.sourceIndexFromDestinationIndex(destIndex)); 1503 assertEquals("Destination index: " + destIndex, 1504 expectedSrcCoarseStringIndices[destIndex], 1505 coarseIterator.sourceIndexFromDestinationIndex(destIndex)); 1506 assertEquals("Destination index: " + destIndex, 1507 expectedSrcCoarseStringIndices[destIndex], 1508 coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex)); 1509 } 1510 } 1511 1512 @Test TestCaseMapGreekExtended()1513 public void TestCaseMapGreekExtended() { 1514 // Ticket 13851 1515 String s = "\u1F80\u1F88\u1FFC"; 1516 String result = CaseMap.toLower().apply(Locale.ROOT, s); 1517 assertEquals("lower", "\u1F80\u1F80\u1FF3", result); 1518 result = CaseMap.toTitle().apply(Locale.ROOT, null, s); 1519 assertEquals("title", "\u1F88\u1F80\u1FF3", result); 1520 } 1521 1522 @Test TestFoldBug20316()1523 public void TestFoldBug20316() { 1524 String s = "廬ᾒ뻪ᣃइ垚Ⴡₓ렞체ꖲ갹ݖ䕷꾬쯎㊅ᦘᰄ㸜䡏遁럢豑黾奯㸀⊻줮끎蒹衤劔뽳趧熶撒쫃窩겨ཇ脌쵐嫑⟑겭㋋濜隣ᳰ봢ℼ櫩靛㉃炔鋳" + 1525 "оे⳨ᦧྃ깢粣ᑤꇪ찃̹鵄ዤꛛᰙ⡝捣쯋톐蕩栭쥀뎊ᄯ৻恳〬昴껤룩列潱ᑮ煃鶖안꽊鹭宪帐❖ा쥈잔"; 1526 String result = CaseMap.fold().apply(s); 1527 assertTrue("廬ᾒ...->廬ἢι...", result.startsWith("廬ἢι")); 1528 s = "儊ẖ깸ᝓ恷ᇁ䜄쌼ꇸჃ䗑䘬䒥㈴槁蛚紆洔㖣믏亝醣黹Ά嶨䖕篕舀ꖧ₭ଯᒗ✧ԗ墖쁳㽎苊澎긁⾆⒞蠻왃囨ᡠ邏꾭⪐턣搤穳≠톲絋砖ሷ⠆" + 1529 "瞏惢鵶剕듘ᅤ♟Ԡⴠ⊡鹔ጙ갑⣚堟ᣗ✸㕇絮䠎瘗⟡놥擢ꉭ佱ྪ飹痵⿑⨴츿璿僖㯷넴鋰膄釚겼ナ黪差"; 1530 result = CaseMap.fold().apply(s); 1531 assertTrue("儊ẖ...->儊h\u0331...", result.startsWith("儊h\u0331")); 1532 } 1533 1534 // private data members - test data -------------------------------------- 1535 1536 private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR"); 1537 private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE"); 1538 private static final Locale GREEK_LOCALE_ = new Locale("el", "GR"); 1539 private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US"); 1540 private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT"); 1541 private static final Locale DUTCH_LOCALE_ = new Locale("nl"); 1542 1543 private static final int CHARACTER_UPPER_[] = 1544 {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 1545 0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e, 1546 0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f, 1547 0x01c4, 0x01c8, 0x000c, 0x0000}; 1548 private static final int CHARACTER_LOWER_[] = 1549 {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 1550 0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e, 1551 0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f, 1552 0x01c6, 0x01c9, 0x000c, 0x0000}; 1553 1554 /* 1555 * CaseFolding.txt says about i and its cousins: 1556 * 0049; C; 0069; # LATIN CAPITAL LETTER I 1557 * 0049; T; 0131; # LATIN CAPITAL LETTER I 1558 * 1559 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1560 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1561 * That's all. 1562 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings. 1563 */ 1564 private static final int FOLDING_SIMPLE_[] = { 1565 // input, default, exclude special i 1566 0x61, 0x61, 0x61, 1567 0x49, 0x69, 0x131, 1568 0x130, 0x130, 0x69, 1569 0x131, 0x131, 0x131, 1570 0xdf, 0xdf, 0xdf, 1571 0xfb03, 0xfb03, 0xfb03, 1572 0x1040e,0x10436,0x10436, 1573 0x5ffff,0x5ffff,0x5ffff 1574 }; 1575 private static final String FOLDING_MIXED_[] = 1576 {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff", 1577 "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"}; 1578 private static final String FOLDING_DEFAULT_[] = 1579 {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff", 1580 "ass\u03bcffi\uD801\uDC34i\u0307\u0131"}; 1581 private static final String FOLDING_EXCLUDE_SPECIAL_I_[] = 1582 {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff", 1583 "ass\u03bcffi\uD801\uDC34i\u0131"}; 1584 /** 1585 * "IESUS CHRISTOS" 1586 */ 1587 private static final String SHARED_UPPERCASE_GREEK_ = 1588 "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3"; 1589 /** 1590 * "iesus christos" 1591 */ 1592 private static final String SHARED_LOWERCASE_GREEK_ = 1593 "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2"; 1594 private static final String SHARED_LOWERCASE_TURKISH_ = 1595 "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021"; 1596 private static final String SHARED_UPPERCASE_TURKISH_ = 1597 "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c"; 1598 private static final String SHARED_UPPERCASE_ISTANBUL_ = 1599 "\u0130STANBUL, NOT CONSTANTINOPLE!"; 1600 private static final String SHARED_LOWERCASE_ISTANBUL_ = 1601 "i\u0307stanbul, not constantinople!"; 1602 private static final String SHARED_LOWERCASE_TOPKAP_ = 1603 "topkap\u0131 palace, istanbul"; 1604 private static final String SHARED_UPPERCASE_TOPKAP_ = 1605 "TOPKAPI PALACE, ISTANBUL"; 1606 private static final String SHARED_LOWERCASE_GERMAN_ = 1607 "S\u00FC\u00DFmayrstra\u00DFe"; 1608 private static final String SHARED_UPPERCASE_GERMAN_ = 1609 "S\u00DCSSMAYRSTRASSE"; 1610 1611 private static final String UPPER_BEFORE_ = 1612 "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff"; 1613 private static final String UPPER_ROOT_ = 1614 "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff"; 1615 private static final String UPPER_TURKISH_ = 1616 "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff"; 1617 private static final String UPPER_MINI_ = "\u00df\u0061"; 1618 private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041"; 1619 1620 private static final String LOWER_BEFORE_ = 1621 "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff"; 1622 private static final String LOWER_ROOT_ = 1623 "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff"; 1624 private static final String LOWER_TURKISH_ = 1625 "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff"; 1626 1627 /** 1628 * each item is an array with input string, result string, locale ID, break iterator, options 1629 * the break iterator is specified as an int, same as in BreakIterator.KIND_*: 1630 * 0=KIND_CHARACTER 1=KIND_WORD 2=KIND_LINE 3=KIND_SENTENCE 4=KIND_TITLE -1=default (NULL=words) -2=no breaks (.*) 1631 * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT 1632 * see ICU4C source/test/testdata/casing.txt 1633 */ 1634 private static final String TITLE_DATA_[] = { 1635 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff", 1636 "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff", 1637 "", 1638 "0", 1639 "", 1640 1641 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff", 1642 "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff", 1643 "", 1644 "1", 1645 "", 1646 1647 "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933 1648 "", 1649 "-1", 1650 "", 1651 1652 " tHe QUIcK bRoWn", " The Quick Brown", 1653 "", 1654 "4", 1655 "", 1656 1657 "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc", 1658 "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER 1659 "", 1660 "0", 1661 "", 1662 1663 "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j 1664 "", 1665 "-1", 1666 "", 1667 1668 "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", 1669 "", 1670 "-1", 1671 "", 1672 1673 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.", 1674 "A \u02bbCat. A \u02bbDog! \u02bbEtc.", 1675 "", 1676 "-1", 1677 "", // default 1678 1679 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.", 1680 "A \u02bbcat. A \u02bbdog! \u02bbetc.", 1681 "", 1682 "-1", 1683 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT 1684 1685 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.", 1686 "A \u02bbCaT. A \u02bbdOg! \u02bbETc.", 1687 "", 1688 "3", 1689 "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE 1690 1691 1692 "\u02bbcAt! \u02bbeTc.", 1693 "\u02bbCat! \u02bbetc.", 1694 "", 1695 "-2", 1696 "", // -2=Trivial break iterator 1697 1698 "\u02bbcAt! \u02bbeTc.", 1699 "\u02bbcat! \u02bbetc.", 1700 "", 1701 "-2", 1702 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT 1703 1704 "\u02bbcAt! \u02bbeTc.", 1705 "\u02bbCAt! \u02bbeTc.", 1706 "", 1707 "-2", 1708 "L", // U_TITLECASE_NO_LOWERCASE 1709 1710 "\u02bbcAt! \u02bbeTc.", 1711 "\u02bbcAt! \u02bbeTc.", 1712 "", 1713 "-2", 1714 "AL", // Both options 1715 1716 // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError 1717 // when TITLECASE_NO_LOWERCASE encounters a single-letter word 1718 "a b c", 1719 "A B C", 1720 "", 1721 "1", 1722 "L" // U_TITLECASE_NO_LOWERCASE 1723 }; 1724 1725 1726 /** 1727 * <p>basic string, lower string, upper string, title string</p> 1728 */ 1729 private static final String SPECIAL_DATA_[] = { 1730 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414), 1731 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C), 1732 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414), 1733 "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + 1734 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414), 1735 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + 1736 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C), 1737 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + 1738 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414), 1739 // sigmas followed/preceded by cased letters 1740 "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ", 1741 "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ", 1742 "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 " 1743 }; 1744 private static final Locale SPECIAL_LOCALES_[] = { 1745 null, 1746 ENGLISH_LOCALE_, 1747 null, 1748 }; 1749 1750 private static final String SPECIAL_DOTTED_ = 1751 "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301"; 1752 private static final String SPECIAL_DOTTED_LOWER_TURKISH_ = 1753 "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301"; 1754 private static final String SPECIAL_DOTTED_LOWER_GERMAN_ = 1755 "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301"; 1756 private static final String SPECIAL_DOT_ABOVE_ = 1757 "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307"; 1758 private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ = 1759 "A\u0307 \u0307 I J\u0327 J\u0301\u0307"; 1760 private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ = 1761 "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307"; 1762 private static final String SPECIAL_DOT_ABOVE_UPPER_ = 1763 "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128"; 1764 private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ = 1765 "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303"; 1766 private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ = 1767 "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129"; 1768 1769 // private methods ------------------------------------------------------- 1770 1771 /** 1772 * Converting the hex numbers represented between ';' to Unicode strings 1773 * @param str string to break up into Unicode strings 1774 * @return array of Unicode strings ending with a null 1775 */ getUnicodeStrings(String str)1776 private String[] getUnicodeStrings(String str) 1777 { 1778 List<String> v = new ArrayList<>(10); 1779 int start = 0; 1780 for (int casecount = 4; casecount > 0; casecount --) { 1781 int end = str.indexOf("; ", start); 1782 String casestr = str.substring(start, end); 1783 StringBuffer buffer = new StringBuffer(); 1784 int spaceoffset = 0; 1785 while (spaceoffset < casestr.length()) { 1786 int nextspace = casestr.indexOf(' ', spaceoffset); 1787 if (nextspace == -1) { 1788 nextspace = casestr.length(); 1789 } 1790 buffer.append((char)Integer.parseInt( 1791 casestr.substring(spaceoffset, nextspace), 1792 16)); 1793 spaceoffset = nextspace + 1; 1794 } 1795 start = end + 2; 1796 v.add(buffer.toString()); 1797 } 1798 int comments = str.indexOf(" #", start); 1799 if (comments != -1 && comments != start) { 1800 if (str.charAt(comments - 1) == ';') { 1801 comments --; 1802 } 1803 String conditions = str.substring(start, comments); 1804 int offset = 0; 1805 while (offset < conditions.length()) { 1806 int spaceoffset = conditions.indexOf(' ', offset); 1807 if (spaceoffset == -1) { 1808 spaceoffset = conditions.length(); 1809 } 1810 v.add(conditions.substring(offset, spaceoffset)); 1811 offset = spaceoffset + 1; 1812 } 1813 } 1814 int size = v.size(); 1815 String result[] = new String[size]; 1816 for (int i = 0; i < size; i ++) { 1817 result[i] = v.get(i); 1818 } 1819 return result; 1820 } 1821 } 1822