1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2012, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.dev.test.translit; 11 12 import java.util.ArrayList; 13 import java.util.Enumeration; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.List; 18 import java.util.Locale; 19 20 import org.junit.Test; 21 import org.junit.runner.RunWith; 22 import org.junit.runners.JUnit4; 23 24 import ohos.global.icu.dev.test.TestFmwk; 25 import ohos.global.icu.dev.test.TestUtil; 26 import ohos.global.icu.impl.Utility; 27 import ohos.global.icu.impl.UtilityExtensions; 28 import ohos.global.icu.lang.CharSequences; 29 import ohos.global.icu.lang.UCharacter; 30 import ohos.global.icu.lang.UScript; 31 import ohos.global.icu.text.Replaceable; 32 import ohos.global.icu.text.ReplaceableString; 33 import ohos.global.icu.text.StringTransform; 34 import ohos.global.icu.text.Transliterator; 35 import ohos.global.icu.text.UTF16; 36 import ohos.global.icu.text.UnicodeFilter; 37 import ohos.global.icu.text.UnicodeSet; 38 import ohos.global.icu.text.UnicodeSetIterator; 39 import ohos.global.icu.util.CaseInsensitiveString; 40 import ohos.global.icu.util.ULocale; 41 42 43 /*********************************************************************** 44 45 HOW TO USE THIS TEST FILE 46 -or- 47 How I developed on two platforms 48 without losing (too much of) my mind 49 50 51 1. Add new tests by copying/pasting/changing existing tests. On Java, 52 any public void method named Test...() taking no parameters becomes 53 a test. On C++, you need to modify the header and add a line to 54 the runIndexedTest() dispatch method. 55 56 2. Make liberal use of the expect() method; it is your friend. 57 58 3. The tests in this file exactly match those in a sister file on the 59 other side. The two files are: 60 61 icu4j: src/ohos.global.icu.dev.test/translit/TransliteratorTest.java 62 icu4c: source/test/intltest/transtst.cpp 63 64 ==> THIS IS THE IMPORTANT PART <== 65 66 When you add a test in this file, add it in transtst.cpp too. 67 Give it the same name and put it in the same relative place. This 68 makes maintenance a lot simpler for any poor soul who ends up 69 trying to synchronize the tests between icu4j and icu4c. 70 71 4. If you MUST enter a test that is NOT paralleled in the sister file, 72 then add it in the special non-mirrored section. These are 73 labeled 74 75 "icu4j ONLY" 76 77 or 78 79 "icu4c ONLY" 80 81 Make sure you document the reason the test is here and not there. 82 83 84 Thank you. 85 The Management 86 ***********************************************************************/ 87 88 /** 89 * @test 90 * @summary General test of Transliterator 91 */ 92 93 @RunWith(JUnit4.class) 94 public class TransliteratorTest extends TestFmwk { 95 @Test TestHangul()96 public void TestHangul() { 97 98 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 99 Transliterator hl = lh.getInverse(); 100 101 assertTransform("Transform", "\uCE20", lh, "ch"); 102 103 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 104 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 105 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 106 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 107 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 108 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 109 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 110 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 111 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 112 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 113 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 114 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 115 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 116 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 117 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 118 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 119 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 120 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 121 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 122 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 123 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 124 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 125 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 126 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 127 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 128 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 129 130 } 131 132 @Test TestChinese()133 public void TestChinese() { 134 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 135 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 136 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 137 } 138 139 @Test TestRegistry()140 public void TestRegistry() { 141 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 142 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 143 checkRegistry("foo1", "[:letter:] a > b;"); 144 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 145 String id = (String) e.nextElement(); 146 checkRegistry(id); 147 } 148 // Need to remove these test-specific transliterators in order not to interfere with other tests. 149 Transliterator.unregister("foo3"); 150 Transliterator.unregister("foo2"); 151 Transliterator.unregister("foo1"); 152 } 153 checkRegistry(String id, String rules)154 private void checkRegistry (String id, String rules) { 155 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 156 Transliterator.registerInstance(foo); 157 checkRegistry(id); 158 } 159 checkRegistry(String id)160 private void checkRegistry(String id) { 161 Transliterator fie = Transliterator.getInstance(id); 162 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 163 fie.setFilter(fae); 164 Transliterator foe = Transliterator.getInstance(id); 165 UnicodeFilter fee = foe.getFilter(); 166 if (fae.equals(fee)) { 167 errln("Changed what is in registry for " + id); 168 } 169 } 170 171 @Test TestInstantiationError()172 public void TestInstantiationError() { 173 try { 174 String ID = "<Not a valid Transliterator ID>"; 175 Transliterator t = Transliterator.getInstance(ID); 176 errln("FAIL: " + ID + " returned " + t); 177 } catch (IllegalArgumentException ex) { 178 logln("OK: Bogus ID handled properly"); 179 } 180 } 181 182 @Test TestSimpleRules()183 public void TestSimpleRules() { 184 /* Example: rules 1. ab>x|y 185 * 2. yc>z 186 * 187 * []|eabcd start - no match, copy e to tranlated buffer 188 * [e]|abcd match rule 1 - copy output & adjust cursor 189 * [ex|y]cd match rule 2 - copy output & adjust cursor 190 * [exz]|d no match, copy d to transliterated buffer 191 * [exzd]| done 192 */ 193 expect("ab>x|y;" + 194 "yc>z", 195 "eabcd", "exzd"); 196 197 /* Another set of rules: 198 * 1. ab>x|yzacw 199 * 2. za>q 200 * 3. qc>r 201 * 4. cw>n 202 * 203 * []|ab Rule 1 204 * [x|yzacw] No match 205 * [xy|zacw] Rule 2 206 * [xyq|cw] Rule 4 207 * [xyqn]| Done 208 */ 209 expect("ab>x|yzacw;" + 210 "za>q;" + 211 "qc>r;" + 212 "cw>n", 213 "ab", "xyqn"); 214 215 /* Test categories 216 */ 217 Transliterator t = Transliterator.createFromRules("<ID>", 218 "$dummy=\uE100;" + 219 "$vowel=[aeiouAEIOU];" + 220 "$lu=[:Lu:];" + 221 "$vowel } $lu > '!';" + 222 "$vowel > '&';" + 223 "'!' { $lu > '^';" + 224 "$lu > '*';" + 225 "a>ERROR", 226 Transliterator.FORWARD); 227 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 228 } 229 230 /** 231 * Test inline set syntax and set variable syntax. 232 */ 233 @Test TestInlineSet()234 public void TestInlineSet() { 235 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 236 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 237 238 expect("$digit = [0-9];" + 239 "$alpha = [a-zA-Z];" + 240 "$alphanumeric = [$digit $alpha];" + // *** 241 "$special = [^$alphanumeric];" + // *** 242 "$alphanumeric > '-';" + 243 "$special > '*';", 244 245 "thx-1138", "---*----"); 246 } 247 248 /** 249 * Create some inverses and confirm that they work. We have to be 250 * careful how we do this, since the inverses will not be true 251 * inverses -- we can't throw any random string at the composition 252 * of the transliterators and expect the identity function. F x 253 * F' != I. However, if we are careful about the input, we will 254 * get the expected results. 255 */ 256 @Test TestRuleBasedInverse()257 public void TestRuleBasedInverse() { 258 String RULES = 259 "abc>zyx;" + 260 "ab>yz;" + 261 "bc>zx;" + 262 "ca>xy;" + 263 "a>x;" + 264 "b>y;" + 265 "c>z;" + 266 267 "abc<zyx;" + 268 "ab<yz;" + 269 "bc<zx;" + 270 "ca<xy;" + 271 "a<x;" + 272 "b<y;" + 273 "c<z;" + 274 275 ""; 276 277 String[] DATA = { 278 // Careful here -- random strings will not work. If we keep 279 // the left side to the domain and the right side to the range 280 // we will be okay though (left, abc; right xyz). 281 "a", "x", 282 "abcacab", "zyxxxyy", 283 "caccb", "xyzzy", 284 }; 285 286 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 287 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 288 for (int i=0; i<DATA.length; i+=2) { 289 expect(fwd, DATA[i], DATA[i+1]); 290 expect(rev, DATA[i+1], DATA[i]); 291 } 292 } 293 294 /** 295 * Basic test of keyboard. 296 */ 297 @Test TestKeyboard()298 public void TestKeyboard() { 299 Transliterator t = Transliterator.createFromRules("<ID>", 300 "psch>Y;" 301 +"ps>y;" 302 +"ch>x;" 303 +"a>A;", Transliterator.FORWARD); 304 String DATA[] = { 305 // insertion, buffer 306 "a", "A", 307 "p", "Ap", 308 "s", "Aps", 309 "c", "Apsc", 310 "a", "AycA", 311 "psch", "AycAY", 312 null, "AycAY", // null means finishKeyboardTransliteration 313 }; 314 315 keyboardAux(t, DATA); 316 } 317 318 /** 319 * Basic test of keyboard with cursor. 320 */ 321 @Test TestKeyboard2()322 public void TestKeyboard2() { 323 Transliterator t = Transliterator.createFromRules("<ID>", 324 "ych>Y;" 325 +"ps>|y;" 326 +"ch>x;" 327 +"a>A;", Transliterator.FORWARD); 328 String DATA[] = { 329 // insertion, buffer 330 "a", "A", 331 "p", "Ap", 332 "s", "Aps", // modified for rollback - "Ay", 333 "c", "Apsc", // modified for rollback - "Ayc", 334 "a", "AycA", 335 "p", "AycAp", 336 "s", "AycAps", // modified for rollback - "AycAy", 337 "c", "AycApsc", // modified for rollback - "AycAyc", 338 "h", "AycAY", 339 null, "AycAY", // null means finishKeyboardTransliteration 340 }; 341 342 keyboardAux(t, DATA); 343 } 344 345 /** 346 * Test keyboard transliteration with back-replacement. 347 */ 348 @Test TestKeyboard3()349 public void TestKeyboard3() { 350 // We want th>z but t>y. Furthermore, during keyboard 351 // transliteration we want t>y then yh>z if t, then h are 352 // typed. 353 String RULES = 354 "t>|y;" + 355 "yh>z;" + 356 ""; 357 358 String[] DATA = { 359 // Column 1: characters to add to buffer (as if typed) 360 // Column 2: expected appearance of buffer after 361 // keyboard xliteration. 362 "a", "a", 363 "b", "ab", 364 "t", "abt", // modified for rollback - "aby", 365 "c", "abyc", 366 "t", "abyct", // modified for rollback - "abycy", 367 "h", "abycz", 368 null, "abycz", // null means finishKeyboardTransliteration 369 }; 370 371 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 372 keyboardAux(t, DATA); 373 } 374 keyboardAux(Transliterator t, String[] DATA)375 private void keyboardAux(Transliterator t, String[] DATA) { 376 Transliterator.Position index = new Transliterator.Position(); 377 ReplaceableString s = new ReplaceableString(); 378 for (int i=0; i<DATA.length; i+=2) { 379 StringBuffer log; 380 if (DATA[i] != null) { 381 log = new StringBuffer(s.toString() + " + " 382 + DATA[i] 383 + " -> "); 384 t.transliterate(s, index, DATA[i]); 385 } else { 386 log = new StringBuffer(s.toString() + " => "); 387 t.finishTransliteration(s, index); 388 } 389 UtilityExtensions.formatInput(log, s, index); 390 if (s.toString().equals(DATA[i+1])) { 391 logln(log.toString()); 392 } else { 393 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 394 } 395 } 396 } 397 398 // Latin-Arabic has been temporarily removed until it can be 399 // done correctly. 400 401 // public void TestArabic() { 402 // String DATA[] = { 403 // "Arabic", 404 // "\u062a\u062a\u0645\u062a\u0639 "+ 405 // "\u0627\u0644\u0644\u063a\u0629 "+ 406 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 407 // "\u0628\u0628\u0646\u0638\u0645 "+ 408 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 409 // "\u062c\u0645\u064a\u0644\u0629" 410 // }; 411 412 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 413 // for (int i=0; i<DATA.length; i+=2) { 414 // expect(t, DATA[i], DATA[i+1]); 415 // } 416 // } 417 418 /** 419 * Compose the Kana transliterator forward and reverse and try 420 * some strings that should come out unchanged. 421 */ 422 @Test TestCompoundKana()423 public void TestCompoundKana() { 424 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 425 expect(t, "aaaaa", "aaaaa"); 426 } 427 428 /** 429 * Compose the hex transliterators forward and reverse. 430 */ 431 @Test TestCompoundHex()432 public void TestCompoundHex() { 433 Transliterator a = Transliterator.getInstance("Any-Hex"); 434 Transliterator b = Transliterator.getInstance("Hex-Any"); 435 // Transliterator[] trans = { a, b }; 436 // Transliterator ab = Transliterator.getInstance(trans); 437 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 438 439 // Do some basic tests of b 440 expect(b, "\\u0030\\u0031", "01"); 441 442 String s = "abcde"; 443 expect(ab, s, s); 444 445 // trans = new Transliterator[] { b, a }; 446 // Transliterator ba = Transliterator.getInstance(trans); 447 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 448 ReplaceableString str = new ReplaceableString(s); 449 a.transliterate(str); 450 expect(ba, str.toString(), str.toString()); 451 } 452 453 /** 454 * Do some basic tests of filtering. 455 */ 456 @Test TestFiltering()457 public void TestFiltering() { 458 459 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 460 tempTrans.setFilter(new UnicodeSet("[a]")); 461 String tempResult = tempTrans.transform("xa"); 462 assertEquals("context should not be filtered ", "xb", tempResult); 463 464 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 465 tempResult = tempTrans.transform("xa"); 466 assertEquals("context should not be filtered ", "xb", tempResult); 467 468 Transliterator hex = Transliterator.getInstance("Any-Hex"); 469 hex.setFilter(new UnicodeFilter() { 470 @Override 471 public boolean contains(int c) { 472 return c != 'c'; 473 } 474 @Override 475 public String toPattern(boolean escapeUnprintable) { 476 return ""; 477 } 478 @Override 479 public boolean matchesIndexValue(int v) { 480 return false; 481 } 482 @Override 483 public void addMatchSetTo(UnicodeSet toUnionTo) {} 484 }); 485 String s = "abcde"; 486 String out = hex.transliterate(s); 487 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 488 if (out.equals(exp)) { 489 logln("Ok: \"" + exp + "\""); 490 } else { 491 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 492 } 493 } 494 495 /** 496 * Test anchors 497 */ 498 @Test TestAnchors()499 public void TestAnchors() { 500 expect("^ab > 01 ;" + 501 " ab > |8 ;" + 502 " b > k ;" + 503 " 8x$ > 45 ;" + 504 " 8x > 77 ;", 505 506 "ababbabxabx", 507 "018k7745"); 508 expect("$s = [z$] ;" + 509 "$s{ab > 01 ;" + 510 " ab > |8 ;" + 511 " b > k ;" + 512 " 8x}$s > 45 ;" + 513 " 8x > 77 ;", 514 515 "abzababbabxzabxabx", 516 "01z018k45z01x45"); 517 } 518 519 /** 520 * Test pattern quoting and escape mechanisms. 521 */ 522 @Test TestPatternQuoting()523 public void TestPatternQuoting() { 524 // Array of 3n items 525 // Each item is <rules>, <input>, <expected output> 526 String[] DATA = { 527 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 528 }; 529 530 for (int i=0; i<DATA.length; i+=3) { 531 logln("Pattern: " + Utility.escape(DATA[i])); 532 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 533 expect(t, DATA[i+1], DATA[i+2]); 534 } 535 } 536 537 @Test TestVariableNames()538 public void TestVariableNames() { 539 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 540 if (gl == null) { 541 errln("FAIL: null Transliterator returned."); 542 } 543 } 544 545 /** 546 * Regression test for bugs found in Greek transliteration. 547 */ 548 @Test TestJ277()549 public void TestJ277() { 550 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 551 552 char sigma = (char)0x3C3; 553 char upsilon = (char)0x3C5; 554 char nu = (char)0x3BD; 555 // not used char PHI = (char)0x3A6; 556 char alpha = (char)0x3B1; 557 // not used char omega = (char)0x3C9; 558 // not used char omicron = (char)0x3BF; 559 // not used char epsilon = (char)0x3B5; 560 561 // sigma upsilon nu -> syn 562 StringBuffer buf = new StringBuffer(); 563 buf.append(sigma).append(upsilon).append(nu); 564 String syn = buf.toString(); 565 expect(gl, syn, "syn"); 566 567 // sigma alpha upsilon nu -> saun 568 buf.setLength(0); 569 buf.append(sigma).append(alpha).append(upsilon).append(nu); 570 String sayn = buf.toString(); 571 expect(gl, sayn, "saun"); 572 573 // Again, using a smaller rule set 574 String rules = 575 "$alpha = \u03B1;" + 576 "$nu = \u03BD;" + 577 "$sigma = \u03C3;" + 578 "$ypsilon = \u03C5;" + 579 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 580 "s <> $sigma;" + 581 "a <> $alpha;" + 582 "u <> $vowel { $ypsilon;" + 583 "y <> $ypsilon;" + 584 "n <> $nu;"; 585 Transliterator mini = Transliterator.createFromRules 586 ("mini", rules, Transliterator.REVERSE); 587 expect(mini, syn, "syn"); 588 expect(mini, sayn, "saun"); 589 590 //| // Transliterate the Greek locale data 591 //| Locale el("el"); 592 //| DateFormatSymbols syms(el, status); 593 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 594 //| int32_t i, count; 595 //| const UnicodeString* data = syms.getMonths(count); 596 //| for (i=0; i<count; ++i) { 597 //| if (data[i].length() == 0) { 598 //| continue; 599 //| } 600 //| UnicodeString out(data[i]); 601 //| gl->transliterate(out); 602 //| bool_t ok = TRUE; 603 //| if (data[i].length() >= 2 && out.length() >= 2 && 604 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 605 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 606 //| ok = FALSE; 607 //| } 608 //| } 609 //| if (ok) { 610 //| logln(prettify(data[i] + " -> " + out)); 611 //| } else { 612 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 613 //| } 614 //| } 615 } 616 617 // /** 618 // * Prefix, suffix support in hex transliterators 619 // */ 620 // public void TestJ243() { 621 // // Test default Hex-Any, which should handle 622 // // \\u, \\U, u+, and U+ 623 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 624 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 625 // 626 // // Try a custom Hex-Any 627 // // \\uXXXX and &#xXXXX; 628 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 629 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 630 // "abcd5fx0123"); 631 // 632 // // Try custom Any-Hex (default is tested elsewhere) 633 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 634 // expect(hex3, "012", "012"); 635 // } 636 637 @Test TestJ329()638 public void TestJ329() { 639 640 Object[] DATA = { 641 Boolean.FALSE, "a > b; c > d", 642 Boolean.TRUE, "a > b; no operator; c > d", 643 }; 644 645 for (int i=0; i<DATA.length; i+=2) { 646 String err = null; 647 try { 648 Transliterator.createFromRules("<ID>", 649 (String) DATA[i+1], 650 Transliterator.FORWARD); 651 } catch (IllegalArgumentException e) { 652 err = e.getMessage(); 653 } 654 boolean gotError = (err != null); 655 String desc = (String) DATA[i+1] + 656 (gotError ? (" -> error: " + err) : " -> no error"); 657 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 658 logln("Ok: " + desc); 659 } else { 660 errln("FAIL: " + desc); 661 } 662 } 663 } 664 665 /** 666 * Test segments and segment references. 667 */ 668 @Test TestSegments()669 public void TestSegments() { 670 // Array of 3n items 671 // Each item is <rules>, <input>, <expected output> 672 String[] DATA = { 673 "([a-z]) '.' ([0-9]) > $2 '-' $1", 674 "abc.123.xyz.456", 675 "ab1-c23.xy4-z56", 676 }; 677 678 for (int i=0; i<DATA.length; i+=3) { 679 logln("Pattern: " + Utility.escape(DATA[i])); 680 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 681 expect(t, DATA[i+1], DATA[i+2]); 682 } 683 } 684 685 /** 686 * Test cursor positioning outside of the key 687 */ 688 @Test TestCursorOffset()689 public void TestCursorOffset() { 690 // Array of 3n items 691 // Each item is <rules>, <input>, <expected output> 692 String[] DATA = { 693 "pre {alpha} post > | @ ALPHA ;" + 694 "eALPHA > beta ;" + 695 "pre {beta} post > BETA @@ | ;" + 696 "post > xyz", 697 698 "prealphapost prebetapost", 699 "prbetaxyz preBETApost", 700 }; 701 702 for (int i=0; i<DATA.length; i+=3) { 703 logln("Pattern: " + Utility.escape(DATA[i])); 704 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 705 expect(t, DATA[i+1], DATA[i+2]); 706 } 707 } 708 709 /** 710 * Test zero length and > 1 char length variable values. Test 711 * use of variable refs in UnicodeSets. 712 */ 713 @Test TestArbitraryVariableValues()714 public void TestArbitraryVariableValues() { 715 // Array of 3n items 716 // Each item is <rules>, <input>, <expected output> 717 String[] DATA = { 718 "$abe = ab;" + 719 "$pat = x[yY]z;" + 720 "$ll = 'a-z';" + 721 "$llZ = [$ll];" + 722 "$llY = [$ll$pat];" + 723 "$emp = ;" + 724 725 "$abe > ABE;" + 726 "$pat > END;" + 727 "$llZ > 1;" + 728 "$llY > 2;" + 729 "7$emp 8 > 9;" + 730 "", 731 732 "ab xYzxyz stY78", 733 "ABE ENDEND 1129", 734 }; 735 736 for (int i=0; i<DATA.length; i+=3) { 737 logln("Pattern: " + Utility.escape(DATA[i])); 738 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 739 expect(t, DATA[i+1], DATA[i+2]); 740 } 741 } 742 743 /** 744 * Confirm that the contextStart, contextLimit, start, and limit 745 * behave correctly. 746 */ 747 @Test TestPositionHandling()748 public void TestPositionHandling() { 749 // Array of 3n items 750 // Each item is <rules>, <input>, <expected output> 751 String[] DATA = { 752 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 753 "xtat txtb", // pos 0,9,0,9 754 "xTTaSS TTxUUb", 755 756 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 757 "xtat txtb", // pos 2,9,3,8 758 "xtaSS TTxUUb", 759 760 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 761 "xtat txtb", // pos 3,8,3,8 762 "xtaTT TTxTTb", 763 }; 764 765 // Array of 4n positions -- these go with the DATA array 766 // They are: contextStart, contextLimit, start, limit 767 int[] POS = { 768 0, 9, 0, 9, 769 2, 9, 3, 8, 770 3, 8, 3, 8, 771 }; 772 773 int n = DATA.length/3; 774 for (int i=0; i<n; i++) { 775 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 776 Transliterator.Position pos = new Transliterator.Position( 777 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 778 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 779 t.transliterate(rsource, pos); 780 t.finishTransliteration(rsource, pos); 781 String result = rsource.toString(); 782 String exp = DATA[3*i+2]; 783 expectAux(Utility.escape(DATA[3*i]), 784 DATA[3*i+1], 785 result, 786 result.equals(exp), 787 exp); 788 } 789 } 790 791 /** 792 * Test the Hiragana-Katakana transliterator. 793 */ 794 @Test TestHiraganaKatakana()795 public void TestHiraganaKatakana() { 796 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 797 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 798 799 // Array of 3n items 800 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 801 String[] DATA = { 802 "both", 803 "\u3042\u3090\u3099\u3092\u3050", 804 "\u30A2\u30F8\u30F2\u30B0", 805 806 "kh", 807 "\u307C\u3051\u3060\u3042\u3093\u30FC", 808 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 809 }; 810 811 for (int i=0; i<DATA.length; i+=3) { 812 switch (DATA[i].charAt(0)) { 813 case 'h': // Hiragana-Katakana 814 expect(hk, DATA[i+1], DATA[i+2]); 815 break; 816 case 'k': // Katakana-Hiragana 817 expect(kh, DATA[i+2], DATA[i+1]); 818 break; 819 case 'b': // both 820 expect(hk, DATA[i+1], DATA[i+2]); 821 expect(kh, DATA[i+2], DATA[i+1]); 822 break; 823 } 824 } 825 826 } 827 828 @Test TestCopyJ476()829 public void TestCopyJ476() { 830 // This is a C++-only copy constructor test 831 } 832 833 /** 834 * Test inter-Indic transliterators. These are composed. 835 */ 836 @Test TestInterIndic()837 public void TestInterIndic() { 838 String ID = "Devanagari-Gujarati"; 839 Transliterator dg = Transliterator.getInstance(ID); 840 if (dg == null) { 841 errln("FAIL: getInstance(" + ID + ") returned null"); 842 return; 843 } 844 String id = dg.getID(); 845 if (!id.equals(ID)) { 846 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 847 } 848 String dev = "\u0901\u090B\u0925"; 849 String guj = "\u0A81\u0A8B\u0AA5"; 850 expect(dg, dev, guj); 851 } 852 853 /** 854 * Test filter syntax in IDs. (J23) 855 */ 856 @Test TestFilterIDs()857 public void TestFilterIDs() { 858 String[] DATA = { 859 "[aeiou]Any-Hex", // ID 860 "[aeiou]Hex-Any", // expected inverse ID 861 "quizzical", // src 862 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 863 864 "[aeiou]Any-Hex;[^5]Hex-Any", 865 "[^5]Any-Hex;[aeiou]Hex-Any", 866 "quizzical", 867 "q\\u0075izzical", 868 869 "[abc]Null", 870 "[abc]Null", 871 "xyz", 872 "xyz", 873 }; 874 875 for (int i=0; i<DATA.length; i+=4) { 876 String ID = DATA[i]; 877 Transliterator t = Transliterator.getInstance(ID); 878 expect(t, DATA[i+2], DATA[i+3]); 879 880 // Check the ID 881 if (!ID.equals(t.getID())) { 882 errln("FAIL: getInstance(" + ID + ").getID() => " + 883 t.getID()); 884 } 885 886 // Check the inverse 887 String uID = DATA[i+1]; 888 Transliterator u = t.getInverse(); 889 if (u == null) { 890 errln("FAIL: " + ID + ".getInverse() returned NULL"); 891 } else if (!u.getID().equals(uID)) { 892 errln("FAIL: " + ID + ".getInverse().getID() => " + 893 u.getID() + ", expected " + uID); 894 } 895 } 896 } 897 898 /** 899 * Test the case mapping transliterators. 900 */ 901 @Test TestCaseMap()902 public void TestCaseMap() { 903 Transliterator toUpper = 904 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 905 Transliterator toLower = 906 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 907 Transliterator toTitle = 908 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 909 910 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 911 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 912 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 913 "the quick brown foX jumped over the lazY dogs."); 914 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 915 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 916 } 917 918 /** 919 * Test the name mapping transliterators. 920 */ 921 @Test TestNameMap()922 public void TestNameMap() { 923 Transliterator uni2name = 924 Transliterator.getInstance("Any-Name[^abc]"); 925 Transliterator name2uni = 926 Transliterator.getInstance("Name-Any"); 927 928 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 929 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 930 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 931 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 932 933 // round trip 934 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 935 936 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 937 expect(t, s, s); 938 } 939 940 /** 941 * Test liberalized ID syntax. 1006c 942 */ 943 @Test TestLiberalizedID()944 public void TestLiberalizedID() { 945 // Some test cases have an expected getID() value of NULL. This 946 // means I have disabled the test case for now. This stuff is 947 // still under development, and I haven't decided whether to make 948 // getID() return canonical case yet. It will all get rewritten 949 // with the move to Source-Target/Variant IDs anyway. [aliu] 950 String DATA[] = { 951 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 952 " Null ", "Null", "whitespace", 953 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 954 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 955 }; 956 957 for (int i=0; i<DATA.length; i+=3) { 958 try { 959 Transliterator t = Transliterator.getInstance(DATA[i]); 960 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 961 logln("Ok: " + DATA[i+2] + 962 " create ID \"" + DATA[i] + "\" => \"" + 963 t.getID() + "\""); 964 } else { 965 errln("FAIL: " + DATA[i+2] + 966 " create ID \"" + DATA[i] + "\" => \"" + 967 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 968 } 969 } catch (IllegalArgumentException e) { 970 errln("FAIL: " + DATA[i+2] + 971 " create ID \"" + DATA[i] + "\""); 972 } 973 } 974 } 975 976 @Test TestCreateInstance()977 public void TestCreateInstance() { 978 String FORWARD = "F"; 979 String REVERSE = "R"; 980 String DATA[] = { 981 // Column 1: id 982 // Column 2: direction 983 // Column 3: expected ID, or "" if expect failure 984 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 985 986 // JB#2689: bad compound causes crash 987 "InvalidSource-InvalidTarget", FORWARD, "", 988 "InvalidSource-InvalidTarget", REVERSE, "", 989 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 990 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 991 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 992 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 993 994 null 995 }; 996 997 for (int i=0; DATA[i]!=null; i+=3) { 998 String id=DATA[i]; 999 int dir = (DATA[i+1]==FORWARD)? 1000 Transliterator.FORWARD:Transliterator.REVERSE; 1001 String expID=DATA[i+2]; 1002 Exception e = null; 1003 Transliterator t; 1004 try { 1005 t = Transliterator.getInstance(id,dir); 1006 } catch (Exception e1) { 1007 e = e1; 1008 t = null; 1009 } 1010 String newID = (t!=null)?t.getID():""; 1011 boolean ok = (newID.equals(expID)); 1012 if (t==null) { 1013 newID = e.getMessage(); 1014 } 1015 if (ok) { 1016 logln("Ok: createInstance(" + 1017 id + "," + DATA[i+1] + ") => " + newID); 1018 } else { 1019 errln("FAIL: createInstance(" + 1020 id + "," + DATA[i+1] + ") => " + newID + 1021 ", expected " + expID); 1022 } 1023 } 1024 } 1025 1026 /** 1027 * Test the normalization transliterator. 1028 */ 1029 @Test TestNormalizationTransliterator()1030 public void TestNormalizationTransliterator() { 1031 // THE FOLLOWING TWO TABLES ARE COPIED FROM ohos.global.icu.dev.test.normalizer.BasicTest 1032 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1033 String[][] CANON = { 1034 // Input Decomposed Composed 1035 {"cat", "cat", "cat" }, 1036 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1037 1038 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1039 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1040 1041 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1042 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1043 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1044 1045 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1046 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1047 1048 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1049 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1050 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1051 1052 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1053 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1054 1055 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1056 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1057 1058 {"Henry IV", "Henry IV", "Henry IV" }, 1059 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1060 1061 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1062 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1063 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1064 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1065 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1066 1067 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1068 }; 1069 1070 String[][] COMPAT = { 1071 // Input Decomposed Composed 1072 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1073 1074 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1075 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1076 1077 {"Henry IV", "Henry IV", "Henry IV" }, 1078 {"Henry \u2163", "Henry IV", "Henry IV" }, 1079 1080 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1081 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1082 1083 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1084 }; 1085 1086 Transliterator NFD = Transliterator.getInstance("NFD"); 1087 Transliterator NFC = Transliterator.getInstance("NFC"); 1088 for (int i=0; i<CANON.length; ++i) { 1089 String in = CANON[i][0]; 1090 String expd = CANON[i][1]; 1091 String expc = CANON[i][2]; 1092 expect(NFD, in, expd); 1093 expect(NFC, in, expc); 1094 } 1095 1096 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1097 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1098 for (int i=0; i<COMPAT.length; ++i) { 1099 String in = COMPAT[i][0]; 1100 String expkd = COMPAT[i][1]; 1101 String expkc = COMPAT[i][2]; 1102 expect(NFKD, in, expkd); 1103 expect(NFKC, in, expkc); 1104 } 1105 1106 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1107 expect(t, "\u010dx", "c\u030C"); 1108 } 1109 1110 /** 1111 * Test compound RBT rules. 1112 */ 1113 @Test TestCompoundRBT()1114 public void TestCompoundRBT() { 1115 // Careful with spacing and ';' here: Phrase this exactly 1116 // as toRules() is going to return it. If toRules() changes 1117 // with regard to spacing or ';', then adjust this string. 1118 String rule = "::Hex-Any;\n" + 1119 "::Any-Lower;\n" + 1120 "a > '.A.';\n" + 1121 "b > '.B.';\n" + 1122 "::[^t]Any-Upper;"; 1123 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1124 if (t == null) { 1125 errln("FAIL: createFromRules failed"); 1126 return; 1127 } 1128 expect(t, "\u0043at in the hat, bat on the mat", 1129 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1130 String r = t.toRules(true); 1131 if (r.equals(rule)) { 1132 logln("OK: toRules() => " + r); 1133 } else { 1134 errln("FAIL: toRules() => " + r + 1135 ", expected " + rule); 1136 } 1137 1138 // Now test toRules 1139 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1140 if (t == null) { 1141 errln("FAIL: createInstance failed"); 1142 return; 1143 } 1144 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1145 r = t.toRules(true); 1146 if (!r.equals(exp)) { 1147 errln("FAIL: toRules() => " + r + 1148 ", expected " + exp); 1149 } else { 1150 logln("OK: toRules() => " + r); 1151 } 1152 1153 // Round trip the result of toRules 1154 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1155 if (t == null) { 1156 errln("FAIL: createFromRules #2 failed"); 1157 return; 1158 } else { 1159 logln("OK: createFromRules(" + r + ") succeeded"); 1160 } 1161 1162 // Test toRules again 1163 r = t.toRules(true); 1164 if (!r.equals(exp)) { 1165 errln("FAIL: toRules() => " + r + 1166 ", expected " + exp); 1167 } else { 1168 logln("OK: toRules() => " + r); 1169 } 1170 1171 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1172 // to what the regenerated ID will look like. 1173 String id = "Upper(Lower);(NFKC)"; 1174 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1175 if (t == null) { 1176 errln("FAIL: createInstance #2 failed"); 1177 return; 1178 } 1179 if (t.getID().equals(id)) { 1180 logln("OK: created " + id); 1181 } else { 1182 errln("FAIL: createInstance(" + id + 1183 ").getID() => " + t.getID()); 1184 } 1185 1186 Transliterator u = t.getInverse(); 1187 if (u == null) { 1188 errln("FAIL: createInverse failed"); 1189 return; 1190 } 1191 exp = "NFKC();Lower(Upper)"; 1192 if (u.getID().equals(exp)) { 1193 logln("OK: createInverse(" + id + ") => " + 1194 u.getID()); 1195 } else { 1196 errln("FAIL: createInverse(" + id + ") => " + 1197 u.getID()); 1198 } 1199 } 1200 1201 /** 1202 * Compound filter semantics were orginially not implemented 1203 * correctly. Originally, each component filter f(i) is replaced by 1204 * f'(i) = f(i) && g, where g is the filter for the compound 1205 * transliterator. 1206 * 1207 * From Mark: 1208 * 1209 * Suppose and I have a transliterator X. Internally X is 1210 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1211 * 1212 * The compound should convert all greek characters (through latin) to 1213 * cyrillic, then lowercase the result. The filter should say "don't 1214 * touch 'A' in the original". But because an intermediate result 1215 * happens to go through "A", the Greek Alpha gets hung up. 1216 */ 1217 @Test TestCompoundFilter()1218 public void TestCompoundFilter() { 1219 Transliterator t = Transliterator.getInstance 1220 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1221 t.setFilter(new UnicodeSet("[^A]")); 1222 1223 // Only the 'A' at index 1 should remain unchanged 1224 expect(t, 1225 CharsToUnicodeString("BA\\u039A\\u0391"), 1226 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1227 } 1228 1229 /** 1230 * Test the "Remove" transliterator. 1231 */ 1232 @Test TestRemove()1233 public void TestRemove() { 1234 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1235 expect(t, "The quick brown fox.", 1236 "Th qck brwn fx."); 1237 } 1238 1239 @Test TestToRules()1240 public void TestToRules() { 1241 String RBT = "rbt"; 1242 String SET = "set"; 1243 String[] DATA = { 1244 RBT, 1245 "$a=\\u4E61; [$a] > A;", 1246 "[\\u4E61] > A;", 1247 1248 RBT, 1249 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1250 "[[:Zs:][:Zl:]]{a} > A;", 1251 1252 SET, 1253 "[[:Zs:][:Zl:]]", 1254 "[[:Zs:][:Zl:]]", 1255 1256 SET, 1257 "[:Ps:]", 1258 "[:Ps:]", 1259 1260 SET, 1261 "[:L:]", 1262 "[:L:]", 1263 1264 SET, 1265 "[[:L:]-[A]]", 1266 "[[:L:]-[A]]", 1267 1268 SET, 1269 "[~[:Lu:][:Ll:]]", 1270 "[~[:Lu:][:Ll:]]", 1271 1272 SET, 1273 "[~[a-z]]", 1274 "[~[a-z]]", 1275 1276 RBT, 1277 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1278 "[^[:Zs:]]{a} > A;", 1279 1280 RBT, 1281 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1282 "[[a-z]-[:Zs:]]{a} > A;", 1283 1284 RBT, 1285 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1286 "[[:Zs:]&[a-z]]{a} > A;", 1287 1288 RBT, 1289 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1290 "[x[:Zs:]]{a} > A;", 1291 1292 RBT, 1293 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1294 "$macron = \\u0304 ;"+ 1295 "$evowel = [aeiouyAEIOUY] ;"+ 1296 "$iotasub = \\u0345 ;"+ 1297 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1298 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1299 1300 RBT, 1301 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1302 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1303 }; 1304 1305 for (int d=0; d < DATA.length; d+=3) { 1306 if (DATA[d] == RBT) { 1307 // Transliterator test 1308 Transliterator t = Transliterator.createFromRules("ID", 1309 DATA[d+1], Transliterator.FORWARD); 1310 if (t == null) { 1311 errln("FAIL: createFromRules failed"); 1312 return; 1313 } 1314 String rules, escapedRules; 1315 rules = t.toRules(false); 1316 escapedRules = t.toRules(true); 1317 String expRules = Utility.unescape(DATA[d+2]); 1318 String expEscapedRules = DATA[d+2]; 1319 if (rules.equals(expRules)) { 1320 logln("Ok: " + DATA[d+1] + 1321 " => " + Utility.escape(rules)); 1322 } else { 1323 errln("FAIL: " + DATA[d+1] + 1324 " => " + Utility.escape(rules + ", exp " + expRules)); 1325 } 1326 if (escapedRules.equals(expEscapedRules)) { 1327 logln("Ok: " + DATA[d+1] + 1328 " => " + escapedRules); 1329 } else { 1330 errln("FAIL: " + DATA[d+1] + 1331 " => " + escapedRules + ", exp " + expEscapedRules); 1332 } 1333 1334 } else { 1335 // UnicodeSet test 1336 String pat = DATA[d+1]; 1337 String expToPat = DATA[d+2]; 1338 UnicodeSet set = new UnicodeSet(pat); 1339 1340 // Adjust spacing etc. as necessary. 1341 String toPat; 1342 toPat = set.toPattern(true); 1343 if (expToPat.equals(toPat)) { 1344 logln("Ok: " + pat + 1345 " => " + toPat); 1346 } else { 1347 errln("FAIL: " + pat + 1348 " => " + Utility.escape(toPat) + 1349 ", exp " + Utility.escape(pat)); 1350 } 1351 } 1352 } 1353 } 1354 1355 @Test TestContext()1356 public void TestContext() { 1357 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1358 1359 expect("de > x; {d}e > y;", 1360 "de", 1361 "ye", 1362 pos); 1363 1364 expect("ab{c} > z;", 1365 "xadabdabcy", 1366 "xadabdabzy"); 1367 } 1368 CharsToUnicodeString(String s)1369 static final String CharsToUnicodeString(String s) { 1370 return Utility.unescape(s); 1371 } 1372 1373 @Test TestSupplemental()1374 public void TestSupplemental() { 1375 1376 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1377 "a > $a; $s > i;"), 1378 CharsToUnicodeString("ab\\U0001030Fx"), 1379 CharsToUnicodeString("\\U00010300bix")); 1380 1381 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1382 "$b=[A-Z\\U00010400-\\U0001044D];" + 1383 "($a)($b) > $2 $1;"), 1384 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1385 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1386 1387 // k|ax\\U00010300xm 1388 1389 // k|a\\U00010400\\U00010300xm 1390 // ky|\\U00010400\\U00010300xm 1391 // ky\\U00010400|\\U00010300xm 1392 1393 // ky\\U00010400|\\U00010300\\U00010400m 1394 // ky\\U00010400y|\\U00010400m 1395 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1396 "$a {x} > | @ \\U00010400;" + 1397 "{$a} [^\\u0000-\\uFFFF] > y;"), 1398 CharsToUnicodeString("kax\\U00010300xm"), 1399 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1400 1401 expect(Transliterator.getInstance("Any-Name"), 1402 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1403 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1404 1405 expect(Transliterator.getInstance("Name-Any"), 1406 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1407 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1408 1409 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1410 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1411 "U+10330U+10FF00U+E0061U+00A0"); 1412 1413 expect(Transliterator.getInstance("Any-Hex/C"), 1414 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1415 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1416 1417 expect(Transliterator.getInstance("Any-Hex/Perl"), 1418 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1419 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1420 1421 expect(Transliterator.getInstance("Any-Hex/Java"), 1422 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1423 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1424 1425 expect(Transliterator.getInstance("Any-Hex/XML"), 1426 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1427 "𐌰􏼀󠁡 "); 1428 1429 expect(Transliterator.getInstance("Any-Hex/XML10"), 1430 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1431 "𐌰􏼀󠁡 "); 1432 1433 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1434 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1435 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1436 } 1437 1438 @Test TestQuantifier()1439 public void TestQuantifier() { 1440 1441 // Make sure @ in a quantified anteContext works 1442 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1443 "AAAAAb", 1444 "aaa(aac)"); 1445 1446 // Make sure @ in a quantified postContext works 1447 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1448 "baaaaa", 1449 "caa(aaa)"); 1450 1451 // Make sure @ in a quantified postContext with seg ref works 1452 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1453 "baaaaa", 1454 "baa(aaa)"); 1455 1456 // Make sure @ past ante context doesn't enter ante context 1457 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1458 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1459 "xxxab", 1460 "xxx(ac)", 1461 pos); 1462 1463 // Make sure @ past post context doesn't pass limit 1464 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1465 expect("{b} a+ > c @@ |; x > y; a > A;", 1466 "baxx", 1467 "caxx", 1468 pos2); 1469 1470 // Make sure @ past post context doesn't enter post context 1471 expect("{b} a+ > c @@ |; x > y; a > A;", 1472 "baxx", 1473 "cayy"); 1474 1475 expect("(ab)? c > d;", 1476 "c abc ababc", 1477 "d d abd"); 1478 1479 // NOTE: The (ab)+ when referenced just yields a single "ab", 1480 // not the full sequence of them. This accords with perl behavior. 1481 expect("(ab)+ {x} > '(' $1 ')';", 1482 "x abx ababxy", 1483 "x ab(ab) abab(ab)y"); 1484 1485 expect("b+ > x;", 1486 "ac abc abbc abbbc", 1487 "ac axc axc axc"); 1488 1489 expect("[abc]+ > x;", 1490 "qac abrc abbcs abtbbc", 1491 "qx xrx xs xtx"); 1492 1493 expect("q{(ab)+} > x;", 1494 "qa qab qaba qababc qaba", 1495 "qa qx qxa qxc qxa"); 1496 1497 expect("q(ab)* > x;", 1498 "qa qab qaba qababc", 1499 "xa x xa xc"); 1500 1501 // NOTE: The (ab)+ when referenced just yields a single "ab", 1502 // not the full sequence of them. This accords with perl behavior. 1503 expect("q(ab)* > '(' $1 ')';", 1504 "qa qab qaba qababc", 1505 "()a (ab) (ab)a (ab)c"); 1506 1507 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1508 // quoted string 1509 expect("'ab'+ > x;", 1510 "bb ab ababb", 1511 "bb x xb"); 1512 1513 // $foo+ and $foo* -- the quantifier should apply to the entire 1514 // variable reference 1515 expect("$var = ab; $var+ > x;", 1516 "bb ab ababb", 1517 "bb x xb"); 1518 } 1519 1520 static class TestFact implements Transliterator.Factory { 1521 static class NameableNullTrans extends Transliterator { NameableNullTrans(String id)1522 public NameableNullTrans(String id) { 1523 super(id, null); 1524 } 1525 @Override handleTransliterate(Replaceable text, Position offsets, boolean incremental)1526 protected void handleTransliterate(Replaceable text, 1527 Position offsets, boolean incremental) { 1528 offsets.start = offsets.limit; 1529 } 1530 } 1531 String id; TestFact(String theID)1532 public TestFact(String theID) { 1533 id = theID; 1534 } 1535 @Override getInstance(String ignoredID)1536 public Transliterator getInstance(String ignoredID) { 1537 return new NameableNullTrans(id); 1538 } 1539 } 1540 1541 @Test TestSTV()1542 public void TestSTV() { 1543 Enumeration es = Transliterator.getAvailableSources(); 1544 for (int i=0; es.hasMoreElements(); ++i) { 1545 String source = (String) es.nextElement(); 1546 logln("" + i + ": " + source); 1547 if (source.length() == 0) { 1548 errln("FAIL: empty source"); 1549 continue; 1550 } 1551 Enumeration et = Transliterator.getAvailableTargets(source); 1552 for (int j=0; et.hasMoreElements(); ++j) { 1553 String target = (String) et.nextElement(); 1554 logln(" " + j + ": " + target); 1555 if (target.length() == 0) { 1556 errln("FAIL: empty target"); 1557 continue; 1558 } 1559 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1560 for (int k=0; ev.hasMoreElements(); ++k) { 1561 String variant = (String) ev.nextElement(); 1562 if (variant.length() == 0) { 1563 logln(" " + k + ": <empty>"); 1564 } else { 1565 logln(" " + k + ": " + variant); 1566 } 1567 } 1568 } 1569 } 1570 1571 // Test registration 1572 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1573 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1574 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1575 for (int i=0; i<3; ++i) { 1576 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1577 try { 1578 Transliterator t = Transliterator.getInstance(IDS[i]); 1579 if (t.getID().equals(IDS[i])) { 1580 logln("Ok: Registration/creation succeeded for ID " + 1581 IDS[i]); 1582 } else { 1583 errln("FAIL: Registration of ID " + 1584 IDS[i] + " creates ID " + t.getID()); 1585 } 1586 Transliterator.unregister(IDS[i]); 1587 try { 1588 t = Transliterator.getInstance(IDS[i]); 1589 errln("FAIL: Unregistration failed for ID " + 1590 IDS[i] + "; still receiving ID " + t.getID()); 1591 } catch (IllegalArgumentException e2) { 1592 // Good; this is what we expect 1593 logln("Ok; Unregistered " + IDS[i]); 1594 } 1595 } catch (IllegalArgumentException e) { 1596 errln("FAIL: Registration/creation failed for ID " + 1597 IDS[i]); 1598 } finally { 1599 Transliterator.unregister(IDS[i]); 1600 } 1601 } 1602 1603 // Make sure getAvailable API reflects removal 1604 for (Enumeration e = Transliterator.getAvailableIDs(); 1605 e.hasMoreElements(); ) { 1606 String id = (String) e.nextElement(); 1607 for (int i=0; i<3; ++i) { 1608 if (id.equals(FULL_IDS[i])) { 1609 errln("FAIL: unregister(" + id + ") failed"); 1610 } 1611 } 1612 } 1613 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1614 e.hasMoreElements(); ) { 1615 String t = (String) e.nextElement(); 1616 if (t.equals(IDS[0])) { 1617 errln("FAIL: unregister(Any-" + t + ") failed"); 1618 } 1619 } 1620 for (Enumeration e = Transliterator.getAvailableSources(); 1621 e.hasMoreElements(); ) { 1622 String s = (String) e.nextElement(); 1623 for (int i=0; i<3; ++i) { 1624 if (SOURCES[i] == null) continue; 1625 if (s.equals(SOURCES[i])) { 1626 errln("FAIL: unregister(" + s + "-*) failed"); 1627 } 1628 } 1629 } 1630 } 1631 1632 /** 1633 * Test inverse of Greek-Latin; Title() 1634 */ 1635 @Test TestCompoundInverse()1636 public void TestCompoundInverse() { 1637 Transliterator t = Transliterator.getInstance 1638 ("Greek-Latin; Title()", Transliterator.REVERSE); 1639 if (t == null) { 1640 errln("FAIL: createInstance"); 1641 return; 1642 } 1643 String exp = "(Title);Latin-Greek"; 1644 if (t.getID().equals(exp)) { 1645 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1646 t.getID()); 1647 } else { 1648 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1649 t.getID() + "\", expected \"" + exp + "\""); 1650 } 1651 } 1652 1653 /** 1654 * Test NFD chaining with RBT 1655 */ 1656 @Test TestNFDChainRBT()1657 public void TestNFDChainRBT() { 1658 Transliterator t = Transliterator.createFromRules( 1659 "TEST", "::NFD; aa > Q; a > q;", 1660 Transliterator.FORWARD); 1661 logln(t.toRules(true)); 1662 expect(t, "aa", "Q"); 1663 } 1664 1665 /** 1666 * Inverse of "Null" should be "Null". (J21) 1667 */ 1668 @Test TestNullInverse()1669 public void TestNullInverse() { 1670 Transliterator t = Transliterator.getInstance("Null"); 1671 Transliterator u = t.getInverse(); 1672 if (!u.getID().equals("Null")) { 1673 errln("FAIL: Inverse of Null should be Null"); 1674 } 1675 } 1676 1677 /** 1678 * Check ID of inverse of alias. (J22) 1679 */ 1680 @Test TestAliasInverseID()1681 public void TestAliasInverseID() { 1682 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1683 Transliterator t = Transliterator.getInstance(ID); 1684 Transliterator u = t.getInverse(); 1685 String exp = "Hangul-Latin"; 1686 String got = u.getID(); 1687 if (!got.equals(exp)) { 1688 errln("FAIL: Inverse of " + ID + " is " + got + 1689 ", expected " + exp); 1690 } 1691 } 1692 1693 /** 1694 * Test IDs of inverses of compound transliterators. (J20) 1695 */ 1696 @Test TestCompoundInverseID()1697 public void TestCompoundInverseID() { 1698 String ID = "Latin-Jamo;NFC(NFD)"; 1699 Transliterator t = Transliterator.getInstance(ID); 1700 Transliterator u = t.getInverse(); 1701 String exp = "NFD(NFC);Jamo-Latin"; 1702 String got = u.getID(); 1703 if (!got.equals(exp)) { 1704 errln("FAIL: Inverse of " + ID + " is " + got + 1705 ", expected " + exp); 1706 } 1707 } 1708 1709 /** 1710 * Test undefined variable. 1711 */ 1712 @Test TestUndefinedVariable()1713 public void TestUndefinedVariable() { 1714 String rule = "$initial } a <> \u1161;"; 1715 try { 1716 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1717 } catch (IllegalArgumentException e) { 1718 logln("OK: Got exception for " + rule + ", as expected: " + 1719 e.getMessage()); 1720 return; 1721 } 1722 errln("Fail: bogus rule " + rule + " compiled without error"); 1723 } 1724 1725 /** 1726 * Test empty context. 1727 */ 1728 @Test TestEmptyContext()1729 public void TestEmptyContext() { 1730 expect(" { a } > b;", "xay a ", "xby b "); 1731 } 1732 1733 /** 1734 * Test compound filter ID syntax 1735 */ 1736 @Test TestCompoundFilterID()1737 public void TestCompoundFilterID() { 1738 String[] DATA = { 1739 // Col. 1 = ID or rule set (latter must start with #) 1740 1741 // = columns > 1 are null if expect col. 1 to be illegal = 1742 1743 // Col. 2 = direction, "F..." or "R..." 1744 // Col. 3 = source string 1745 // Col. 4 = exp result 1746 1747 "[abc]; [abc]", null, null, null, // multiple filters 1748 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1749 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1750 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1751 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1752 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1753 }; 1754 1755 for (int i=0; i<DATA.length; i+=4) { 1756 String id = DATA[i]; 1757 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1758 Transliterator.REVERSE : Transliterator.FORWARD; 1759 String source = DATA[i+2]; 1760 String exp = DATA[i+3]; 1761 boolean expOk = (DATA[i+1] != null); 1762 Transliterator t = null; 1763 IllegalArgumentException e = null; 1764 try { 1765 if (id.charAt(0) == '#') { 1766 t = Transliterator.createFromRules("ID", id, direction); 1767 } else { 1768 t = Transliterator.getInstance(id, direction); 1769 } 1770 } catch (IllegalArgumentException ee) { 1771 e = ee; 1772 } 1773 boolean ok = (t != null && e == null); 1774 if (ok == expOk) { 1775 logln("Ok: " + id + " => " + t + 1776 (e != null ? (", " + e.getMessage()) : "")); 1777 if (source != null) { 1778 expect(t, source, exp); 1779 } 1780 } else { 1781 errln("FAIL: " + id + " => " + t + 1782 (e != null ? (", " + e.getMessage()) : "")); 1783 } 1784 } 1785 } 1786 1787 /** 1788 * Test new property set syntax 1789 */ 1790 @Test TestPropertySet()1791 public void TestPropertySet() { 1792 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1793 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1794 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1795 } 1796 1797 /** 1798 * Test various failure points of the new 2.0 engine. 1799 */ 1800 @Test TestNewEngine()1801 public void TestNewEngine() { 1802 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1803 // Katakana should be untouched 1804 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1805 1806 if (true) { 1807 // This test will only work if Transliterator.ROLLBACK is 1808 // true. Otherwise, this test will fail, revealing a 1809 // limitation of global filters in incremental mode. 1810 1811 Transliterator a = 1812 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1813 Transliterator A = 1814 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1815 1816 //Transliterator array[] = new Transliterator[] { 1817 // a, 1818 // Transliterator.getInstance("NFD"), 1819 // A }; 1820 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1821 1822 try { 1823 Transliterator.registerInstance(a); 1824 Transliterator.registerInstance(A); 1825 1826 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1827 expect(t, "aAaA", "bAbA"); 1828 1829 Transliterator[] u = t.getElements(); 1830 assertTrue("getElements().length", u.length == 3); 1831 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1832 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1833 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1834 1835 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1836 t.setFilter(new UnicodeSet("[:Ll:]")); 1837 expect(t, "aAaA", "bAbA"); 1838 } finally { 1839 Transliterator.unregister("a_to_A"); 1840 Transliterator.unregister("A_to_b"); 1841 } 1842 } 1843 1844 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1845 "a", 1846 "ax"); 1847 1848 String gr = 1849 "$ddot = \u0308 ;" + 1850 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1851 "$rough = \u0314 ;" + 1852 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1853 "\u03b1 <> a ;" + 1854 "$rough <> h ;"; 1855 1856 expect(gr, "\u03B1\u0314", "ha"); 1857 } 1858 1859 /** 1860 * Test quantified segment behavior. We want: 1861 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1862 */ 1863 @Test TestQuantifiedSegment()1864 public void TestQuantifiedSegment() { 1865 // The normal case 1866 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1867 1868 // The tricky case; the quantifier is around the segment 1869 expect("([abc])+ > x $1 x;", "cba", "xax"); 1870 1871 // Tricky case in reverse direction 1872 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1873 1874 // Check post-context segment 1875 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1876 1877 // Test toRule/toPattern for non-quantified segment. 1878 // Careful with spacing here. 1879 String r = "([a-c]){q} > x $1 x;"; 1880 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1881 String rr = t.toRules(true); 1882 if (!r.equals(rr)) { 1883 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1884 } else { 1885 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1886 } 1887 1888 // Test toRule/toPattern for quantified segment. 1889 // Careful with spacing here. 1890 r = "([a-c])+{q} > x $1 x;"; 1891 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1892 rr = t.toRules(true); 1893 if (!r.equals(rr)) { 1894 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1895 } else { 1896 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1897 } 1898 } 1899 1900 //====================================================================== 1901 // Ram's tests 1902 //====================================================================== 1903 /* this test performs test of rules in ISO 15915 */ 1904 @Test TestDevanagariLatinRT()1905 public void TestDevanagariLatinRT(){ 1906 String[] source = { 1907 "bh\u0101rata", 1908 "kra", 1909 "k\u1E63a", 1910 "khra", 1911 "gra", 1912 "\u1E45ra", 1913 "cra", 1914 "chra", 1915 "j\u00F1a", 1916 "jhra", 1917 "\u00F1ra", 1918 "\u1E6Dya", 1919 "\u1E6Dhra", 1920 "\u1E0Dya", 1921 //"r\u0323ya", // \u095c is not valid in Devanagari 1922 "\u1E0Dhya", 1923 "\u1E5Bhra", 1924 "\u1E47ra", 1925 "tta", 1926 "thra", 1927 "dda", 1928 "dhra", 1929 "nna", 1930 "pra", 1931 "phra", 1932 "bra", 1933 "bhra", 1934 "mra", 1935 "\u1E49ra", 1936 //"l\u0331ra", 1937 "yra", 1938 "\u1E8Fra", 1939 //"l-", 1940 "vra", 1941 "\u015Bra", 1942 "\u1E63ra", 1943 "sra", 1944 "hma", 1945 "\u1E6D\u1E6Da", 1946 "\u1E6D\u1E6Dha", 1947 "\u1E6Dh\u1E6Dha", 1948 "\u1E0D\u1E0Da", 1949 "\u1E0D\u1E0Dha", 1950 "\u1E6Dya", 1951 "\u1E6Dhya", 1952 "\u1E0Dya", 1953 "\u1E0Dhya", 1954 // Not roundtrippable -- 1955 // \u0939\u094d\u094d\u092E - hma 1956 // \u0939\u094d\u092E - hma 1957 // CharsToUnicodeString("hma"), 1958 "hya", 1959 "\u015Br\u0325", 1960 "\u015Bca", 1961 "\u0115", 1962 "san\u0304j\u012Bb s\u0113nagupta", 1963 "\u0101nand vaddir\u0101ju", 1964 }; 1965 String[] expected = { 1966 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 1967 "\u0915\u094D\u0930", /* kra */ 1968 "\u0915\u094D\u0937", /* ks\u0323a */ 1969 "\u0916\u094D\u0930", /* khra */ 1970 "\u0917\u094D\u0930", /* gra */ 1971 "\u0919\u094D\u0930", /* n\u0307ra */ 1972 "\u091A\u094D\u0930", /* cra */ 1973 "\u091B\u094D\u0930", /* chra */ 1974 "\u091C\u094D\u091E", /* jn\u0303a */ 1975 "\u091D\u094D\u0930", /* jhra */ 1976 "\u091E\u094D\u0930", /* n\u0303ra */ 1977 "\u091F\u094D\u092F", /* t\u0323ya */ 1978 "\u0920\u094D\u0930", /* t\u0323hra */ 1979 "\u0921\u094D\u092F", /* d\u0323ya */ 1980 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 1981 "\u0922\u094D\u092F", /* d\u0323hya */ 1982 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 1983 "\u0923\u094D\u0930", /* n\u0323ra */ 1984 "\u0924\u094D\u0924", /* tta */ 1985 "\u0925\u094D\u0930", /* thra */ 1986 "\u0926\u094D\u0926", /* dda */ 1987 "\u0927\u094D\u0930", /* dhra */ 1988 "\u0928\u094D\u0928", /* nna */ 1989 "\u092A\u094D\u0930", /* pra */ 1990 "\u092B\u094D\u0930", /* phra */ 1991 "\u092C\u094D\u0930", /* bra */ 1992 "\u092D\u094D\u0930", /* bhra */ 1993 "\u092E\u094D\u0930", /* mra */ 1994 "\u0929\u094D\u0930", /* n\u0331ra */ 1995 //"\u0934\u094D\u0930", /* l\u0331ra */ 1996 "\u092F\u094D\u0930", /* yra */ 1997 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 1998 //"l-", 1999 "\u0935\u094D\u0930", /* vra */ 2000 "\u0936\u094D\u0930", /* s\u0301ra */ 2001 "\u0937\u094D\u0930", /* s\u0323ra */ 2002 "\u0938\u094D\u0930", /* sra */ 2003 "\u0939\u094d\u092E", /* hma */ 2004 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 2005 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 2006 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 2007 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 2008 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 2009 "\u091F\u094D\u092F", /* t\u0323ya */ 2010 "\u0920\u094D\u092F", /* t\u0323hya */ 2011 "\u0921\u094D\u092F", /* d\u0323ya */ 2012 "\u0922\u094D\u092F", /* d\u0323hya */ 2013 // "hma", /* hma */ 2014 "\u0939\u094D\u092F", /* hya */ 2015 "\u0936\u0943", /* s\u0301r\u0325a */ 2016 "\u0936\u094D\u091A", /* s\u0301ca */ 2017 "\u090d", /* e\u0306 */ 2018 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2019 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2020 }; 2021 2022 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2023 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2024 2025 for(int i= 0; i<source.length; i++){ 2026 expect(latinToDev,(source[i]),(expected[i])); 2027 expect(devToLatin,(expected[i]),(source[i])); 2028 } 2029 2030 } 2031 @Test TestTeluguLatinRT()2032 public void TestTeluguLatinRT(){ 2033 String[] source = { 2034 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2035 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2036 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2037 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2038 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2039 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2040 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2041 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2042 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2043 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2044 }; 2045 2046 String[] expected = { 2047 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2048 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2049 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2050 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2051 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2052 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2053 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2054 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2055 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2056 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2057 }; 2058 2059 2060 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2061 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2062 2063 for(int i= 0; i<source.length; i++){ 2064 expect(latinToDev,(source[i]),(expected[i])); 2065 expect(devToLatin,(expected[i]),(source[i])); 2066 } 2067 } 2068 2069 @Test TestSanskritLatinRT()2070 public void TestSanskritLatinRT(){ 2071 int MAX_LEN =15; 2072 String[] source = { 2073 "rmk\u1E63\u0113t", 2074 "\u015Br\u012Bmad", 2075 "bhagavadg\u012Bt\u0101", 2076 "adhy\u0101ya", 2077 "arjuna", 2078 "vi\u1E63\u0101da", 2079 "y\u014Dga", 2080 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2081 "uv\u0101cr\u0325", 2082 "dharmak\u1E63\u0113tr\u0113", 2083 "kuruk\u1E63\u0113tr\u0113", 2084 "samav\u0113t\u0101", 2085 "yuyutsava\u1E25", 2086 "m\u0101mak\u0101\u1E25", 2087 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2088 "kimakurvata", 2089 "san\u0304java", 2090 }; 2091 String[] expected = { 2092 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2093 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2094 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2095 "\u0905\u0927\u094d\u092f\u093e\u092f", 2096 "\u0905\u0930\u094d\u091c\u0941\u0928", 2097 "\u0935\u093f\u0937\u093e\u0926", 2098 "\u092f\u094b\u0917", 2099 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2100 "\u0909\u0935\u093E\u091A\u0943", 2101 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2102 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2103 "\u0938\u092e\u0935\u0947\u0924\u093e", 2104 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2105 "\u092e\u093e\u092e\u0915\u093e\u0903", 2106 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2107 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2108 "\u0938\u0902\u091c\u0935", 2109 }; 2110 2111 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2112 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2113 for(int i= 0; i<MAX_LEN; i++){ 2114 expect(latinToDev,(source[i]),(expected[i])); 2115 expect(devToLatin,(expected[i]),(source[i])); 2116 } 2117 } 2118 2119 @Test TestCompoundLatinRT()2120 public void TestCompoundLatinRT(){ 2121 int MAX_LEN =15; 2122 String[] source = { 2123 "rmk\u1E63\u0113t", 2124 "\u015Br\u012Bmad", 2125 "bhagavadg\u012Bt\u0101", 2126 "adhy\u0101ya", 2127 "arjuna", 2128 "vi\u1E63\u0101da", 2129 "y\u014Dga", 2130 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2131 "uv\u0101cr\u0325", 2132 "dharmak\u1E63\u0113tr\u0113", 2133 "kuruk\u1E63\u0113tr\u0113", 2134 "samav\u0113t\u0101", 2135 "yuyutsava\u1E25", 2136 "m\u0101mak\u0101\u1E25", 2137 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2138 "kimakurvata", 2139 "san\u0304java" 2140 }; 2141 String[] expected = { 2142 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2143 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2144 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2145 "\u0905\u0927\u094d\u092f\u093e\u092f", 2146 "\u0905\u0930\u094d\u091c\u0941\u0928", 2147 "\u0935\u093f\u0937\u093e\u0926", 2148 "\u092f\u094b\u0917", 2149 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2150 "\u0909\u0935\u093E\u091A\u0943", 2151 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2152 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2153 "\u0938\u092e\u0935\u0947\u0924\u093e", 2154 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2155 "\u092e\u093e\u092e\u0915\u093e\u0903", 2156 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2157 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2158 "\u0938\u0902\u091c\u0935" 2159 }; 2160 2161 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2162 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2163 for(int i= 0; i<MAX_LEN; i++){ 2164 expect(latinToDevToLatin,(source[i]),(source[i])); 2165 expect(devToLatinToDev,(expected[i]),(expected[i])); 2166 } 2167 } 2168 /** 2169 * Test Gurmukhi-Devanagari Tippi and Bindi 2170 */ 2171 @Test TestGurmukhiDevanagari()2172 public void TestGurmukhiDevanagari(){ 2173 // the rule says: 2174 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2175 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2176 2177 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2178 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2179 2180 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2181 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2182 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2183 StringBuffer src = new StringBuffer(" \u0902"); 2184 StringBuffer expect = new StringBuffer(" \u0A02"); 2185 while(vIter.next()){ 2186 src.setCharAt(0,(char) vIter.codepoint); 2187 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2188 expect(trans,src.toString(),expect.toString()); 2189 } 2190 2191 expect.setCharAt(1,'\u0A70'); 2192 while(nvIter.next()){ 2193 //src.setCharAt(0,(char) nvIter.codepoint); 2194 src.setCharAt(0,(char)nvIter.codepoint); 2195 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2196 expect(trans,src.toString(),expect.toString()); 2197 } 2198 } 2199 /** 2200 * Test instantiation from a locale. 2201 */ 2202 @Test TestLocaleInstantiation()2203 public void TestLocaleInstantiation() { 2204 Transliterator t; 2205 try{ 2206 t = Transliterator.getInstance("te_IN-Latin"); 2207 //expect(t, "\u0430", "a"); 2208 }catch(IllegalArgumentException ex){ 2209 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2210 } 2211 try{ 2212 t = Transliterator.getInstance("ru_RU-Latin"); 2213 expect(t, "\u0430", "a"); 2214 }catch(IllegalArgumentException ex){ 2215 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2216 } 2217 try{ 2218 t = Transliterator.getInstance("en-el"); 2219 expect(t, "a", "\u03B1"); 2220 }catch(IllegalArgumentException ex){ 2221 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2222 } 2223 } 2224 2225 /** 2226 * Test title case handling of accent (should ignore accents) 2227 */ 2228 @Test TestTitleAccents()2229 public void TestTitleAccents() { 2230 Transliterator t = Transliterator.getInstance("Title"); 2231 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2232 } 2233 2234 /** 2235 * Basic test of a locale resource based rule. 2236 */ 2237 @Test TestLocaleResource()2238 public void TestLocaleResource() { 2239 String DATA[] = { 2240 // id from to 2241 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2242 "Latin-el", "b", "\u03bc\u03c0", 2243 "Latin-Greek", "b", "\u03B2", 2244 "Greek-Latin/UNGEGN", "\u03B2", "v", 2245 "el-Latin", "\u03B2", "v", 2246 "Greek-Latin", "\u03B2", "b", 2247 }; 2248 for (int i=0; i<DATA.length; i+=3) { 2249 Transliterator t = Transliterator.getInstance(DATA[i]); 2250 expect(t, DATA[i+1], DATA[i+2]); 2251 } 2252 } 2253 2254 /** 2255 * Make sure parse errors reference the right line. 2256 */ 2257 @Test TestParseError()2258 public void TestParseError() { 2259 String rule = 2260 "a > b;\n" + 2261 "# more stuff\n" + 2262 "d << b;"; 2263 try { 2264 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2265 if(t!=null){ 2266 errln("FAIL: Did not get expected exception"); 2267 } 2268 } catch (IllegalArgumentException e) { 2269 String err = e.getMessage(); 2270 if (err.indexOf("d << b") >= 0) { 2271 logln("Ok: " + err); 2272 } else { 2273 errln("FAIL: " + err); 2274 } 2275 return; 2276 } 2277 errln("FAIL: no syntax error"); 2278 } 2279 2280 /** 2281 * Make sure sets on output are disallowed. 2282 */ 2283 @Test TestOutputSet()2284 public void TestOutputSet() { 2285 String rule = "$set = [a-cm-n]; b > $set;"; 2286 Transliterator t = null; 2287 try { 2288 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2289 if(t!=null){ 2290 errln("FAIL: Did not get the expected exception"); 2291 } 2292 } catch (IllegalArgumentException e) { 2293 logln("Ok: " + e.getMessage()); 2294 return; 2295 } 2296 errln("FAIL: No syntax error"); 2297 } 2298 2299 /** 2300 * Test the use variable range pragma, making sure that use of 2301 * variable range characters is detected and flagged as an error. 2302 */ 2303 @Test TestVariableRange()2304 public void TestVariableRange() { 2305 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2306 try { 2307 Transliterator t = 2308 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2309 if(t!=null){ 2310 errln("FAIL: Did not get the expected exception"); 2311 } 2312 } catch (IllegalArgumentException e) { 2313 logln("Ok: " + e.getMessage()); 2314 return; 2315 } 2316 errln("FAIL: No syntax error"); 2317 } 2318 2319 /** 2320 * Test invalid post context error handling 2321 */ 2322 @Test TestInvalidPostContext()2323 public void TestInvalidPostContext() { 2324 try { 2325 Transliterator t = 2326 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2327 if(t!=null){ 2328 errln("FAIL: Did not get the expected exception"); 2329 } 2330 } catch (IllegalArgumentException e) { 2331 String msg = e.getMessage(); 2332 if (msg.indexOf("a}b{c") >= 0) { 2333 logln("Ok: " + msg); 2334 } else { 2335 errln("FAIL: " + msg); 2336 } 2337 return; 2338 } 2339 errln("FAIL: No syntax error"); 2340 } 2341 2342 /** 2343 * Test ID form variants 2344 */ 2345 @Test TestIDForms()2346 public void TestIDForms() { 2347 String DATA[] = { 2348 "NFC", null, "NFD", 2349 "nfd", null, "NFC", // make sure case is ignored 2350 "Any-NFKD", null, "Any-NFKC", 2351 "Null", null, "Null", 2352 "-nfkc", "nfkc", "NFKD", 2353 "-nfkc/", "nfkc", "NFKD", 2354 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2355 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2356 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2357 "Source-", null, null, 2358 "Source/Variant-", null, null, 2359 "Source-/Variant", null, null, 2360 "/Variant", null, null, 2361 "/Variant-", null, null, 2362 "-/Variant", null, null, 2363 "-/", null, null, 2364 "-", null, null, 2365 "/", null, null, 2366 }; 2367 2368 for (int i=0; i<DATA.length; i+=3) { 2369 String ID = DATA[i]; 2370 String expID = DATA[i+1]; 2371 String expInvID = DATA[i+2]; 2372 boolean expValid = (expInvID != null); 2373 if (expID == null) { 2374 expID = ID; 2375 } 2376 try { 2377 Transliterator t = 2378 Transliterator.getInstance(ID); 2379 Transliterator u = t.getInverse(); 2380 if (t.getID().equals(expID) && 2381 u.getID().equals(expInvID)) { 2382 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2383 } else { 2384 errln("FAIL: getInstance(" + ID + ") => " + 2385 t.getID() + " x getInverse() => " + u.getID() + 2386 ", expected " + expInvID); 2387 } 2388 } catch (IllegalArgumentException e) { 2389 if (!expValid) { 2390 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2391 } else { 2392 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2393 } 2394 } 2395 } 2396 } 2397 checkRules(String label, Transliterator t2, String testRulesForward)2398 void checkRules(String label, Transliterator t2, String testRulesForward) { 2399 String rules2 = t2.toRules(true); 2400 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2401 rules2 = TestUtility.replace(rules2, " ", ""); 2402 rules2 = TestUtility.replace(rules2, "\n", ""); 2403 rules2 = TestUtility.replace(rules2, "\r", ""); 2404 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2405 2406 if (!rules2.equals(testRulesForward)) { 2407 errln(label); 2408 logln("GENERATED RULES: " + rules2); 2409 logln("SHOULD BE: " + testRulesForward); 2410 } 2411 } 2412 2413 /** 2414 * Mark's toRules test. 2415 */ 2416 @Test TestToRulesMark()2417 public void TestToRulesMark() { 2418 2419 String testRules = 2420 "::[[:Latin:][:Mark:]];" 2421 + "::NFKD (NFC);" 2422 + "::Lower (Lower);" 2423 + "a <> \\u03B1;" // alpha 2424 + "::NFKC (NFD);" 2425 + "::Upper (Lower);" 2426 + "::Lower ();" 2427 + "::([[:Greek:][:Mark:]]);" 2428 ; 2429 String testRulesForward = 2430 "::[[:Latin:][:Mark:]];" 2431 + "::NFKD(NFC);" 2432 + "::Lower(Lower);" 2433 + "a > \\u03B1;" 2434 + "::NFKC(NFD);" 2435 + "::Upper (Lower);" 2436 + "::Lower ();" 2437 ; 2438 String testRulesBackward = 2439 "::[[:Greek:][:Mark:]];" 2440 + "::Lower (Upper);" 2441 + "::NFD(NFKC);" 2442 + "\\u03B1 > a;" 2443 + "::Lower(Lower);" 2444 + "::NFC(NFKD);" 2445 ; 2446 String source = "\u00E1"; // a-acute 2447 String target = "\u03AC"; // alpha-acute 2448 2449 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2450 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2451 2452 expect(t2, source, target); 2453 expect(t3, target, source); 2454 2455 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2456 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2457 } 2458 2459 /** 2460 * Test Escape and Unescape transliterators. 2461 */ 2462 @Test TestEscape()2463 public void TestEscape() { 2464 expect(Transliterator.getInstance("Hex-Any"), 2465 "\\x{40}\\U000000312Q", 2466 "@12Q"); 2467 expect(Transliterator.getInstance("Any-Hex/C"), 2468 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2469 "\\u0041\\U0010BEEF\\uFEED"); 2470 expect(Transliterator.getInstance("Any-Hex/Java"), 2471 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2472 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2473 expect(Transliterator.getInstance("Any-Hex/Perl"), 2474 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2475 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2476 } 2477 2478 /** 2479 * Make sure display names of variants look reasonable. 2480 */ 2481 @Test TestDisplayName()2482 public void TestDisplayName() { 2483 String DATA[] = { 2484 // ID, forward name, reverse name 2485 // Update the text as necessary -- the important thing is 2486 // not the text itself, but how various cases are handled. 2487 2488 // Basic test 2489 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2490 2491 // Variants 2492 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2493 2494 // Target-only IDs 2495 "NFC", "Any to NFC", "Any to NFD", 2496 }; 2497 2498 Locale US = Locale.US; 2499 2500 for (int i=0; i<DATA.length; i+=3) { 2501 String name = Transliterator.getDisplayName(DATA[i], US); 2502 if (!name.equals(DATA[i+1])) { 2503 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2504 name + ", expected " + DATA[i+1]); 2505 } else { 2506 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2507 } 2508 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2509 name = Transliterator.getDisplayName(t.getID(), US); 2510 if (!name.equals(DATA[i+2])) { 2511 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2512 name + ", expected " + DATA[i+2]); 2513 } else { 2514 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2515 } 2516 2517 // Cover getDisplayName(String) 2518 ULocale save = ULocale.getDefault(); 2519 ULocale.setDefault(ULocale.US); 2520 String name2 = Transliterator.getDisplayName(t.getID()); 2521 if (!name.equals(name2)) 2522 errln("FAIL: getDisplayName with default locale failed"); 2523 ULocale.setDefault(save); 2524 } 2525 } 2526 2527 /** 2528 * Test anchor masking 2529 */ 2530 @Test TestAnchorMasking()2531 public void TestAnchorMasking() { 2532 String rule = "^a > Q; a > q;"; 2533 try { 2534 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2535 if(t==null){ 2536 errln("FAIL: Did not get the expected exception"); 2537 } 2538 } catch (IllegalArgumentException e) { 2539 errln("FAIL: " + rule + " => " + e); 2540 } 2541 } 2542 2543 /** 2544 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2545 * during ICU4J modularization to remove dependency of tests on Transliterator. 2546 */ 2547 @Test TestScriptAllCodepoints()2548 public void TestScriptAllCodepoints(){ 2549 int code; 2550 HashSet scriptIdsChecked = new HashSet(); 2551 HashSet scriptAbbrsChecked = new HashSet(); 2552 for( int i =0; i <= 0x10ffff; i++){ 2553 code = UScript.getScript(i); 2554 if(code==UScript.INVALID_CODE){ 2555 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2556 } 2557 String id =UScript.getName(code); 2558 String abbr = UScript.getShortName(code); 2559 if (!scriptIdsChecked.contains(id)) { 2560 scriptIdsChecked.add(id); 2561 String newId ="[:"+id+":];NFD"; 2562 try{ 2563 Transliterator t = Transliterator.getInstance(newId); 2564 if(t==null){ 2565 errln("Failed to create transliterator for "+hex(i)+ 2566 " script code: " +id); 2567 } 2568 }catch(Exception e){ 2569 errln("Failed to create transliterator for "+hex(i) 2570 +" script code: " +id 2571 + " Exception: "+e.getMessage()); 2572 } 2573 } 2574 if (!scriptAbbrsChecked.contains(abbr)) { 2575 scriptAbbrsChecked.add(abbr); 2576 String newAbbrId ="[:"+abbr+":];NFD"; 2577 try{ 2578 Transliterator t = Transliterator.getInstance(newAbbrId); 2579 if(t==null){ 2580 errln("Failed to create transliterator for "+hex(i)+ 2581 " script code: " +abbr); 2582 } 2583 }catch(Exception e){ 2584 errln("Failed to create transliterator for "+hex(i) 2585 +" script code: " +abbr 2586 + " Exception: "+e.getMessage()); 2587 } 2588 } 2589 } 2590 } 2591 2592 2593 static final String[][] registerRules = { 2594 {"Any-Dev1", "x > X; y > Y;"}, 2595 {"Any-Dev2", "XY > Z"}, 2596 {"Greek-Latin/FAKE", 2597 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2598 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2599 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2600 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2601 }, 2602 }; 2603 2604 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2605 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2606 2607 static final String[][] testCases = { 2608 2609 // NORMALIZATION 2610 // should add more test cases 2611 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2612 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2613 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2614 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2615 2616 // mp -> b BUG 2617 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2618 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2619 2620 // check for devanagari bug 2621 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2622 2623 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2624 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2625 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2626 //TODO: enable this test once Titlecase works right 2627 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2628 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2629 2630 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2631 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2632 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2633 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2634 2635 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2636 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2637 2638 // FORMS OF S 2639 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2640 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2641 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2642 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2643 2644 // Tatiana bug 2645 // Upper: TAT\u02B9\u00C2NA 2646 // Lower: tat\u02B9\u00E2na 2647 // Title: Tat\u02B9\u00E2na 2648 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2649 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2650 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2651 }; 2652 2653 @Test TestSpecialCases()2654 public void TestSpecialCases() { 2655 2656 for (int i = 0; i < registerRules.length; ++i) { 2657 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2658 registerRules[i][1], Transliterator.FORWARD); 2659 DummyFactory.add(registerRules[i][0], t); 2660 } 2661 for (int i = 0; i < testCases.length; ++i) { 2662 String name = testCases[i][0]; 2663 Transliterator t = Transliterator.getInstance(name); 2664 String id = t.getID(); 2665 String source = testCases[i][1]; 2666 String target = null; 2667 2668 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2669 2670 if (testCases[i].length > 2) target = testCases[i][2]; 2671 else if (id.equalsIgnoreCase("NFD")) target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFD); 2672 else if (id.equalsIgnoreCase("NFC")) target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFC); 2673 else if (id.equalsIgnoreCase("NFKD")) target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFKD); 2674 else if (id.equalsIgnoreCase("NFKC")) target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFKC); 2675 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2676 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2677 2678 expect(t, source, target); 2679 } 2680 for (int i = 0; i < registerRules.length; ++i) { 2681 Transliterator.unregister(registerRules[i][0]); 2682 } 2683 } 2684 2685 // seems like there should be an easier way to just register an instance of a transliterator 2686 2687 static class DummyFactory implements Transliterator.Factory { 2688 static DummyFactory singleton = new DummyFactory(); 2689 static HashMap m = new HashMap(); 2690 2691 // Since Transliterators are immutable, we don't have to clone on set & get add(String ID, Transliterator t)2692 static void add(String ID, Transliterator t) { 2693 m.put(ID, t); 2694 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2695 Transliterator.registerFactory(ID, singleton); 2696 } 2697 @Override getInstance(String ID)2698 public Transliterator getInstance(String ID) { 2699 return (Transliterator) m.get(ID); 2700 } 2701 } 2702 2703 @Test TestCasing()2704 public void TestCasing() { 2705 Transliterator toLower = Transliterator.getInstance("lower"); 2706 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2707 Transliterator toUpper = Transliterator.getInstance("upper"); 2708 Transliterator toTitle = Transliterator.getInstance("title"); 2709 for (int i = 0; i < 0x600; ++i) { 2710 String s = UTF16.valueOf(i); 2711 2712 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2713 assertEquals("Lowercase", lower, toLower.transform(s)); 2714 2715 String casefold = UCharacter.foldCase(s, true); 2716 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2717 2718 if (i != 0x0345) { 2719 // ICU 60 changes the default titlecasing index adjustment. 2720 // For word breaks it is mostly the same as before, 2721 // but it is different for the iota subscript (the only cased combining mark). 2722 // This should be ok because the iota subscript is not supposed to appear 2723 // at the start of a word. 2724 // The title Transliterator is far below feature parity with the 2725 // UCharacter and CaseMap titlecasing functions. 2726 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2727 assertEquals("Title", title, toTitle.transform(s)); 2728 } 2729 2730 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2731 assertEquals("Upper", upper, toUpper.transform(s)); 2732 } 2733 } 2734 2735 @Test TestSurrogateCasing()2736 public void TestSurrogateCasing () { 2737 // check that casing handles surrogates 2738 // titlecase is currently defective 2739 int dee = UTF16.charAt(DESERET_dee,0); 2740 int DEE = UCharacter.toTitleCase(dee); 2741 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2742 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2743 } 2744 2745 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2746 errln("Fails uppercase of surrogates"); 2747 } 2748 2749 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2750 errln("Fails lowercase of surrogates"); 2751 } 2752 } 2753 2754 2755 @Test TestFunction()2756 public void TestFunction() { 2757 // Careful with spacing and ';' here: Phrase this exactly 2758 // as toRules() is going to return it. If toRules() changes 2759 // with regard to spacing or ';', then adjust this string. 2760 String rule = 2761 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2762 2763 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2764 if (t == null) { 2765 errln("FAIL: createFromRules failed"); 2766 return; 2767 } 2768 2769 String r = t.toRules(true); 2770 if (r.equals(rule)) { 2771 logln("OK: toRules() => " + r); 2772 } else { 2773 errln("FAIL: toRules() => " + r + 2774 ", expected " + rule); 2775 } 2776 2777 expect(t, "The Quick Brown Fox", 2778 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2779 rule = 2780 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2781 2782 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2783 if (t == null) { 2784 errln("FAIL: createFromRules failed"); 2785 return; 2786 } 2787 2788 r = t.toRules(true); 2789 if (r.equals(rule)) { 2790 logln("OK: toRules() => " + r); 2791 } else { 2792 errln("FAIL: toRules() => " + r + 2793 ", expected " + rule); 2794 } 2795 2796 expect(t, "\u0301", 2797 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2798 } 2799 2800 @Test TestInvalidBackRef()2801 public void TestInvalidBackRef() { 2802 String rule = ". > $1;"; 2803 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2804 try { 2805 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2806 if (t != null) { 2807 errln("FAIL: createFromRules should have returned NULL"); 2808 } 2809 errln("FAIL: Ok: . > $1; => no error"); 2810 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2811 if (t2 != null) { 2812 errln("FAIL: createFromRules should have returned NULL"); 2813 } 2814 errln("FAIL: Ok: . > $1; => no error"); 2815 } catch (IllegalArgumentException e) { 2816 logln("Ok: . > $1; => " + e.getMessage()); 2817 } 2818 } 2819 2820 @Test TestMulticharStringSet()2821 public void TestMulticharStringSet() { 2822 // Basic testing 2823 String rule = 2824 " [{aa}] > x;" + 2825 " a > y;" + 2826 " [b{bc}] > z;" + 2827 "[{gd}] { e > q;" + 2828 " e } [{fg}] > r;" ; 2829 2830 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2831 if (t == null) { 2832 errln("FAIL: createFromRules failed"); 2833 return; 2834 } 2835 2836 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2837 "y x yz z d gd de gdq gdqfg ddrfg"); 2838 2839 // Overlapped string test. Make sure that when multiple 2840 // strings can match that the longest one is matched. 2841 rule = 2842 " [a {ab} {abc}] > x;" + 2843 " b > y;" + 2844 " c > z;" + 2845 " q [t {st} {rst}] { e > p;" ; 2846 2847 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2848 if (t == null) { 2849 errln("FAIL: createFromRules failed"); 2850 return; 2851 } 2852 2853 expect(t, "a ab abc qte qste qrste", 2854 "x x x qtp qstp qrstp"); 2855 } 2856 2857 /** 2858 * Test that user-registered transliterators can be used under function 2859 * syntax. 2860 */ 2861 @Test TestUserFunction()2862 public void TestUserFunction() { 2863 Transliterator t; 2864 2865 // There's no need to register inverses if we don't use them 2866 TestUserFunctionFactory.add("Any-gif", 2867 Transliterator.createFromRules("gif", 2868 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2869 Transliterator.FORWARD)); 2870 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2871 2872 TestUserFunctionFactory.add("Any-RemoveCurly", 2873 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2874 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2875 2876 logln("Trying &hex"); 2877 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2878 logln("Registering"); 2879 TestUserFunctionFactory.add("Any-hex2", t); 2880 t = Transliterator.getInstance("Any-hex2"); 2881 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2882 2883 logln("Trying &gif"); 2884 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2885 logln("Registering"); 2886 TestUserFunctionFactory.add("Any-gif2", t); 2887 t = Transliterator.getInstance("Any-gif2"); 2888 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2889 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2890 2891 // Test that filters are allowed after & 2892 t = Transliterator.createFromRules("test", 2893 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2894 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2895 2896 // Unregister our test stuff 2897 TestUserFunctionFactory.unregister(); 2898 } 2899 2900 static class TestUserFunctionFactory implements Transliterator.Factory { 2901 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 2902 static HashMap m = new HashMap(); 2903 add(String ID, Transliterator t)2904 static void add(String ID, Transliterator t) { 2905 m.put(new CaseInsensitiveString(ID), t); 2906 Transliterator.registerFactory(ID, singleton); 2907 } 2908 2909 @Override getInstance(String ID)2910 public Transliterator getInstance(String ID) { 2911 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 2912 } 2913 unregister()2914 static void unregister() { 2915 Iterator ids = m.keySet().iterator(); 2916 while (ids.hasNext()) { 2917 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 2918 Transliterator.unregister(id.getString()); 2919 ids.remove(); // removes pair from m 2920 } 2921 } 2922 } 2923 2924 /** 2925 * Test the Any-X transliterators. 2926 */ 2927 @Test TestAnyX()2928 public void TestAnyX() { 2929 Transliterator anyLatin = 2930 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 2931 2932 expect(anyLatin, 2933 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 2934 "greek:abkABK hiragana:abuku cyrillic:abc"); 2935 } 2936 2937 /** 2938 * Test Any-X transliterators with sample letters from all scripts. 2939 */ 2940 @Test TestAny()2941 public void TestAny() { 2942 UnicodeSet alphabetic = new UnicodeSet("[:alphabetic:]").freeze(); 2943 StringBuffer testString = new StringBuffer(); 2944 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 2945 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 2946 int count = 5; 2947 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 2948 testString.append(it.getString()); 2949 if (--count < 0) break; 2950 } 2951 } 2952 logln("Sample set for Any-Latin: " + testString); 2953 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 2954 String result = anyLatin.transliterate(testString.toString()); 2955 logln("Sample result for Any-Latin: " + result); 2956 } 2957 2958 2959 /** 2960 * Test the source and target set API. These are only implemented 2961 * for RBT and CompoundTransliterator at this time. 2962 */ 2963 @Test TestSourceTargetSet()2964 public void TestSourceTargetSet() { 2965 // Rules 2966 String r = 2967 "a > b; " + 2968 "r [x{lu}] > q;"; 2969 2970 // Expected source 2971 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 2972 2973 // Expected target 2974 UnicodeSet expTrg = new UnicodeSet("[bq]"); 2975 2976 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 2977 UnicodeSet src = t.getSourceSet(); 2978 UnicodeSet trg = t.getTargetSet(); 2979 2980 if (src.equals(expSrc) && trg.equals(expTrg)) { 2981 logln("Ok: " + r + " => source = " + src.toPattern(true) + 2982 ", target = " + trg.toPattern(true)); 2983 } else { 2984 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 2985 ", expected " + expSrc.toPattern(true) + 2986 "; target = " + trg.toPattern(true) + 2987 ", expected " + expTrg.toPattern(true)); 2988 } 2989 } 2990 2991 @Test TestSourceTargetSetFilter()2992 public void TestSourceTargetSetFilter() { 2993 String[][] tests = { 2994 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 2995 {"[] Latin-Greek", null, "[\']"}, 2996 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 2997 {"[] Any-Latin"}, 2998 {"[] casefold"}, 2999 {"[] NFKD;"}, 3000 {"[] NFKC;"}, 3001 {"[] hex"}, 3002 {"[] lower"}, 3003 {"[] null"}, 3004 {"[] remove"}, 3005 {"[] title"}, 3006 {"[] upper"}, 3007 }; 3008 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3009 for (String[] testPair : tests) { 3010 String test = testPair[0]; 3011 Transliterator t0; 3012 try { 3013 t0 = Transliterator.getInstance(test); 3014 } catch (Exception e) { 3015 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3016 } 3017 Transliterator t1; 3018 try { 3019 t1 = t0.getInverse(); 3020 } catch (Exception e) { 3021 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3022 } 3023 int targetIndex = 0; 3024 for (Transliterator t : new Transliterator[]{t0, t1}) { 3025 boolean ok; 3026 UnicodeSet source = t.getSourceSet(); 3027 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3028 targetIndex++; 3029 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3030 : testPair[targetIndex] == null ? expectedSource 3031 : testPair[targetIndex].length() == 0 ? expectedSource 3032 : new UnicodeSet(testPair[targetIndex]); 3033 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3034 if (!ok) { // for debugging 3035 source = t.getSourceSet(); 3036 } 3037 UnicodeSet target = t.getTargetSet(); 3038 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3039 if (!ok) { // for debugging 3040 target = t.getTargetSet(); 3041 } 3042 } 3043 } 3044 } 3045 isAtomic(String s, String t, Transliterator trans)3046 static boolean isAtomic(String s, String t, Transliterator trans) { 3047 for (int i = 1; i < s.length(); ++i) { 3048 if (!CharSequences.onCharacterBoundary(s, i)) { 3049 continue; 3050 } 3051 String q = trans.transform(s.substring(0,i)); 3052 if (t.startsWith(q)) { 3053 String r = trans.transform(s.substring(i)); 3054 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3055 return false; 3056 } 3057 } 3058 } 3059 return true; 3060 // // make sure that every part is different 3061 // if (s.codePointCount(0, s.length()) > 1) { 3062 // int[] codePoints = It.codePoints(s); 3063 // for (int k = 0; k < codePoints.length; ++k) { 3064 // int pos = indexOf(t,codePoints[k]); 3065 // if (pos >= 0) { 3066 // int x; 3067 // } 3068 // } 3069 // if (s.contains("\u00C0")) { 3070 // logln("\u00C0"); 3071 // } 3072 // } 3073 } 3074 addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3075 static void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3076 expectedSource.addAll(s); 3077 if (t.length() > 0) { 3078 expectedTarget.addAll(t); 3079 } 3080 } 3081 3082 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3083 // disorderedMarks.add(s); 3084 // for (int j = 1; j < s.length(); ++j) { 3085 // if (CharSequences.onCharacterBoundary(s, j)) { 3086 // String shorter = s.substring(0,j); 3087 // disorderedMarks.add(shorter); 3088 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3089 // } 3090 // } 3091 // } 3092 3093 @Test TestCharUtils()3094 public void TestCharUtils() { 3095 String[][] startTests = { 3096 {"1", "a", "ab"}, 3097 {"0", "a", "xb"}, 3098 {"0", "\uD800", "\uD800\uDC01"}, 3099 {"1", "\uD800a", "\uD800b"}, 3100 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3101 }; 3102 for (String[] row : startTests) { 3103 int actual = findSharedStartLength(row[1], row[2]); 3104 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3105 Integer.parseInt(row[0]), 3106 actual); 3107 } 3108 String[][] endTests = { 3109 {"0", "\uDC00", "\uD801\uDC00"}, 3110 {"1", "a", "ba"}, 3111 {"0", "a", "bx"}, 3112 {"1", "a\uDC00", "b\uDC00"}, 3113 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3114 }; 3115 for (String[] row : endTests) { 3116 int actual = findSharedEndLength(row[1], row[2]); 3117 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3118 Integer.parseInt(row[0]), 3119 actual); 3120 } 3121 } 3122 3123 /** 3124 * @param s 3125 * @param t 3126 * @return 3127 */ 3128 // TODO make generally available findSharedStartLength(CharSequence s, CharSequence t)3129 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3130 int min = Math.min(s.length(), t.length()); 3131 int i; 3132 char sch, tch; 3133 for (i = 0; i < min; ++i) { 3134 sch = s.charAt(i); 3135 tch = t.charAt(i); 3136 if (sch != tch) { 3137 break; 3138 } 3139 } 3140 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3141 } 3142 3143 /** 3144 * @param s 3145 * @param t 3146 * @return 3147 */ 3148 // TODO make generally available findSharedEndLength(CharSequence s, CharSequence t)3149 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3150 int slength = s.length(); 3151 int tlength = t.length(); 3152 int min = Math.min(slength, tlength); 3153 int i; 3154 char sch, tch; 3155 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3156 for (i = 0; i < min; ++i) { 3157 sch = s.charAt(slength - i - 1); 3158 tch = t.charAt(tlength - i - 1); 3159 if (sch != tch) { 3160 break; 3161 } 3162 } 3163 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3164 } 3165 3166 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3167 assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3168 static void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3169 boolean haveError = false; 3170 if (!actual.containsAll(empirical)) { 3171 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3172 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3173 haveError = true; 3174 } 3175 if (!empirical.containsAll(actual)) { 3176 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3177 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3178 haveError = true; 3179 } 3180 if (!haveError) { 3181 logln("OK " + message + ' ' + toPattern(empirical)); 3182 } 3183 } 3184 toPattern(UnicodeSet missing)3185 private static String toPattern(UnicodeSet missing) { 3186 String result = missing.toPattern(false); 3187 if (result.length() < 200) { 3188 return result; 3189 } 3190 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3191 } 3192 3193 3194 /** 3195 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3196 */ 3197 @Test TestPatternWhitespace()3198 public void TestPatternWhitespace() { 3199 // Rules 3200 String r = "a > \u200E b;"; 3201 3202 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3203 3204 expect(t, "a", "b"); 3205 3206 // UnicodeSet 3207 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3208 3209 if (set.contains(0x200E)) { 3210 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3211 } 3212 } 3213 3214 @Test TestAlternateSyntax()3215 public void TestAlternateSyntax() { 3216 // U+2206 == & 3217 // U+2190 == < 3218 // U+2192 == > 3219 // U+2194 == <> 3220 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3221 "abc", 3222 "xbz"); 3223 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3224 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3225 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3226 } 3227 3228 @Test TestPositionAPI()3229 public void TestPositionAPI() { 3230 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3231 Transliterator.Position b = new Transliterator.Position(a); 3232 Transliterator.Position c = new Transliterator.Position(); 3233 c.set(a); 3234 // Call the toString() API: 3235 if (a.equals(b) && a.equals(c)) { 3236 logln("Ok: " + a + " == " + b + " == " + c); 3237 } else { 3238 errln("FAIL: " + a + " != " + b + " != " + c); 3239 } 3240 } 3241 3242 //====================================================================== 3243 // New tests for the ::BEGIN/::END syntax 3244 //====================================================================== 3245 3246 private static final String[] BEGIN_END_RULES = new String[] { 3247 // [0] 3248 "abc > xy;" 3249 + "aba > z;", 3250 3251 // [1] 3252 /* 3253 "::BEGIN;" 3254 + "abc > xy;" 3255 + "::END;" 3256 + "::BEGIN;" 3257 + "aba > z;" 3258 + "::END;", 3259 */ 3260 "", // test case commented out below, this is here to keep from messing up the indexes 3261 3262 // [2] 3263 /* 3264 "abc > xy;" 3265 + "::BEGIN;" 3266 + "aba > z;" 3267 + "::END;", 3268 */ 3269 "", // test case commented out below, this is here to keep from messing up the indexes 3270 3271 // [3] 3272 /* 3273 "::BEGIN;" 3274 + "abc > xy;" 3275 + "::END;" 3276 + "aba > z;", 3277 */ 3278 "", // test case commented out below, this is here to keep from messing up the indexes 3279 3280 // [4] 3281 "abc > xy;" 3282 + "::Null;" 3283 + "aba > z;", 3284 3285 // [5] 3286 "::Upper;" 3287 + "ABC > xy;" 3288 + "AB > x;" 3289 + "C > z;" 3290 + "::Upper;" 3291 + "XYZ > p;" 3292 + "XY > q;" 3293 + "Z > r;" 3294 + "::Upper;", 3295 3296 // [6] 3297 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3298 + "$delim = [\\-$ws];" 3299 + "$ws $delim* > ' ';" 3300 + "'-' $delim* > '-';", 3301 3302 // [7] 3303 "::Null;" 3304 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3305 + "$delim = [\\-$ws];" 3306 + "$ws $delim* > ' ';" 3307 + "'-' $delim* > '-';", 3308 3309 // [8] 3310 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3311 + "$delim = [\\-$ws];" 3312 + "$ws $delim* > ' ';" 3313 + "'-' $delim* > '-';" 3314 + "::Null;", 3315 3316 // [9] 3317 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3318 + "$delim = [\\-$ws];" 3319 + "::Null;" 3320 + "$ws $delim* > ' ';" 3321 + "'-' $delim* > '-';", 3322 3323 // [10] 3324 /* 3325 "::BEGIN;" 3326 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3327 + "$delim = [\\-$ws];" 3328 + "::END;" 3329 + "$ws $delim* > ' ';" 3330 + "'-' $delim* > '-';", 3331 */ 3332 "", // test case commented out below, this is here to keep from messing up the indexes 3333 3334 // [11] 3335 /* 3336 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3337 + "$delim = [\\-$ws];" 3338 + "::BEGIN;" 3339 + "$ws $delim* > ' ';" 3340 + "'-' $delim* > '-';" 3341 + "::END;", 3342 */ 3343 "", // test case commented out below, this is here to keep from messing up the indexes 3344 3345 // [12] 3346 /* 3347 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3348 + "$delim = [\\-$ws];" 3349 + "$ab = [ab];" 3350 + "::BEGIN;" 3351 + "$ws $delim* > ' ';" 3352 + "'-' $delim* > '-';" 3353 + "::END;" 3354 + "::BEGIN;" 3355 + "$ab { ' ' } $ab > '-';" 3356 + "c { ' ' > ;" 3357 + "::END;" 3358 + "::BEGIN;" 3359 + "'a-a' > a\\%|a;" 3360 + "::END;", 3361 */ 3362 "", // test case commented out below, this is here to keep from messing up the indexes 3363 3364 // [13] 3365 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3366 + "$delim = [\\-$ws];" 3367 + "$ab = [ab];" 3368 + "::Null;" 3369 + "$ws $delim* > ' ';" 3370 + "'-' $delim* > '-';" 3371 + "::Null;" 3372 + "$ab { ' ' } $ab > '-';" 3373 + "c { ' ' > ;" 3374 + "::Null;" 3375 + "'a-a' > a\\%|a;", 3376 3377 // [14] 3378 /* 3379 "::[abc];" 3380 + "::BEGIN;" 3381 + "abc > xy;" 3382 + "::END;" 3383 + "::BEGIN;" 3384 + "aba > yz;" 3385 + "::END;" 3386 + "::Upper;", 3387 */ 3388 "", // test case commented out below, this is here to keep from messing up the indexes 3389 3390 // [15] 3391 "::[abc];" 3392 + "abc > xy;" 3393 + "::Null;" 3394 + "aba > yz;" 3395 + "::Upper;", 3396 3397 // [16] 3398 /* 3399 "::[abc];" 3400 + "::BEGIN;" 3401 + "abc <> xy;" 3402 + "::END;" 3403 + "::BEGIN;" 3404 + "aba <> yz;" 3405 + "::END;" 3406 + "::Upper(Lower);" 3407 + "::([XYZ]);", 3408 */ 3409 "", // test case commented out below, this is here to keep from messing up the indexes 3410 3411 // [17] 3412 "::[abc];" 3413 + "abc <> xy;" 3414 + "::Null;" 3415 + "aba <> yz;" 3416 + "::Upper(Lower);" 3417 + "::([XYZ]);" 3418 }; 3419 3420 /* 3421 (This entire test is commented out below and will need some heavy revision when we re-add 3422 the ::BEGIN/::END stuff) 3423 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3424 // [7] 3425 "::BEGIN;" 3426 + "abc > xy;" 3427 + "::BEGIN;" 3428 + "aba > z;" 3429 + "::END;" 3430 + "::END;", 3431 3432 // [8] 3433 "abc > xy;" 3434 + " aba > z;" 3435 + "::END;", 3436 3437 // [9] 3438 "::BEGIN;" 3439 + "::Upper;" 3440 + "::END;" 3441 }; 3442 */ 3443 3444 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3445 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3446 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3447 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3448 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3449 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3450 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3451 3452 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3453 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3454 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3455 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3456 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3457 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3458 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3459 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3460 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3461 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3462 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3463 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3464 3465 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3466 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3467 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3468 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3469 }; 3470 3471 @Test TestBeginEnd()3472 public void TestBeginEnd() { 3473 // run through the list of test cases above 3474 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3475 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3476 } 3477 3478 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3479 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3480 Transliterator.REVERSE); 3481 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3482 3483 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3484 // that all of them cause errors 3485 /* 3486 (commented out until we have the real ::BEGIN/::END stuff in place 3487 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3488 try { 3489 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3490 Transliterator.FORWARD); 3491 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3492 } 3493 catch (IllegalArgumentException e) { 3494 // this is supposed to happen; do nothing here 3495 } 3496 } 3497 */ 3498 } 3499 3500 @Test TestBeginEndToRules()3501 public void TestBeginEndToRules() { 3502 // run through the same list of test cases we used above, but this time, instead of just 3503 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3504 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3505 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3506 // to (i.e., does the same thing as) the original rule set 3507 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3508 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3509 Transliterator.FORWARD); 3510 String rules = t.toRules(false); 3511 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3512 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3513 } 3514 3515 // do the same thing for the reversible test case 3516 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3517 Transliterator.REVERSE); 3518 String rules = reversed.toRules(false); 3519 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3520 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3521 } 3522 3523 @Test TestRegisterAlias()3524 public void TestRegisterAlias() { 3525 String longID = "Lower;[aeiou]Upper"; 3526 String shortID = "Any-CapVowels"; 3527 String reallyShortID = "CapVowels"; 3528 3529 Transliterator.registerAlias(shortID, longID); 3530 3531 Transliterator t1 = Transliterator.getInstance(longID); 3532 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3533 3534 if (!t1.getID().equals(longID)) 3535 errln("Transliterator instantiated with long ID doesn't have long ID"); 3536 if (!t2.getID().equals(reallyShortID)) 3537 errln("Transliterator instantiated with short ID doesn't have short ID"); 3538 3539 if (!t1.toRules(true).equals(t2.toRules(true))) 3540 errln("Alias transliterators aren't the same"); 3541 3542 Transliterator.unregister(shortID); 3543 3544 try { 3545 t1 = Transliterator.getInstance(shortID); 3546 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3547 } 3548 catch (IllegalArgumentException e) { 3549 } 3550 3551 // try the same thing again, but this time with something other than 3552 // an instance of CompoundTransliterator 3553 String realID = "Latin-Greek"; 3554 String fakeID = "Latin-dlgkjdflkjdl"; 3555 Transliterator.registerAlias(fakeID, realID); 3556 3557 t1 = Transliterator.getInstance(realID); 3558 t2 = Transliterator.getInstance(fakeID); 3559 3560 if (!t1.toRules(true).equals(t2.toRules(true))) 3561 errln("Alias transliterators aren't the same"); 3562 3563 Transliterator.unregister(fakeID); 3564 } 3565 3566 /** 3567 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3568 */ 3569 @Test TestHalfwidthFullwidth()3570 public void TestHalfwidthFullwidth() { 3571 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3572 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3573 3574 // Array of 3n items 3575 // Each item is 3576 // "hf"|"fh"|"both", 3577 // <Halfwidth>, 3578 // <Fullwidth> 3579 String[] DATA = { 3580 "both", 3581 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3582 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3583 }; 3584 3585 for (int i=0; i<DATA.length; i+=3) { 3586 switch (DATA[i].charAt(0)) { 3587 case 'h': // Halfwidth-Fullwidth only 3588 expect(hf, DATA[i+1], DATA[i+2]); 3589 break; 3590 case 'f': // Fullwidth-Halfwidth only 3591 expect(fh, DATA[i+2], DATA[i+1]); 3592 break; 3593 case 'b': // both directions 3594 expect(hf, DATA[i+1], DATA[i+2]); 3595 expect(fh, DATA[i+2], DATA[i+1]); 3596 break; 3597 } 3598 } 3599 3600 } 3601 3602 /** 3603 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3604 * TODO: confirm that the expected results are correct. 3605 * For now, test just confirms that C++ and Java give identical results. 3606 */ 3607 @Test TestThai()3608 public void TestThai() { 3609 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3610 String thaiText = 3611 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3612 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3613 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3614 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3615 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3616 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3617 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3618 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3619 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3620 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3621 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3622 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3623 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3624 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3625 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3626 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3627 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3628 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3629 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3630 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3631 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3632 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3633 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3634 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3635 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3636 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3637 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3638 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3639 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3640 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3641 3642 String latinText = 3643 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3644 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3645 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3646 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3647 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3648 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3649 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3650 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3651 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3652 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3653 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3654 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3655 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3656 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3657 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3658 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3659 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3660 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3661 3662 expect(tr, thaiText, latinText); 3663 } 3664 3665 3666 //====================================================================== 3667 // These tests are not mirrored (yet) in icu4c at 3668 // source/test/intltest/transtst.cpp 3669 //====================================================================== 3670 3671 /** 3672 * Improve code coverage. 3673 */ 3674 @Test TestCoverage()3675 public void TestCoverage() { 3676 // NullTransliterator 3677 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3678 expect(t, "a", "a"); 3679 3680 // Source, target set 3681 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3682 t.setFilter(new UnicodeSet("[A-Z]")); 3683 logln("source = " + t.getSourceSet()); 3684 logln("target = " + t.getTargetSet()); 3685 3686 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3687 logln("source = " + t.getSourceSet()); 3688 logln("target = " + t.getTargetSet()); 3689 } 3690 /* 3691 * Test case for threading problem in NormalizationTransliterator 3692 * reported by ticket#5160 3693 */ 3694 @Test TestT5160()3695 public void TestT5160() { 3696 final String[] testData = { 3697 "a", 3698 "b", 3699 "\u09BE", 3700 "A\u0301", 3701 }; 3702 final String[] expected = { 3703 "a", 3704 "b", 3705 "\u09BE", 3706 "\u00C1", 3707 }; 3708 Transliterator translit = Transliterator.getInstance("NFC"); 3709 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 3710 for (int i = 0; i < tasks.length; i++) { 3711 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 3712 } 3713 TestUtil.runUntilDone(tasks); 3714 3715 for (int i = 0; i < tasks.length; i++) { 3716 if (tasks[i].getErrorMessage() != null) { 3717 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 3718 break; 3719 } 3720 } 3721 } 3722 3723 static class NormTranslitTask implements Runnable { 3724 Transliterator translit; 3725 String testData; 3726 String expectedData; 3727 String errorMsg; 3728 NormTranslitTask(Transliterator translit, String testData, String expectedData)3729 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 3730 this.translit = translit; 3731 this.testData = testData; 3732 this.expectedData = expectedData; 3733 } 3734 3735 @Override run()3736 public void run() { 3737 errorMsg = null; 3738 StringBuffer inBuf = new StringBuffer(testData); 3739 StringBuffer expectedBuf = new StringBuffer(expectedData); 3740 3741 for(int i = 0; i < 1000; i++) { 3742 String in = inBuf.toString(); 3743 String out = translit.transliterate(in); 3744 String expected = expectedBuf.toString(); 3745 if (!out.equals(expected)) { 3746 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 3747 break; 3748 } 3749 inBuf.append(testData); 3750 expectedBuf.append(expectedData); 3751 } 3752 } 3753 getErrorMessage()3754 public String getErrorMessage() { 3755 return errorMsg; 3756 } 3757 } 3758 3759 //====================================================================== 3760 // Support methods 3761 //====================================================================== expect(String rules, String source, String expectedResult, Transliterator.Position pos)3762 static void expect(String rules, 3763 String source, 3764 String expectedResult, 3765 Transliterator.Position pos) { 3766 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 3767 expect(t, source, expectedResult, pos); 3768 } 3769 expect(String rules, String source, String expectedResult)3770 static void expect(String rules, String source, String expectedResult) { 3771 expect(rules, source, expectedResult, null); 3772 } 3773 expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3774 static void expect(Transliterator t, String source, String expectedResult, 3775 Transliterator reverseTransliterator) { 3776 expect(t, source, expectedResult); 3777 if (reverseTransliterator != null) { 3778 expect(reverseTransliterator, expectedResult, source); 3779 } 3780 } 3781 expect(Transliterator t, String source, String expectedResult)3782 static void expect(Transliterator t, String source, String expectedResult) { 3783 expect(t, source, expectedResult, (Transliterator.Position) null); 3784 } 3785 expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)3786 static void expect(Transliterator t, String source, String expectedResult, 3787 Transliterator.Position pos) { 3788 if (pos == null) { 3789 String result = t.transliterate(source); 3790 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 3791 } 3792 3793 Transliterator.Position index = null; 3794 if (pos == null) { 3795 index = new Transliterator.Position(0, source.length(), 0, source.length()); 3796 } else { 3797 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 3798 pos.start, pos.limit); 3799 } 3800 3801 ReplaceableString rsource = new ReplaceableString(source); 3802 3803 t.finishTransliteration(rsource, index); 3804 // Do it all at once -- below we do it incrementally 3805 3806 if (index.start != index.limit) { 3807 expectAux(t.getID() + ":UNFINISHED", source, 3808 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 3809 return; 3810 } 3811 String result = rsource.toString(); 3812 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 3813 3814 3815 if (pos == null) { 3816 index = new Transliterator.Position(); 3817 } else { 3818 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 3819 pos.start, pos.limit); 3820 } 3821 3822 // Test incremental transliteration -- this result 3823 // must be the same after we finalize (see below). 3824 List<String> v = new ArrayList<String>(); 3825 v.add(source); 3826 rsource.replace(0, rsource.length(), ""); 3827 if (pos != null) { 3828 rsource.replace(0, 0, source); 3829 v.add(UtilityExtensions.formatInput(rsource, index)); 3830 t.transliterate(rsource, index); 3831 v.add(UtilityExtensions.formatInput(rsource, index)); 3832 } else { 3833 for (int i=0; i<source.length(); ++i) { 3834 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 3835 //log.append(source.charAt(i)).append(" -> ")); 3836 t.transliterate(rsource, index, source.charAt(i)); 3837 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 3838 v.add(UtilityExtensions.formatInput(rsource, index) + 3839 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 3840 } 3841 } 3842 3843 // As a final step in keyboard transliteration, we must call 3844 // transliterate to finish off any pending partial matches that 3845 // were waiting for more input. 3846 t.finishTransliteration(rsource, index); 3847 result = rsource.toString(); 3848 //log.append(" => ").append(rsource.toString()); 3849 v.add(result); 3850 3851 String[] results = new String[v.size()]; 3852 v.toArray(results); 3853 expectAux(t.getID() + ":Incremental", results, 3854 result.equals(expectedResult), 3855 expectedResult); 3856 } 3857 3858 static boolean expectAux(String tag, String source, 3859 String result, String expectedResult) { 3860 return expectAux(tag, new String[] {source, result}, 3861 result.equals(expectedResult), 3862 expectedResult); 3863 } 3864 3865 static boolean expectAux(String tag, String source, 3866 String result, boolean pass, 3867 String expectedResult) { 3868 return expectAux(tag, new String[] {source, result}, 3869 pass, 3870 expectedResult); 3871 } 3872 3873 static boolean expectAux(String tag, String source, 3874 boolean pass, 3875 String expectedResult) { 3876 return expectAux(tag, new String[] {source}, 3877 pass, 3878 expectedResult); 3879 } 3880 3881 static boolean expectAux(String tag, String[] results, boolean pass, 3882 String expectedResult) { 3883 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 3884 3885 for (int i = 0; i < results.length; ++i) { 3886 String label; 3887 if (i == 0) { 3888 label = "source: "; 3889 } else if (i == results.length - 1) { 3890 label = "result: "; 3891 } else { 3892 if (!isVerbose() && pass) continue; 3893 label = "interm" + i + ": "; 3894 } 3895 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 3896 } 3897 3898 if (!pass) { 3899 msg( " expected: " + expectedResult, ERR, false, true); 3900 } 3901 3902 return pass; 3903 } 3904 3905 static private void assertTransform(String message, String expected, StringTransform t, String source) { 3906 assertEquals(message + " " + source, expected, t.transform(source)); 3907 } 3908 3909 3910 static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 3911 assertEquals(message + " " +source, expected, t.transform(source)); 3912 assertEquals(message + " " +source2, expected, t.transform(source2)); 3913 assertEquals(message + " " + expected, source, back.transform(expected)); 3914 } 3915 3916 /* 3917 * Tests the method public Enumeration<String> getAvailableTargets(String source) 3918 */ 3919 @Test 3920 public void TestGetAvailableTargets() { 3921 try { 3922 // Tests when if (targets == null) is true 3923 Transliterator.getAvailableTargets(""); 3924 } catch (Exception e) { 3925 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 3926 } 3927 } 3928 3929 /* 3930 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 3931 */ 3932 @Test 3933 public void TestGetAvailableVariants() { 3934 try { 3935 // Tests when if (targets == null) is true 3936 Transliterator.getAvailableVariants("", ""); 3937 } catch (Exception e) { 3938 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 3939 } 3940 } 3941 3942 /* 3943 * Tests the mehtod String nextLine() in RuleBody 3944 */ 3945 @Test 3946 public void TestNextLine() { 3947 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 3948 try{ 3949 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 3950 } catch(Exception e){ 3951 errln("TransliteratorParser.nextLine() was not suppose to return an " + 3952 "exception for a rule of '\\'"); 3953 } 3954 } 3955 3956 /** 3957 * Tests equals and hashCode implementation of Transliterator.Position 3958 */ 3959 @Test 3960 public void TestPositionEquals() { 3961 Transliterator.Position position1 = new Transliterator.Position(1, 0, 0, 0); 3962 Transliterator.Position position2 = new Transliterator.Position(0, 0, 0, 0); 3963 assertNotEquals("2 different positions are not equal", position1, position2); 3964 assertNotEquals("2 different positions have different hash codes", position1.hashCode(), position2.hashCode()); 3965 Transliterator.Position position3 = new Transliterator.Position(1, 0, 0, 0); 3966 assertEquals("2 positions are equal", position1, position3); 3967 assertEquals("2 positions have the same hash codes", position1.hashCode(), position3.hashCode()); 3968 } 3969 } 3970