1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2012, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.translit; 10 11 import java.util.ArrayList; 12 import java.util.Enumeration; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Iterator; 16 import java.util.List; 17 import java.util.Locale; 18 19 import org.junit.Test; 20 import org.junit.runner.RunWith; 21 import org.junit.runners.JUnit4; 22 23 import com.ibm.icu.dev.test.TestFmwk; 24 import com.ibm.icu.dev.test.TestUtil; 25 import com.ibm.icu.dev.test.rbbi.RBBITstUtils; 26 import com.ibm.icu.impl.Utility; 27 import com.ibm.icu.impl.UtilityExtensions; 28 import com.ibm.icu.lang.CharSequences; 29 import com.ibm.icu.lang.UCharacter; 30 import com.ibm.icu.lang.UScript; 31 import com.ibm.icu.text.Replaceable; 32 import com.ibm.icu.text.ReplaceableString; 33 import com.ibm.icu.text.StringTransform; 34 import com.ibm.icu.text.Transliterator; 35 import com.ibm.icu.text.UTF16; 36 import com.ibm.icu.text.UnicodeFilter; 37 import com.ibm.icu.text.UnicodeSet; 38 import com.ibm.icu.text.UnicodeSetIterator; 39 import com.ibm.icu.util.CaseInsensitiveString; 40 import com.ibm.icu.util.ULocale; 41 42 /*********************************************************************** 43 44 HOW TO USE THIS TEST FILE 45 -or- 46 How I developed on two platforms 47 without losing (too much of) my mind 48 49 50 1. Add new tests by copying/pasting/changing existing tests. On Java, 51 any public void method named Test...() taking no parameters becomes 52 a test. On C++, you need to modify the header and add a line to 53 the runIndexedTest() dispatch method. 54 55 2. Make liberal use of the expect() method; it is your friend. 56 57 3. The tests in this file exactly match those in a sister file on the 58 other side. The two files are: 59 60 icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java 61 icu4c: source/test/intltest/transtst.cpp 62 63 ==> THIS IS THE IMPORTANT PART <== 64 65 When you add a test in this file, add it in transtst.cpp too. 66 Give it the same name and put it in the same relative place. This 67 makes maintenance a lot simpler for any poor soul who ends up 68 trying to synchronize the tests between icu4j and icu4c. 69 70 4. If you MUST enter a test that is NOT paralleled in the sister file, 71 then add it in the special non-mirrored section. These are 72 labeled 73 74 "icu4j ONLY" 75 76 or 77 78 "icu4c ONLY" 79 80 Make sure you document the reason the test is here and not there. 81 82 83 Thank you. 84 The Management 85 ***********************************************************************/ 86 87 /** 88 * @test 89 * @summary General test of Transliterator 90 */ 91 @RunWith(JUnit4.class) 92 public class TransliteratorTest extends TestFmwk { 93 @Test TestHangul()94 public void TestHangul() { 95 96 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 97 Transliterator hl = lh.getInverse(); 98 99 assertTransform("Transform", "\uCE20", lh, "ch"); 100 101 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 102 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 103 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 104 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 105 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 106 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 107 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 108 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 109 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 110 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 111 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 112 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 113 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 114 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 115 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 116 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 117 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 118 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 119 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 120 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 121 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 122 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 123 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 124 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 125 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 126 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 127 128 } 129 130 @Test TestChinese()131 public void TestChinese() { 132 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 133 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 134 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 135 } 136 137 @Test TestRegistry()138 public void TestRegistry() { 139 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 140 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 141 checkRegistry("foo1", "[:letter:] a > b;"); 142 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 143 String id = (String) e.nextElement(); 144 checkRegistry(id); 145 } 146 // Need to remove these test-specific transliterators in order not to interfere with other tests. 147 Transliterator.unregister("foo3"); 148 Transliterator.unregister("foo2"); 149 Transliterator.unregister("foo1"); 150 } 151 checkRegistry(String id, String rules)152 private void checkRegistry (String id, String rules) { 153 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 154 Transliterator.registerInstance(foo); 155 checkRegistry(id); 156 } 157 checkRegistry(String id)158 private void checkRegistry(String id) { 159 Transliterator fie = Transliterator.getInstance(id); 160 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 161 fie.setFilter(fae); 162 Transliterator foe = Transliterator.getInstance(id); 163 UnicodeFilter fee = foe.getFilter(); 164 if (fae.equals(fee)) { 165 errln("Changed what is in registry for " + id); 166 } 167 } 168 169 @Test TestInstantiationError()170 public void TestInstantiationError() { 171 try { 172 String ID = "<Not a valid Transliterator ID>"; 173 Transliterator t = Transliterator.getInstance(ID); 174 errln("FAIL: " + ID + " returned " + t); 175 } catch (IllegalArgumentException ex) { 176 logln("OK: Bogus ID handled properly"); 177 } 178 } 179 180 @Test TestSimpleRules()181 public void TestSimpleRules() { 182 /* Example: rules 1. ab>x|y 183 * 2. yc>z 184 * 185 * []|eabcd start - no match, copy e to translated buffer 186 * [e]|abcd match rule 1 - copy output & adjust cursor 187 * [ex|y]cd match rule 2 - copy output & adjust cursor 188 * [exz]|d no match, copy d to transliterated buffer 189 * [exzd]| done 190 */ 191 expect("ab>x|y;" + 192 "yc>z", 193 "eabcd", "exzd"); 194 195 /* Another set of rules: 196 * 1. ab>x|yzacw 197 * 2. za>q 198 * 3. qc>r 199 * 4. cw>n 200 * 201 * []|ab Rule 1 202 * [x|yzacw] No match 203 * [xy|zacw] Rule 2 204 * [xyq|cw] Rule 4 205 * [xyqn]| Done 206 */ 207 expect("ab>x|yzacw;" + 208 "za>q;" + 209 "qc>r;" + 210 "cw>n", 211 "ab", "xyqn"); 212 213 /* Test categories 214 */ 215 Transliterator t = Transliterator.createFromRules("<ID>", 216 "$dummy=\uE100;" + 217 "$vowel=[aeiouAEIOU];" + 218 "$lu=[:Lu:];" + 219 "$vowel } $lu > '!';" + 220 "$vowel > '&';" + 221 "'!' { $lu > '^';" + 222 "$lu > '*';" + 223 "a>ERROR", 224 Transliterator.FORWARD); 225 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 226 } 227 228 /** 229 * Test inline set syntax and set variable syntax. 230 */ 231 @Test TestInlineSet()232 public void TestInlineSet() { 233 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 234 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 235 236 expect("$digit = [0-9];" + 237 "$alpha = [a-zA-Z];" + 238 "$alphanumeric = [$digit $alpha];" + // *** 239 "$special = [^$alphanumeric];" + // *** 240 "$alphanumeric > '-';" + 241 "$special > '*';", 242 243 "thx-1138", "---*----"); 244 } 245 246 /** 247 * Create some inverses and confirm that they work. We have to be 248 * careful how we do this, since the inverses will not be true 249 * inverses -- we can't throw any random string at the composition 250 * of the transliterators and expect the identity function. F x 251 * F' != I. However, if we are careful about the input, we will 252 * get the expected results. 253 */ 254 @Test TestRuleBasedInverse()255 public void TestRuleBasedInverse() { 256 String RULES = 257 "abc>zyx;" + 258 "ab>yz;" + 259 "bc>zx;" + 260 "ca>xy;" + 261 "a>x;" + 262 "b>y;" + 263 "c>z;" + 264 265 "abc<zyx;" + 266 "ab<yz;" + 267 "bc<zx;" + 268 "ca<xy;" + 269 "a<x;" + 270 "b<y;" + 271 "c<z;" + 272 273 ""; 274 275 String[] DATA = { 276 // Careful here -- random strings will not work. If we keep 277 // the left side to the domain and the right side to the range 278 // we will be okay though (left, abc; right xyz). 279 "a", "x", 280 "abcacab", "zyxxxyy", 281 "caccb", "xyzzy", 282 }; 283 284 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 285 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 286 for (int i=0; i<DATA.length; i+=2) { 287 expect(fwd, DATA[i], DATA[i+1]); 288 expect(rev, DATA[i+1], DATA[i]); 289 } 290 } 291 292 /** 293 * Basic test of keyboard. 294 */ 295 @Test TestKeyboard()296 public void TestKeyboard() { 297 Transliterator t = Transliterator.createFromRules("<ID>", 298 "psch>Y;" 299 +"ps>y;" 300 +"ch>x;" 301 +"a>A;", Transliterator.FORWARD); 302 String DATA[] = { 303 // insertion, buffer 304 "a", "A", 305 "p", "Ap", 306 "s", "Aps", 307 "c", "Apsc", 308 "a", "AycA", 309 "psch", "AycAY", 310 null, "AycAY", // null means finishKeyboardTransliteration 311 }; 312 313 keyboardAux(t, DATA); 314 } 315 316 /** 317 * Basic test of keyboard with cursor. 318 */ 319 @Test TestKeyboard2()320 public void TestKeyboard2() { 321 Transliterator t = Transliterator.createFromRules("<ID>", 322 "ych>Y;" 323 +"ps>|y;" 324 +"ch>x;" 325 +"a>A;", Transliterator.FORWARD); 326 String DATA[] = { 327 // insertion, buffer 328 "a", "A", 329 "p", "Ap", 330 "s", "Aps", // modified for rollback - "Ay", 331 "c", "Apsc", // modified for rollback - "Ayc", 332 "a", "AycA", 333 "p", "AycAp", 334 "s", "AycAps", // modified for rollback - "AycAy", 335 "c", "AycApsc", // modified for rollback - "AycAyc", 336 "h", "AycAY", 337 null, "AycAY", // null means finishKeyboardTransliteration 338 }; 339 340 keyboardAux(t, DATA); 341 } 342 343 /** 344 * Test keyboard transliteration with back-replacement. 345 */ 346 @Test TestKeyboard3()347 public void TestKeyboard3() { 348 // We want th>z but t>y. Furthermore, during keyboard 349 // transliteration we want t>y then yh>z if t, then h are 350 // typed. 351 String RULES = 352 "t>|y;" + 353 "yh>z;" + 354 ""; 355 356 String[] DATA = { 357 // Column 1: characters to add to buffer (as if typed) 358 // Column 2: expected appearance of buffer after 359 // keyboard xliteration. 360 "a", "a", 361 "b", "ab", 362 "t", "abt", // modified for rollback - "aby", 363 "c", "abyc", 364 "t", "abyct", // modified for rollback - "abycy", 365 "h", "abycz", 366 null, "abycz", // null means finishKeyboardTransliteration 367 }; 368 369 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 370 keyboardAux(t, DATA); 371 } 372 keyboardAux(Transliterator t, String[] DATA)373 private void keyboardAux(Transliterator t, String[] DATA) { 374 Transliterator.Position index = new Transliterator.Position(); 375 ReplaceableString s = new ReplaceableString(); 376 for (int i=0; i<DATA.length; i+=2) { 377 StringBuffer log; 378 if (DATA[i] != null) { 379 log = new StringBuffer(s.toString() + " + " 380 + DATA[i] 381 + " -> "); 382 t.transliterate(s, index, DATA[i]); 383 } else { 384 log = new StringBuffer(s.toString() + " => "); 385 t.finishTransliteration(s, index); 386 } 387 UtilityExtensions.formatInput(log, s, index); 388 if (s.toString().equals(DATA[i+1])) { 389 logln(log.toString()); 390 } else { 391 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 392 } 393 } 394 } 395 396 // Latin-Arabic has been temporarily removed until it can be 397 // done correctly. 398 399 // public void TestArabic() { 400 // String DATA[] = { 401 // "Arabic", 402 // "\u062a\u062a\u0645\u062a\u0639 "+ 403 // "\u0627\u0644\u0644\u063a\u0629 "+ 404 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 405 // "\u0628\u0628\u0646\u0638\u0645 "+ 406 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 407 // "\u062c\u0645\u064a\u0644\u0629" 408 // }; 409 410 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 411 // for (int i=0; i<DATA.length; i+=2) { 412 // expect(t, DATA[i], DATA[i+1]); 413 // } 414 // } 415 416 /** 417 * Compose the Kana transliterator forward and reverse and try 418 * some strings that should come out unchanged. 419 */ 420 @Test TestCompoundKana()421 public void TestCompoundKana() { 422 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 423 expect(t, "aaaaa", "aaaaa"); 424 } 425 426 /** 427 * Compose the hex transliterators forward and reverse. 428 */ 429 @Test TestCompoundHex()430 public void TestCompoundHex() { 431 Transliterator a = Transliterator.getInstance("Any-Hex"); 432 Transliterator b = Transliterator.getInstance("Hex-Any"); 433 // Transliterator[] trans = { a, b }; 434 // Transliterator ab = Transliterator.getInstance(trans); 435 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 436 437 // Do some basic tests of b 438 expect(b, "\\u0030\\u0031", "01"); 439 440 String s = "abcde"; 441 expect(ab, s, s); 442 443 // trans = new Transliterator[] { b, a }; 444 // Transliterator ba = Transliterator.getInstance(trans); 445 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 446 ReplaceableString str = new ReplaceableString(s); 447 a.transliterate(str); 448 expect(ba, str.toString(), str.toString()); 449 } 450 451 /** 452 * Do some basic tests of filtering. 453 */ 454 @Test TestFiltering()455 public void TestFiltering() { 456 457 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 458 tempTrans.setFilter(new UnicodeSet("[a]")); 459 String tempResult = tempTrans.transform("xa"); 460 assertEquals("context should not be filtered ", "xb", tempResult); 461 462 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 463 tempResult = tempTrans.transform("xa"); 464 assertEquals("context should not be filtered ", "xb", tempResult); 465 466 Transliterator hex = Transliterator.getInstance("Any-Hex"); 467 hex.setFilter(new UnicodeFilter() { 468 @Override 469 public boolean contains(int c) { 470 return c != 'c'; 471 } 472 @Override 473 public String toPattern(boolean escapeUnprintable) { 474 return ""; 475 } 476 @Override 477 public boolean matchesIndexValue(int v) { 478 return false; 479 } 480 @Override 481 public void addMatchSetTo(UnicodeSet toUnionTo) {} 482 }); 483 String s = "abcde"; 484 String out = hex.transliterate(s); 485 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 486 if (out.equals(exp)) { 487 logln("Ok: \"" + exp + "\""); 488 } else { 489 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 490 } 491 } 492 493 /** 494 * Test anchors 495 */ 496 @Test TestAnchors()497 public void TestAnchors() { 498 expect("^ab > 01 ;" + 499 " ab > |8 ;" + 500 " b > k ;" + 501 " 8x$ > 45 ;" + 502 " 8x > 77 ;", 503 504 "ababbabxabx", 505 "018k7745"); 506 expect("$s = [z$] ;" + 507 "$s{ab > 01 ;" + 508 " ab > |8 ;" + 509 " b > k ;" + 510 " 8x}$s > 45 ;" + 511 " 8x > 77 ;", 512 513 "abzababbabxzabxabx", 514 "01z018k45z01x45"); 515 } 516 517 /** 518 * Test pattern quoting and escape mechanisms. 519 */ 520 @Test TestPatternQuoting()521 public void TestPatternQuoting() { 522 // Array of 3n items 523 // Each item is <rules>, <input>, <expected output> 524 String[] DATA = { 525 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 526 }; 527 528 for (int i=0; i<DATA.length; i+=3) { 529 logln("Pattern: " + Utility.escape(DATA[i])); 530 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 531 expect(t, DATA[i+1], DATA[i+2]); 532 } 533 } 534 535 @Test TestVariableNames()536 public void TestVariableNames() { 537 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 538 if (gl == null) { 539 errln("FAIL: null Transliterator returned."); 540 } 541 } 542 543 /** 544 * Regression test for bugs found in Greek transliteration. 545 */ 546 @Test TestJ277()547 public void TestJ277() { 548 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 549 550 char sigma = (char)0x3C3; 551 char upsilon = (char)0x3C5; 552 char nu = (char)0x3BD; 553 // not used char PHI = (char)0x3A6; 554 char alpha = (char)0x3B1; 555 // not used char omega = (char)0x3C9; 556 // not used char omicron = (char)0x3BF; 557 // not used char epsilon = (char)0x3B5; 558 559 // sigma upsilon nu -> syn 560 StringBuffer buf = new StringBuffer(); 561 buf.append(sigma).append(upsilon).append(nu); 562 String syn = buf.toString(); 563 expect(gl, syn, "syn"); 564 565 // sigma alpha upsilon nu -> saun 566 buf.setLength(0); 567 buf.append(sigma).append(alpha).append(upsilon).append(nu); 568 String sayn = buf.toString(); 569 expect(gl, sayn, "saun"); 570 571 // Again, using a smaller rule set 572 String rules = 573 "$alpha = \u03B1;" + 574 "$nu = \u03BD;" + 575 "$sigma = \u03C3;" + 576 "$ypsilon = \u03C5;" + 577 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 578 "s <> $sigma;" + 579 "a <> $alpha;" + 580 "u <> $vowel { $ypsilon;" + 581 "y <> $ypsilon;" + 582 "n <> $nu;"; 583 Transliterator mini = Transliterator.createFromRules 584 ("mini", rules, Transliterator.REVERSE); 585 expect(mini, syn, "syn"); 586 expect(mini, sayn, "saun"); 587 588 //| // Transliterate the Greek locale data 589 //| Locale el("el"); 590 //| DateFormatSymbols syms(el, status); 591 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 592 //| int32_t i, count; 593 //| const UnicodeString* data = syms.getMonths(count); 594 //| for (i=0; i<count; ++i) { 595 //| if (data[i].length() == 0) { 596 //| continue; 597 //| } 598 //| UnicodeString out(data[i]); 599 //| gl->transliterate(out); 600 //| bool_t ok = true; 601 //| if (data[i].length() >= 2 && out.length() >= 2 && 602 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 603 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 604 //| ok = false; 605 //| } 606 //| } 607 //| if (ok) { 608 //| logln(prettify(data[i] + " -> " + out)); 609 //| } else { 610 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 611 //| } 612 //| } 613 } 614 615 // /** 616 // * Prefix, suffix support in hex transliterators 617 // */ 618 // public void TestJ243() { 619 // // Test default Hex-Any, which should handle 620 // // \\u, \\U, u+, and U+ 621 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 622 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 623 // 624 // // Try a custom Hex-Any 625 // // \\uXXXX and &#xXXXX; 626 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 627 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 628 // "abcd5fx0123"); 629 // 630 // // Try custom Any-Hex (default is tested elsewhere) 631 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 632 // expect(hex3, "012", "012"); 633 // } 634 635 @Test TestJ329()636 public void TestJ329() { 637 638 Object[] DATA = { 639 Boolean.FALSE, "a > b; c > d", 640 Boolean.TRUE, "a > b; no operator; c > d", 641 }; 642 643 for (int i=0; i<DATA.length; i+=2) { 644 String err = null; 645 try { 646 Transliterator.createFromRules("<ID>", 647 (String) DATA[i+1], 648 Transliterator.FORWARD); 649 } catch (IllegalArgumentException e) { 650 err = e.getMessage(); 651 } 652 boolean gotError = (err != null); 653 String desc = (String) DATA[i+1] + 654 (gotError ? (" -> error: " + err) : " -> no error"); 655 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 656 logln("Ok: " + desc); 657 } else { 658 errln("FAIL: " + desc); 659 } 660 } 661 } 662 663 /** 664 * Test segments and segment references. 665 */ 666 @Test TestSegments()667 public void TestSegments() { 668 // Array of 3n items 669 // Each item is <rules>, <input>, <expected output> 670 String[] DATA = { 671 "([a-z]) '.' ([0-9]) > $2 '-' $1", 672 "abc.123.xyz.456", 673 "ab1-c23.xy4-z56", 674 }; 675 676 for (int i=0; i<DATA.length; i+=3) { 677 logln("Pattern: " + Utility.escape(DATA[i])); 678 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 679 expect(t, DATA[i+1], DATA[i+2]); 680 } 681 } 682 683 /** 684 * Test cursor positioning outside of the key 685 */ 686 @Test TestCursorOffset()687 public void TestCursorOffset() { 688 // Array of 3n items 689 // Each item is <rules>, <input>, <expected output> 690 String[] DATA = { 691 "pre {alpha} post > | @ ALPHA ;" + 692 "eALPHA > beta ;" + 693 "pre {beta} post > BETA @@ | ;" + 694 "post > xyz", 695 696 "prealphapost prebetapost", 697 "prbetaxyz preBETApost", 698 }; 699 700 for (int i=0; i<DATA.length; i+=3) { 701 logln("Pattern: " + Utility.escape(DATA[i])); 702 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 703 expect(t, DATA[i+1], DATA[i+2]); 704 } 705 } 706 707 /** 708 * Test zero length and > 1 char length variable values. Test 709 * use of variable refs in UnicodeSets. 710 */ 711 @Test TestArbitraryVariableValues()712 public void TestArbitraryVariableValues() { 713 // Array of 3n items 714 // Each item is <rules>, <input>, <expected output> 715 String[] DATA = { 716 "$abe = ab;" + 717 "$pat = x[yY]z;" + 718 "$ll = 'a-z';" + 719 "$llZ = [$ll];" + 720 "$llY = [$ll$pat];" + 721 "$emp = ;" + 722 723 "$abe > ABE;" + 724 "$pat > END;" + 725 "$llZ > 1;" + 726 "$llY > 2;" + 727 "7$emp 8 > 9;" + 728 "", 729 730 "ab xYzxyz stY78", 731 "ABE ENDEND 1129", 732 }; 733 734 for (int i=0; i<DATA.length; i+=3) { 735 logln("Pattern: " + Utility.escape(DATA[i])); 736 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 737 expect(t, DATA[i+1], DATA[i+2]); 738 } 739 } 740 741 /** 742 * Confirm that the contextStart, contextLimit, start, and limit 743 * behave correctly. 744 */ 745 @Test TestPositionHandling()746 public void TestPositionHandling() { 747 // Array of 3n items 748 // Each item is <rules>, <input>, <expected output> 749 String[] DATA = { 750 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 751 "xtat txtb", // pos 0,9,0,9 752 "xTTaSS TTxUUb", 753 754 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 755 "xtat txtb", // pos 2,9,3,8 756 "xtaSS TTxUUb", 757 758 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 759 "xtat txtb", // pos 3,8,3,8 760 "xtaTT TTxTTb", 761 }; 762 763 // Array of 4n positions -- these go with the DATA array 764 // They are: contextStart, contextLimit, start, limit 765 int[] POS = { 766 0, 9, 0, 9, 767 2, 9, 3, 8, 768 3, 8, 3, 8, 769 }; 770 771 int n = DATA.length/3; 772 for (int i=0; i<n; i++) { 773 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 774 Transliterator.Position pos = new Transliterator.Position( 775 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 776 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 777 t.transliterate(rsource, pos); 778 t.finishTransliteration(rsource, pos); 779 String result = rsource.toString(); 780 String exp = DATA[3*i+2]; 781 expectAux(Utility.escape(DATA[3*i]), 782 DATA[3*i+1], 783 result, 784 result.equals(exp), 785 exp); 786 } 787 } 788 789 /** 790 * Test the Hiragana-Katakana transliterator. 791 */ 792 @Test TestHiraganaKatakana()793 public void TestHiraganaKatakana() { 794 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 795 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 796 797 // Array of 3n items 798 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 799 String[] DATA = { 800 "both", 801 "\u3042\u3090\u3099\u3092\u3050", 802 "\u30A2\u30F8\u30F2\u30B0", 803 804 "kh", 805 "\u307C\u3051\u3060\u3042\u3093\u30FC", 806 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 807 }; 808 809 for (int i=0; i<DATA.length; i+=3) { 810 switch (DATA[i].charAt(0)) { 811 case 'h': // Hiragana-Katakana 812 expect(hk, DATA[i+1], DATA[i+2]); 813 break; 814 case 'k': // Katakana-Hiragana 815 expect(kh, DATA[i+2], DATA[i+1]); 816 break; 817 case 'b': // both 818 expect(hk, DATA[i+1], DATA[i+2]); 819 expect(kh, DATA[i+2], DATA[i+1]); 820 break; 821 } 822 } 823 824 } 825 826 @Test TestCopyJ476()827 public void TestCopyJ476() { 828 // This is a C++-only copy constructor test 829 } 830 831 /** 832 * Test inter-Indic transliterators. These are composed. 833 */ 834 @Test TestInterIndic()835 public void TestInterIndic() { 836 String ID = "Devanagari-Gujarati"; 837 Transliterator dg = Transliterator.getInstance(ID); 838 if (dg == null) { 839 errln("FAIL: getInstance(" + ID + ") returned null"); 840 return; 841 } 842 String id = dg.getID(); 843 if (!id.equals(ID)) { 844 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 845 } 846 String dev = "\u0901\u090B\u0925"; 847 String guj = "\u0A81\u0A8B\u0AA5"; 848 expect(dg, dev, guj); 849 } 850 851 /** 852 * Test filter syntax in IDs. (J23) 853 */ 854 @Test TestFilterIDs()855 public void TestFilterIDs() { 856 String[] DATA = { 857 "[aeiou]Any-Hex", // ID 858 "[aeiou]Hex-Any", // expected inverse ID 859 "quizzical", // src 860 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 861 862 "[aeiou]Any-Hex;[^5]Hex-Any", 863 "[^5]Any-Hex;[aeiou]Hex-Any", 864 "quizzical", 865 "q\\u0075izzical", 866 867 "[abc]Null", 868 "[abc]Null", 869 "xyz", 870 "xyz", 871 }; 872 873 for (int i=0; i<DATA.length; i+=4) { 874 String ID = DATA[i]; 875 Transliterator t = Transliterator.getInstance(ID); 876 expect(t, DATA[i+2], DATA[i+3]); 877 878 // Check the ID 879 if (!ID.equals(t.getID())) { 880 errln("FAIL: getInstance(" + ID + ").getID() => " + 881 t.getID()); 882 } 883 884 // Check the inverse 885 String uID = DATA[i+1]; 886 Transliterator u = t.getInverse(); 887 if (u == null) { 888 errln("FAIL: " + ID + ".getInverse() returned NULL"); 889 } else if (!u.getID().equals(uID)) { 890 errln("FAIL: " + ID + ".getInverse().getID() => " + 891 u.getID() + ", expected " + uID); 892 } 893 } 894 } 895 896 /** 897 * Test the case mapping transliterators. 898 */ 899 @Test TestCaseMap()900 public void TestCaseMap() { 901 Transliterator toUpper = 902 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 903 Transliterator toLower = 904 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 905 Transliterator toTitle = 906 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 907 908 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 909 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 910 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 911 "the quick brown foX jumped over the lazY dogs."); 912 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 913 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 914 } 915 916 /** 917 * Test the name mapping transliterators. 918 */ 919 @Test TestNameMap()920 public void TestNameMap() { 921 Transliterator uni2name = 922 Transliterator.getInstance("Any-Name[^abc]"); 923 Transliterator name2uni = 924 Transliterator.getInstance("Name-Any"); 925 926 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 927 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 928 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 929 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 930 931 // round trip 932 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 933 934 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 935 expect(t, s, s); 936 } 937 938 /** 939 * Test liberalized ID syntax. 1006c 940 */ 941 @Test TestLiberalizedID()942 public void TestLiberalizedID() { 943 // Some test cases have an expected getID() value of NULL. This 944 // means I have disabled the test case for now. This stuff is 945 // still under development, and I haven't decided whether to make 946 // getID() return canonical case yet. It will all get rewritten 947 // with the move to Source-Target/Variant IDs anyway. [aliu] 948 String DATA[] = { 949 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 950 " Null ", "Null", "whitespace", 951 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 952 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 953 }; 954 955 for (int i=0; i<DATA.length; i+=3) { 956 try { 957 Transliterator t = Transliterator.getInstance(DATA[i]); 958 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 959 logln("Ok: " + DATA[i+2] + 960 " create ID \"" + DATA[i] + "\" => \"" + 961 t.getID() + "\""); 962 } else { 963 errln("FAIL: " + DATA[i+2] + 964 " create ID \"" + DATA[i] + "\" => \"" + 965 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 966 } 967 } catch (IllegalArgumentException e) { 968 errln("FAIL: " + DATA[i+2] + 969 " create ID \"" + DATA[i] + "\""); 970 } 971 } 972 } 973 974 @Test TestCreateInstance()975 public void TestCreateInstance() { 976 String FORWARD = "F"; 977 String REVERSE = "R"; 978 String DATA[] = { 979 // Column 1: id 980 // Column 2: direction 981 // Column 3: expected ID, or "" if expect failure 982 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 983 984 // JB#2689: bad compound causes crash 985 "InvalidSource-InvalidTarget", FORWARD, "", 986 "InvalidSource-InvalidTarget", REVERSE, "", 987 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 988 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 989 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 990 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 991 992 null 993 }; 994 995 for (int i=0; DATA[i]!=null; i+=3) { 996 String id=DATA[i]; 997 int dir = (DATA[i+1]==FORWARD)? 998 Transliterator.FORWARD:Transliterator.REVERSE; 999 String expID=DATA[i+2]; 1000 Exception e = null; 1001 Transliterator t; 1002 try { 1003 t = Transliterator.getInstance(id,dir); 1004 } catch (Exception e1) { 1005 e = e1; 1006 t = null; 1007 } 1008 String newID = (t!=null)?t.getID():""; 1009 boolean ok = (newID.equals(expID)); 1010 if (t==null) { 1011 newID = e.getMessage(); 1012 } 1013 if (ok) { 1014 logln("Ok: createInstance(" + 1015 id + "," + DATA[i+1] + ") => " + newID); 1016 } else { 1017 errln("FAIL: createInstance(" + 1018 id + "," + DATA[i+1] + ") => " + newID + 1019 ", expected " + expID); 1020 } 1021 } 1022 } 1023 1024 /** 1025 * Test the normalization transliterator. 1026 */ 1027 @Test TestNormalizationTransliterator()1028 public void TestNormalizationTransliterator() { 1029 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest 1030 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1031 String[][] CANON = { 1032 // Input Decomposed Composed 1033 {"cat", "cat", "cat" }, 1034 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1035 1036 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1037 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1038 1039 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1040 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1041 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1042 1043 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1044 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1045 1046 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1047 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1048 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1049 1050 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1051 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1052 1053 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1054 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1055 1056 {"Henry IV", "Henry IV", "Henry IV" }, 1057 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1058 1059 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1060 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1061 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1062 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1063 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1064 1065 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1066 }; 1067 1068 String[][] COMPAT = { 1069 // Input Decomposed Composed 1070 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1071 1072 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1073 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1074 1075 {"Henry IV", "Henry IV", "Henry IV" }, 1076 {"Henry \u2163", "Henry IV", "Henry IV" }, 1077 1078 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1079 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1080 1081 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1082 }; 1083 1084 Transliterator NFD = Transliterator.getInstance("NFD"); 1085 Transliterator NFC = Transliterator.getInstance("NFC"); 1086 for (int i=0; i<CANON.length; ++i) { 1087 String in = CANON[i][0]; 1088 String expd = CANON[i][1]; 1089 String expc = CANON[i][2]; 1090 expect(NFD, in, expd); 1091 expect(NFC, in, expc); 1092 } 1093 1094 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1095 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1096 for (int i=0; i<COMPAT.length; ++i) { 1097 String in = COMPAT[i][0]; 1098 String expkd = COMPAT[i][1]; 1099 String expkc = COMPAT[i][2]; 1100 expect(NFKD, in, expkd); 1101 expect(NFKC, in, expkc); 1102 } 1103 1104 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1105 expect(t, "\u010dx", "c\u030C"); 1106 } 1107 1108 /** 1109 * Test compound RBT rules. 1110 */ 1111 @Test TestCompoundRBT()1112 public void TestCompoundRBT() { 1113 // Careful with spacing and ';' here: Phrase this exactly 1114 // as toRules() is going to return it. If toRules() changes 1115 // with regard to spacing or ';', then adjust this string. 1116 String rule = "::Hex-Any;\n" + 1117 "::Any-Lower;\n" + 1118 "a > '.A.';\n" + 1119 "b > '.B.';\n" + 1120 "::[^t]Any-Upper;"; 1121 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1122 if (t == null) { 1123 errln("FAIL: createFromRules failed"); 1124 return; 1125 } 1126 expect(t, "\u0043at in the hat, bat on the mat", 1127 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1128 String r = t.toRules(true); 1129 if (r.equals(rule)) { 1130 logln("OK: toRules() => " + r); 1131 } else { 1132 errln("FAIL: toRules() => " + r + 1133 ", expected " + rule); 1134 } 1135 1136 // Now test toRules 1137 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1138 if (t == null) { 1139 errln("FAIL: createInstance failed"); 1140 return; 1141 } 1142 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1143 r = t.toRules(true); 1144 if (!r.equals(exp)) { 1145 errln("FAIL: toRules() => " + r + 1146 ", expected " + exp); 1147 } else { 1148 logln("OK: toRules() => " + r); 1149 } 1150 1151 // Round trip the result of toRules 1152 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1153 if (t == null) { 1154 errln("FAIL: createFromRules #2 failed"); 1155 return; 1156 } else { 1157 logln("OK: createFromRules(" + r + ") succeeded"); 1158 } 1159 1160 // Test toRules again 1161 r = t.toRules(true); 1162 if (!r.equals(exp)) { 1163 errln("FAIL: toRules() => " + r + 1164 ", expected " + exp); 1165 } else { 1166 logln("OK: toRules() => " + r); 1167 } 1168 1169 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1170 // to what the regenerated ID will look like. 1171 String id = "Upper(Lower);(NFKC)"; 1172 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1173 if (t == null) { 1174 errln("FAIL: createInstance #2 failed"); 1175 return; 1176 } 1177 if (t.getID().equals(id)) { 1178 logln("OK: created " + id); 1179 } else { 1180 errln("FAIL: createInstance(" + id + 1181 ").getID() => " + t.getID()); 1182 } 1183 1184 Transliterator u = t.getInverse(); 1185 if (u == null) { 1186 errln("FAIL: createInverse failed"); 1187 return; 1188 } 1189 exp = "NFKC();Lower(Upper)"; 1190 if (u.getID().equals(exp)) { 1191 logln("OK: createInverse(" + id + ") => " + 1192 u.getID()); 1193 } else { 1194 errln("FAIL: createInverse(" + id + ") => " + 1195 u.getID()); 1196 } 1197 } 1198 1199 /** 1200 * Compound filter semantics were originally not implemented 1201 * correctly. Originally, each component filter f(i) is replaced by 1202 * f'(i) = f(i) && g, where g is the filter for the compound 1203 * transliterator. 1204 * 1205 * From Mark: 1206 * 1207 * Suppose and I have a transliterator X. Internally X is 1208 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1209 * 1210 * The compound should convert all greek characters (through latin) to 1211 * cyrillic, then lowercase the result. The filter should say "don't 1212 * touch 'A' in the original". But because an intermediate result 1213 * happens to go through "A", the Greek Alpha gets hung up. 1214 */ 1215 @Test TestCompoundFilter()1216 public void TestCompoundFilter() { 1217 Transliterator t = Transliterator.getInstance 1218 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1219 t.setFilter(new UnicodeSet("[^A]")); 1220 1221 // Only the 'A' at index 1 should remain unchanged 1222 expect(t, 1223 CharsToUnicodeString("BA\\u039A\\u0391"), 1224 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1225 } 1226 1227 /** 1228 * Test the "Remove" transliterator. 1229 */ 1230 @Test TestRemove()1231 public void TestRemove() { 1232 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1233 expect(t, "The quick brown fox.", 1234 "Th qck brwn fx."); 1235 } 1236 1237 @Test TestToRules()1238 public void TestToRules() { 1239 String RBT = "rbt"; 1240 String SET = "set"; 1241 String[] DATA = { 1242 RBT, 1243 "$a=\\u4E61; [$a] > A;", 1244 "[\\u4E61] > A;", 1245 1246 RBT, 1247 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1248 "[[:Zs:][:Zl:]]{a} > A;", 1249 1250 SET, 1251 "[[:Zs:][:Zl:]]", 1252 "[[:Zs:][:Zl:]]", 1253 1254 SET, 1255 "[:Ps:]", 1256 "[:Ps:]", 1257 1258 SET, 1259 "[:L:]", 1260 "[:L:]", 1261 1262 SET, 1263 "[[:L:]-[A]]", 1264 "[[:L:]-[A]]", 1265 1266 SET, 1267 "[~[:Lu:][:Ll:]]", 1268 "[~[:Lu:][:Ll:]]", 1269 1270 SET, 1271 "[~[a-z]]", 1272 "[~[a-z]]", 1273 1274 RBT, 1275 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1276 "[^[:Zs:]]{a} > A;", 1277 1278 RBT, 1279 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1280 "[[a-z]-[:Zs:]]{a} > A;", 1281 1282 RBT, 1283 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1284 "[[:Zs:]&[a-z]]{a} > A;", 1285 1286 RBT, 1287 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1288 "[x[:Zs:]]{a} > A;", 1289 1290 RBT, 1291 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1292 "$macron = \\u0304 ;"+ 1293 "$evowel = [aeiouyAEIOUY] ;"+ 1294 "$iotasub = \\u0345 ;"+ 1295 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1296 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1297 1298 RBT, 1299 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1300 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1301 }; 1302 1303 for (int d=0; d < DATA.length; d+=3) { 1304 if (DATA[d] == RBT) { 1305 // Transliterator test 1306 Transliterator t = Transliterator.createFromRules("ID", 1307 DATA[d+1], Transliterator.FORWARD); 1308 if (t == null) { 1309 errln("FAIL: createFromRules failed"); 1310 return; 1311 } 1312 String rules, escapedRules; 1313 rules = t.toRules(false); 1314 escapedRules = t.toRules(true); 1315 String expRules = Utility.unescape(DATA[d+2]); 1316 String expEscapedRules = DATA[d+2]; 1317 if (rules.equals(expRules)) { 1318 logln("Ok: " + DATA[d+1] + 1319 " => " + Utility.escape(rules)); 1320 } else { 1321 errln("FAIL: " + DATA[d+1] + 1322 " => " + Utility.escape(rules + ", exp " + expRules)); 1323 } 1324 if (escapedRules.equals(expEscapedRules)) { 1325 logln("Ok: " + DATA[d+1] + 1326 " => " + escapedRules); 1327 } else { 1328 errln("FAIL: " + DATA[d+1] + 1329 " => " + escapedRules + ", exp " + expEscapedRules); 1330 } 1331 1332 } else { 1333 // UnicodeSet test 1334 String pat = DATA[d+1]; 1335 String expToPat = DATA[d+2]; 1336 UnicodeSet set = new UnicodeSet(pat); 1337 1338 // Adjust spacing etc. as necessary. 1339 String toPat; 1340 toPat = set.toPattern(true); 1341 if (expToPat.equals(toPat)) { 1342 logln("Ok: " + pat + 1343 " => " + toPat); 1344 } else { 1345 errln("FAIL: " + pat + 1346 " => " + Utility.escape(toPat) + 1347 ", exp " + Utility.escape(pat)); 1348 } 1349 } 1350 } 1351 } 1352 1353 @Test TestContext()1354 public void TestContext() { 1355 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1356 1357 expect("de > x; {d}e > y;", 1358 "de", 1359 "ye", 1360 pos); 1361 1362 expect("ab{c} > z;", 1363 "xadabdabcy", 1364 "xadabdabzy"); 1365 } 1366 CharsToUnicodeString(String s)1367 static final String CharsToUnicodeString(String s) { 1368 return Utility.unescape(s); 1369 } 1370 1371 @Test TestSupplemental()1372 public void TestSupplemental() { 1373 1374 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1375 "a > $a; $s > i;"), 1376 CharsToUnicodeString("ab\\U0001030Fx"), 1377 CharsToUnicodeString("\\U00010300bix")); 1378 1379 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1380 "$b=[A-Z\\U00010400-\\U0001044D];" + 1381 "($a)($b) > $2 $1;"), 1382 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1383 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1384 1385 // k|ax\\U00010300xm 1386 1387 // k|a\\U00010400\\U00010300xm 1388 // ky|\\U00010400\\U00010300xm 1389 // ky\\U00010400|\\U00010300xm 1390 1391 // ky\\U00010400|\\U00010300\\U00010400m 1392 // ky\\U00010400y|\\U00010400m 1393 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1394 "$a {x} > | @ \\U00010400;" + 1395 "{$a} [^\\u0000-\\uFFFF] > y;"), 1396 CharsToUnicodeString("kax\\U00010300xm"), 1397 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1398 1399 expect(Transliterator.getInstance("Any-Name"), 1400 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1401 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1402 1403 expect(Transliterator.getInstance("Name-Any"), 1404 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1405 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1406 1407 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1408 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1409 "U+10330U+10FF00U+E0061U+00A0"); 1410 1411 expect(Transliterator.getInstance("Any-Hex/C"), 1412 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1413 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1414 1415 expect(Transliterator.getInstance("Any-Hex/Perl"), 1416 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1417 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1418 1419 expect(Transliterator.getInstance("Any-Hex/Java"), 1420 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1421 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1422 1423 expect(Transliterator.getInstance("Any-Hex/XML"), 1424 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1425 "𐌰􏼀󠁡 "); 1426 1427 expect(Transliterator.getInstance("Any-Hex/XML10"), 1428 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1429 "𐌰􏼀󠁡 "); 1430 1431 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1432 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1433 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1434 } 1435 1436 @Test TestQuantifier()1437 public void TestQuantifier() { 1438 1439 // Make sure @ in a quantified anteContext works 1440 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1441 "AAAAAb", 1442 "aaa(aac)"); 1443 1444 // Make sure @ in a quantified postContext works 1445 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1446 "baaaaa", 1447 "caa(aaa)"); 1448 1449 // Make sure @ in a quantified postContext with seg ref works 1450 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1451 "baaaaa", 1452 "baa(aaa)"); 1453 1454 // Make sure @ past ante context doesn't enter ante context 1455 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1456 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1457 "xxxab", 1458 "xxx(ac)", 1459 pos); 1460 1461 // Make sure @ past post context doesn't pass limit 1462 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1463 expect("{b} a+ > c @@ |; x > y; a > A;", 1464 "baxx", 1465 "caxx", 1466 pos2); 1467 1468 // Make sure @ past post context doesn't enter post context 1469 expect("{b} a+ > c @@ |; x > y; a > A;", 1470 "baxx", 1471 "cayy"); 1472 1473 expect("(ab)? c > d;", 1474 "c abc ababc", 1475 "d d abd"); 1476 1477 // NOTE: The (ab)+ when referenced just yields a single "ab", 1478 // not the full sequence of them. This accords with perl behavior. 1479 expect("(ab)+ {x} > '(' $1 ')';", 1480 "x abx ababxy", 1481 "x ab(ab) abab(ab)y"); 1482 1483 expect("b+ > x;", 1484 "ac abc abbc abbbc", 1485 "ac axc axc axc"); 1486 1487 expect("[abc]+ > x;", 1488 "qac abrc abbcs abtbbc", 1489 "qx xrx xs xtx"); 1490 1491 expect("q{(ab)+} > x;", 1492 "qa qab qaba qababc qaba", 1493 "qa qx qxa qxc qxa"); 1494 1495 expect("q(ab)* > x;", 1496 "qa qab qaba qababc", 1497 "xa x xa xc"); 1498 1499 // NOTE: The (ab)+ when referenced just yields a single "ab", 1500 // not the full sequence of them. This accords with perl behavior. 1501 expect("q(ab)* > '(' $1 ')';", 1502 "qa qab qaba qababc", 1503 "()a (ab) (ab)a (ab)c"); 1504 1505 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1506 // quoted string 1507 expect("'ab'+ > x;", 1508 "bb ab ababb", 1509 "bb x xb"); 1510 1511 // $foo+ and $foo* -- the quantifier should apply to the entire 1512 // variable reference 1513 expect("$var = ab; $var+ > x;", 1514 "bb ab ababb", 1515 "bb x xb"); 1516 } 1517 1518 static class TestFact implements Transliterator.Factory { 1519 static class NameableNullTrans extends Transliterator { NameableNullTrans(String id)1520 public NameableNullTrans(String id) { 1521 super(id, null); 1522 } 1523 @Override handleTransliterate(Replaceable text, Position offsets, boolean incremental)1524 protected void handleTransliterate(Replaceable text, 1525 Position offsets, boolean incremental) { 1526 offsets.start = offsets.limit; 1527 } 1528 } 1529 String id; TestFact(String theID)1530 public TestFact(String theID) { 1531 id = theID; 1532 } 1533 @Override getInstance(String ignoredID)1534 public Transliterator getInstance(String ignoredID) { 1535 return new NameableNullTrans(id); 1536 } 1537 } 1538 1539 @Test TestSTV()1540 public void TestSTV() { 1541 Enumeration es = Transliterator.getAvailableSources(); 1542 for (int i=0; es.hasMoreElements(); ++i) { 1543 String source = (String) es.nextElement(); 1544 logln("" + i + ": " + source); 1545 if (source.length() == 0) { 1546 errln("FAIL: empty source"); 1547 continue; 1548 } 1549 Enumeration et = Transliterator.getAvailableTargets(source); 1550 for (int j=0; et.hasMoreElements(); ++j) { 1551 String target = (String) et.nextElement(); 1552 logln(" " + j + ": " + target); 1553 if (target.length() == 0) { 1554 errln("FAIL: empty target"); 1555 continue; 1556 } 1557 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1558 for (int k=0; ev.hasMoreElements(); ++k) { 1559 String variant = (String) ev.nextElement(); 1560 if (variant.length() == 0) { 1561 logln(" " + k + ": <empty>"); 1562 } else { 1563 logln(" " + k + ": " + variant); 1564 } 1565 } 1566 } 1567 } 1568 1569 // Test registration 1570 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1571 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1572 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1573 for (int i=0; i<3; ++i) { 1574 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1575 try { 1576 Transliterator t = Transliterator.getInstance(IDS[i]); 1577 if (t.getID().equals(IDS[i])) { 1578 logln("Ok: Registration/creation succeeded for ID " + 1579 IDS[i]); 1580 } else { 1581 errln("FAIL: Registration of ID " + 1582 IDS[i] + " creates ID " + t.getID()); 1583 } 1584 Transliterator.unregister(IDS[i]); 1585 try { 1586 t = Transliterator.getInstance(IDS[i]); 1587 errln("FAIL: Unregistration failed for ID " + 1588 IDS[i] + "; still receiving ID " + t.getID()); 1589 } catch (IllegalArgumentException e2) { 1590 // Good; this is what we expect 1591 logln("Ok; Unregistered " + IDS[i]); 1592 } 1593 } catch (IllegalArgumentException e) { 1594 errln("FAIL: Registration/creation failed for ID " + 1595 IDS[i]); 1596 } finally { 1597 Transliterator.unregister(IDS[i]); 1598 } 1599 } 1600 1601 // Make sure getAvailable API reflects removal 1602 for (Enumeration e = Transliterator.getAvailableIDs(); 1603 e.hasMoreElements(); ) { 1604 String id = (String) e.nextElement(); 1605 for (int i=0; i<3; ++i) { 1606 if (id.equals(FULL_IDS[i])) { 1607 errln("FAIL: unregister(" + id + ") failed"); 1608 } 1609 } 1610 } 1611 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1612 e.hasMoreElements(); ) { 1613 String t = (String) e.nextElement(); 1614 if (t.equals(IDS[0])) { 1615 errln("FAIL: unregister(Any-" + t + ") failed"); 1616 } 1617 } 1618 for (Enumeration e = Transliterator.getAvailableSources(); 1619 e.hasMoreElements(); ) { 1620 String s = (String) e.nextElement(); 1621 for (int i=0; i<3; ++i) { 1622 if (SOURCES[i] == null) continue; 1623 if (s.equals(SOURCES[i])) { 1624 errln("FAIL: unregister(" + s + "-*) failed"); 1625 } 1626 } 1627 } 1628 } 1629 1630 /** 1631 * Test inverse of Greek-Latin; Title() 1632 */ 1633 @Test TestCompoundInverse()1634 public void TestCompoundInverse() { 1635 Transliterator t = Transliterator.getInstance 1636 ("Greek-Latin; Title()", Transliterator.REVERSE); 1637 if (t == null) { 1638 errln("FAIL: createInstance"); 1639 return; 1640 } 1641 String exp = "(Title);Latin-Greek"; 1642 if (t.getID().equals(exp)) { 1643 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1644 t.getID()); 1645 } else { 1646 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1647 t.getID() + "\", expected \"" + exp + "\""); 1648 } 1649 } 1650 1651 /** 1652 * Test NFD chaining with RBT 1653 */ 1654 @Test TestNFDChainRBT()1655 public void TestNFDChainRBT() { 1656 Transliterator t = Transliterator.createFromRules( 1657 "TEST", "::NFD; aa > Q; a > q;", 1658 Transliterator.FORWARD); 1659 logln(t.toRules(true)); 1660 expect(t, "aa", "Q"); 1661 } 1662 1663 /** 1664 * Inverse of "Null" should be "Null". (J21) 1665 */ 1666 @Test TestNullInverse()1667 public void TestNullInverse() { 1668 Transliterator t = Transliterator.getInstance("Null"); 1669 Transliterator u = t.getInverse(); 1670 if (!u.getID().equals("Null")) { 1671 errln("FAIL: Inverse of Null should be Null"); 1672 } 1673 } 1674 1675 /** 1676 * Check ID of inverse of alias. (J22) 1677 */ 1678 @Test TestAliasInverseID()1679 public void TestAliasInverseID() { 1680 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1681 Transliterator t = Transliterator.getInstance(ID); 1682 Transliterator u = t.getInverse(); 1683 String exp = "Hangul-Latin"; 1684 String got = u.getID(); 1685 if (!got.equals(exp)) { 1686 errln("FAIL: Inverse of " + ID + " is " + got + 1687 ", expected " + exp); 1688 } 1689 } 1690 1691 /** 1692 * Test IDs of inverses of compound transliterators. (J20) 1693 */ 1694 @Test TestCompoundInverseID()1695 public void TestCompoundInverseID() { 1696 String ID = "Latin-Jamo;NFC(NFD)"; 1697 Transliterator t = Transliterator.getInstance(ID); 1698 Transliterator u = t.getInverse(); 1699 String exp = "NFD(NFC);Jamo-Latin"; 1700 String got = u.getID(); 1701 if (!got.equals(exp)) { 1702 errln("FAIL: Inverse of " + ID + " is " + got + 1703 ", expected " + exp); 1704 } 1705 } 1706 1707 /** 1708 * Test undefined variable. 1709 */ 1710 @Test TestUndefinedVariable()1711 public void TestUndefinedVariable() { 1712 String rule = "$initial } a <> \u1161;"; 1713 try { 1714 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1715 } catch (IllegalArgumentException e) { 1716 logln("OK: Got exception for " + rule + ", as expected: " + 1717 e.getMessage()); 1718 return; 1719 } 1720 errln("Fail: bogus rule " + rule + " compiled without error"); 1721 } 1722 1723 /** 1724 * Test empty context. 1725 */ 1726 @Test TestEmptyContext()1727 public void TestEmptyContext() { 1728 expect(" { a } > b;", "xay a ", "xby b "); 1729 } 1730 1731 /** 1732 * Test compound filter ID syntax 1733 */ 1734 @Test TestCompoundFilterID()1735 public void TestCompoundFilterID() { 1736 String[] DATA = { 1737 // Col. 1 = ID or rule set (latter must start with #) 1738 1739 // = columns > 1 are null if expect col. 1 to be illegal = 1740 1741 // Col. 2 = direction, "F..." or "R..." 1742 // Col. 3 = source string 1743 // Col. 4 = exp result 1744 1745 "[abc]; [abc]", null, null, null, // multiple filters 1746 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1747 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1748 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1749 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1750 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1751 }; 1752 1753 for (int i=0; i<DATA.length; i+=4) { 1754 String id = DATA[i]; 1755 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1756 Transliterator.REVERSE : Transliterator.FORWARD; 1757 String source = DATA[i+2]; 1758 String exp = DATA[i+3]; 1759 boolean expOk = (DATA[i+1] != null); 1760 Transliterator t = null; 1761 IllegalArgumentException e = null; 1762 try { 1763 if (id.charAt(0) == '#') { 1764 t = Transliterator.createFromRules("ID", id, direction); 1765 } else { 1766 t = Transliterator.getInstance(id, direction); 1767 } 1768 } catch (IllegalArgumentException ee) { 1769 e = ee; 1770 } 1771 boolean ok = (t != null && e == null); 1772 if (ok == expOk) { 1773 logln("Ok: " + id + " => " + t + 1774 (e != null ? (", " + e.getMessage()) : "")); 1775 if (source != null) { 1776 expect(t, source, exp); 1777 } 1778 } else { 1779 errln("FAIL: " + id + " => " + t + 1780 (e != null ? (", " + e.getMessage()) : "")); 1781 } 1782 } 1783 } 1784 1785 /** 1786 * Test new property set syntax 1787 */ 1788 @Test TestPropertySet()1789 public void TestPropertySet() { 1790 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1791 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1792 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1793 } 1794 1795 /** 1796 * Test various failure points of the new 2.0 engine. 1797 */ 1798 @Test TestNewEngine()1799 public void TestNewEngine() { 1800 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1801 // Katakana should be untouched 1802 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1803 1804 if (true) { 1805 // This test will only work if Transliterator.ROLLBACK is 1806 // true. Otherwise, this test will fail, revealing a 1807 // limitation of global filters in incremental mode. 1808 1809 Transliterator a = 1810 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1811 Transliterator A = 1812 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1813 1814 //Transliterator array[] = new Transliterator[] { 1815 // a, 1816 // Transliterator.getInstance("NFD"), 1817 // A }; 1818 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1819 1820 try { 1821 Transliterator.registerInstance(a); 1822 Transliterator.registerInstance(A); 1823 1824 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1825 expect(t, "aAaA", "bAbA"); 1826 1827 Transliterator[] u = t.getElements(); 1828 assertTrue("getElements().length", u.length == 3); 1829 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1830 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1831 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1832 1833 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1834 t.setFilter(new UnicodeSet("[:Ll:]")); 1835 expect(t, "aAaA", "bAbA"); 1836 } finally { 1837 Transliterator.unregister("a_to_A"); 1838 Transliterator.unregister("A_to_b"); 1839 } 1840 } 1841 1842 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1843 "a", 1844 "ax"); 1845 1846 String gr = 1847 "$ddot = \u0308 ;" + 1848 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1849 "$rough = \u0314 ;" + 1850 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1851 "\u03b1 <> a ;" + 1852 "$rough <> h ;"; 1853 1854 expect(gr, "\u03B1\u0314", "ha"); 1855 } 1856 1857 /** 1858 * Test quantified segment behavior. We want: 1859 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1860 */ 1861 @Test TestQuantifiedSegment()1862 public void TestQuantifiedSegment() { 1863 // The normal case 1864 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1865 1866 // The tricky case; the quantifier is around the segment 1867 expect("([abc])+ > x $1 x;", "cba", "xax"); 1868 1869 // Tricky case in reverse direction 1870 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1871 1872 // Check post-context segment 1873 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1874 1875 // Test toRule/toPattern for non-quantified segment. 1876 // Careful with spacing here. 1877 String r = "([a-c]){q} > x $1 x;"; 1878 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1879 String rr = t.toRules(true); 1880 if (!r.equals(rr)) { 1881 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1882 } else { 1883 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1884 } 1885 1886 // Test toRule/toPattern for quantified segment. 1887 // Careful with spacing here. 1888 r = "([a-c])+{q} > x $1 x;"; 1889 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1890 rr = t.toRules(true); 1891 if (!r.equals(rr)) { 1892 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1893 } else { 1894 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1895 } 1896 } 1897 1898 //====================================================================== 1899 // Ram's tests 1900 //====================================================================== 1901 /* this test performs test of rules in ISO 15915 */ 1902 @Test TestDevanagariLatinRT()1903 public void TestDevanagariLatinRT(){ 1904 String[] source = { 1905 "bh\u0101rata", 1906 "kra", 1907 "k\u1E63a", 1908 "khra", 1909 "gra", 1910 "\u1E45ra", 1911 "cra", 1912 "chra", 1913 "j\u00F1a", 1914 "jhra", 1915 "\u00F1ra", 1916 "\u1E6Dya", 1917 "\u1E6Dhra", 1918 "\u1E0Dya", 1919 //"r\u0323ya", // \u095c is not valid in Devanagari 1920 "\u1E0Dhya", 1921 "\u1E5Bhra", 1922 "\u1E47ra", 1923 "tta", 1924 "thra", 1925 "dda", 1926 "dhra", 1927 "nna", 1928 "pra", 1929 "phra", 1930 "bra", 1931 "bhra", 1932 "mra", 1933 "\u1E49ra", 1934 //"l\u0331ra", 1935 "yra", 1936 "\u1E8Fra", 1937 //"l-", 1938 "vra", 1939 "\u015Bra", 1940 "\u1E63ra", 1941 "sra", 1942 "hma", 1943 "\u1E6D\u1E6Da", 1944 "\u1E6D\u1E6Dha", 1945 "\u1E6Dh\u1E6Dha", 1946 "\u1E0D\u1E0Da", 1947 "\u1E0D\u1E0Dha", 1948 "\u1E6Dya", 1949 "\u1E6Dhya", 1950 "\u1E0Dya", 1951 "\u1E0Dhya", 1952 // Not roundtrippable -- 1953 // \u0939\u094d\u094d\u092E - hma 1954 // \u0939\u094d\u092E - hma 1955 // CharsToUnicodeString("hma"), 1956 "hya", 1957 "\u015Br\u0325", 1958 "\u015Bca", 1959 "\u0115", 1960 "san\u0304j\u012Bb s\u0113nagupta", 1961 "\u0101nand vaddir\u0101ju", 1962 }; 1963 String[] expected = { 1964 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 1965 "\u0915\u094D\u0930", /* kra */ 1966 "\u0915\u094D\u0937", /* ks\u0323a */ 1967 "\u0916\u094D\u0930", /* khra */ 1968 "\u0917\u094D\u0930", /* gra */ 1969 "\u0919\u094D\u0930", /* n\u0307ra */ 1970 "\u091A\u094D\u0930", /* cra */ 1971 "\u091B\u094D\u0930", /* chra */ 1972 "\u091C\u094D\u091E", /* jn\u0303a */ 1973 "\u091D\u094D\u0930", /* jhra */ 1974 "\u091E\u094D\u0930", /* n\u0303ra */ 1975 "\u091F\u094D\u092F", /* t\u0323ya */ 1976 "\u0920\u094D\u0930", /* t\u0323hra */ 1977 "\u0921\u094D\u092F", /* d\u0323ya */ 1978 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 1979 "\u0922\u094D\u092F", /* d\u0323hya */ 1980 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 1981 "\u0923\u094D\u0930", /* n\u0323ra */ 1982 "\u0924\u094D\u0924", /* tta */ 1983 "\u0925\u094D\u0930", /* thra */ 1984 "\u0926\u094D\u0926", /* dda */ 1985 "\u0927\u094D\u0930", /* dhra */ 1986 "\u0928\u094D\u0928", /* nna */ 1987 "\u092A\u094D\u0930", /* pra */ 1988 "\u092B\u094D\u0930", /* phra */ 1989 "\u092C\u094D\u0930", /* bra */ 1990 "\u092D\u094D\u0930", /* bhra */ 1991 "\u092E\u094D\u0930", /* mra */ 1992 "\u0929\u094D\u0930", /* n\u0331ra */ 1993 //"\u0934\u094D\u0930", /* l\u0331ra */ 1994 "\u092F\u094D\u0930", /* yra */ 1995 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 1996 //"l-", 1997 "\u0935\u094D\u0930", /* vra */ 1998 "\u0936\u094D\u0930", /* s\u0301ra */ 1999 "\u0937\u094D\u0930", /* s\u0323ra */ 2000 "\u0938\u094D\u0930", /* sra */ 2001 "\u0939\u094d\u092E", /* hma */ 2002 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 2003 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 2004 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 2005 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 2006 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 2007 "\u091F\u094D\u092F", /* t\u0323ya */ 2008 "\u0920\u094D\u092F", /* t\u0323hya */ 2009 "\u0921\u094D\u092F", /* d\u0323ya */ 2010 "\u0922\u094D\u092F", /* d\u0323hya */ 2011 // "hma", /* hma */ 2012 "\u0939\u094D\u092F", /* hya */ 2013 "\u0936\u0943", /* s\u0301r\u0325a */ 2014 "\u0936\u094D\u091A", /* s\u0301ca */ 2015 "\u090d", /* e\u0306 */ 2016 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2017 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2018 }; 2019 2020 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2021 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2022 2023 for(int i= 0; i<source.length; i++){ 2024 expect(latinToDev,(source[i]),(expected[i])); 2025 expect(devToLatin,(expected[i]),(source[i])); 2026 } 2027 2028 } 2029 @Test TestTeluguLatinRT()2030 public void TestTeluguLatinRT(){ 2031 String[] source = { 2032 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2033 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2034 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2035 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2036 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2037 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2038 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2039 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2040 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2041 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2042 }; 2043 2044 String[] expected = { 2045 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2046 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2047 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2048 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2049 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2050 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2051 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2052 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2053 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2054 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2055 }; 2056 2057 2058 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2059 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2060 2061 for(int i= 0; i<source.length; i++){ 2062 expect(latinToDev,(source[i]),(expected[i])); 2063 expect(devToLatin,(expected[i]),(source[i])); 2064 } 2065 } 2066 2067 @Test TestSanskritLatinRT()2068 public void TestSanskritLatinRT(){ 2069 int MAX_LEN =15; 2070 String[] source = { 2071 "rmk\u1E63\u0113t", 2072 "\u015Br\u012Bmad", 2073 "bhagavadg\u012Bt\u0101", 2074 "adhy\u0101ya", 2075 "arjuna", 2076 "vi\u1E63\u0101da", 2077 "y\u014Dga", 2078 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2079 "uv\u0101cr\u0325", 2080 "dharmak\u1E63\u0113tr\u0113", 2081 "kuruk\u1E63\u0113tr\u0113", 2082 "samav\u0113t\u0101", 2083 "yuyutsava\u1E25", 2084 "m\u0101mak\u0101\u1E25", 2085 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2086 "kimakurvata", 2087 "san\u0304java", 2088 }; 2089 String[] expected = { 2090 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2091 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2092 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2093 "\u0905\u0927\u094d\u092f\u093e\u092f", 2094 "\u0905\u0930\u094d\u091c\u0941\u0928", 2095 "\u0935\u093f\u0937\u093e\u0926", 2096 "\u092f\u094b\u0917", 2097 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2098 "\u0909\u0935\u093E\u091A\u0943", 2099 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2100 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2101 "\u0938\u092e\u0935\u0947\u0924\u093e", 2102 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2103 "\u092e\u093e\u092e\u0915\u093e\u0903", 2104 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2105 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2106 "\u0938\u0902\u091c\u0935", 2107 }; 2108 2109 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2110 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2111 for(int i= 0; i<MAX_LEN; i++){ 2112 expect(latinToDev,(source[i]),(expected[i])); 2113 expect(devToLatin,(expected[i]),(source[i])); 2114 } 2115 } 2116 2117 @Test TestCompoundLatinRT()2118 public void TestCompoundLatinRT(){ 2119 int MAX_LEN =15; 2120 String[] source = { 2121 "rmk\u1E63\u0113t", 2122 "\u015Br\u012Bmad", 2123 "bhagavadg\u012Bt\u0101", 2124 "adhy\u0101ya", 2125 "arjuna", 2126 "vi\u1E63\u0101da", 2127 "y\u014Dga", 2128 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2129 "uv\u0101cr\u0325", 2130 "dharmak\u1E63\u0113tr\u0113", 2131 "kuruk\u1E63\u0113tr\u0113", 2132 "samav\u0113t\u0101", 2133 "yuyutsava\u1E25", 2134 "m\u0101mak\u0101\u1E25", 2135 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2136 "kimakurvata", 2137 "san\u0304java" 2138 }; 2139 String[] expected = { 2140 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2141 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2142 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2143 "\u0905\u0927\u094d\u092f\u093e\u092f", 2144 "\u0905\u0930\u094d\u091c\u0941\u0928", 2145 "\u0935\u093f\u0937\u093e\u0926", 2146 "\u092f\u094b\u0917", 2147 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2148 "\u0909\u0935\u093E\u091A\u0943", 2149 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2150 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2151 "\u0938\u092e\u0935\u0947\u0924\u093e", 2152 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2153 "\u092e\u093e\u092e\u0915\u093e\u0903", 2154 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2155 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2156 "\u0938\u0902\u091c\u0935" 2157 }; 2158 2159 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2160 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2161 for(int i= 0; i<MAX_LEN; i++){ 2162 expect(latinToDevToLatin,(source[i]),(source[i])); 2163 expect(devToLatinToDev,(expected[i]),(expected[i])); 2164 } 2165 } 2166 /** 2167 * Test Gurmukhi-Devanagari Tippi and Bindi 2168 */ 2169 @Test TestGurmukhiDevanagari()2170 public void TestGurmukhiDevanagari(){ 2171 // the rule says: 2172 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2173 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2174 2175 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2176 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2177 2178 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2179 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2180 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2181 StringBuffer src = new StringBuffer(" \u0902"); 2182 StringBuffer expect = new StringBuffer(" \u0A02"); 2183 while(vIter.next()){ 2184 src.setCharAt(0,(char) vIter.codepoint); 2185 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2186 expect(trans,src.toString(),expect.toString()); 2187 } 2188 2189 expect.setCharAt(1,'\u0A70'); 2190 while(nvIter.next()){ 2191 //src.setCharAt(0,(char) nvIter.codepoint); 2192 src.setCharAt(0,(char)nvIter.codepoint); 2193 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2194 expect(trans,src.toString(),expect.toString()); 2195 } 2196 } 2197 /** 2198 * Test instantiation from a locale. 2199 */ 2200 @Test TestLocaleInstantiation()2201 public void TestLocaleInstantiation() { 2202 Transliterator t; 2203 try{ 2204 t = Transliterator.getInstance("te_IN-Latin"); 2205 //expect(t, "\u0430", "a"); 2206 }catch(IllegalArgumentException ex){ 2207 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2208 } 2209 try{ 2210 t = Transliterator.getInstance("ru_RU-Latin"); 2211 expect(t, "\u0430", "a"); 2212 }catch(IllegalArgumentException ex){ 2213 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2214 } 2215 try{ 2216 t = Transliterator.getInstance("en-el"); 2217 expect(t, "a", "\u03B1"); 2218 }catch(IllegalArgumentException ex){ 2219 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2220 } 2221 } 2222 2223 /** 2224 * Test title case handling of accent (should ignore accents) 2225 */ 2226 @Test TestTitleAccents()2227 public void TestTitleAccents() { 2228 Transliterator t = Transliterator.getInstance("Title"); 2229 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2230 } 2231 2232 /** 2233 * Basic test of a locale resource based rule. 2234 */ 2235 @Test TestLocaleResource()2236 public void TestLocaleResource() { 2237 String DATA[] = { 2238 // id from to 2239 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2240 "Latin-el", "b", "\u03bc\u03c0", 2241 "Latin-Greek", "b", "\u03B2", 2242 "Greek-Latin/UNGEGN", "\u03B2", "v", 2243 "el-Latin", "\u03B2", "v", 2244 "Greek-Latin", "\u03B2", "b", 2245 }; 2246 for (int i=0; i<DATA.length; i+=3) { 2247 Transliterator t = Transliterator.getInstance(DATA[i]); 2248 expect(t, DATA[i+1], DATA[i+2]); 2249 } 2250 } 2251 2252 /** 2253 * Make sure parse errors reference the right line. 2254 */ 2255 @Test TestParseError()2256 public void TestParseError() { 2257 String rule = 2258 "a > b;\n" + 2259 "# more stuff\n" + 2260 "d << b;"; 2261 try { 2262 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2263 if(t!=null){ 2264 errln("FAIL: Did not get expected exception"); 2265 } 2266 } catch (IllegalArgumentException e) { 2267 String err = e.getMessage(); 2268 if (err.indexOf("d << b") >= 0) { 2269 logln("Ok: " + err); 2270 } else { 2271 errln("FAIL: " + err); 2272 } 2273 return; 2274 } 2275 errln("FAIL: no syntax error"); 2276 } 2277 2278 /** 2279 * Make sure sets on output are disallowed. 2280 */ 2281 @Test TestOutputSet()2282 public void TestOutputSet() { 2283 String rule = "$set = [a-cm-n]; b > $set;"; 2284 Transliterator t = null; 2285 try { 2286 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2287 if(t!=null){ 2288 errln("FAIL: Did not get the expected exception"); 2289 } 2290 } catch (IllegalArgumentException e) { 2291 logln("Ok: " + e.getMessage()); 2292 return; 2293 } 2294 errln("FAIL: No syntax error"); 2295 } 2296 2297 /** 2298 * Test the use variable range pragma, making sure that use of 2299 * variable range characters is detected and flagged as an error. 2300 */ 2301 @Test TestVariableRange()2302 public void TestVariableRange() { 2303 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2304 try { 2305 Transliterator t = 2306 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2307 if(t!=null){ 2308 errln("FAIL: Did not get the expected exception"); 2309 } 2310 } catch (IllegalArgumentException e) { 2311 logln("Ok: " + e.getMessage()); 2312 return; 2313 } 2314 errln("FAIL: No syntax error"); 2315 } 2316 2317 /** 2318 * Test invalid post context error handling 2319 */ 2320 @Test TestInvalidPostContext()2321 public void TestInvalidPostContext() { 2322 try { 2323 Transliterator t = 2324 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2325 if(t!=null){ 2326 errln("FAIL: Did not get the expected exception"); 2327 } 2328 } catch (IllegalArgumentException e) { 2329 String msg = e.getMessage(); 2330 if (msg.indexOf("a}b{c") >= 0) { 2331 logln("Ok: " + msg); 2332 } else { 2333 errln("FAIL: " + msg); 2334 } 2335 return; 2336 } 2337 errln("FAIL: No syntax error"); 2338 } 2339 2340 /** 2341 * Test ID form variants 2342 */ 2343 @Test TestIDForms()2344 public void TestIDForms() { 2345 String DATA[] = { 2346 "NFC", null, "NFD", 2347 "nfd", null, "NFC", // make sure case is ignored 2348 "Any-NFKD", null, "Any-NFKC", 2349 "Null", null, "Null", 2350 "-nfkc", "nfkc", "NFKD", 2351 "-nfkc/", "nfkc", "NFKD", 2352 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2353 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2354 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2355 "Source-", null, null, 2356 "Source/Variant-", null, null, 2357 "Source-/Variant", null, null, 2358 "/Variant", null, null, 2359 "/Variant-", null, null, 2360 "-/Variant", null, null, 2361 "-/", null, null, 2362 "-", null, null, 2363 "/", null, null, 2364 }; 2365 2366 for (int i=0; i<DATA.length; i+=3) { 2367 String ID = DATA[i]; 2368 String expID = DATA[i+1]; 2369 String expInvID = DATA[i+2]; 2370 boolean expValid = (expInvID != null); 2371 if (expID == null) { 2372 expID = ID; 2373 } 2374 try { 2375 Transliterator t = 2376 Transliterator.getInstance(ID); 2377 Transliterator u = t.getInverse(); 2378 if (t.getID().equals(expID) && 2379 u.getID().equals(expInvID)) { 2380 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2381 } else { 2382 errln("FAIL: getInstance(" + ID + ") => " + 2383 t.getID() + " x getInverse() => " + u.getID() + 2384 ", expected " + expInvID); 2385 } 2386 } catch (IllegalArgumentException e) { 2387 if (!expValid) { 2388 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2389 } else { 2390 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2391 } 2392 } 2393 } 2394 } 2395 checkRules(String label, Transliterator t2, String testRulesForward)2396 void checkRules(String label, Transliterator t2, String testRulesForward) { 2397 String rules2 = t2.toRules(true); 2398 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2399 rules2 = TestUtility.replace(rules2, " ", ""); 2400 rules2 = TestUtility.replace(rules2, "\n", ""); 2401 rules2 = TestUtility.replace(rules2, "\r", ""); 2402 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2403 2404 if (!rules2.equals(testRulesForward)) { 2405 errln(label); 2406 logln("GENERATED RULES: " + rules2); 2407 logln("SHOULD BE: " + testRulesForward); 2408 } 2409 } 2410 2411 /** 2412 * Mark's toRules test. 2413 */ 2414 @Test TestToRulesMark()2415 public void TestToRulesMark() { 2416 2417 String testRules = 2418 "::[[:Latin:][:Mark:]];" 2419 + "::NFKD (NFC);" 2420 + "::Lower (Lower);" 2421 + "a <> \\u03B1;" // alpha 2422 + "::NFKC (NFD);" 2423 + "::Upper (Lower);" 2424 + "::Lower ();" 2425 + "::([[:Greek:][:Mark:]]);" 2426 ; 2427 String testRulesForward = 2428 "::[[:Latin:][:Mark:]];" 2429 + "::NFKD(NFC);" 2430 + "::Lower(Lower);" 2431 + "a > \\u03B1;" 2432 + "::NFKC(NFD);" 2433 + "::Upper (Lower);" 2434 + "::Lower ();" 2435 ; 2436 String testRulesBackward = 2437 "::[[:Greek:][:Mark:]];" 2438 + "::Lower (Upper);" 2439 + "::NFD(NFKC);" 2440 + "\\u03B1 > a;" 2441 + "::Lower(Lower);" 2442 + "::NFC(NFKD);" 2443 ; 2444 String source = "\u00E1"; // a-acute 2445 String target = "\u03AC"; // alpha-acute 2446 2447 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2448 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2449 2450 expect(t2, source, target); 2451 expect(t3, target, source); 2452 2453 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2454 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2455 } 2456 2457 /** 2458 * Test Escape and Unescape transliterators. 2459 */ 2460 @Test TestEscape()2461 public void TestEscape() { 2462 expect(Transliterator.getInstance("Hex-Any"), 2463 "\\x{40}\\U000000312Q", 2464 "@12Q"); 2465 expect(Transliterator.getInstance("Any-Hex/C"), 2466 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2467 "\\u0041\\U0010BEEF\\uFEED"); 2468 expect(Transliterator.getInstance("Any-Hex/Java"), 2469 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2470 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2471 expect(Transliterator.getInstance("Any-Hex/Perl"), 2472 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2473 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2474 } 2475 2476 /** 2477 * Make sure display names of variants look reasonable. 2478 */ 2479 @Test TestDisplayName()2480 public void TestDisplayName() { 2481 String DATA[] = { 2482 // ID, forward name, reverse name 2483 // Update the text as necessary -- the important thing is 2484 // not the text itself, but how various cases are handled. 2485 2486 // Basic test 2487 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2488 2489 // Variants 2490 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2491 2492 // Target-only IDs 2493 "NFC", "Any to NFC", "Any to NFD", 2494 }; 2495 2496 Locale US = Locale.US; 2497 2498 for (int i=0; i<DATA.length; i+=3) { 2499 String name = Transliterator.getDisplayName(DATA[i], US); 2500 if (!name.equals(DATA[i+1])) { 2501 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2502 name + ", expected " + DATA[i+1]); 2503 } else { 2504 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2505 } 2506 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2507 name = Transliterator.getDisplayName(t.getID(), US); 2508 if (!name.equals(DATA[i+2])) { 2509 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2510 name + ", expected " + DATA[i+2]); 2511 } else { 2512 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2513 } 2514 2515 // Cover getDisplayName(String) 2516 ULocale save = ULocale.getDefault(); 2517 ULocale.setDefault(ULocale.US); 2518 String name2 = Transliterator.getDisplayName(t.getID()); 2519 if (!name.equals(name2)) 2520 errln("FAIL: getDisplayName with default locale failed"); 2521 ULocale.setDefault(save); 2522 } 2523 } 2524 2525 /** 2526 * Test anchor masking 2527 */ 2528 @Test TestAnchorMasking()2529 public void TestAnchorMasking() { 2530 String rule = "^a > Q; a > q;"; 2531 try { 2532 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2533 if(t==null){ 2534 errln("FAIL: Did not get the expected exception"); 2535 } 2536 } catch (IllegalArgumentException e) { 2537 errln("FAIL: " + rule + " => " + e); 2538 } 2539 } 2540 2541 /** 2542 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2543 * during ICU4J modularization to remove dependency of tests on Transliterator. 2544 */ 2545 @Test TestScriptAllCodepoints()2546 public void TestScriptAllCodepoints(){ 2547 int code; 2548 HashSet scriptIdsChecked = new HashSet(); 2549 HashSet scriptAbbrsChecked = new HashSet(); 2550 for( int i =0; i <= 0x10ffff; i++){ 2551 code = UScript.getScript(i); 2552 if(code==UScript.INVALID_CODE){ 2553 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2554 } 2555 String id =UScript.getName(code); 2556 String abbr = UScript.getShortName(code); 2557 if (!scriptIdsChecked.contains(id)) { 2558 scriptIdsChecked.add(id); 2559 String newId ="[:"+id+":];NFD"; 2560 try{ 2561 Transliterator t = Transliterator.getInstance(newId); 2562 if(t==null){ 2563 errln("Failed to create transliterator for "+hex(i)+ 2564 " script code: " +id); 2565 } 2566 }catch(Exception e){ 2567 errln("Failed to create transliterator for "+hex(i) 2568 +" script code: " +id 2569 + " Exception: "+e.getMessage()); 2570 } 2571 } 2572 if (!scriptAbbrsChecked.contains(abbr)) { 2573 scriptAbbrsChecked.add(abbr); 2574 String newAbbrId ="[:"+abbr+":];NFD"; 2575 try{ 2576 Transliterator t = Transliterator.getInstance(newAbbrId); 2577 if(t==null){ 2578 errln("Failed to create transliterator for "+hex(i)+ 2579 " script code: " +abbr); 2580 } 2581 }catch(Exception e){ 2582 errln("Failed to create transliterator for "+hex(i) 2583 +" script code: " +abbr 2584 + " Exception: "+e.getMessage()); 2585 } 2586 } 2587 } 2588 } 2589 2590 static final String[][] registerRules = { 2591 {"Any-Dev1", "x > X; y > Y;"}, 2592 {"Any-Dev2", "XY > Z"}, 2593 {"Greek-Latin/FAKE", 2594 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2595 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2596 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2597 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2598 }, 2599 }; 2600 2601 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2602 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2603 2604 static final String[][] testCases = { 2605 2606 // NORMALIZATION 2607 // should add more test cases 2608 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2609 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2610 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2611 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2612 2613 // mp -> b BUG 2614 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2615 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2616 2617 // check for devanagari bug 2618 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2619 2620 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2621 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2622 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2623 //TODO: enable this test once Titlecase works right 2624 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2625 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2626 2627 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2628 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2629 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2630 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2631 2632 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2633 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2634 2635 // FORMS OF S 2636 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2637 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2638 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2639 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2640 2641 // Tatiana bug 2642 // Upper: TAT\u02B9\u00C2NA 2643 // Lower: tat\u02B9\u00E2na 2644 // Title: Tat\u02B9\u00E2na 2645 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2646 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2647 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2648 }; 2649 2650 @Test TestSpecialCases()2651 public void TestSpecialCases() { 2652 2653 for (int i = 0; i < registerRules.length; ++i) { 2654 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2655 registerRules[i][1], Transliterator.FORWARD); 2656 DummyFactory.add(registerRules[i][0], t); 2657 } 2658 for (int i = 0; i < testCases.length; ++i) { 2659 String name = testCases[i][0]; 2660 Transliterator t = Transliterator.getInstance(name); 2661 String id = t.getID(); 2662 String source = testCases[i][1]; 2663 String target = null; 2664 2665 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2666 2667 if (testCases[i].length > 2) target = testCases[i][2]; 2668 else if (id.equalsIgnoreCase("NFD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD); 2669 else if (id.equalsIgnoreCase("NFC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC); 2670 else if (id.equalsIgnoreCase("NFKD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD); 2671 else if (id.equalsIgnoreCase("NFKC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC); 2672 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2673 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2674 2675 expect(t, source, target); 2676 } 2677 for (int i = 0; i < registerRules.length; ++i) { 2678 Transliterator.unregister(registerRules[i][0]); 2679 } 2680 } 2681 2682 // seems like there should be an easier way to just register an instance of a transliterator 2683 2684 static class DummyFactory implements Transliterator.Factory { 2685 static DummyFactory singleton = new DummyFactory(); 2686 static HashMap m = new HashMap(); 2687 2688 // Since Transliterators are immutable, we don't have to clone on set & get add(String ID, Transliterator t)2689 static void add(String ID, Transliterator t) { 2690 m.put(ID, t); 2691 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2692 Transliterator.registerFactory(ID, singleton); 2693 } 2694 @Override getInstance(String ID)2695 public Transliterator getInstance(String ID) { 2696 return (Transliterator) m.get(ID); 2697 } 2698 } 2699 2700 @Test TestCasing()2701 public void TestCasing() { 2702 Transliterator toLower = Transliterator.getInstance("lower"); 2703 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2704 Transliterator toUpper = Transliterator.getInstance("upper"); 2705 Transliterator toTitle = Transliterator.getInstance("title"); 2706 for (int i = 0; i < 0x600; ++i) { 2707 String s = UTF16.valueOf(i); 2708 2709 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2710 assertEquals("Lowercase", lower, toLower.transform(s)); 2711 2712 String casefold = UCharacter.foldCase(s, true); 2713 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2714 2715 if (i != 0x0345) { 2716 // ICU 60 changes the default titlecasing index adjustment. 2717 // For word breaks it is mostly the same as before, 2718 // but it is different for the iota subscript (the only cased combining mark). 2719 // This should be ok because the iota subscript is not supposed to appear 2720 // at the start of a word. 2721 // The title Transliterator is far below feature parity with the 2722 // UCharacter and CaseMap titlecasing functions. 2723 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2724 assertEquals("Title", title, toTitle.transform(s)); 2725 } 2726 2727 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2728 assertEquals("Upper", upper, toUpper.transform(s)); 2729 } 2730 } 2731 2732 @Test TestSurrogateCasing()2733 public void TestSurrogateCasing () { 2734 // check that casing handles surrogates 2735 // titlecase is currently defective 2736 int dee = UTF16.charAt(DESERET_dee,0); 2737 int DEE = UCharacter.toTitleCase(dee); 2738 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2739 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2740 } 2741 2742 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2743 errln("Fails uppercase of surrogates"); 2744 } 2745 2746 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2747 errln("Fails lowercase of surrogates"); 2748 } 2749 } 2750 2751 2752 @Test TestFunction()2753 public void TestFunction() { 2754 // Careful with spacing and ';' here: Phrase this exactly 2755 // as toRules() is going to return it. If toRules() changes 2756 // with regard to spacing or ';', then adjust this string. 2757 String rule = 2758 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2759 2760 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2761 if (t == null) { 2762 errln("FAIL: createFromRules failed"); 2763 return; 2764 } 2765 2766 String r = t.toRules(true); 2767 if (r.equals(rule)) { 2768 logln("OK: toRules() => " + r); 2769 } else { 2770 errln("FAIL: toRules() => " + r + 2771 ", expected " + rule); 2772 } 2773 2774 expect(t, "The Quick Brown Fox", 2775 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2776 rule = 2777 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2778 2779 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2780 if (t == null) { 2781 errln("FAIL: createFromRules failed"); 2782 return; 2783 } 2784 2785 r = t.toRules(true); 2786 if (r.equals(rule)) { 2787 logln("OK: toRules() => " + r); 2788 } else { 2789 errln("FAIL: toRules() => " + r + 2790 ", expected " + rule); 2791 } 2792 2793 expect(t, "\u0301", 2794 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2795 } 2796 2797 @Test TestInvalidBackRef()2798 public void TestInvalidBackRef() { 2799 String rule = ". > $1;"; 2800 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2801 try { 2802 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2803 if (t != null) { 2804 errln("FAIL: createFromRules should have returned NULL"); 2805 } 2806 errln("FAIL: Ok: . > $1; => no error"); 2807 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2808 if (t2 != null) { 2809 errln("FAIL: createFromRules should have returned NULL"); 2810 } 2811 errln("FAIL: Ok: . > $1; => no error"); 2812 } catch (IllegalArgumentException e) { 2813 logln("Ok: . > $1; => " + e.getMessage()); 2814 } 2815 } 2816 2817 @Test TestMulticharStringSet()2818 public void TestMulticharStringSet() { 2819 // Basic testing 2820 String rule = 2821 " [{aa}] > x;" + 2822 " a > y;" + 2823 " [b{bc}] > z;" + 2824 "[{gd}] { e > q;" + 2825 " e } [{fg}] > r;" ; 2826 2827 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2828 if (t == null) { 2829 errln("FAIL: createFromRules failed"); 2830 return; 2831 } 2832 2833 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2834 "y x yz z d gd de gdq gdqfg ddrfg"); 2835 2836 // Overlapped string test. Make sure that when multiple 2837 // strings can match that the longest one is matched. 2838 rule = 2839 " [a {ab} {abc}] > x;" + 2840 " b > y;" + 2841 " c > z;" + 2842 " q [t {st} {rst}] { e > p;" ; 2843 2844 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2845 if (t == null) { 2846 errln("FAIL: createFromRules failed"); 2847 return; 2848 } 2849 2850 expect(t, "a ab abc qte qste qrste", 2851 "x x x qtp qstp qrstp"); 2852 } 2853 2854 /** 2855 * Test that user-registered transliterators can be used under function 2856 * syntax. 2857 */ 2858 @Test TestUserFunction()2859 public void TestUserFunction() { 2860 Transliterator t; 2861 2862 // There's no need to register inverses if we don't use them 2863 TestUserFunctionFactory.add("Any-gif", 2864 Transliterator.createFromRules("gif", 2865 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2866 Transliterator.FORWARD)); 2867 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2868 2869 TestUserFunctionFactory.add("Any-RemoveCurly", 2870 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2871 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2872 2873 logln("Trying &hex"); 2874 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2875 logln("Registering"); 2876 TestUserFunctionFactory.add("Any-hex2", t); 2877 t = Transliterator.getInstance("Any-hex2"); 2878 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2879 2880 logln("Trying &gif"); 2881 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2882 logln("Registering"); 2883 TestUserFunctionFactory.add("Any-gif2", t); 2884 t = Transliterator.getInstance("Any-gif2"); 2885 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2886 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2887 2888 // Test that filters are allowed after & 2889 t = Transliterator.createFromRules("test", 2890 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2891 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2892 2893 // Unregister our test stuff 2894 TestUserFunctionFactory.unregister(); 2895 } 2896 2897 static class TestUserFunctionFactory implements Transliterator.Factory { 2898 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 2899 static HashMap m = new HashMap(); 2900 add(String ID, Transliterator t)2901 static void add(String ID, Transliterator t) { 2902 m.put(new CaseInsensitiveString(ID), t); 2903 Transliterator.registerFactory(ID, singleton); 2904 } 2905 2906 @Override getInstance(String ID)2907 public Transliterator getInstance(String ID) { 2908 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 2909 } 2910 unregister()2911 static void unregister() { 2912 Iterator ids = m.keySet().iterator(); 2913 while (ids.hasNext()) { 2914 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 2915 Transliterator.unregister(id.getString()); 2916 ids.remove(); // removes pair from m 2917 } 2918 } 2919 } 2920 2921 /** 2922 * Test the Any-X transliterators. 2923 */ 2924 @Test TestAnyX()2925 public void TestAnyX() { 2926 Transliterator anyLatin = 2927 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 2928 2929 expect(anyLatin, 2930 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 2931 "greek:abkABK hiragana:abuku cyrillic:abc"); 2932 } 2933 2934 /** 2935 * Test Any-X transliterators with sample letters from all scripts. 2936 */ 2937 @Test TestAny()2938 public void TestAny() { 2939 UnicodeSet alphabetic = new UnicodeSet("[:alphabetic:]").freeze(); 2940 StringBuffer testString = new StringBuffer(); 2941 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 2942 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 2943 int count = 5; 2944 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 2945 testString.append(it.getString()); 2946 if (--count < 0) break; 2947 } 2948 } 2949 logln("Sample set for Any-Latin: " + testString); 2950 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 2951 String result = anyLatin.transliterate(testString.toString()); 2952 logln("Sample result for Any-Latin: " + result); 2953 } 2954 2955 2956 /** 2957 * Test the source and target set API. These are only implemented 2958 * for RBT and CompoundTransliterator at this time. 2959 */ 2960 @Test TestSourceTargetSet()2961 public void TestSourceTargetSet() { 2962 // Rules 2963 String r = 2964 "a > b; " + 2965 "r [x{lu}] > q;"; 2966 2967 // Expected source 2968 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 2969 2970 // Expected target 2971 UnicodeSet expTrg = new UnicodeSet("[bq]"); 2972 2973 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 2974 UnicodeSet src = t.getSourceSet(); 2975 UnicodeSet trg = t.getTargetSet(); 2976 2977 if (src.equals(expSrc) && trg.equals(expTrg)) { 2978 logln("Ok: " + r + " => source = " + src.toPattern(true) + 2979 ", target = " + trg.toPattern(true)); 2980 } else { 2981 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 2982 ", expected " + expSrc.toPattern(true) + 2983 "; target = " + trg.toPattern(true) + 2984 ", expected " + expTrg.toPattern(true)); 2985 } 2986 } 2987 2988 @Test TestSourceTargetSetFilter()2989 public void TestSourceTargetSetFilter() { 2990 String[][] tests = { 2991 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 2992 {"[] Latin-Greek", null, "[\']"}, 2993 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 2994 {"[] Any-Latin"}, 2995 {"[] casefold"}, 2996 {"[] NFKD;"}, 2997 {"[] NFKC;"}, 2998 {"[] hex"}, 2999 {"[] lower"}, 3000 {"[] null"}, 3001 {"[] remove"}, 3002 {"[] title"}, 3003 {"[] upper"}, 3004 }; 3005 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3006 for (String[] testPair : tests) { 3007 String test = testPair[0]; 3008 Transliterator t0; 3009 try { 3010 t0 = Transliterator.getInstance(test); 3011 } catch (Exception e) { 3012 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3013 } 3014 Transliterator t1; 3015 try { 3016 t1 = t0.getInverse(); 3017 } catch (Exception e) { 3018 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3019 } 3020 int targetIndex = 0; 3021 for (Transliterator t : new Transliterator[]{t0, t1}) { 3022 boolean ok; 3023 UnicodeSet source = t.getSourceSet(); 3024 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3025 targetIndex++; 3026 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3027 : testPair[targetIndex] == null ? expectedSource 3028 : testPair[targetIndex].length() == 0 ? expectedSource 3029 : new UnicodeSet(testPair[targetIndex]); 3030 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3031 if (!ok) { // for debugging 3032 source = t.getSourceSet(); 3033 } 3034 UnicodeSet target = t.getTargetSet(); 3035 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3036 if (!ok) { // for debugging 3037 target = t.getTargetSet(); 3038 } 3039 } 3040 } 3041 } 3042 isAtomic(String s, String t, Transliterator trans)3043 static boolean isAtomic(String s, String t, Transliterator trans) { 3044 for (int i = 1; i < s.length(); ++i) { 3045 if (!CharSequences.onCharacterBoundary(s, i)) { 3046 continue; 3047 } 3048 String q = trans.transform(s.substring(0,i)); 3049 if (t.startsWith(q)) { 3050 String r = trans.transform(s.substring(i)); 3051 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3052 return false; 3053 } 3054 } 3055 } 3056 return true; 3057 // // make sure that every part is different 3058 // if (s.codePointCount(0, s.length()) > 1) { 3059 // int[] codePoints = It.codePoints(s); 3060 // for (int k = 0; k < codePoints.length; ++k) { 3061 // int pos = indexOf(t,codePoints[k]); 3062 // if (pos >= 0) { 3063 // int x; 3064 // } 3065 // } 3066 // if (s.contains("\u00C0")) { 3067 // logln("\u00C0"); 3068 // } 3069 // } 3070 } 3071 addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3072 static void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3073 expectedSource.addAll(s); 3074 if (t.length() > 0) { 3075 expectedTarget.addAll(t); 3076 } 3077 } 3078 3079 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3080 // disorderedMarks.add(s); 3081 // for (int j = 1; j < s.length(); ++j) { 3082 // if (CharSequences.onCharacterBoundary(s, j)) { 3083 // String shorter = s.substring(0,j); 3084 // disorderedMarks.add(shorter); 3085 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3086 // } 3087 // } 3088 // } 3089 3090 @Test TestCharUtils()3091 public void TestCharUtils() { 3092 String[][] startTests = { 3093 {"1", "a", "ab"}, 3094 {"0", "a", "xb"}, 3095 {"0", "\uD800", "\uD800\uDC01"}, 3096 {"1", "\uD800a", "\uD800b"}, 3097 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3098 }; 3099 for (String[] row : startTests) { 3100 int actual = findSharedStartLength(row[1], row[2]); 3101 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3102 Integer.parseInt(row[0]), 3103 actual); 3104 } 3105 String[][] endTests = { 3106 {"0", "\uDC00", "\uD801\uDC00"}, 3107 {"1", "a", "ba"}, 3108 {"0", "a", "bx"}, 3109 {"1", "a\uDC00", "b\uDC00"}, 3110 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3111 }; 3112 for (String[] row : endTests) { 3113 int actual = findSharedEndLength(row[1], row[2]); 3114 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3115 Integer.parseInt(row[0]), 3116 actual); 3117 } 3118 } 3119 3120 /** 3121 * @param s 3122 * @param t 3123 * @return 3124 */ 3125 // TODO make generally available findSharedStartLength(CharSequence s, CharSequence t)3126 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3127 int min = Math.min(s.length(), t.length()); 3128 int i; 3129 char sch, tch; 3130 for (i = 0; i < min; ++i) { 3131 sch = s.charAt(i); 3132 tch = t.charAt(i); 3133 if (sch != tch) { 3134 break; 3135 } 3136 } 3137 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3138 } 3139 3140 /** 3141 * @param s 3142 * @param t 3143 * @return 3144 */ 3145 // TODO make generally available findSharedEndLength(CharSequence s, CharSequence t)3146 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3147 int slength = s.length(); 3148 int tlength = t.length(); 3149 int min = Math.min(slength, tlength); 3150 int i; 3151 char sch, tch; 3152 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3153 for (i = 0; i < min; ++i) { 3154 sch = s.charAt(slength - i - 1); 3155 tch = t.charAt(tlength - i - 1); 3156 if (sch != tch) { 3157 break; 3158 } 3159 } 3160 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3161 } 3162 3163 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3164 assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3165 static void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3166 boolean haveError = false; 3167 if (!actual.containsAll(empirical)) { 3168 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3169 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3170 haveError = true; 3171 } 3172 if (!empirical.containsAll(actual)) { 3173 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3174 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3175 haveError = true; 3176 } 3177 if (!haveError) { 3178 logln("OK " + message + ' ' + toPattern(empirical)); 3179 } 3180 } 3181 toPattern(UnicodeSet missing)3182 private static String toPattern(UnicodeSet missing) { 3183 String result = missing.toPattern(false); 3184 if (result.length() < 200) { 3185 return result; 3186 } 3187 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3188 } 3189 3190 3191 /** 3192 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3193 */ 3194 @Test TestPatternWhitespace()3195 public void TestPatternWhitespace() { 3196 // Rules 3197 String r = "a > \u200E b;"; 3198 3199 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3200 3201 expect(t, "a", "b"); 3202 3203 // UnicodeSet 3204 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3205 3206 if (set.contains(0x200E)) { 3207 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3208 } 3209 } 3210 3211 @Test TestAlternateSyntax()3212 public void TestAlternateSyntax() { 3213 // U+2206 == & 3214 // U+2190 == < 3215 // U+2192 == > 3216 // U+2194 == <> 3217 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3218 "abc", 3219 "xbz"); 3220 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3221 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3222 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3223 } 3224 3225 @Test TestPositionAPI()3226 public void TestPositionAPI() { 3227 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3228 Transliterator.Position b = new Transliterator.Position(a); 3229 Transliterator.Position c = new Transliterator.Position(); 3230 c.set(a); 3231 // Call the toString() API: 3232 if (a.equals(b) && a.equals(c)) { 3233 logln("Ok: " + a + " == " + b + " == " + c); 3234 } else { 3235 errln("FAIL: " + a + " != " + b + " != " + c); 3236 } 3237 } 3238 3239 //====================================================================== 3240 // New tests for the ::BEGIN/::END syntax 3241 //====================================================================== 3242 3243 private static final String[] BEGIN_END_RULES = new String[] { 3244 // [0] 3245 "abc > xy;" 3246 + "aba > z;", 3247 3248 // [1] 3249 /* 3250 "::BEGIN;" 3251 + "abc > xy;" 3252 + "::END;" 3253 + "::BEGIN;" 3254 + "aba > z;" 3255 + "::END;", 3256 */ 3257 "", // test case commented out below, this is here to keep from messing up the indexes 3258 3259 // [2] 3260 /* 3261 "abc > xy;" 3262 + "::BEGIN;" 3263 + "aba > z;" 3264 + "::END;", 3265 */ 3266 "", // test case commented out below, this is here to keep from messing up the indexes 3267 3268 // [3] 3269 /* 3270 "::BEGIN;" 3271 + "abc > xy;" 3272 + "::END;" 3273 + "aba > z;", 3274 */ 3275 "", // test case commented out below, this is here to keep from messing up the indexes 3276 3277 // [4] 3278 "abc > xy;" 3279 + "::Null;" 3280 + "aba > z;", 3281 3282 // [5] 3283 "::Upper;" 3284 + "ABC > xy;" 3285 + "AB > x;" 3286 + "C > z;" 3287 + "::Upper;" 3288 + "XYZ > p;" 3289 + "XY > q;" 3290 + "Z > r;" 3291 + "::Upper;", 3292 3293 // [6] 3294 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3295 + "$delim = [\\-$ws];" 3296 + "$ws $delim* > ' ';" 3297 + "'-' $delim* > '-';", 3298 3299 // [7] 3300 "::Null;" 3301 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3302 + "$delim = [\\-$ws];" 3303 + "$ws $delim* > ' ';" 3304 + "'-' $delim* > '-';", 3305 3306 // [8] 3307 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3308 + "$delim = [\\-$ws];" 3309 + "$ws $delim* > ' ';" 3310 + "'-' $delim* > '-';" 3311 + "::Null;", 3312 3313 // [9] 3314 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3315 + "$delim = [\\-$ws];" 3316 + "::Null;" 3317 + "$ws $delim* > ' ';" 3318 + "'-' $delim* > '-';", 3319 3320 // [10] 3321 /* 3322 "::BEGIN;" 3323 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3324 + "$delim = [\\-$ws];" 3325 + "::END;" 3326 + "$ws $delim* > ' ';" 3327 + "'-' $delim* > '-';", 3328 */ 3329 "", // test case commented out below, this is here to keep from messing up the indexes 3330 3331 // [11] 3332 /* 3333 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3334 + "$delim = [\\-$ws];" 3335 + "::BEGIN;" 3336 + "$ws $delim* > ' ';" 3337 + "'-' $delim* > '-';" 3338 + "::END;", 3339 */ 3340 "", // test case commented out below, this is here to keep from messing up the indexes 3341 3342 // [12] 3343 /* 3344 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3345 + "$delim = [\\-$ws];" 3346 + "$ab = [ab];" 3347 + "::BEGIN;" 3348 + "$ws $delim* > ' ';" 3349 + "'-' $delim* > '-';" 3350 + "::END;" 3351 + "::BEGIN;" 3352 + "$ab { ' ' } $ab > '-';" 3353 + "c { ' ' > ;" 3354 + "::END;" 3355 + "::BEGIN;" 3356 + "'a-a' > a\\%|a;" 3357 + "::END;", 3358 */ 3359 "", // test case commented out below, this is here to keep from messing up the indexes 3360 3361 // [13] 3362 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3363 + "$delim = [\\-$ws];" 3364 + "$ab = [ab];" 3365 + "::Null;" 3366 + "$ws $delim* > ' ';" 3367 + "'-' $delim* > '-';" 3368 + "::Null;" 3369 + "$ab { ' ' } $ab > '-';" 3370 + "c { ' ' > ;" 3371 + "::Null;" 3372 + "'a-a' > a\\%|a;", 3373 3374 // [14] 3375 /* 3376 "::[abc];" 3377 + "::BEGIN;" 3378 + "abc > xy;" 3379 + "::END;" 3380 + "::BEGIN;" 3381 + "aba > yz;" 3382 + "::END;" 3383 + "::Upper;", 3384 */ 3385 "", // test case commented out below, this is here to keep from messing up the indexes 3386 3387 // [15] 3388 "::[abc];" 3389 + "abc > xy;" 3390 + "::Null;" 3391 + "aba > yz;" 3392 + "::Upper;", 3393 3394 // [16] 3395 /* 3396 "::[abc];" 3397 + "::BEGIN;" 3398 + "abc <> xy;" 3399 + "::END;" 3400 + "::BEGIN;" 3401 + "aba <> yz;" 3402 + "::END;" 3403 + "::Upper(Lower);" 3404 + "::([XYZ]);", 3405 */ 3406 "", // test case commented out below, this is here to keep from messing up the indexes 3407 3408 // [17] 3409 "::[abc];" 3410 + "abc <> xy;" 3411 + "::Null;" 3412 + "aba <> yz;" 3413 + "::Upper(Lower);" 3414 + "::([XYZ]);" 3415 }; 3416 3417 /* 3418 (This entire test is commented out below and will need some heavy revision when we re-add 3419 the ::BEGIN/::END stuff) 3420 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3421 // [7] 3422 "::BEGIN;" 3423 + "abc > xy;" 3424 + "::BEGIN;" 3425 + "aba > z;" 3426 + "::END;" 3427 + "::END;", 3428 3429 // [8] 3430 "abc > xy;" 3431 + " aba > z;" 3432 + "::END;", 3433 3434 // [9] 3435 "::BEGIN;" 3436 + "::Upper;" 3437 + "::END;" 3438 }; 3439 */ 3440 3441 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3442 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3443 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3444 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3445 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3446 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3447 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3448 3449 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3450 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3451 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3452 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3453 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3454 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3455 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3456 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3457 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3458 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3459 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3460 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3461 3462 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3463 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3464 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3465 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3466 }; 3467 3468 @Test TestBeginEnd()3469 public void TestBeginEnd() { 3470 // run through the list of test cases above 3471 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3472 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3473 } 3474 3475 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3476 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3477 Transliterator.REVERSE); 3478 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3479 3480 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3481 // that all of them cause errors 3482 /* 3483 (commented out until we have the real ::BEGIN/::END stuff in place 3484 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3485 try { 3486 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3487 Transliterator.FORWARD); 3488 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3489 } 3490 catch (IllegalArgumentException e) { 3491 // this is supposed to happen; do nothing here 3492 } 3493 } 3494 */ 3495 } 3496 3497 @Test TestBeginEndToRules()3498 public void TestBeginEndToRules() { 3499 // run through the same list of test cases we used above, but this time, instead of just 3500 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3501 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3502 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3503 // to (i.e., does the same thing as) the original rule set 3504 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3505 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3506 Transliterator.FORWARD); 3507 String rules = t.toRules(false); 3508 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3509 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3510 } 3511 3512 // do the same thing for the reversible test case 3513 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3514 Transliterator.REVERSE); 3515 String rules = reversed.toRules(false); 3516 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3517 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3518 } 3519 3520 @Test TestRegisterAlias()3521 public void TestRegisterAlias() { 3522 String longID = "Lower;[aeiou]Upper"; 3523 String shortID = "Any-CapVowels"; 3524 String reallyShortID = "CapVowels"; 3525 3526 Transliterator.registerAlias(shortID, longID); 3527 3528 Transliterator t1 = Transliterator.getInstance(longID); 3529 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3530 3531 if (!t1.getID().equals(longID)) 3532 errln("Transliterator instantiated with long ID doesn't have long ID"); 3533 if (!t2.getID().equals(reallyShortID)) 3534 errln("Transliterator instantiated with short ID doesn't have short ID"); 3535 3536 if (!t1.toRules(true).equals(t2.toRules(true))) 3537 errln("Alias transliterators aren't the same"); 3538 3539 Transliterator.unregister(shortID); 3540 3541 try { 3542 t1 = Transliterator.getInstance(shortID); 3543 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3544 } 3545 catch (IllegalArgumentException e) { 3546 } 3547 3548 // try the same thing again, but this time with something other than 3549 // an instance of CompoundTransliterator 3550 String realID = "Latin-Greek"; 3551 String fakeID = "Latin-dlgkjdflkjdl"; 3552 Transliterator.registerAlias(fakeID, realID); 3553 3554 t1 = Transliterator.getInstance(realID); 3555 t2 = Transliterator.getInstance(fakeID); 3556 3557 if (!t1.toRules(true).equals(t2.toRules(true))) 3558 errln("Alias transliterators aren't the same"); 3559 3560 Transliterator.unregister(fakeID); 3561 } 3562 3563 /** 3564 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3565 */ 3566 @Test TestHalfwidthFullwidth()3567 public void TestHalfwidthFullwidth() { 3568 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3569 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3570 3571 // Array of 3n items 3572 // Each item is 3573 // "hf"|"fh"|"both", 3574 // <Halfwidth>, 3575 // <Fullwidth> 3576 String[] DATA = { 3577 "both", 3578 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3579 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3580 }; 3581 3582 for (int i=0; i<DATA.length; i+=3) { 3583 switch (DATA[i].charAt(0)) { 3584 case 'h': // Halfwidth-Fullwidth only 3585 expect(hf, DATA[i+1], DATA[i+2]); 3586 break; 3587 case 'f': // Fullwidth-Halfwidth only 3588 expect(fh, DATA[i+2], DATA[i+1]); 3589 break; 3590 case 'b': // both directions 3591 expect(hf, DATA[i+1], DATA[i+2]); 3592 expect(fh, DATA[i+2], DATA[i+1]); 3593 break; 3594 } 3595 } 3596 3597 } 3598 3599 /** 3600 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3601 * TODO: confirm that the expected results are correct. 3602 * For now, test just confirms that C++ and Java give identical results. 3603 */ 3604 @Test TestThai()3605 public void TestThai() { 3606 // The expectations in this test heavily depends on the Thai dictionary. 3607 // Therefore, we skip this test under the LSTM configuration. 3608 org.junit.Assume.assumeTrue(!RBBITstUtils.skipDictionaryTest()); 3609 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3610 String thaiText = 3611 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3612 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3613 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3614 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3615 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3616 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3617 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3618 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3619 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3620 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3621 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3622 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3623 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3624 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3625 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3626 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3627 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3628 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3629 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3630 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3631 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3632 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3633 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3634 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3635 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3636 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3637 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3638 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3639 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3640 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3641 3642 String latinText = 3643 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3644 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3645 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3646 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3647 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3648 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3649 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3650 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3651 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3652 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3653 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3654 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3655 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3656 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3657 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3658 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3659 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3660 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3661 3662 expect(tr, thaiText, latinText); 3663 } 3664 3665 3666 //====================================================================== 3667 // These tests are not mirrored (yet) in icu4c at 3668 // source/test/intltest/transtst.cpp 3669 //====================================================================== 3670 3671 /** 3672 * Improve code coverage. 3673 */ 3674 @Test TestCoverage()3675 public void TestCoverage() { 3676 // NullTransliterator 3677 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3678 expect(t, "a", "a"); 3679 3680 // Source, target set 3681 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3682 t.setFilter(new UnicodeSet("[A-Z]")); 3683 logln("source = " + t.getSourceSet()); 3684 logln("target = " + t.getTargetSet()); 3685 3686 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3687 logln("source = " + t.getSourceSet()); 3688 logln("target = " + t.getTargetSet()); 3689 } 3690 /* 3691 * Test case for threading problem in NormalizationTransliterator 3692 * reported by ticket#5160 3693 */ 3694 @Test TestT5160()3695 public void TestT5160() { 3696 final String[] testData = { 3697 "a", 3698 "b", 3699 "\u09BE", 3700 "A\u0301", 3701 }; 3702 final String[] expected = { 3703 "a", 3704 "b", 3705 "\u09BE", 3706 "\u00C1", 3707 }; 3708 Transliterator translit = Transliterator.getInstance("NFC"); 3709 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 3710 for (int i = 0; i < tasks.length; i++) { 3711 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 3712 } 3713 TestUtil.runUntilDone(tasks); 3714 3715 for (int i = 0; i < tasks.length; i++) { 3716 if (tasks[i].getErrorMessage() != null) { 3717 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 3718 break; 3719 } 3720 } 3721 } 3722 3723 static class NormTranslitTask implements Runnable { 3724 Transliterator translit; 3725 String testData; 3726 String expectedData; 3727 String errorMsg; 3728 NormTranslitTask(Transliterator translit, String testData, String expectedData)3729 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 3730 this.translit = translit; 3731 this.testData = testData; 3732 this.expectedData = expectedData; 3733 } 3734 3735 @Override run()3736 public void run() { 3737 errorMsg = null; 3738 StringBuffer inBuf = new StringBuffer(testData); 3739 StringBuffer expectedBuf = new StringBuffer(expectedData); 3740 3741 for(int i = 0; i < 1000; i++) { 3742 String in = inBuf.toString(); 3743 String out = translit.transliterate(in); 3744 String expected = expectedBuf.toString(); 3745 if (!out.equals(expected)) { 3746 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 3747 break; 3748 } 3749 inBuf.append(testData); 3750 expectedBuf.append(expectedData); 3751 } 3752 } 3753 getErrorMessage()3754 public String getErrorMessage() { 3755 return errorMsg; 3756 } 3757 } 3758 3759 //====================================================================== 3760 // Support methods 3761 //====================================================================== expect(String rules, String source, String expectedResult, Transliterator.Position pos)3762 static void expect(String rules, 3763 String source, 3764 String expectedResult, 3765 Transliterator.Position pos) { 3766 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 3767 expect(t, source, expectedResult, pos); 3768 } 3769 expect(String rules, String source, String expectedResult)3770 static void expect(String rules, String source, String expectedResult) { 3771 expect(rules, source, expectedResult, null); 3772 } 3773 expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3774 static void expect(Transliterator t, String source, String expectedResult, 3775 Transliterator reverseTransliterator) { 3776 expect(t, source, expectedResult); 3777 if (reverseTransliterator != null) { 3778 expect(reverseTransliterator, expectedResult, source); 3779 } 3780 } 3781 expect(Transliterator t, String source, String expectedResult)3782 static void expect(Transliterator t, String source, String expectedResult) { 3783 expect(t, source, expectedResult, (Transliterator.Position) null); 3784 } 3785 expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)3786 static void expect(Transliterator t, String source, String expectedResult, 3787 Transliterator.Position pos) { 3788 if (pos == null) { 3789 String result = t.transliterate(source); 3790 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 3791 } 3792 3793 Transliterator.Position index = null; 3794 if (pos == null) { 3795 index = new Transliterator.Position(0, source.length(), 0, source.length()); 3796 } else { 3797 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 3798 pos.start, pos.limit); 3799 } 3800 3801 ReplaceableString rsource = new ReplaceableString(source); 3802 3803 t.finishTransliteration(rsource, index); 3804 // Do it all at once -- below we do it incrementally 3805 3806 if (index.start != index.limit) { 3807 expectAux(t.getID() + ":UNFINISHED", source, 3808 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 3809 return; 3810 } 3811 String result = rsource.toString(); 3812 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 3813 3814 3815 if (pos == null) { 3816 index = new Transliterator.Position(); 3817 } else { 3818 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 3819 pos.start, pos.limit); 3820 } 3821 3822 // Test incremental transliteration -- this result 3823 // must be the same after we finalize (see below). 3824 List<String> v = new ArrayList<String>(); 3825 v.add(source); 3826 rsource.replace(0, rsource.length(), ""); 3827 if (pos != null) { 3828 rsource.replace(0, 0, source); 3829 v.add(UtilityExtensions.formatInput(rsource, index)); 3830 t.transliterate(rsource, index); 3831 v.add(UtilityExtensions.formatInput(rsource, index)); 3832 } else { 3833 for (int i=0; i<source.length(); ++i) { 3834 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 3835 //log.append(source.charAt(i)).append(" -> ")); 3836 t.transliterate(rsource, index, source.charAt(i)); 3837 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 3838 v.add(UtilityExtensions.formatInput(rsource, index) + 3839 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 3840 } 3841 } 3842 3843 // As a final step in keyboard transliteration, we must call 3844 // transliterate to finish off any pending partial matches that 3845 // were waiting for more input. 3846 t.finishTransliteration(rsource, index); 3847 result = rsource.toString(); 3848 //log.append(" => ").append(rsource.toString()); 3849 v.add(result); 3850 3851 String[] results = new String[v.size()]; 3852 v.toArray(results); 3853 expectAux(t.getID() + ":Incremental", results, 3854 result.equals(expectedResult), 3855 expectedResult); 3856 } 3857 3858 static boolean expectAux(String tag, String source, 3859 String result, String expectedResult) { 3860 return expectAux(tag, new String[] {source, result}, 3861 result.equals(expectedResult), 3862 expectedResult); 3863 } 3864 3865 static boolean expectAux(String tag, String source, 3866 String result, boolean pass, 3867 String expectedResult) { 3868 return expectAux(tag, new String[] {source, result}, 3869 pass, 3870 expectedResult); 3871 } 3872 3873 static boolean expectAux(String tag, String source, 3874 boolean pass, 3875 String expectedResult) { 3876 return expectAux(tag, new String[] {source}, 3877 pass, 3878 expectedResult); 3879 } 3880 3881 static boolean expectAux(String tag, String[] results, boolean pass, 3882 String expectedResult) { 3883 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 3884 3885 for (int i = 0; i < results.length; ++i) { 3886 String label; 3887 if (i == 0) { 3888 label = "source: "; 3889 } else if (i == results.length - 1) { 3890 label = "result: "; 3891 } else { 3892 if (!isVerbose() && pass) continue; 3893 label = "interm" + i + ": "; 3894 } 3895 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 3896 } 3897 3898 if (!pass) { 3899 msg( " expected: " + expectedResult, ERR, false, true); 3900 } 3901 3902 return pass; 3903 } 3904 3905 static private void assertTransform(String message, String expected, StringTransform t, String source) { 3906 assertEquals(message + " " + source, expected, t.transform(source)); 3907 } 3908 3909 3910 static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 3911 assertEquals(message + " " +source, expected, t.transform(source)); 3912 assertEquals(message + " " +source2, expected, t.transform(source2)); 3913 assertEquals(message + " " + expected, source, back.transform(expected)); 3914 } 3915 3916 /* 3917 * Tests the method public Enumeration<String> getAvailableTargets(String source) 3918 */ 3919 @Test 3920 public void TestGetAvailableTargets() { 3921 try { 3922 // Tests when if (targets == null) is true 3923 Transliterator.getAvailableTargets(""); 3924 } catch (Exception e) { 3925 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 3926 } 3927 } 3928 3929 /* 3930 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 3931 */ 3932 @Test 3933 public void TestGetAvailableVariants() { 3934 try { 3935 // Tests when if (targets == null) is true 3936 Transliterator.getAvailableVariants("", ""); 3937 } catch (Exception e) { 3938 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 3939 } 3940 } 3941 3942 /* 3943 * Tests the method String nextLine() in RuleBody 3944 */ 3945 @Test 3946 public void TestNextLine() { 3947 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 3948 try{ 3949 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 3950 } catch(Exception e){ 3951 errln("TransliteratorParser.nextLine() was not suppose to return an " + 3952 "exception for a rule of '\\'"); 3953 } 3954 } 3955 3956 /** 3957 * Tests equals and hashCode implementation of Transliterator.Position 3958 */ 3959 @Test 3960 public void TestPositionEquals() { 3961 Transliterator.Position position1 = new Transliterator.Position(1, 0, 0, 0); 3962 Transliterator.Position position2 = new Transliterator.Position(0, 0, 0, 0); 3963 assertNotEquals("2 different positions are not equal", position1, position2); 3964 assertNotEquals("2 different positions have different hash codes", position1.hashCode(), position2.hashCode()); 3965 Transliterator.Position position3 = new Transliterator.Position(1, 0, 0, 0); 3966 assertEquals("2 positions are equal", position1, position3); 3967 assertEquals("2 positions have the same hash codes", position1.hashCode(), position3.hashCode()); 3968 } 3969 } 3970