1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2012, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.translit; 10 11 import java.util.ArrayList; 12 import java.util.Enumeration; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Iterator; 16 import java.util.List; 17 import java.util.Locale; 18 19 import org.junit.Test; 20 import org.junit.runner.RunWith; 21 import org.junit.runners.JUnit4; 22 23 import com.ibm.icu.dev.test.TestFmwk; 24 import com.ibm.icu.dev.test.TestUtil; 25 import com.ibm.icu.impl.Utility; 26 import com.ibm.icu.impl.UtilityExtensions; 27 import com.ibm.icu.lang.CharSequences; 28 import com.ibm.icu.lang.UCharacter; 29 import com.ibm.icu.lang.UScript; 30 import com.ibm.icu.text.Replaceable; 31 import com.ibm.icu.text.ReplaceableString; 32 import com.ibm.icu.text.StringTransform; 33 import com.ibm.icu.text.Transliterator; 34 import com.ibm.icu.text.UTF16; 35 import com.ibm.icu.text.UnicodeFilter; 36 import com.ibm.icu.text.UnicodeSet; 37 import com.ibm.icu.text.UnicodeSetIterator; 38 import com.ibm.icu.util.CaseInsensitiveString; 39 import com.ibm.icu.util.ULocale; 40 41 /*********************************************************************** 42 43 HOW TO USE THIS TEST FILE 44 -or- 45 How I developed on two platforms 46 without losing (too much of) my mind 47 48 49 1. Add new tests by copying/pasting/changing existing tests. On Java, 50 any public void method named Test...() taking no parameters becomes 51 a test. On C++, you need to modify the header and add a line to 52 the runIndexedTest() dispatch method. 53 54 2. Make liberal use of the expect() method; it is your friend. 55 56 3. The tests in this file exactly match those in a sister file on the 57 other side. The two files are: 58 59 icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java 60 icu4c: source/test/intltest/transtst.cpp 61 62 ==> THIS IS THE IMPORTANT PART <== 63 64 When you add a test in this file, add it in transtst.cpp too. 65 Give it the same name and put it in the same relative place. This 66 makes maintenance a lot simpler for any poor soul who ends up 67 trying to synchronize the tests between icu4j and icu4c. 68 69 4. If you MUST enter a test that is NOT paralleled in the sister file, 70 then add it in the special non-mirrored section. These are 71 labeled 72 73 "icu4j ONLY" 74 75 or 76 77 "icu4c ONLY" 78 79 Make sure you document the reason the test is here and not there. 80 81 82 Thank you. 83 The Management 84 ***********************************************************************/ 85 86 /** 87 * @test 88 * @summary General test of Transliterator 89 */ 90 @RunWith(JUnit4.class) 91 public class TransliteratorTest extends TestFmwk { 92 @Test TestHangul()93 public void TestHangul() { 94 95 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 96 Transliterator hl = lh.getInverse(); 97 98 assertTransform("Transform", "\uCE20", lh, "ch"); 99 100 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 101 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 102 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 103 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 104 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 105 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 106 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 107 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 108 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 109 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 110 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 111 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 112 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 113 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 114 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 115 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 116 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 117 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 118 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 119 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 120 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 121 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 122 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 123 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 124 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 125 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 126 127 } 128 129 @Test TestChinese()130 public void TestChinese() { 131 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 132 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 133 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 134 } 135 136 @Test TestRegistry()137 public void TestRegistry() { 138 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 139 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 140 checkRegistry("foo1", "[:letter:] a > b;"); 141 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 142 String id = (String) e.nextElement(); 143 checkRegistry(id); 144 } 145 // Need to remove these test-specific transliterators in order not to interfere with other tests. 146 Transliterator.unregister("foo3"); 147 Transliterator.unregister("foo2"); 148 Transliterator.unregister("foo1"); 149 } 150 checkRegistry(String id, String rules)151 private void checkRegistry (String id, String rules) { 152 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 153 Transliterator.registerInstance(foo); 154 checkRegistry(id); 155 } 156 checkRegistry(String id)157 private void checkRegistry(String id) { 158 Transliterator fie = Transliterator.getInstance(id); 159 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 160 fie.setFilter(fae); 161 Transliterator foe = Transliterator.getInstance(id); 162 UnicodeFilter fee = foe.getFilter(); 163 if (fae.equals(fee)) { 164 errln("Changed what is in registry for " + id); 165 } 166 } 167 168 @Test TestInstantiationError()169 public void TestInstantiationError() { 170 try { 171 String ID = "<Not a valid Transliterator ID>"; 172 Transliterator t = Transliterator.getInstance(ID); 173 errln("FAIL: " + ID + " returned " + t); 174 } catch (IllegalArgumentException ex) { 175 logln("OK: Bogus ID handled properly"); 176 } 177 } 178 179 @Test TestSimpleRules()180 public void TestSimpleRules() { 181 /* Example: rules 1. ab>x|y 182 * 2. yc>z 183 * 184 * []|eabcd start - no match, copy e to tranlated buffer 185 * [e]|abcd match rule 1 - copy output & adjust cursor 186 * [ex|y]cd match rule 2 - copy output & adjust cursor 187 * [exz]|d no match, copy d to transliterated buffer 188 * [exzd]| done 189 */ 190 expect("ab>x|y;" + 191 "yc>z", 192 "eabcd", "exzd"); 193 194 /* Another set of rules: 195 * 1. ab>x|yzacw 196 * 2. za>q 197 * 3. qc>r 198 * 4. cw>n 199 * 200 * []|ab Rule 1 201 * [x|yzacw] No match 202 * [xy|zacw] Rule 2 203 * [xyq|cw] Rule 4 204 * [xyqn]| Done 205 */ 206 expect("ab>x|yzacw;" + 207 "za>q;" + 208 "qc>r;" + 209 "cw>n", 210 "ab", "xyqn"); 211 212 /* Test categories 213 */ 214 Transliterator t = Transliterator.createFromRules("<ID>", 215 "$dummy=\uE100;" + 216 "$vowel=[aeiouAEIOU];" + 217 "$lu=[:Lu:];" + 218 "$vowel } $lu > '!';" + 219 "$vowel > '&';" + 220 "'!' { $lu > '^';" + 221 "$lu > '*';" + 222 "a>ERROR", 223 Transliterator.FORWARD); 224 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 225 } 226 227 /** 228 * Test inline set syntax and set variable syntax. 229 */ 230 @Test TestInlineSet()231 public void TestInlineSet() { 232 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 233 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 234 235 expect("$digit = [0-9];" + 236 "$alpha = [a-zA-Z];" + 237 "$alphanumeric = [$digit $alpha];" + // *** 238 "$special = [^$alphanumeric];" + // *** 239 "$alphanumeric > '-';" + 240 "$special > '*';", 241 242 "thx-1138", "---*----"); 243 } 244 245 /** 246 * Create some inverses and confirm that they work. We have to be 247 * careful how we do this, since the inverses will not be true 248 * inverses -- we can't throw any random string at the composition 249 * of the transliterators and expect the identity function. F x 250 * F' != I. However, if we are careful about the input, we will 251 * get the expected results. 252 */ 253 @Test TestRuleBasedInverse()254 public void TestRuleBasedInverse() { 255 String RULES = 256 "abc>zyx;" + 257 "ab>yz;" + 258 "bc>zx;" + 259 "ca>xy;" + 260 "a>x;" + 261 "b>y;" + 262 "c>z;" + 263 264 "abc<zyx;" + 265 "ab<yz;" + 266 "bc<zx;" + 267 "ca<xy;" + 268 "a<x;" + 269 "b<y;" + 270 "c<z;" + 271 272 ""; 273 274 String[] DATA = { 275 // Careful here -- random strings will not work. If we keep 276 // the left side to the domain and the right side to the range 277 // we will be okay though (left, abc; right xyz). 278 "a", "x", 279 "abcacab", "zyxxxyy", 280 "caccb", "xyzzy", 281 }; 282 283 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 284 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 285 for (int i=0; i<DATA.length; i+=2) { 286 expect(fwd, DATA[i], DATA[i+1]); 287 expect(rev, DATA[i+1], DATA[i]); 288 } 289 } 290 291 /** 292 * Basic test of keyboard. 293 */ 294 @Test TestKeyboard()295 public void TestKeyboard() { 296 Transliterator t = Transliterator.createFromRules("<ID>", 297 "psch>Y;" 298 +"ps>y;" 299 +"ch>x;" 300 +"a>A;", Transliterator.FORWARD); 301 String DATA[] = { 302 // insertion, buffer 303 "a", "A", 304 "p", "Ap", 305 "s", "Aps", 306 "c", "Apsc", 307 "a", "AycA", 308 "psch", "AycAY", 309 null, "AycAY", // null means finishKeyboardTransliteration 310 }; 311 312 keyboardAux(t, DATA); 313 } 314 315 /** 316 * Basic test of keyboard with cursor. 317 */ 318 @Test TestKeyboard2()319 public void TestKeyboard2() { 320 Transliterator t = Transliterator.createFromRules("<ID>", 321 "ych>Y;" 322 +"ps>|y;" 323 +"ch>x;" 324 +"a>A;", Transliterator.FORWARD); 325 String DATA[] = { 326 // insertion, buffer 327 "a", "A", 328 "p", "Ap", 329 "s", "Aps", // modified for rollback - "Ay", 330 "c", "Apsc", // modified for rollback - "Ayc", 331 "a", "AycA", 332 "p", "AycAp", 333 "s", "AycAps", // modified for rollback - "AycAy", 334 "c", "AycApsc", // modified for rollback - "AycAyc", 335 "h", "AycAY", 336 null, "AycAY", // null means finishKeyboardTransliteration 337 }; 338 339 keyboardAux(t, DATA); 340 } 341 342 /** 343 * Test keyboard transliteration with back-replacement. 344 */ 345 @Test TestKeyboard3()346 public void TestKeyboard3() { 347 // We want th>z but t>y. Furthermore, during keyboard 348 // transliteration we want t>y then yh>z if t, then h are 349 // typed. 350 String RULES = 351 "t>|y;" + 352 "yh>z;" + 353 ""; 354 355 String[] DATA = { 356 // Column 1: characters to add to buffer (as if typed) 357 // Column 2: expected appearance of buffer after 358 // keyboard xliteration. 359 "a", "a", 360 "b", "ab", 361 "t", "abt", // modified for rollback - "aby", 362 "c", "abyc", 363 "t", "abyct", // modified for rollback - "abycy", 364 "h", "abycz", 365 null, "abycz", // null means finishKeyboardTransliteration 366 }; 367 368 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 369 keyboardAux(t, DATA); 370 } 371 keyboardAux(Transliterator t, String[] DATA)372 private void keyboardAux(Transliterator t, String[] DATA) { 373 Transliterator.Position index = new Transliterator.Position(); 374 ReplaceableString s = new ReplaceableString(); 375 for (int i=0; i<DATA.length; i+=2) { 376 StringBuffer log; 377 if (DATA[i] != null) { 378 log = new StringBuffer(s.toString() + " + " 379 + DATA[i] 380 + " -> "); 381 t.transliterate(s, index, DATA[i]); 382 } else { 383 log = new StringBuffer(s.toString() + " => "); 384 t.finishTransliteration(s, index); 385 } 386 UtilityExtensions.formatInput(log, s, index); 387 if (s.toString().equals(DATA[i+1])) { 388 logln(log.toString()); 389 } else { 390 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 391 } 392 } 393 } 394 395 // Latin-Arabic has been temporarily removed until it can be 396 // done correctly. 397 398 // public void TestArabic() { 399 // String DATA[] = { 400 // "Arabic", 401 // "\u062a\u062a\u0645\u062a\u0639 "+ 402 // "\u0627\u0644\u0644\u063a\u0629 "+ 403 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 404 // "\u0628\u0628\u0646\u0638\u0645 "+ 405 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 406 // "\u062c\u0645\u064a\u0644\u0629" 407 // }; 408 409 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 410 // for (int i=0; i<DATA.length; i+=2) { 411 // expect(t, DATA[i], DATA[i+1]); 412 // } 413 // } 414 415 /** 416 * Compose the Kana transliterator forward and reverse and try 417 * some strings that should come out unchanged. 418 */ 419 @Test TestCompoundKana()420 public void TestCompoundKana() { 421 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 422 expect(t, "aaaaa", "aaaaa"); 423 } 424 425 /** 426 * Compose the hex transliterators forward and reverse. 427 */ 428 @Test TestCompoundHex()429 public void TestCompoundHex() { 430 Transliterator a = Transliterator.getInstance("Any-Hex"); 431 Transliterator b = Transliterator.getInstance("Hex-Any"); 432 // Transliterator[] trans = { a, b }; 433 // Transliterator ab = Transliterator.getInstance(trans); 434 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 435 436 // Do some basic tests of b 437 expect(b, "\\u0030\\u0031", "01"); 438 439 String s = "abcde"; 440 expect(ab, s, s); 441 442 // trans = new Transliterator[] { b, a }; 443 // Transliterator ba = Transliterator.getInstance(trans); 444 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 445 ReplaceableString str = new ReplaceableString(s); 446 a.transliterate(str); 447 expect(ba, str.toString(), str.toString()); 448 } 449 450 /** 451 * Do some basic tests of filtering. 452 */ 453 @Test TestFiltering()454 public void TestFiltering() { 455 456 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 457 tempTrans.setFilter(new UnicodeSet("[a]")); 458 String tempResult = tempTrans.transform("xa"); 459 assertEquals("context should not be filtered ", "xb", tempResult); 460 461 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 462 tempResult = tempTrans.transform("xa"); 463 assertEquals("context should not be filtered ", "xb", tempResult); 464 465 Transliterator hex = Transliterator.getInstance("Any-Hex"); 466 hex.setFilter(new UnicodeFilter() { 467 @Override 468 public boolean contains(int c) { 469 return c != 'c'; 470 } 471 @Override 472 public String toPattern(boolean escapeUnprintable) { 473 return ""; 474 } 475 @Override 476 public boolean matchesIndexValue(int v) { 477 return false; 478 } 479 @Override 480 public void addMatchSetTo(UnicodeSet toUnionTo) {} 481 }); 482 String s = "abcde"; 483 String out = hex.transliterate(s); 484 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 485 if (out.equals(exp)) { 486 logln("Ok: \"" + exp + "\""); 487 } else { 488 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 489 } 490 } 491 492 /** 493 * Test anchors 494 */ 495 @Test TestAnchors()496 public void TestAnchors() { 497 expect("^ab > 01 ;" + 498 " ab > |8 ;" + 499 " b > k ;" + 500 " 8x$ > 45 ;" + 501 " 8x > 77 ;", 502 503 "ababbabxabx", 504 "018k7745"); 505 expect("$s = [z$] ;" + 506 "$s{ab > 01 ;" + 507 " ab > |8 ;" + 508 " b > k ;" + 509 " 8x}$s > 45 ;" + 510 " 8x > 77 ;", 511 512 "abzababbabxzabxabx", 513 "01z018k45z01x45"); 514 } 515 516 /** 517 * Test pattern quoting and escape mechanisms. 518 */ 519 @Test TestPatternQuoting()520 public void TestPatternQuoting() { 521 // Array of 3n items 522 // Each item is <rules>, <input>, <expected output> 523 String[] DATA = { 524 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 525 }; 526 527 for (int i=0; i<DATA.length; i+=3) { 528 logln("Pattern: " + Utility.escape(DATA[i])); 529 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 530 expect(t, DATA[i+1], DATA[i+2]); 531 } 532 } 533 534 @Test TestVariableNames()535 public void TestVariableNames() { 536 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 537 if (gl == null) { 538 errln("FAIL: null Transliterator returned."); 539 } 540 } 541 542 /** 543 * Regression test for bugs found in Greek transliteration. 544 */ 545 @Test TestJ277()546 public void TestJ277() { 547 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 548 549 char sigma = (char)0x3C3; 550 char upsilon = (char)0x3C5; 551 char nu = (char)0x3BD; 552 // not used char PHI = (char)0x3A6; 553 char alpha = (char)0x3B1; 554 // not used char omega = (char)0x3C9; 555 // not used char omicron = (char)0x3BF; 556 // not used char epsilon = (char)0x3B5; 557 558 // sigma upsilon nu -> syn 559 StringBuffer buf = new StringBuffer(); 560 buf.append(sigma).append(upsilon).append(nu); 561 String syn = buf.toString(); 562 expect(gl, syn, "syn"); 563 564 // sigma alpha upsilon nu -> saun 565 buf.setLength(0); 566 buf.append(sigma).append(alpha).append(upsilon).append(nu); 567 String sayn = buf.toString(); 568 expect(gl, sayn, "saun"); 569 570 // Again, using a smaller rule set 571 String rules = 572 "$alpha = \u03B1;" + 573 "$nu = \u03BD;" + 574 "$sigma = \u03C3;" + 575 "$ypsilon = \u03C5;" + 576 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 577 "s <> $sigma;" + 578 "a <> $alpha;" + 579 "u <> $vowel { $ypsilon;" + 580 "y <> $ypsilon;" + 581 "n <> $nu;"; 582 Transliterator mini = Transliterator.createFromRules 583 ("mini", rules, Transliterator.REVERSE); 584 expect(mini, syn, "syn"); 585 expect(mini, sayn, "saun"); 586 587 //| // Transliterate the Greek locale data 588 //| Locale el("el"); 589 //| DateFormatSymbols syms(el, status); 590 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 591 //| int32_t i, count; 592 //| const UnicodeString* data = syms.getMonths(count); 593 //| for (i=0; i<count; ++i) { 594 //| if (data[i].length() == 0) { 595 //| continue; 596 //| } 597 //| UnicodeString out(data[i]); 598 //| gl->transliterate(out); 599 //| bool_t ok = TRUE; 600 //| if (data[i].length() >= 2 && out.length() >= 2 && 601 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 602 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 603 //| ok = FALSE; 604 //| } 605 //| } 606 //| if (ok) { 607 //| logln(prettify(data[i] + " -> " + out)); 608 //| } else { 609 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 610 //| } 611 //| } 612 } 613 614 // /** 615 // * Prefix, suffix support in hex transliterators 616 // */ 617 // public void TestJ243() { 618 // // Test default Hex-Any, which should handle 619 // // \\u, \\U, u+, and U+ 620 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 621 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 622 // 623 // // Try a custom Hex-Any 624 // // \\uXXXX and &#xXXXX; 625 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 626 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 627 // "abcd5fx0123"); 628 // 629 // // Try custom Any-Hex (default is tested elsewhere) 630 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 631 // expect(hex3, "012", "012"); 632 // } 633 634 @Test TestJ329()635 public void TestJ329() { 636 637 Object[] DATA = { 638 Boolean.FALSE, "a > b; c > d", 639 Boolean.TRUE, "a > b; no operator; c > d", 640 }; 641 642 for (int i=0; i<DATA.length; i+=2) { 643 String err = null; 644 try { 645 Transliterator.createFromRules("<ID>", 646 (String) DATA[i+1], 647 Transliterator.FORWARD); 648 } catch (IllegalArgumentException e) { 649 err = e.getMessage(); 650 } 651 boolean gotError = (err != null); 652 String desc = (String) DATA[i+1] + 653 (gotError ? (" -> error: " + err) : " -> no error"); 654 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 655 logln("Ok: " + desc); 656 } else { 657 errln("FAIL: " + desc); 658 } 659 } 660 } 661 662 /** 663 * Test segments and segment references. 664 */ 665 @Test TestSegments()666 public void TestSegments() { 667 // Array of 3n items 668 // Each item is <rules>, <input>, <expected output> 669 String[] DATA = { 670 "([a-z]) '.' ([0-9]) > $2 '-' $1", 671 "abc.123.xyz.456", 672 "ab1-c23.xy4-z56", 673 }; 674 675 for (int i=0; i<DATA.length; i+=3) { 676 logln("Pattern: " + Utility.escape(DATA[i])); 677 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 678 expect(t, DATA[i+1], DATA[i+2]); 679 } 680 } 681 682 /** 683 * Test cursor positioning outside of the key 684 */ 685 @Test TestCursorOffset()686 public void TestCursorOffset() { 687 // Array of 3n items 688 // Each item is <rules>, <input>, <expected output> 689 String[] DATA = { 690 "pre {alpha} post > | @ ALPHA ;" + 691 "eALPHA > beta ;" + 692 "pre {beta} post > BETA @@ | ;" + 693 "post > xyz", 694 695 "prealphapost prebetapost", 696 "prbetaxyz preBETApost", 697 }; 698 699 for (int i=0; i<DATA.length; i+=3) { 700 logln("Pattern: " + Utility.escape(DATA[i])); 701 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 702 expect(t, DATA[i+1], DATA[i+2]); 703 } 704 } 705 706 /** 707 * Test zero length and > 1 char length variable values. Test 708 * use of variable refs in UnicodeSets. 709 */ 710 @Test TestArbitraryVariableValues()711 public void TestArbitraryVariableValues() { 712 // Array of 3n items 713 // Each item is <rules>, <input>, <expected output> 714 String[] DATA = { 715 "$abe = ab;" + 716 "$pat = x[yY]z;" + 717 "$ll = 'a-z';" + 718 "$llZ = [$ll];" + 719 "$llY = [$ll$pat];" + 720 "$emp = ;" + 721 722 "$abe > ABE;" + 723 "$pat > END;" + 724 "$llZ > 1;" + 725 "$llY > 2;" + 726 "7$emp 8 > 9;" + 727 "", 728 729 "ab xYzxyz stY78", 730 "ABE ENDEND 1129", 731 }; 732 733 for (int i=0; i<DATA.length; i+=3) { 734 logln("Pattern: " + Utility.escape(DATA[i])); 735 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 736 expect(t, DATA[i+1], DATA[i+2]); 737 } 738 } 739 740 /** 741 * Confirm that the contextStart, contextLimit, start, and limit 742 * behave correctly. 743 */ 744 @Test TestPositionHandling()745 public void TestPositionHandling() { 746 // Array of 3n items 747 // Each item is <rules>, <input>, <expected output> 748 String[] DATA = { 749 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 750 "xtat txtb", // pos 0,9,0,9 751 "xTTaSS TTxUUb", 752 753 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 754 "xtat txtb", // pos 2,9,3,8 755 "xtaSS TTxUUb", 756 757 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 758 "xtat txtb", // pos 3,8,3,8 759 "xtaTT TTxTTb", 760 }; 761 762 // Array of 4n positions -- these go with the DATA array 763 // They are: contextStart, contextLimit, start, limit 764 int[] POS = { 765 0, 9, 0, 9, 766 2, 9, 3, 8, 767 3, 8, 3, 8, 768 }; 769 770 int n = DATA.length/3; 771 for (int i=0; i<n; i++) { 772 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 773 Transliterator.Position pos = new Transliterator.Position( 774 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 775 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 776 t.transliterate(rsource, pos); 777 t.finishTransliteration(rsource, pos); 778 String result = rsource.toString(); 779 String exp = DATA[3*i+2]; 780 expectAux(Utility.escape(DATA[3*i]), 781 DATA[3*i+1], 782 result, 783 result.equals(exp), 784 exp); 785 } 786 } 787 788 /** 789 * Test the Hiragana-Katakana transliterator. 790 */ 791 @Test TestHiraganaKatakana()792 public void TestHiraganaKatakana() { 793 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 794 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 795 796 // Array of 3n items 797 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 798 String[] DATA = { 799 "both", 800 "\u3042\u3090\u3099\u3092\u3050", 801 "\u30A2\u30F8\u30F2\u30B0", 802 803 "kh", 804 "\u307C\u3051\u3060\u3042\u3093\u30FC", 805 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 806 }; 807 808 for (int i=0; i<DATA.length; i+=3) { 809 switch (DATA[i].charAt(0)) { 810 case 'h': // Hiragana-Katakana 811 expect(hk, DATA[i+1], DATA[i+2]); 812 break; 813 case 'k': // Katakana-Hiragana 814 expect(kh, DATA[i+2], DATA[i+1]); 815 break; 816 case 'b': // both 817 expect(hk, DATA[i+1], DATA[i+2]); 818 expect(kh, DATA[i+2], DATA[i+1]); 819 break; 820 } 821 } 822 823 } 824 825 @Test TestCopyJ476()826 public void TestCopyJ476() { 827 // This is a C++-only copy constructor test 828 } 829 830 /** 831 * Test inter-Indic transliterators. These are composed. 832 */ 833 @Test TestInterIndic()834 public void TestInterIndic() { 835 String ID = "Devanagari-Gujarati"; 836 Transliterator dg = Transliterator.getInstance(ID); 837 if (dg == null) { 838 errln("FAIL: getInstance(" + ID + ") returned null"); 839 return; 840 } 841 String id = dg.getID(); 842 if (!id.equals(ID)) { 843 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 844 } 845 String dev = "\u0901\u090B\u0925"; 846 String guj = "\u0A81\u0A8B\u0AA5"; 847 expect(dg, dev, guj); 848 } 849 850 /** 851 * Test filter syntax in IDs. (J23) 852 */ 853 @Test TestFilterIDs()854 public void TestFilterIDs() { 855 String[] DATA = { 856 "[aeiou]Any-Hex", // ID 857 "[aeiou]Hex-Any", // expected inverse ID 858 "quizzical", // src 859 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 860 861 "[aeiou]Any-Hex;[^5]Hex-Any", 862 "[^5]Any-Hex;[aeiou]Hex-Any", 863 "quizzical", 864 "q\\u0075izzical", 865 866 "[abc]Null", 867 "[abc]Null", 868 "xyz", 869 "xyz", 870 }; 871 872 for (int i=0; i<DATA.length; i+=4) { 873 String ID = DATA[i]; 874 Transliterator t = Transliterator.getInstance(ID); 875 expect(t, DATA[i+2], DATA[i+3]); 876 877 // Check the ID 878 if (!ID.equals(t.getID())) { 879 errln("FAIL: getInstance(" + ID + ").getID() => " + 880 t.getID()); 881 } 882 883 // Check the inverse 884 String uID = DATA[i+1]; 885 Transliterator u = t.getInverse(); 886 if (u == null) { 887 errln("FAIL: " + ID + ".getInverse() returned NULL"); 888 } else if (!u.getID().equals(uID)) { 889 errln("FAIL: " + ID + ".getInverse().getID() => " + 890 u.getID() + ", expected " + uID); 891 } 892 } 893 } 894 895 /** 896 * Test the case mapping transliterators. 897 */ 898 @Test TestCaseMap()899 public void TestCaseMap() { 900 Transliterator toUpper = 901 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 902 Transliterator toLower = 903 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 904 Transliterator toTitle = 905 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 906 907 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 908 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 909 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 910 "the quick brown foX jumped over the lazY dogs."); 911 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 912 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 913 } 914 915 /** 916 * Test the name mapping transliterators. 917 */ 918 @Test TestNameMap()919 public void TestNameMap() { 920 Transliterator uni2name = 921 Transliterator.getInstance("Any-Name[^abc]"); 922 Transliterator name2uni = 923 Transliterator.getInstance("Name-Any"); 924 925 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 926 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 927 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 928 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 929 930 // round trip 931 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 932 933 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 934 expect(t, s, s); 935 } 936 937 /** 938 * Test liberalized ID syntax. 1006c 939 */ 940 @Test TestLiberalizedID()941 public void TestLiberalizedID() { 942 // Some test cases have an expected getID() value of NULL. This 943 // means I have disabled the test case for now. This stuff is 944 // still under development, and I haven't decided whether to make 945 // getID() return canonical case yet. It will all get rewritten 946 // with the move to Source-Target/Variant IDs anyway. [aliu] 947 String DATA[] = { 948 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 949 " Null ", "Null", "whitespace", 950 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 951 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 952 }; 953 954 for (int i=0; i<DATA.length; i+=3) { 955 try { 956 Transliterator t = Transliterator.getInstance(DATA[i]); 957 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 958 logln("Ok: " + DATA[i+2] + 959 " create ID \"" + DATA[i] + "\" => \"" + 960 t.getID() + "\""); 961 } else { 962 errln("FAIL: " + DATA[i+2] + 963 " create ID \"" + DATA[i] + "\" => \"" + 964 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 965 } 966 } catch (IllegalArgumentException e) { 967 errln("FAIL: " + DATA[i+2] + 968 " create ID \"" + DATA[i] + "\""); 969 } 970 } 971 } 972 973 @Test TestCreateInstance()974 public void TestCreateInstance() { 975 String FORWARD = "F"; 976 String REVERSE = "R"; 977 String DATA[] = { 978 // Column 1: id 979 // Column 2: direction 980 // Column 3: expected ID, or "" if expect failure 981 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 982 983 // JB#2689: bad compound causes crash 984 "InvalidSource-InvalidTarget", FORWARD, "", 985 "InvalidSource-InvalidTarget", REVERSE, "", 986 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 987 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 988 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 989 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 990 991 null 992 }; 993 994 for (int i=0; DATA[i]!=null; i+=3) { 995 String id=DATA[i]; 996 int dir = (DATA[i+1]==FORWARD)? 997 Transliterator.FORWARD:Transliterator.REVERSE; 998 String expID=DATA[i+2]; 999 Exception e = null; 1000 Transliterator t; 1001 try { 1002 t = Transliterator.getInstance(id,dir); 1003 } catch (Exception e1) { 1004 e = e1; 1005 t = null; 1006 } 1007 String newID = (t!=null)?t.getID():""; 1008 boolean ok = (newID.equals(expID)); 1009 if (t==null) { 1010 newID = e.getMessage(); 1011 } 1012 if (ok) { 1013 logln("Ok: createInstance(" + 1014 id + "," + DATA[i+1] + ") => " + newID); 1015 } else { 1016 errln("FAIL: createInstance(" + 1017 id + "," + DATA[i+1] + ") => " + newID + 1018 ", expected " + expID); 1019 } 1020 } 1021 } 1022 1023 /** 1024 * Test the normalization transliterator. 1025 */ 1026 @Test TestNormalizationTransliterator()1027 public void TestNormalizationTransliterator() { 1028 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest 1029 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1030 String[][] CANON = { 1031 // Input Decomposed Composed 1032 {"cat", "cat", "cat" }, 1033 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1034 1035 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1036 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1037 1038 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1039 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1040 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1041 1042 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1043 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1044 1045 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1046 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1047 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1048 1049 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1050 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1051 1052 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1053 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1054 1055 {"Henry IV", "Henry IV", "Henry IV" }, 1056 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1057 1058 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1059 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1060 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1061 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1062 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1063 1064 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1065 }; 1066 1067 String[][] COMPAT = { 1068 // Input Decomposed Composed 1069 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1070 1071 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1072 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1073 1074 {"Henry IV", "Henry IV", "Henry IV" }, 1075 {"Henry \u2163", "Henry IV", "Henry IV" }, 1076 1077 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1078 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1079 1080 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1081 }; 1082 1083 Transliterator NFD = Transliterator.getInstance("NFD"); 1084 Transliterator NFC = Transliterator.getInstance("NFC"); 1085 for (int i=0; i<CANON.length; ++i) { 1086 String in = CANON[i][0]; 1087 String expd = CANON[i][1]; 1088 String expc = CANON[i][2]; 1089 expect(NFD, in, expd); 1090 expect(NFC, in, expc); 1091 } 1092 1093 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1094 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1095 for (int i=0; i<COMPAT.length; ++i) { 1096 String in = COMPAT[i][0]; 1097 String expkd = COMPAT[i][1]; 1098 String expkc = COMPAT[i][2]; 1099 expect(NFKD, in, expkd); 1100 expect(NFKC, in, expkc); 1101 } 1102 1103 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1104 expect(t, "\u010dx", "c\u030C"); 1105 } 1106 1107 /** 1108 * Test compound RBT rules. 1109 */ 1110 @Test TestCompoundRBT()1111 public void TestCompoundRBT() { 1112 // Careful with spacing and ';' here: Phrase this exactly 1113 // as toRules() is going to return it. If toRules() changes 1114 // with regard to spacing or ';', then adjust this string. 1115 String rule = "::Hex-Any;\n" + 1116 "::Any-Lower;\n" + 1117 "a > '.A.';\n" + 1118 "b > '.B.';\n" + 1119 "::[^t]Any-Upper;"; 1120 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1121 if (t == null) { 1122 errln("FAIL: createFromRules failed"); 1123 return; 1124 } 1125 expect(t, "\u0043at in the hat, bat on the mat", 1126 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1127 String r = t.toRules(true); 1128 if (r.equals(rule)) { 1129 logln("OK: toRules() => " + r); 1130 } else { 1131 errln("FAIL: toRules() => " + r + 1132 ", expected " + rule); 1133 } 1134 1135 // Now test toRules 1136 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1137 if (t == null) { 1138 errln("FAIL: createInstance failed"); 1139 return; 1140 } 1141 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1142 r = t.toRules(true); 1143 if (!r.equals(exp)) { 1144 errln("FAIL: toRules() => " + r + 1145 ", expected " + exp); 1146 } else { 1147 logln("OK: toRules() => " + r); 1148 } 1149 1150 // Round trip the result of toRules 1151 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1152 if (t == null) { 1153 errln("FAIL: createFromRules #2 failed"); 1154 return; 1155 } else { 1156 logln("OK: createFromRules(" + r + ") succeeded"); 1157 } 1158 1159 // Test toRules again 1160 r = t.toRules(true); 1161 if (!r.equals(exp)) { 1162 errln("FAIL: toRules() => " + r + 1163 ", expected " + exp); 1164 } else { 1165 logln("OK: toRules() => " + r); 1166 } 1167 1168 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1169 // to what the regenerated ID will look like. 1170 String id = "Upper(Lower);(NFKC)"; 1171 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1172 if (t == null) { 1173 errln("FAIL: createInstance #2 failed"); 1174 return; 1175 } 1176 if (t.getID().equals(id)) { 1177 logln("OK: created " + id); 1178 } else { 1179 errln("FAIL: createInstance(" + id + 1180 ").getID() => " + t.getID()); 1181 } 1182 1183 Transliterator u = t.getInverse(); 1184 if (u == null) { 1185 errln("FAIL: createInverse failed"); 1186 return; 1187 } 1188 exp = "NFKC();Lower(Upper)"; 1189 if (u.getID().equals(exp)) { 1190 logln("OK: createInverse(" + id + ") => " + 1191 u.getID()); 1192 } else { 1193 errln("FAIL: createInverse(" + id + ") => " + 1194 u.getID()); 1195 } 1196 } 1197 1198 /** 1199 * Compound filter semantics were orginially not implemented 1200 * correctly. Originally, each component filter f(i) is replaced by 1201 * f'(i) = f(i) && g, where g is the filter for the compound 1202 * transliterator. 1203 * 1204 * From Mark: 1205 * 1206 * Suppose and I have a transliterator X. Internally X is 1207 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1208 * 1209 * The compound should convert all greek characters (through latin) to 1210 * cyrillic, then lowercase the result. The filter should say "don't 1211 * touch 'A' in the original". But because an intermediate result 1212 * happens to go through "A", the Greek Alpha gets hung up. 1213 */ 1214 @Test TestCompoundFilter()1215 public void TestCompoundFilter() { 1216 Transliterator t = Transliterator.getInstance 1217 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1218 t.setFilter(new UnicodeSet("[^A]")); 1219 1220 // Only the 'A' at index 1 should remain unchanged 1221 expect(t, 1222 CharsToUnicodeString("BA\\u039A\\u0391"), 1223 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1224 } 1225 1226 /** 1227 * Test the "Remove" transliterator. 1228 */ 1229 @Test TestRemove()1230 public void TestRemove() { 1231 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1232 expect(t, "The quick brown fox.", 1233 "Th qck brwn fx."); 1234 } 1235 1236 @Test TestToRules()1237 public void TestToRules() { 1238 String RBT = "rbt"; 1239 String SET = "set"; 1240 String[] DATA = { 1241 RBT, 1242 "$a=\\u4E61; [$a] > A;", 1243 "[\\u4E61] > A;", 1244 1245 RBT, 1246 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1247 "[[:Zs:][:Zl:]]{a} > A;", 1248 1249 SET, 1250 "[[:Zs:][:Zl:]]", 1251 "[[:Zs:][:Zl:]]", 1252 1253 SET, 1254 "[:Ps:]", 1255 "[:Ps:]", 1256 1257 SET, 1258 "[:L:]", 1259 "[:L:]", 1260 1261 SET, 1262 "[[:L:]-[A]]", 1263 "[[:L:]-[A]]", 1264 1265 SET, 1266 "[~[:Lu:][:Ll:]]", 1267 "[~[:Lu:][:Ll:]]", 1268 1269 SET, 1270 "[~[a-z]]", 1271 "[~[a-z]]", 1272 1273 RBT, 1274 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1275 "[^[:Zs:]]{a} > A;", 1276 1277 RBT, 1278 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1279 "[[a-z]-[:Zs:]]{a} > A;", 1280 1281 RBT, 1282 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1283 "[[:Zs:]&[a-z]]{a} > A;", 1284 1285 RBT, 1286 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1287 "[x[:Zs:]]{a} > A;", 1288 1289 RBT, 1290 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1291 "$macron = \\u0304 ;"+ 1292 "$evowel = [aeiouyAEIOUY] ;"+ 1293 "$iotasub = \\u0345 ;"+ 1294 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1295 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1296 1297 RBT, 1298 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1299 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1300 }; 1301 1302 for (int d=0; d < DATA.length; d+=3) { 1303 if (DATA[d] == RBT) { 1304 // Transliterator test 1305 Transliterator t = Transliterator.createFromRules("ID", 1306 DATA[d+1], Transliterator.FORWARD); 1307 if (t == null) { 1308 errln("FAIL: createFromRules failed"); 1309 return; 1310 } 1311 String rules, escapedRules; 1312 rules = t.toRules(false); 1313 escapedRules = t.toRules(true); 1314 String expRules = Utility.unescape(DATA[d+2]); 1315 String expEscapedRules = DATA[d+2]; 1316 if (rules.equals(expRules)) { 1317 logln("Ok: " + DATA[d+1] + 1318 " => " + Utility.escape(rules)); 1319 } else { 1320 errln("FAIL: " + DATA[d+1] + 1321 " => " + Utility.escape(rules + ", exp " + expRules)); 1322 } 1323 if (escapedRules.equals(expEscapedRules)) { 1324 logln("Ok: " + DATA[d+1] + 1325 " => " + escapedRules); 1326 } else { 1327 errln("FAIL: " + DATA[d+1] + 1328 " => " + escapedRules + ", exp " + expEscapedRules); 1329 } 1330 1331 } else { 1332 // UnicodeSet test 1333 String pat = DATA[d+1]; 1334 String expToPat = DATA[d+2]; 1335 UnicodeSet set = new UnicodeSet(pat); 1336 1337 // Adjust spacing etc. as necessary. 1338 String toPat; 1339 toPat = set.toPattern(true); 1340 if (expToPat.equals(toPat)) { 1341 logln("Ok: " + pat + 1342 " => " + toPat); 1343 } else { 1344 errln("FAIL: " + pat + 1345 " => " + Utility.escape(toPat) + 1346 ", exp " + Utility.escape(pat)); 1347 } 1348 } 1349 } 1350 } 1351 1352 @Test TestContext()1353 public void TestContext() { 1354 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1355 1356 expect("de > x; {d}e > y;", 1357 "de", 1358 "ye", 1359 pos); 1360 1361 expect("ab{c} > z;", 1362 "xadabdabcy", 1363 "xadabdabzy"); 1364 } 1365 CharsToUnicodeString(String s)1366 static final String CharsToUnicodeString(String s) { 1367 return Utility.unescape(s); 1368 } 1369 1370 @Test TestSupplemental()1371 public void TestSupplemental() { 1372 1373 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1374 "a > $a; $s > i;"), 1375 CharsToUnicodeString("ab\\U0001030Fx"), 1376 CharsToUnicodeString("\\U00010300bix")); 1377 1378 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1379 "$b=[A-Z\\U00010400-\\U0001044D];" + 1380 "($a)($b) > $2 $1;"), 1381 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1382 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1383 1384 // k|ax\\U00010300xm 1385 1386 // k|a\\U00010400\\U00010300xm 1387 // ky|\\U00010400\\U00010300xm 1388 // ky\\U00010400|\\U00010300xm 1389 1390 // ky\\U00010400|\\U00010300\\U00010400m 1391 // ky\\U00010400y|\\U00010400m 1392 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1393 "$a {x} > | @ \\U00010400;" + 1394 "{$a} [^\\u0000-\\uFFFF] > y;"), 1395 CharsToUnicodeString("kax\\U00010300xm"), 1396 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1397 1398 expect(Transliterator.getInstance("Any-Name"), 1399 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1400 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1401 1402 expect(Transliterator.getInstance("Name-Any"), 1403 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1404 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1405 1406 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1407 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1408 "U+10330U+10FF00U+E0061U+00A0"); 1409 1410 expect(Transliterator.getInstance("Any-Hex/C"), 1411 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1412 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1413 1414 expect(Transliterator.getInstance("Any-Hex/Perl"), 1415 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1416 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1417 1418 expect(Transliterator.getInstance("Any-Hex/Java"), 1419 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1420 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1421 1422 expect(Transliterator.getInstance("Any-Hex/XML"), 1423 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1424 "𐌰􏼀󠁡 "); 1425 1426 expect(Transliterator.getInstance("Any-Hex/XML10"), 1427 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1428 "𐌰􏼀󠁡 "); 1429 1430 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1431 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1432 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1433 } 1434 1435 @Test TestQuantifier()1436 public void TestQuantifier() { 1437 1438 // Make sure @ in a quantified anteContext works 1439 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1440 "AAAAAb", 1441 "aaa(aac)"); 1442 1443 // Make sure @ in a quantified postContext works 1444 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1445 "baaaaa", 1446 "caa(aaa)"); 1447 1448 // Make sure @ in a quantified postContext with seg ref works 1449 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1450 "baaaaa", 1451 "baa(aaa)"); 1452 1453 // Make sure @ past ante context doesn't enter ante context 1454 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1455 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1456 "xxxab", 1457 "xxx(ac)", 1458 pos); 1459 1460 // Make sure @ past post context doesn't pass limit 1461 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1462 expect("{b} a+ > c @@ |; x > y; a > A;", 1463 "baxx", 1464 "caxx", 1465 pos2); 1466 1467 // Make sure @ past post context doesn't enter post context 1468 expect("{b} a+ > c @@ |; x > y; a > A;", 1469 "baxx", 1470 "cayy"); 1471 1472 expect("(ab)? c > d;", 1473 "c abc ababc", 1474 "d d abd"); 1475 1476 // NOTE: The (ab)+ when referenced just yields a single "ab", 1477 // not the full sequence of them. This accords with perl behavior. 1478 expect("(ab)+ {x} > '(' $1 ')';", 1479 "x abx ababxy", 1480 "x ab(ab) abab(ab)y"); 1481 1482 expect("b+ > x;", 1483 "ac abc abbc abbbc", 1484 "ac axc axc axc"); 1485 1486 expect("[abc]+ > x;", 1487 "qac abrc abbcs abtbbc", 1488 "qx xrx xs xtx"); 1489 1490 expect("q{(ab)+} > x;", 1491 "qa qab qaba qababc qaba", 1492 "qa qx qxa qxc qxa"); 1493 1494 expect("q(ab)* > x;", 1495 "qa qab qaba qababc", 1496 "xa x xa xc"); 1497 1498 // NOTE: The (ab)+ when referenced just yields a single "ab", 1499 // not the full sequence of them. This accords with perl behavior. 1500 expect("q(ab)* > '(' $1 ')';", 1501 "qa qab qaba qababc", 1502 "()a (ab) (ab)a (ab)c"); 1503 1504 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1505 // quoted string 1506 expect("'ab'+ > x;", 1507 "bb ab ababb", 1508 "bb x xb"); 1509 1510 // $foo+ and $foo* -- the quantifier should apply to the entire 1511 // variable reference 1512 expect("$var = ab; $var+ > x;", 1513 "bb ab ababb", 1514 "bb x xb"); 1515 } 1516 1517 static class TestFact implements Transliterator.Factory { 1518 static class NameableNullTrans extends Transliterator { NameableNullTrans(String id)1519 public NameableNullTrans(String id) { 1520 super(id, null); 1521 } 1522 @Override handleTransliterate(Replaceable text, Position offsets, boolean incremental)1523 protected void handleTransliterate(Replaceable text, 1524 Position offsets, boolean incremental) { 1525 offsets.start = offsets.limit; 1526 } 1527 } 1528 String id; TestFact(String theID)1529 public TestFact(String theID) { 1530 id = theID; 1531 } 1532 @Override getInstance(String ignoredID)1533 public Transliterator getInstance(String ignoredID) { 1534 return new NameableNullTrans(id); 1535 } 1536 } 1537 1538 @Test TestSTV()1539 public void TestSTV() { 1540 Enumeration es = Transliterator.getAvailableSources(); 1541 for (int i=0; es.hasMoreElements(); ++i) { 1542 String source = (String) es.nextElement(); 1543 logln("" + i + ": " + source); 1544 if (source.length() == 0) { 1545 errln("FAIL: empty source"); 1546 continue; 1547 } 1548 Enumeration et = Transliterator.getAvailableTargets(source); 1549 for (int j=0; et.hasMoreElements(); ++j) { 1550 String target = (String) et.nextElement(); 1551 logln(" " + j + ": " + target); 1552 if (target.length() == 0) { 1553 errln("FAIL: empty target"); 1554 continue; 1555 } 1556 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1557 for (int k=0; ev.hasMoreElements(); ++k) { 1558 String variant = (String) ev.nextElement(); 1559 if (variant.length() == 0) { 1560 logln(" " + k + ": <empty>"); 1561 } else { 1562 logln(" " + k + ": " + variant); 1563 } 1564 } 1565 } 1566 } 1567 1568 // Test registration 1569 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1570 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1571 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1572 for (int i=0; i<3; ++i) { 1573 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1574 try { 1575 Transliterator t = Transliterator.getInstance(IDS[i]); 1576 if (t.getID().equals(IDS[i])) { 1577 logln("Ok: Registration/creation succeeded for ID " + 1578 IDS[i]); 1579 } else { 1580 errln("FAIL: Registration of ID " + 1581 IDS[i] + " creates ID " + t.getID()); 1582 } 1583 Transliterator.unregister(IDS[i]); 1584 try { 1585 t = Transliterator.getInstance(IDS[i]); 1586 errln("FAIL: Unregistration failed for ID " + 1587 IDS[i] + "; still receiving ID " + t.getID()); 1588 } catch (IllegalArgumentException e2) { 1589 // Good; this is what we expect 1590 logln("Ok; Unregistered " + IDS[i]); 1591 } 1592 } catch (IllegalArgumentException e) { 1593 errln("FAIL: Registration/creation failed for ID " + 1594 IDS[i]); 1595 } finally { 1596 Transliterator.unregister(IDS[i]); 1597 } 1598 } 1599 1600 // Make sure getAvailable API reflects removal 1601 for (Enumeration e = Transliterator.getAvailableIDs(); 1602 e.hasMoreElements(); ) { 1603 String id = (String) e.nextElement(); 1604 for (int i=0; i<3; ++i) { 1605 if (id.equals(FULL_IDS[i])) { 1606 errln("FAIL: unregister(" + id + ") failed"); 1607 } 1608 } 1609 } 1610 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1611 e.hasMoreElements(); ) { 1612 String t = (String) e.nextElement(); 1613 if (t.equals(IDS[0])) { 1614 errln("FAIL: unregister(Any-" + t + ") failed"); 1615 } 1616 } 1617 for (Enumeration e = Transliterator.getAvailableSources(); 1618 e.hasMoreElements(); ) { 1619 String s = (String) e.nextElement(); 1620 for (int i=0; i<3; ++i) { 1621 if (SOURCES[i] == null) continue; 1622 if (s.equals(SOURCES[i])) { 1623 errln("FAIL: unregister(" + s + "-*) failed"); 1624 } 1625 } 1626 } 1627 } 1628 1629 /** 1630 * Test inverse of Greek-Latin; Title() 1631 */ 1632 @Test TestCompoundInverse()1633 public void TestCompoundInverse() { 1634 Transliterator t = Transliterator.getInstance 1635 ("Greek-Latin; Title()", Transliterator.REVERSE); 1636 if (t == null) { 1637 errln("FAIL: createInstance"); 1638 return; 1639 } 1640 String exp = "(Title);Latin-Greek"; 1641 if (t.getID().equals(exp)) { 1642 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1643 t.getID()); 1644 } else { 1645 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1646 t.getID() + "\", expected \"" + exp + "\""); 1647 } 1648 } 1649 1650 /** 1651 * Test NFD chaining with RBT 1652 */ 1653 @Test TestNFDChainRBT()1654 public void TestNFDChainRBT() { 1655 Transliterator t = Transliterator.createFromRules( 1656 "TEST", "::NFD; aa > Q; a > q;", 1657 Transliterator.FORWARD); 1658 logln(t.toRules(true)); 1659 expect(t, "aa", "Q"); 1660 } 1661 1662 /** 1663 * Inverse of "Null" should be "Null". (J21) 1664 */ 1665 @Test TestNullInverse()1666 public void TestNullInverse() { 1667 Transliterator t = Transliterator.getInstance("Null"); 1668 Transliterator u = t.getInverse(); 1669 if (!u.getID().equals("Null")) { 1670 errln("FAIL: Inverse of Null should be Null"); 1671 } 1672 } 1673 1674 /** 1675 * Check ID of inverse of alias. (J22) 1676 */ 1677 @Test TestAliasInverseID()1678 public void TestAliasInverseID() { 1679 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1680 Transliterator t = Transliterator.getInstance(ID); 1681 Transliterator u = t.getInverse(); 1682 String exp = "Hangul-Latin"; 1683 String got = u.getID(); 1684 if (!got.equals(exp)) { 1685 errln("FAIL: Inverse of " + ID + " is " + got + 1686 ", expected " + exp); 1687 } 1688 } 1689 1690 /** 1691 * Test IDs of inverses of compound transliterators. (J20) 1692 */ 1693 @Test TestCompoundInverseID()1694 public void TestCompoundInverseID() { 1695 String ID = "Latin-Jamo;NFC(NFD)"; 1696 Transliterator t = Transliterator.getInstance(ID); 1697 Transliterator u = t.getInverse(); 1698 String exp = "NFD(NFC);Jamo-Latin"; 1699 String got = u.getID(); 1700 if (!got.equals(exp)) { 1701 errln("FAIL: Inverse of " + ID + " is " + got + 1702 ", expected " + exp); 1703 } 1704 } 1705 1706 /** 1707 * Test undefined variable. 1708 */ 1709 @Test TestUndefinedVariable()1710 public void TestUndefinedVariable() { 1711 String rule = "$initial } a <> \u1161;"; 1712 try { 1713 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1714 } catch (IllegalArgumentException e) { 1715 logln("OK: Got exception for " + rule + ", as expected: " + 1716 e.getMessage()); 1717 return; 1718 } 1719 errln("Fail: bogus rule " + rule + " compiled without error"); 1720 } 1721 1722 /** 1723 * Test empty context. 1724 */ 1725 @Test TestEmptyContext()1726 public void TestEmptyContext() { 1727 expect(" { a } > b;", "xay a ", "xby b "); 1728 } 1729 1730 /** 1731 * Test compound filter ID syntax 1732 */ 1733 @Test TestCompoundFilterID()1734 public void TestCompoundFilterID() { 1735 String[] DATA = { 1736 // Col. 1 = ID or rule set (latter must start with #) 1737 1738 // = columns > 1 are null if expect col. 1 to be illegal = 1739 1740 // Col. 2 = direction, "F..." or "R..." 1741 // Col. 3 = source string 1742 // Col. 4 = exp result 1743 1744 "[abc]; [abc]", null, null, null, // multiple filters 1745 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1746 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1747 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1748 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1749 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1750 }; 1751 1752 for (int i=0; i<DATA.length; i+=4) { 1753 String id = DATA[i]; 1754 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1755 Transliterator.REVERSE : Transliterator.FORWARD; 1756 String source = DATA[i+2]; 1757 String exp = DATA[i+3]; 1758 boolean expOk = (DATA[i+1] != null); 1759 Transliterator t = null; 1760 IllegalArgumentException e = null; 1761 try { 1762 if (id.charAt(0) == '#') { 1763 t = Transliterator.createFromRules("ID", id, direction); 1764 } else { 1765 t = Transliterator.getInstance(id, direction); 1766 } 1767 } catch (IllegalArgumentException ee) { 1768 e = ee; 1769 } 1770 boolean ok = (t != null && e == null); 1771 if (ok == expOk) { 1772 logln("Ok: " + id + " => " + t + 1773 (e != null ? (", " + e.getMessage()) : "")); 1774 if (source != null) { 1775 expect(t, source, exp); 1776 } 1777 } else { 1778 errln("FAIL: " + id + " => " + t + 1779 (e != null ? (", " + e.getMessage()) : "")); 1780 } 1781 } 1782 } 1783 1784 /** 1785 * Test new property set syntax 1786 */ 1787 @Test TestPropertySet()1788 public void TestPropertySet() { 1789 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1790 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1791 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1792 } 1793 1794 /** 1795 * Test various failure points of the new 2.0 engine. 1796 */ 1797 @Test TestNewEngine()1798 public void TestNewEngine() { 1799 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1800 // Katakana should be untouched 1801 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1802 1803 if (true) { 1804 // This test will only work if Transliterator.ROLLBACK is 1805 // true. Otherwise, this test will fail, revealing a 1806 // limitation of global filters in incremental mode. 1807 1808 Transliterator a = 1809 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1810 Transliterator A = 1811 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1812 1813 //Transliterator array[] = new Transliterator[] { 1814 // a, 1815 // Transliterator.getInstance("NFD"), 1816 // A }; 1817 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1818 1819 try { 1820 Transliterator.registerInstance(a); 1821 Transliterator.registerInstance(A); 1822 1823 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1824 expect(t, "aAaA", "bAbA"); 1825 1826 Transliterator[] u = t.getElements(); 1827 assertTrue("getElements().length", u.length == 3); 1828 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1829 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1830 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1831 1832 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1833 t.setFilter(new UnicodeSet("[:Ll:]")); 1834 expect(t, "aAaA", "bAbA"); 1835 } finally { 1836 Transliterator.unregister("a_to_A"); 1837 Transliterator.unregister("A_to_b"); 1838 } 1839 } 1840 1841 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1842 "a", 1843 "ax"); 1844 1845 String gr = 1846 "$ddot = \u0308 ;" + 1847 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1848 "$rough = \u0314 ;" + 1849 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1850 "\u03b1 <> a ;" + 1851 "$rough <> h ;"; 1852 1853 expect(gr, "\u03B1\u0314", "ha"); 1854 } 1855 1856 /** 1857 * Test quantified segment behavior. We want: 1858 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1859 */ 1860 @Test TestQuantifiedSegment()1861 public void TestQuantifiedSegment() { 1862 // The normal case 1863 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1864 1865 // The tricky case; the quantifier is around the segment 1866 expect("([abc])+ > x $1 x;", "cba", "xax"); 1867 1868 // Tricky case in reverse direction 1869 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1870 1871 // Check post-context segment 1872 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1873 1874 // Test toRule/toPattern for non-quantified segment. 1875 // Careful with spacing here. 1876 String r = "([a-c]){q} > x $1 x;"; 1877 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1878 String rr = t.toRules(true); 1879 if (!r.equals(rr)) { 1880 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1881 } else { 1882 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1883 } 1884 1885 // Test toRule/toPattern for quantified segment. 1886 // Careful with spacing here. 1887 r = "([a-c])+{q} > x $1 x;"; 1888 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1889 rr = t.toRules(true); 1890 if (!r.equals(rr)) { 1891 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1892 } else { 1893 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1894 } 1895 } 1896 1897 //====================================================================== 1898 // Ram's tests 1899 //====================================================================== 1900 /* this test performs test of rules in ISO 15915 */ 1901 @Test TestDevanagariLatinRT()1902 public void TestDevanagariLatinRT(){ 1903 String[] source = { 1904 "bh\u0101rata", 1905 "kra", 1906 "k\u1E63a", 1907 "khra", 1908 "gra", 1909 "\u1E45ra", 1910 "cra", 1911 "chra", 1912 "j\u00F1a", 1913 "jhra", 1914 "\u00F1ra", 1915 "\u1E6Dya", 1916 "\u1E6Dhra", 1917 "\u1E0Dya", 1918 //"r\u0323ya", // \u095c is not valid in Devanagari 1919 "\u1E0Dhya", 1920 "\u1E5Bhra", 1921 "\u1E47ra", 1922 "tta", 1923 "thra", 1924 "dda", 1925 "dhra", 1926 "nna", 1927 "pra", 1928 "phra", 1929 "bra", 1930 "bhra", 1931 "mra", 1932 "\u1E49ra", 1933 //"l\u0331ra", 1934 "yra", 1935 "\u1E8Fra", 1936 //"l-", 1937 "vra", 1938 "\u015Bra", 1939 "\u1E63ra", 1940 "sra", 1941 "hma", 1942 "\u1E6D\u1E6Da", 1943 "\u1E6D\u1E6Dha", 1944 "\u1E6Dh\u1E6Dha", 1945 "\u1E0D\u1E0Da", 1946 "\u1E0D\u1E0Dha", 1947 "\u1E6Dya", 1948 "\u1E6Dhya", 1949 "\u1E0Dya", 1950 "\u1E0Dhya", 1951 // Not roundtrippable -- 1952 // \u0939\u094d\u094d\u092E - hma 1953 // \u0939\u094d\u092E - hma 1954 // CharsToUnicodeString("hma"), 1955 "hya", 1956 "\u015Br\u0325", 1957 "\u015Bca", 1958 "\u0115", 1959 "san\u0304j\u012Bb s\u0113nagupta", 1960 "\u0101nand vaddir\u0101ju", 1961 }; 1962 String[] expected = { 1963 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 1964 "\u0915\u094D\u0930", /* kra */ 1965 "\u0915\u094D\u0937", /* ks\u0323a */ 1966 "\u0916\u094D\u0930", /* khra */ 1967 "\u0917\u094D\u0930", /* gra */ 1968 "\u0919\u094D\u0930", /* n\u0307ra */ 1969 "\u091A\u094D\u0930", /* cra */ 1970 "\u091B\u094D\u0930", /* chra */ 1971 "\u091C\u094D\u091E", /* jn\u0303a */ 1972 "\u091D\u094D\u0930", /* jhra */ 1973 "\u091E\u094D\u0930", /* n\u0303ra */ 1974 "\u091F\u094D\u092F", /* t\u0323ya */ 1975 "\u0920\u094D\u0930", /* t\u0323hra */ 1976 "\u0921\u094D\u092F", /* d\u0323ya */ 1977 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 1978 "\u0922\u094D\u092F", /* d\u0323hya */ 1979 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 1980 "\u0923\u094D\u0930", /* n\u0323ra */ 1981 "\u0924\u094D\u0924", /* tta */ 1982 "\u0925\u094D\u0930", /* thra */ 1983 "\u0926\u094D\u0926", /* dda */ 1984 "\u0927\u094D\u0930", /* dhra */ 1985 "\u0928\u094D\u0928", /* nna */ 1986 "\u092A\u094D\u0930", /* pra */ 1987 "\u092B\u094D\u0930", /* phra */ 1988 "\u092C\u094D\u0930", /* bra */ 1989 "\u092D\u094D\u0930", /* bhra */ 1990 "\u092E\u094D\u0930", /* mra */ 1991 "\u0929\u094D\u0930", /* n\u0331ra */ 1992 //"\u0934\u094D\u0930", /* l\u0331ra */ 1993 "\u092F\u094D\u0930", /* yra */ 1994 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 1995 //"l-", 1996 "\u0935\u094D\u0930", /* vra */ 1997 "\u0936\u094D\u0930", /* s\u0301ra */ 1998 "\u0937\u094D\u0930", /* s\u0323ra */ 1999 "\u0938\u094D\u0930", /* sra */ 2000 "\u0939\u094d\u092E", /* hma */ 2001 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 2002 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 2003 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 2004 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 2005 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 2006 "\u091F\u094D\u092F", /* t\u0323ya */ 2007 "\u0920\u094D\u092F", /* t\u0323hya */ 2008 "\u0921\u094D\u092F", /* d\u0323ya */ 2009 "\u0922\u094D\u092F", /* d\u0323hya */ 2010 // "hma", /* hma */ 2011 "\u0939\u094D\u092F", /* hya */ 2012 "\u0936\u0943", /* s\u0301r\u0325a */ 2013 "\u0936\u094D\u091A", /* s\u0301ca */ 2014 "\u090d", /* e\u0306 */ 2015 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2016 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2017 }; 2018 2019 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2020 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2021 2022 for(int i= 0; i<source.length; i++){ 2023 expect(latinToDev,(source[i]),(expected[i])); 2024 expect(devToLatin,(expected[i]),(source[i])); 2025 } 2026 2027 } 2028 @Test TestTeluguLatinRT()2029 public void TestTeluguLatinRT(){ 2030 String[] source = { 2031 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2032 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2033 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2034 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2035 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2036 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2037 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2038 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2039 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2040 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2041 }; 2042 2043 String[] expected = { 2044 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2045 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2046 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2047 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2048 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2049 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2050 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2051 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2052 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2053 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2054 }; 2055 2056 2057 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2058 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2059 2060 for(int i= 0; i<source.length; i++){ 2061 expect(latinToDev,(source[i]),(expected[i])); 2062 expect(devToLatin,(expected[i]),(source[i])); 2063 } 2064 } 2065 2066 @Test TestSanskritLatinRT()2067 public void TestSanskritLatinRT(){ 2068 int MAX_LEN =15; 2069 String[] source = { 2070 "rmk\u1E63\u0113t", 2071 "\u015Br\u012Bmad", 2072 "bhagavadg\u012Bt\u0101", 2073 "adhy\u0101ya", 2074 "arjuna", 2075 "vi\u1E63\u0101da", 2076 "y\u014Dga", 2077 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2078 "uv\u0101cr\u0325", 2079 "dharmak\u1E63\u0113tr\u0113", 2080 "kuruk\u1E63\u0113tr\u0113", 2081 "samav\u0113t\u0101", 2082 "yuyutsava\u1E25", 2083 "m\u0101mak\u0101\u1E25", 2084 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2085 "kimakurvata", 2086 "san\u0304java", 2087 }; 2088 String[] expected = { 2089 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2090 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2091 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2092 "\u0905\u0927\u094d\u092f\u093e\u092f", 2093 "\u0905\u0930\u094d\u091c\u0941\u0928", 2094 "\u0935\u093f\u0937\u093e\u0926", 2095 "\u092f\u094b\u0917", 2096 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2097 "\u0909\u0935\u093E\u091A\u0943", 2098 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2099 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2100 "\u0938\u092e\u0935\u0947\u0924\u093e", 2101 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2102 "\u092e\u093e\u092e\u0915\u093e\u0903", 2103 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2104 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2105 "\u0938\u0902\u091c\u0935", 2106 }; 2107 2108 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2109 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2110 for(int i= 0; i<MAX_LEN; i++){ 2111 expect(latinToDev,(source[i]),(expected[i])); 2112 expect(devToLatin,(expected[i]),(source[i])); 2113 } 2114 } 2115 2116 @Test TestCompoundLatinRT()2117 public void TestCompoundLatinRT(){ 2118 int MAX_LEN =15; 2119 String[] source = { 2120 "rmk\u1E63\u0113t", 2121 "\u015Br\u012Bmad", 2122 "bhagavadg\u012Bt\u0101", 2123 "adhy\u0101ya", 2124 "arjuna", 2125 "vi\u1E63\u0101da", 2126 "y\u014Dga", 2127 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2128 "uv\u0101cr\u0325", 2129 "dharmak\u1E63\u0113tr\u0113", 2130 "kuruk\u1E63\u0113tr\u0113", 2131 "samav\u0113t\u0101", 2132 "yuyutsava\u1E25", 2133 "m\u0101mak\u0101\u1E25", 2134 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2135 "kimakurvata", 2136 "san\u0304java" 2137 }; 2138 String[] expected = { 2139 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2140 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2141 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2142 "\u0905\u0927\u094d\u092f\u093e\u092f", 2143 "\u0905\u0930\u094d\u091c\u0941\u0928", 2144 "\u0935\u093f\u0937\u093e\u0926", 2145 "\u092f\u094b\u0917", 2146 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2147 "\u0909\u0935\u093E\u091A\u0943", 2148 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2149 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2150 "\u0938\u092e\u0935\u0947\u0924\u093e", 2151 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2152 "\u092e\u093e\u092e\u0915\u093e\u0903", 2153 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2154 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2155 "\u0938\u0902\u091c\u0935" 2156 }; 2157 2158 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2159 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2160 for(int i= 0; i<MAX_LEN; i++){ 2161 expect(latinToDevToLatin,(source[i]),(source[i])); 2162 expect(devToLatinToDev,(expected[i]),(expected[i])); 2163 } 2164 } 2165 /** 2166 * Test Gurmukhi-Devanagari Tippi and Bindi 2167 */ 2168 @Test TestGurmukhiDevanagari()2169 public void TestGurmukhiDevanagari(){ 2170 // the rule says: 2171 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2172 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2173 2174 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2175 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2176 2177 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2178 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2179 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2180 StringBuffer src = new StringBuffer(" \u0902"); 2181 StringBuffer expect = new StringBuffer(" \u0A02"); 2182 while(vIter.next()){ 2183 src.setCharAt(0,(char) vIter.codepoint); 2184 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2185 expect(trans,src.toString(),expect.toString()); 2186 } 2187 2188 expect.setCharAt(1,'\u0A70'); 2189 while(nvIter.next()){ 2190 //src.setCharAt(0,(char) nvIter.codepoint); 2191 src.setCharAt(0,(char)nvIter.codepoint); 2192 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2193 expect(trans,src.toString(),expect.toString()); 2194 } 2195 } 2196 /** 2197 * Test instantiation from a locale. 2198 */ 2199 @Test TestLocaleInstantiation()2200 public void TestLocaleInstantiation() { 2201 Transliterator t; 2202 try{ 2203 t = Transliterator.getInstance("te_IN-Latin"); 2204 //expect(t, "\u0430", "a"); 2205 }catch(IllegalArgumentException ex){ 2206 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2207 } 2208 try{ 2209 t = Transliterator.getInstance("ru_RU-Latin"); 2210 expect(t, "\u0430", "a"); 2211 }catch(IllegalArgumentException ex){ 2212 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2213 } 2214 try{ 2215 t = Transliterator.getInstance("en-el"); 2216 expect(t, "a", "\u03B1"); 2217 }catch(IllegalArgumentException ex){ 2218 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2219 } 2220 } 2221 2222 /** 2223 * Test title case handling of accent (should ignore accents) 2224 */ 2225 @Test TestTitleAccents()2226 public void TestTitleAccents() { 2227 Transliterator t = Transliterator.getInstance("Title"); 2228 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2229 } 2230 2231 /** 2232 * Basic test of a locale resource based rule. 2233 */ 2234 @Test TestLocaleResource()2235 public void TestLocaleResource() { 2236 String DATA[] = { 2237 // id from to 2238 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2239 "Latin-el", "b", "\u03bc\u03c0", 2240 "Latin-Greek", "b", "\u03B2", 2241 "Greek-Latin/UNGEGN", "\u03B2", "v", 2242 "el-Latin", "\u03B2", "v", 2243 "Greek-Latin", "\u03B2", "b", 2244 }; 2245 for (int i=0; i<DATA.length; i+=3) { 2246 Transliterator t = Transliterator.getInstance(DATA[i]); 2247 expect(t, DATA[i+1], DATA[i+2]); 2248 } 2249 } 2250 2251 /** 2252 * Make sure parse errors reference the right line. 2253 */ 2254 @Test TestParseError()2255 public void TestParseError() { 2256 String rule = 2257 "a > b;\n" + 2258 "# more stuff\n" + 2259 "d << b;"; 2260 try { 2261 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2262 if(t!=null){ 2263 errln("FAIL: Did not get expected exception"); 2264 } 2265 } catch (IllegalArgumentException e) { 2266 String err = e.getMessage(); 2267 if (err.indexOf("d << b") >= 0) { 2268 logln("Ok: " + err); 2269 } else { 2270 errln("FAIL: " + err); 2271 } 2272 return; 2273 } 2274 errln("FAIL: no syntax error"); 2275 } 2276 2277 /** 2278 * Make sure sets on output are disallowed. 2279 */ 2280 @Test TestOutputSet()2281 public void TestOutputSet() { 2282 String rule = "$set = [a-cm-n]; b > $set;"; 2283 Transliterator t = null; 2284 try { 2285 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2286 if(t!=null){ 2287 errln("FAIL: Did not get the expected exception"); 2288 } 2289 } catch (IllegalArgumentException e) { 2290 logln("Ok: " + e.getMessage()); 2291 return; 2292 } 2293 errln("FAIL: No syntax error"); 2294 } 2295 2296 /** 2297 * Test the use variable range pragma, making sure that use of 2298 * variable range characters is detected and flagged as an error. 2299 */ 2300 @Test TestVariableRange()2301 public void TestVariableRange() { 2302 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2303 try { 2304 Transliterator t = 2305 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2306 if(t!=null){ 2307 errln("FAIL: Did not get the expected exception"); 2308 } 2309 } catch (IllegalArgumentException e) { 2310 logln("Ok: " + e.getMessage()); 2311 return; 2312 } 2313 errln("FAIL: No syntax error"); 2314 } 2315 2316 /** 2317 * Test invalid post context error handling 2318 */ 2319 @Test TestInvalidPostContext()2320 public void TestInvalidPostContext() { 2321 try { 2322 Transliterator t = 2323 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2324 if(t!=null){ 2325 errln("FAIL: Did not get the expected exception"); 2326 } 2327 } catch (IllegalArgumentException e) { 2328 String msg = e.getMessage(); 2329 if (msg.indexOf("a}b{c") >= 0) { 2330 logln("Ok: " + msg); 2331 } else { 2332 errln("FAIL: " + msg); 2333 } 2334 return; 2335 } 2336 errln("FAIL: No syntax error"); 2337 } 2338 2339 /** 2340 * Test ID form variants 2341 */ 2342 @Test TestIDForms()2343 public void TestIDForms() { 2344 String DATA[] = { 2345 "NFC", null, "NFD", 2346 "nfd", null, "NFC", // make sure case is ignored 2347 "Any-NFKD", null, "Any-NFKC", 2348 "Null", null, "Null", 2349 "-nfkc", "nfkc", "NFKD", 2350 "-nfkc/", "nfkc", "NFKD", 2351 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2352 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2353 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2354 "Source-", null, null, 2355 "Source/Variant-", null, null, 2356 "Source-/Variant", null, null, 2357 "/Variant", null, null, 2358 "/Variant-", null, null, 2359 "-/Variant", null, null, 2360 "-/", null, null, 2361 "-", null, null, 2362 "/", null, null, 2363 }; 2364 2365 for (int i=0; i<DATA.length; i+=3) { 2366 String ID = DATA[i]; 2367 String expID = DATA[i+1]; 2368 String expInvID = DATA[i+2]; 2369 boolean expValid = (expInvID != null); 2370 if (expID == null) { 2371 expID = ID; 2372 } 2373 try { 2374 Transliterator t = 2375 Transliterator.getInstance(ID); 2376 Transliterator u = t.getInverse(); 2377 if (t.getID().equals(expID) && 2378 u.getID().equals(expInvID)) { 2379 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2380 } else { 2381 errln("FAIL: getInstance(" + ID + ") => " + 2382 t.getID() + " x getInverse() => " + u.getID() + 2383 ", expected " + expInvID); 2384 } 2385 } catch (IllegalArgumentException e) { 2386 if (!expValid) { 2387 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2388 } else { 2389 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2390 } 2391 } 2392 } 2393 } 2394 checkRules(String label, Transliterator t2, String testRulesForward)2395 void checkRules(String label, Transliterator t2, String testRulesForward) { 2396 String rules2 = t2.toRules(true); 2397 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2398 rules2 = TestUtility.replace(rules2, " ", ""); 2399 rules2 = TestUtility.replace(rules2, "\n", ""); 2400 rules2 = TestUtility.replace(rules2, "\r", ""); 2401 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2402 2403 if (!rules2.equals(testRulesForward)) { 2404 errln(label); 2405 logln("GENERATED RULES: " + rules2); 2406 logln("SHOULD BE: " + testRulesForward); 2407 } 2408 } 2409 2410 /** 2411 * Mark's toRules test. 2412 */ 2413 @Test TestToRulesMark()2414 public void TestToRulesMark() { 2415 2416 String testRules = 2417 "::[[:Latin:][:Mark:]];" 2418 + "::NFKD (NFC);" 2419 + "::Lower (Lower);" 2420 + "a <> \\u03B1;" // alpha 2421 + "::NFKC (NFD);" 2422 + "::Upper (Lower);" 2423 + "::Lower ();" 2424 + "::([[:Greek:][:Mark:]]);" 2425 ; 2426 String testRulesForward = 2427 "::[[:Latin:][:Mark:]];" 2428 + "::NFKD(NFC);" 2429 + "::Lower(Lower);" 2430 + "a > \\u03B1;" 2431 + "::NFKC(NFD);" 2432 + "::Upper (Lower);" 2433 + "::Lower ();" 2434 ; 2435 String testRulesBackward = 2436 "::[[:Greek:][:Mark:]];" 2437 + "::Lower (Upper);" 2438 + "::NFD(NFKC);" 2439 + "\\u03B1 > a;" 2440 + "::Lower(Lower);" 2441 + "::NFC(NFKD);" 2442 ; 2443 String source = "\u00E1"; // a-acute 2444 String target = "\u03AC"; // alpha-acute 2445 2446 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2447 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2448 2449 expect(t2, source, target); 2450 expect(t3, target, source); 2451 2452 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2453 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2454 } 2455 2456 /** 2457 * Test Escape and Unescape transliterators. 2458 */ 2459 @Test TestEscape()2460 public void TestEscape() { 2461 expect(Transliterator.getInstance("Hex-Any"), 2462 "\\x{40}\\U000000312Q", 2463 "@12Q"); 2464 expect(Transliterator.getInstance("Any-Hex/C"), 2465 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2466 "\\u0041\\U0010BEEF\\uFEED"); 2467 expect(Transliterator.getInstance("Any-Hex/Java"), 2468 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2469 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2470 expect(Transliterator.getInstance("Any-Hex/Perl"), 2471 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2472 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2473 } 2474 2475 /** 2476 * Make sure display names of variants look reasonable. 2477 */ 2478 @Test TestDisplayName()2479 public void TestDisplayName() { 2480 String DATA[] = { 2481 // ID, forward name, reverse name 2482 // Update the text as necessary -- the important thing is 2483 // not the text itself, but how various cases are handled. 2484 2485 // Basic test 2486 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2487 2488 // Variants 2489 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2490 2491 // Target-only IDs 2492 "NFC", "Any to NFC", "Any to NFD", 2493 }; 2494 2495 Locale US = Locale.US; 2496 2497 for (int i=0; i<DATA.length; i+=3) { 2498 String name = Transliterator.getDisplayName(DATA[i], US); 2499 if (!name.equals(DATA[i+1])) { 2500 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2501 name + ", expected " + DATA[i+1]); 2502 } else { 2503 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2504 } 2505 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2506 name = Transliterator.getDisplayName(t.getID(), US); 2507 if (!name.equals(DATA[i+2])) { 2508 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2509 name + ", expected " + DATA[i+2]); 2510 } else { 2511 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2512 } 2513 2514 // Cover getDisplayName(String) 2515 ULocale save = ULocale.getDefault(); 2516 ULocale.setDefault(ULocale.US); 2517 String name2 = Transliterator.getDisplayName(t.getID()); 2518 if (!name.equals(name2)) 2519 errln("FAIL: getDisplayName with default locale failed"); 2520 ULocale.setDefault(save); 2521 } 2522 } 2523 2524 /** 2525 * Test anchor masking 2526 */ 2527 @Test TestAnchorMasking()2528 public void TestAnchorMasking() { 2529 String rule = "^a > Q; a > q;"; 2530 try { 2531 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2532 if(t==null){ 2533 errln("FAIL: Did not get the expected exception"); 2534 } 2535 } catch (IllegalArgumentException e) { 2536 errln("FAIL: " + rule + " => " + e); 2537 } 2538 } 2539 2540 /** 2541 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2542 * during ICU4J modularization to remove dependency of tests on Transliterator. 2543 */ 2544 @Test TestScriptAllCodepoints()2545 public void TestScriptAllCodepoints(){ 2546 int code; 2547 HashSet scriptIdsChecked = new HashSet(); 2548 HashSet scriptAbbrsChecked = new HashSet(); 2549 for( int i =0; i <= 0x10ffff; i++){ 2550 code = UScript.getScript(i); 2551 if(code==UScript.INVALID_CODE){ 2552 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2553 } 2554 String id =UScript.getName(code); 2555 String abbr = UScript.getShortName(code); 2556 if (!scriptIdsChecked.contains(id)) { 2557 scriptIdsChecked.add(id); 2558 String newId ="[:"+id+":];NFD"; 2559 try{ 2560 Transliterator t = Transliterator.getInstance(newId); 2561 if(t==null){ 2562 errln("Failed to create transliterator for "+hex(i)+ 2563 " script code: " +id); 2564 } 2565 }catch(Exception e){ 2566 errln("Failed to create transliterator for "+hex(i) 2567 +" script code: " +id 2568 + " Exception: "+e.getMessage()); 2569 } 2570 } 2571 if (!scriptAbbrsChecked.contains(abbr)) { 2572 scriptAbbrsChecked.add(abbr); 2573 String newAbbrId ="[:"+abbr+":];NFD"; 2574 try{ 2575 Transliterator t = Transliterator.getInstance(newAbbrId); 2576 if(t==null){ 2577 errln("Failed to create transliterator for "+hex(i)+ 2578 " script code: " +abbr); 2579 } 2580 }catch(Exception e){ 2581 errln("Failed to create transliterator for "+hex(i) 2582 +" script code: " +abbr 2583 + " Exception: "+e.getMessage()); 2584 } 2585 } 2586 } 2587 } 2588 2589 2590 static final String[][] registerRules = { 2591 {"Any-Dev1", "x > X; y > Y;"}, 2592 {"Any-Dev2", "XY > Z"}, 2593 {"Greek-Latin/FAKE", 2594 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2595 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2596 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2597 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2598 }, 2599 }; 2600 2601 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2602 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2603 2604 static final String[][] testCases = { 2605 2606 // NORMALIZATION 2607 // should add more test cases 2608 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2609 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2610 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2611 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2612 2613 // mp -> b BUG 2614 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2615 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2616 2617 // check for devanagari bug 2618 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2619 2620 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2621 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2622 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2623 //TODO: enable this test once Titlecase works right 2624 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2625 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2626 2627 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2628 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2629 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2630 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2631 2632 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2633 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2634 2635 // FORMS OF S 2636 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2637 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2638 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2639 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2640 2641 // Tatiana bug 2642 // Upper: TAT\u02B9\u00C2NA 2643 // Lower: tat\u02B9\u00E2na 2644 // Title: Tat\u02B9\u00E2na 2645 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2646 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2647 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2648 }; 2649 2650 @Test TestSpecialCases()2651 public void TestSpecialCases() { 2652 2653 for (int i = 0; i < registerRules.length; ++i) { 2654 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2655 registerRules[i][1], Transliterator.FORWARD); 2656 DummyFactory.add(registerRules[i][0], t); 2657 } 2658 for (int i = 0; i < testCases.length; ++i) { 2659 String name = testCases[i][0]; 2660 Transliterator t = Transliterator.getInstance(name); 2661 String id = t.getID(); 2662 String source = testCases[i][1]; 2663 String target = null; 2664 2665 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2666 2667 if (testCases[i].length > 2) target = testCases[i][2]; 2668 else if (id.equalsIgnoreCase("NFD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD); 2669 else if (id.equalsIgnoreCase("NFC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC); 2670 else if (id.equalsIgnoreCase("NFKD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD); 2671 else if (id.equalsIgnoreCase("NFKC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC); 2672 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2673 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2674 2675 expect(t, source, target); 2676 } 2677 for (int i = 0; i < registerRules.length; ++i) { 2678 Transliterator.unregister(registerRules[i][0]); 2679 } 2680 } 2681 2682 // seems like there should be an easier way to just register an instance of a transliterator 2683 2684 static class DummyFactory implements Transliterator.Factory { 2685 static DummyFactory singleton = new DummyFactory(); 2686 static HashMap m = new HashMap(); 2687 2688 // Since Transliterators are immutable, we don't have to clone on set & get add(String ID, Transliterator t)2689 static void add(String ID, Transliterator t) { 2690 m.put(ID, t); 2691 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2692 Transliterator.registerFactory(ID, singleton); 2693 } 2694 @Override getInstance(String ID)2695 public Transliterator getInstance(String ID) { 2696 return (Transliterator) m.get(ID); 2697 } 2698 } 2699 2700 @Test TestCasing()2701 public void TestCasing() { 2702 Transliterator toLower = Transliterator.getInstance("lower"); 2703 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2704 Transliterator toUpper = Transliterator.getInstance("upper"); 2705 Transliterator toTitle = Transliterator.getInstance("title"); 2706 for (int i = 0; i < 0x600; ++i) { 2707 String s = UTF16.valueOf(i); 2708 2709 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2710 assertEquals("Lowercase", lower, toLower.transform(s)); 2711 2712 String casefold = UCharacter.foldCase(s, true); 2713 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2714 2715 if (i != 0x0345) { 2716 // ICU 60 changes the default titlecasing index adjustment. 2717 // For word breaks it is mostly the same as before, 2718 // but it is different for the iota subscript (the only cased combining mark). 2719 // This should be ok because the iota subscript is not supposed to appear 2720 // at the start of a word. 2721 // The title Transliterator is far below feature parity with the 2722 // UCharacter and CaseMap titlecasing functions. 2723 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2724 assertEquals("Title", title, toTitle.transform(s)); 2725 } 2726 2727 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2728 assertEquals("Upper", upper, toUpper.transform(s)); 2729 } 2730 } 2731 2732 @Test TestSurrogateCasing()2733 public void TestSurrogateCasing () { 2734 // check that casing handles surrogates 2735 // titlecase is currently defective 2736 int dee = UTF16.charAt(DESERET_dee,0); 2737 int DEE = UCharacter.toTitleCase(dee); 2738 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2739 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2740 } 2741 2742 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2743 errln("Fails uppercase of surrogates"); 2744 } 2745 2746 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2747 errln("Fails lowercase of surrogates"); 2748 } 2749 } 2750 2751 2752 @Test TestFunction()2753 public void TestFunction() { 2754 // Careful with spacing and ';' here: Phrase this exactly 2755 // as toRules() is going to return it. If toRules() changes 2756 // with regard to spacing or ';', then adjust this string. 2757 String rule = 2758 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2759 2760 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2761 if (t == null) { 2762 errln("FAIL: createFromRules failed"); 2763 return; 2764 } 2765 2766 String r = t.toRules(true); 2767 if (r.equals(rule)) { 2768 logln("OK: toRules() => " + r); 2769 } else { 2770 errln("FAIL: toRules() => " + r + 2771 ", expected " + rule); 2772 } 2773 2774 expect(t, "The Quick Brown Fox", 2775 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2776 rule = 2777 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2778 2779 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2780 if (t == null) { 2781 errln("FAIL: createFromRules failed"); 2782 return; 2783 } 2784 2785 r = t.toRules(true); 2786 if (r.equals(rule)) { 2787 logln("OK: toRules() => " + r); 2788 } else { 2789 errln("FAIL: toRules() => " + r + 2790 ", expected " + rule); 2791 } 2792 2793 expect(t, "\u0301", 2794 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2795 } 2796 2797 @Test TestInvalidBackRef()2798 public void TestInvalidBackRef() { 2799 String rule = ". > $1;"; 2800 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2801 try { 2802 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2803 if (t != null) { 2804 errln("FAIL: createFromRules should have returned NULL"); 2805 } 2806 errln("FAIL: Ok: . > $1; => no error"); 2807 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2808 if (t2 != null) { 2809 errln("FAIL: createFromRules should have returned NULL"); 2810 } 2811 errln("FAIL: Ok: . > $1; => no error"); 2812 } catch (IllegalArgumentException e) { 2813 logln("Ok: . > $1; => " + e.getMessage()); 2814 } 2815 } 2816 2817 @Test TestMulticharStringSet()2818 public void TestMulticharStringSet() { 2819 // Basic testing 2820 String rule = 2821 " [{aa}] > x;" + 2822 " a > y;" + 2823 " [b{bc}] > z;" + 2824 "[{gd}] { e > q;" + 2825 " e } [{fg}] > r;" ; 2826 2827 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2828 if (t == null) { 2829 errln("FAIL: createFromRules failed"); 2830 return; 2831 } 2832 2833 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2834 "y x yz z d gd de gdq gdqfg ddrfg"); 2835 2836 // Overlapped string test. Make sure that when multiple 2837 // strings can match that the longest one is matched. 2838 rule = 2839 " [a {ab} {abc}] > x;" + 2840 " b > y;" + 2841 " c > z;" + 2842 " q [t {st} {rst}] { e > p;" ; 2843 2844 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2845 if (t == null) { 2846 errln("FAIL: createFromRules failed"); 2847 return; 2848 } 2849 2850 expect(t, "a ab abc qte qste qrste", 2851 "x x x qtp qstp qrstp"); 2852 } 2853 2854 /** 2855 * Test that user-registered transliterators can be used under function 2856 * syntax. 2857 */ 2858 @Test TestUserFunction()2859 public void TestUserFunction() { 2860 Transliterator t; 2861 2862 // There's no need to register inverses if we don't use them 2863 TestUserFunctionFactory.add("Any-gif", 2864 Transliterator.createFromRules("gif", 2865 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2866 Transliterator.FORWARD)); 2867 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2868 2869 TestUserFunctionFactory.add("Any-RemoveCurly", 2870 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2871 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2872 2873 logln("Trying &hex"); 2874 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2875 logln("Registering"); 2876 TestUserFunctionFactory.add("Any-hex2", t); 2877 t = Transliterator.getInstance("Any-hex2"); 2878 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2879 2880 logln("Trying &gif"); 2881 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2882 logln("Registering"); 2883 TestUserFunctionFactory.add("Any-gif2", t); 2884 t = Transliterator.getInstance("Any-gif2"); 2885 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2886 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2887 2888 // Test that filters are allowed after & 2889 t = Transliterator.createFromRules("test", 2890 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2891 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2892 2893 // Unregister our test stuff 2894 TestUserFunctionFactory.unregister(); 2895 } 2896 2897 static class TestUserFunctionFactory implements Transliterator.Factory { 2898 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 2899 static HashMap m = new HashMap(); 2900 add(String ID, Transliterator t)2901 static void add(String ID, Transliterator t) { 2902 m.put(new CaseInsensitiveString(ID), t); 2903 Transliterator.registerFactory(ID, singleton); 2904 } 2905 2906 @Override getInstance(String ID)2907 public Transliterator getInstance(String ID) { 2908 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 2909 } 2910 unregister()2911 static void unregister() { 2912 Iterator ids = m.keySet().iterator(); 2913 while (ids.hasNext()) { 2914 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 2915 Transliterator.unregister(id.getString()); 2916 ids.remove(); // removes pair from m 2917 } 2918 } 2919 } 2920 2921 /** 2922 * Test the Any-X transliterators. 2923 */ 2924 @Test TestAnyX()2925 public void TestAnyX() { 2926 Transliterator anyLatin = 2927 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 2928 2929 expect(anyLatin, 2930 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 2931 "greek:abkABK hiragana:abuku cyrillic:abc"); 2932 } 2933 2934 /** 2935 * Test Any-X transliterators with sample letters from all scripts. 2936 */ 2937 @Test TestAny()2938 public void TestAny() { 2939 UnicodeSet alphabetic = new UnicodeSet("[:alphabetic:]").freeze(); 2940 StringBuffer testString = new StringBuffer(); 2941 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 2942 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 2943 int count = 5; 2944 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 2945 testString.append(it.getString()); 2946 if (--count < 0) break; 2947 } 2948 } 2949 logln("Sample set for Any-Latin: " + testString); 2950 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 2951 String result = anyLatin.transliterate(testString.toString()); 2952 logln("Sample result for Any-Latin: " + result); 2953 } 2954 2955 2956 /** 2957 * Test the source and target set API. These are only implemented 2958 * for RBT and CompoundTransliterator at this time. 2959 */ 2960 @Test TestSourceTargetSet()2961 public void TestSourceTargetSet() { 2962 // Rules 2963 String r = 2964 "a > b; " + 2965 "r [x{lu}] > q;"; 2966 2967 // Expected source 2968 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 2969 2970 // Expected target 2971 UnicodeSet expTrg = new UnicodeSet("[bq]"); 2972 2973 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 2974 UnicodeSet src = t.getSourceSet(); 2975 UnicodeSet trg = t.getTargetSet(); 2976 2977 if (src.equals(expSrc) && trg.equals(expTrg)) { 2978 logln("Ok: " + r + " => source = " + src.toPattern(true) + 2979 ", target = " + trg.toPattern(true)); 2980 } else { 2981 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 2982 ", expected " + expSrc.toPattern(true) + 2983 "; target = " + trg.toPattern(true) + 2984 ", expected " + expTrg.toPattern(true)); 2985 } 2986 } 2987 2988 @Test TestSourceTargetSetFilter()2989 public void TestSourceTargetSetFilter() { 2990 String[][] tests = { 2991 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 2992 {"[] Latin-Greek", null, "[\']"}, 2993 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 2994 {"[] Any-Latin"}, 2995 {"[] casefold"}, 2996 {"[] NFKD;"}, 2997 {"[] NFKC;"}, 2998 {"[] hex"}, 2999 {"[] lower"}, 3000 {"[] null"}, 3001 {"[] remove"}, 3002 {"[] title"}, 3003 {"[] upper"}, 3004 }; 3005 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3006 for (String[] testPair : tests) { 3007 String test = testPair[0]; 3008 Transliterator t0; 3009 try { 3010 t0 = Transliterator.getInstance(test); 3011 } catch (Exception e) { 3012 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3013 } 3014 Transliterator t1; 3015 try { 3016 t1 = t0.getInverse(); 3017 } catch (Exception e) { 3018 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3019 } 3020 int targetIndex = 0; 3021 for (Transliterator t : new Transliterator[]{t0, t1}) { 3022 boolean ok; 3023 UnicodeSet source = t.getSourceSet(); 3024 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3025 targetIndex++; 3026 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3027 : testPair[targetIndex] == null ? expectedSource 3028 : testPair[targetIndex].length() == 0 ? expectedSource 3029 : new UnicodeSet(testPair[targetIndex]); 3030 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3031 if (!ok) { // for debugging 3032 source = t.getSourceSet(); 3033 } 3034 UnicodeSet target = t.getTargetSet(); 3035 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3036 if (!ok) { // for debugging 3037 target = t.getTargetSet(); 3038 } 3039 } 3040 } 3041 } 3042 isAtomic(String s, String t, Transliterator trans)3043 static boolean isAtomic(String s, String t, Transliterator trans) { 3044 for (int i = 1; i < s.length(); ++i) { 3045 if (!CharSequences.onCharacterBoundary(s, i)) { 3046 continue; 3047 } 3048 String q = trans.transform(s.substring(0,i)); 3049 if (t.startsWith(q)) { 3050 String r = trans.transform(s.substring(i)); 3051 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3052 return false; 3053 } 3054 } 3055 } 3056 return true; 3057 // // make sure that every part is different 3058 // if (s.codePointCount(0, s.length()) > 1) { 3059 // int[] codePoints = It.codePoints(s); 3060 // for (int k = 0; k < codePoints.length; ++k) { 3061 // int pos = indexOf(t,codePoints[k]); 3062 // if (pos >= 0) { 3063 // int x; 3064 // } 3065 // } 3066 // if (s.contains("\u00C0")) { 3067 // logln("\u00C0"); 3068 // } 3069 // } 3070 } 3071 addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3072 static void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3073 expectedSource.addAll(s); 3074 if (t.length() > 0) { 3075 expectedTarget.addAll(t); 3076 } 3077 } 3078 3079 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3080 // disorderedMarks.add(s); 3081 // for (int j = 1; j < s.length(); ++j) { 3082 // if (CharSequences.onCharacterBoundary(s, j)) { 3083 // String shorter = s.substring(0,j); 3084 // disorderedMarks.add(shorter); 3085 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3086 // } 3087 // } 3088 // } 3089 3090 @Test TestCharUtils()3091 public void TestCharUtils() { 3092 String[][] startTests = { 3093 {"1", "a", "ab"}, 3094 {"0", "a", "xb"}, 3095 {"0", "\uD800", "\uD800\uDC01"}, 3096 {"1", "\uD800a", "\uD800b"}, 3097 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3098 }; 3099 for (String[] row : startTests) { 3100 int actual = findSharedStartLength(row[1], row[2]); 3101 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3102 Integer.parseInt(row[0]), 3103 actual); 3104 } 3105 String[][] endTests = { 3106 {"0", "\uDC00", "\uD801\uDC00"}, 3107 {"1", "a", "ba"}, 3108 {"0", "a", "bx"}, 3109 {"1", "a\uDC00", "b\uDC00"}, 3110 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3111 }; 3112 for (String[] row : endTests) { 3113 int actual = findSharedEndLength(row[1], row[2]); 3114 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3115 Integer.parseInt(row[0]), 3116 actual); 3117 } 3118 } 3119 3120 /** 3121 * @param s 3122 * @param t 3123 * @return 3124 */ 3125 // TODO make generally available findSharedStartLength(CharSequence s, CharSequence t)3126 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3127 int min = Math.min(s.length(), t.length()); 3128 int i; 3129 char sch, tch; 3130 for (i = 0; i < min; ++i) { 3131 sch = s.charAt(i); 3132 tch = t.charAt(i); 3133 if (sch != tch) { 3134 break; 3135 } 3136 } 3137 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3138 } 3139 3140 /** 3141 * @param s 3142 * @param t 3143 * @return 3144 */ 3145 // TODO make generally available findSharedEndLength(CharSequence s, CharSequence t)3146 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3147 int slength = s.length(); 3148 int tlength = t.length(); 3149 int min = Math.min(slength, tlength); 3150 int i; 3151 char sch, tch; 3152 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3153 for (i = 0; i < min; ++i) { 3154 sch = s.charAt(slength - i - 1); 3155 tch = t.charAt(tlength - i - 1); 3156 if (sch != tch) { 3157 break; 3158 } 3159 } 3160 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3161 } 3162 3163 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3164 assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3165 static void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3166 boolean haveError = false; 3167 if (!actual.containsAll(empirical)) { 3168 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3169 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3170 haveError = true; 3171 } 3172 if (!empirical.containsAll(actual)) { 3173 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3174 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3175 haveError = true; 3176 } 3177 if (!haveError) { 3178 logln("OK " + message + ' ' + toPattern(empirical)); 3179 } 3180 } 3181 toPattern(UnicodeSet missing)3182 private static String toPattern(UnicodeSet missing) { 3183 String result = missing.toPattern(false); 3184 if (result.length() < 200) { 3185 return result; 3186 } 3187 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3188 } 3189 3190 3191 /** 3192 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3193 */ 3194 @Test TestPatternWhitespace()3195 public void TestPatternWhitespace() { 3196 // Rules 3197 String r = "a > \u200E b;"; 3198 3199 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3200 3201 expect(t, "a", "b"); 3202 3203 // UnicodeSet 3204 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3205 3206 if (set.contains(0x200E)) { 3207 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3208 } 3209 } 3210 3211 @Test TestAlternateSyntax()3212 public void TestAlternateSyntax() { 3213 // U+2206 == & 3214 // U+2190 == < 3215 // U+2192 == > 3216 // U+2194 == <> 3217 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3218 "abc", 3219 "xbz"); 3220 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3221 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3222 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3223 } 3224 3225 @Test TestPositionAPI()3226 public void TestPositionAPI() { 3227 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3228 Transliterator.Position b = new Transliterator.Position(a); 3229 Transliterator.Position c = new Transliterator.Position(); 3230 c.set(a); 3231 // Call the toString() API: 3232 if (a.equals(b) && a.equals(c)) { 3233 logln("Ok: " + a + " == " + b + " == " + c); 3234 } else { 3235 errln("FAIL: " + a + " != " + b + " != " + c); 3236 } 3237 } 3238 3239 //====================================================================== 3240 // New tests for the ::BEGIN/::END syntax 3241 //====================================================================== 3242 3243 private static final String[] BEGIN_END_RULES = new String[] { 3244 // [0] 3245 "abc > xy;" 3246 + "aba > z;", 3247 3248 // [1] 3249 /* 3250 "::BEGIN;" 3251 + "abc > xy;" 3252 + "::END;" 3253 + "::BEGIN;" 3254 + "aba > z;" 3255 + "::END;", 3256 */ 3257 "", // test case commented out below, this is here to keep from messing up the indexes 3258 3259 // [2] 3260 /* 3261 "abc > xy;" 3262 + "::BEGIN;" 3263 + "aba > z;" 3264 + "::END;", 3265 */ 3266 "", // test case commented out below, this is here to keep from messing up the indexes 3267 3268 // [3] 3269 /* 3270 "::BEGIN;" 3271 + "abc > xy;" 3272 + "::END;" 3273 + "aba > z;", 3274 */ 3275 "", // test case commented out below, this is here to keep from messing up the indexes 3276 3277 // [4] 3278 "abc > xy;" 3279 + "::Null;" 3280 + "aba > z;", 3281 3282 // [5] 3283 "::Upper;" 3284 + "ABC > xy;" 3285 + "AB > x;" 3286 + "C > z;" 3287 + "::Upper;" 3288 + "XYZ > p;" 3289 + "XY > q;" 3290 + "Z > r;" 3291 + "::Upper;", 3292 3293 // [6] 3294 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3295 + "$delim = [\\-$ws];" 3296 + "$ws $delim* > ' ';" 3297 + "'-' $delim* > '-';", 3298 3299 // [7] 3300 "::Null;" 3301 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3302 + "$delim = [\\-$ws];" 3303 + "$ws $delim* > ' ';" 3304 + "'-' $delim* > '-';", 3305 3306 // [8] 3307 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3308 + "$delim = [\\-$ws];" 3309 + "$ws $delim* > ' ';" 3310 + "'-' $delim* > '-';" 3311 + "::Null;", 3312 3313 // [9] 3314 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3315 + "$delim = [\\-$ws];" 3316 + "::Null;" 3317 + "$ws $delim* > ' ';" 3318 + "'-' $delim* > '-';", 3319 3320 // [10] 3321 /* 3322 "::BEGIN;" 3323 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3324 + "$delim = [\\-$ws];" 3325 + "::END;" 3326 + "$ws $delim* > ' ';" 3327 + "'-' $delim* > '-';", 3328 */ 3329 "", // test case commented out below, this is here to keep from messing up the indexes 3330 3331 // [11] 3332 /* 3333 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3334 + "$delim = [\\-$ws];" 3335 + "::BEGIN;" 3336 + "$ws $delim* > ' ';" 3337 + "'-' $delim* > '-';" 3338 + "::END;", 3339 */ 3340 "", // test case commented out below, this is here to keep from messing up the indexes 3341 3342 // [12] 3343 /* 3344 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3345 + "$delim = [\\-$ws];" 3346 + "$ab = [ab];" 3347 + "::BEGIN;" 3348 + "$ws $delim* > ' ';" 3349 + "'-' $delim* > '-';" 3350 + "::END;" 3351 + "::BEGIN;" 3352 + "$ab { ' ' } $ab > '-';" 3353 + "c { ' ' > ;" 3354 + "::END;" 3355 + "::BEGIN;" 3356 + "'a-a' > a\\%|a;" 3357 + "::END;", 3358 */ 3359 "", // test case commented out below, this is here to keep from messing up the indexes 3360 3361 // [13] 3362 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3363 + "$delim = [\\-$ws];" 3364 + "$ab = [ab];" 3365 + "::Null;" 3366 + "$ws $delim* > ' ';" 3367 + "'-' $delim* > '-';" 3368 + "::Null;" 3369 + "$ab { ' ' } $ab > '-';" 3370 + "c { ' ' > ;" 3371 + "::Null;" 3372 + "'a-a' > a\\%|a;", 3373 3374 // [14] 3375 /* 3376 "::[abc];" 3377 + "::BEGIN;" 3378 + "abc > xy;" 3379 + "::END;" 3380 + "::BEGIN;" 3381 + "aba > yz;" 3382 + "::END;" 3383 + "::Upper;", 3384 */ 3385 "", // test case commented out below, this is here to keep from messing up the indexes 3386 3387 // [15] 3388 "::[abc];" 3389 + "abc > xy;" 3390 + "::Null;" 3391 + "aba > yz;" 3392 + "::Upper;", 3393 3394 // [16] 3395 /* 3396 "::[abc];" 3397 + "::BEGIN;" 3398 + "abc <> xy;" 3399 + "::END;" 3400 + "::BEGIN;" 3401 + "aba <> yz;" 3402 + "::END;" 3403 + "::Upper(Lower);" 3404 + "::([XYZ]);", 3405 */ 3406 "", // test case commented out below, this is here to keep from messing up the indexes 3407 3408 // [17] 3409 "::[abc];" 3410 + "abc <> xy;" 3411 + "::Null;" 3412 + "aba <> yz;" 3413 + "::Upper(Lower);" 3414 + "::([XYZ]);" 3415 }; 3416 3417 /* 3418 (This entire test is commented out below and will need some heavy revision when we re-add 3419 the ::BEGIN/::END stuff) 3420 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3421 // [7] 3422 "::BEGIN;" 3423 + "abc > xy;" 3424 + "::BEGIN;" 3425 + "aba > z;" 3426 + "::END;" 3427 + "::END;", 3428 3429 // [8] 3430 "abc > xy;" 3431 + " aba > z;" 3432 + "::END;", 3433 3434 // [9] 3435 "::BEGIN;" 3436 + "::Upper;" 3437 + "::END;" 3438 }; 3439 */ 3440 3441 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3442 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3443 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3444 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3445 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3446 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3447 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3448 3449 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3450 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3451 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3452 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3453 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3454 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3455 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3456 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3457 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3458 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3459 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3460 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3461 3462 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3463 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3464 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3465 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3466 }; 3467 3468 @Test TestBeginEnd()3469 public void TestBeginEnd() { 3470 // run through the list of test cases above 3471 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3472 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3473 } 3474 3475 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3476 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3477 Transliterator.REVERSE); 3478 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3479 3480 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3481 // that all of them cause errors 3482 /* 3483 (commented out until we have the real ::BEGIN/::END stuff in place 3484 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3485 try { 3486 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3487 Transliterator.FORWARD); 3488 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3489 } 3490 catch (IllegalArgumentException e) { 3491 // this is supposed to happen; do nothing here 3492 } 3493 } 3494 */ 3495 } 3496 3497 @Test TestBeginEndToRules()3498 public void TestBeginEndToRules() { 3499 // run through the same list of test cases we used above, but this time, instead of just 3500 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3501 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3502 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3503 // to (i.e., does the same thing as) the original rule set 3504 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3505 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3506 Transliterator.FORWARD); 3507 String rules = t.toRules(false); 3508 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3509 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3510 } 3511 3512 // do the same thing for the reversible test case 3513 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3514 Transliterator.REVERSE); 3515 String rules = reversed.toRules(false); 3516 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3517 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3518 } 3519 3520 @Test TestRegisterAlias()3521 public void TestRegisterAlias() { 3522 String longID = "Lower;[aeiou]Upper"; 3523 String shortID = "Any-CapVowels"; 3524 String reallyShortID = "CapVowels"; 3525 3526 Transliterator.registerAlias(shortID, longID); 3527 3528 Transliterator t1 = Transliterator.getInstance(longID); 3529 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3530 3531 if (!t1.getID().equals(longID)) 3532 errln("Transliterator instantiated with long ID doesn't have long ID"); 3533 if (!t2.getID().equals(reallyShortID)) 3534 errln("Transliterator instantiated with short ID doesn't have short ID"); 3535 3536 if (!t1.toRules(true).equals(t2.toRules(true))) 3537 errln("Alias transliterators aren't the same"); 3538 3539 Transliterator.unregister(shortID); 3540 3541 try { 3542 t1 = Transliterator.getInstance(shortID); 3543 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3544 } 3545 catch (IllegalArgumentException e) { 3546 } 3547 3548 // try the same thing again, but this time with something other than 3549 // an instance of CompoundTransliterator 3550 String realID = "Latin-Greek"; 3551 String fakeID = "Latin-dlgkjdflkjdl"; 3552 Transliterator.registerAlias(fakeID, realID); 3553 3554 t1 = Transliterator.getInstance(realID); 3555 t2 = Transliterator.getInstance(fakeID); 3556 3557 if (!t1.toRules(true).equals(t2.toRules(true))) 3558 errln("Alias transliterators aren't the same"); 3559 3560 Transliterator.unregister(fakeID); 3561 } 3562 3563 /** 3564 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3565 */ 3566 @Test TestHalfwidthFullwidth()3567 public void TestHalfwidthFullwidth() { 3568 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3569 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3570 3571 // Array of 3n items 3572 // Each item is 3573 // "hf"|"fh"|"both", 3574 // <Halfwidth>, 3575 // <Fullwidth> 3576 String[] DATA = { 3577 "both", 3578 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3579 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3580 }; 3581 3582 for (int i=0; i<DATA.length; i+=3) { 3583 switch (DATA[i].charAt(0)) { 3584 case 'h': // Halfwidth-Fullwidth only 3585 expect(hf, DATA[i+1], DATA[i+2]); 3586 break; 3587 case 'f': // Fullwidth-Halfwidth only 3588 expect(fh, DATA[i+2], DATA[i+1]); 3589 break; 3590 case 'b': // both directions 3591 expect(hf, DATA[i+1], DATA[i+2]); 3592 expect(fh, DATA[i+2], DATA[i+1]); 3593 break; 3594 } 3595 } 3596 3597 } 3598 3599 /** 3600 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3601 * TODO: confirm that the expected results are correct. 3602 * For now, test just confirms that C++ and Java give identical results. 3603 */ 3604 @Test TestThai()3605 public void TestThai() { 3606 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3607 String thaiText = 3608 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3609 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3610 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3611 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3612 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3613 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3614 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3615 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3616 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3617 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3618 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3619 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3620 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3621 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3622 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3623 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3624 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3625 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3626 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3627 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3628 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3629 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3630 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3631 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3632 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3633 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3634 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3635 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3636 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3637 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3638 3639 String latinText = 3640 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3641 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3642 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3643 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3644 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3645 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3646 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3647 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3648 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3649 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3650 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3651 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3652 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3653 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3654 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3655 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3656 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3657 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3658 3659 expect(tr, thaiText, latinText); 3660 } 3661 3662 3663 //====================================================================== 3664 // These tests are not mirrored (yet) in icu4c at 3665 // source/test/intltest/transtst.cpp 3666 //====================================================================== 3667 3668 /** 3669 * Improve code coverage. 3670 */ 3671 @Test TestCoverage()3672 public void TestCoverage() { 3673 // NullTransliterator 3674 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3675 expect(t, "a", "a"); 3676 3677 // Source, target set 3678 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3679 t.setFilter(new UnicodeSet("[A-Z]")); 3680 logln("source = " + t.getSourceSet()); 3681 logln("target = " + t.getTargetSet()); 3682 3683 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3684 logln("source = " + t.getSourceSet()); 3685 logln("target = " + t.getTargetSet()); 3686 } 3687 /* 3688 * Test case for threading problem in NormalizationTransliterator 3689 * reported by ticket#5160 3690 */ 3691 @Test TestT5160()3692 public void TestT5160() { 3693 final String[] testData = { 3694 "a", 3695 "b", 3696 "\u09BE", 3697 "A\u0301", 3698 }; 3699 final String[] expected = { 3700 "a", 3701 "b", 3702 "\u09BE", 3703 "\u00C1", 3704 }; 3705 Transliterator translit = Transliterator.getInstance("NFC"); 3706 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 3707 for (int i = 0; i < tasks.length; i++) { 3708 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 3709 } 3710 TestUtil.runUntilDone(tasks); 3711 3712 for (int i = 0; i < tasks.length; i++) { 3713 if (tasks[i].getErrorMessage() != null) { 3714 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 3715 break; 3716 } 3717 } 3718 } 3719 3720 static class NormTranslitTask implements Runnable { 3721 Transliterator translit; 3722 String testData; 3723 String expectedData; 3724 String errorMsg; 3725 NormTranslitTask(Transliterator translit, String testData, String expectedData)3726 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 3727 this.translit = translit; 3728 this.testData = testData; 3729 this.expectedData = expectedData; 3730 } 3731 3732 @Override run()3733 public void run() { 3734 errorMsg = null; 3735 StringBuffer inBuf = new StringBuffer(testData); 3736 StringBuffer expectedBuf = new StringBuffer(expectedData); 3737 3738 for(int i = 0; i < 1000; i++) { 3739 String in = inBuf.toString(); 3740 String out = translit.transliterate(in); 3741 String expected = expectedBuf.toString(); 3742 if (!out.equals(expected)) { 3743 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 3744 break; 3745 } 3746 inBuf.append(testData); 3747 expectedBuf.append(expectedData); 3748 } 3749 } 3750 getErrorMessage()3751 public String getErrorMessage() { 3752 return errorMsg; 3753 } 3754 } 3755 3756 //====================================================================== 3757 // Support methods 3758 //====================================================================== expect(String rules, String source, String expectedResult, Transliterator.Position pos)3759 static void expect(String rules, 3760 String source, 3761 String expectedResult, 3762 Transliterator.Position pos) { 3763 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 3764 expect(t, source, expectedResult, pos); 3765 } 3766 expect(String rules, String source, String expectedResult)3767 static void expect(String rules, String source, String expectedResult) { 3768 expect(rules, source, expectedResult, null); 3769 } 3770 expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3771 static void expect(Transliterator t, String source, String expectedResult, 3772 Transliterator reverseTransliterator) { 3773 expect(t, source, expectedResult); 3774 if (reverseTransliterator != null) { 3775 expect(reverseTransliterator, expectedResult, source); 3776 } 3777 } 3778 expect(Transliterator t, String source, String expectedResult)3779 static void expect(Transliterator t, String source, String expectedResult) { 3780 expect(t, source, expectedResult, (Transliterator.Position) null); 3781 } 3782 expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)3783 static void expect(Transliterator t, String source, String expectedResult, 3784 Transliterator.Position pos) { 3785 if (pos == null) { 3786 String result = t.transliterate(source); 3787 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 3788 } 3789 3790 Transliterator.Position index = null; 3791 if (pos == null) { 3792 index = new Transliterator.Position(0, source.length(), 0, source.length()); 3793 } else { 3794 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 3795 pos.start, pos.limit); 3796 } 3797 3798 ReplaceableString rsource = new ReplaceableString(source); 3799 3800 t.finishTransliteration(rsource, index); 3801 // Do it all at once -- below we do it incrementally 3802 3803 if (index.start != index.limit) { 3804 expectAux(t.getID() + ":UNFINISHED", source, 3805 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 3806 return; 3807 } 3808 String result = rsource.toString(); 3809 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 3810 3811 3812 if (pos == null) { 3813 index = new Transliterator.Position(); 3814 } else { 3815 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 3816 pos.start, pos.limit); 3817 } 3818 3819 // Test incremental transliteration -- this result 3820 // must be the same after we finalize (see below). 3821 List<String> v = new ArrayList<String>(); 3822 v.add(source); 3823 rsource.replace(0, rsource.length(), ""); 3824 if (pos != null) { 3825 rsource.replace(0, 0, source); 3826 v.add(UtilityExtensions.formatInput(rsource, index)); 3827 t.transliterate(rsource, index); 3828 v.add(UtilityExtensions.formatInput(rsource, index)); 3829 } else { 3830 for (int i=0; i<source.length(); ++i) { 3831 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 3832 //log.append(source.charAt(i)).append(" -> ")); 3833 t.transliterate(rsource, index, source.charAt(i)); 3834 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 3835 v.add(UtilityExtensions.formatInput(rsource, index) + 3836 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 3837 } 3838 } 3839 3840 // As a final step in keyboard transliteration, we must call 3841 // transliterate to finish off any pending partial matches that 3842 // were waiting for more input. 3843 t.finishTransliteration(rsource, index); 3844 result = rsource.toString(); 3845 //log.append(" => ").append(rsource.toString()); 3846 v.add(result); 3847 3848 String[] results = new String[v.size()]; 3849 v.toArray(results); 3850 expectAux(t.getID() + ":Incremental", results, 3851 result.equals(expectedResult), 3852 expectedResult); 3853 } 3854 3855 static boolean expectAux(String tag, String source, 3856 String result, String expectedResult) { 3857 return expectAux(tag, new String[] {source, result}, 3858 result.equals(expectedResult), 3859 expectedResult); 3860 } 3861 3862 static boolean expectAux(String tag, String source, 3863 String result, boolean pass, 3864 String expectedResult) { 3865 return expectAux(tag, new String[] {source, result}, 3866 pass, 3867 expectedResult); 3868 } 3869 3870 static boolean expectAux(String tag, String source, 3871 boolean pass, 3872 String expectedResult) { 3873 return expectAux(tag, new String[] {source}, 3874 pass, 3875 expectedResult); 3876 } 3877 3878 static boolean expectAux(String tag, String[] results, boolean pass, 3879 String expectedResult) { 3880 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 3881 3882 for (int i = 0; i < results.length; ++i) { 3883 String label; 3884 if (i == 0) { 3885 label = "source: "; 3886 } else if (i == results.length - 1) { 3887 label = "result: "; 3888 } else { 3889 if (!isVerbose() && pass) continue; 3890 label = "interm" + i + ": "; 3891 } 3892 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 3893 } 3894 3895 if (!pass) { 3896 msg( " expected: " + expectedResult, ERR, false, true); 3897 } 3898 3899 return pass; 3900 } 3901 3902 static private void assertTransform(String message, String expected, StringTransform t, String source) { 3903 assertEquals(message + " " + source, expected, t.transform(source)); 3904 } 3905 3906 3907 static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 3908 assertEquals(message + " " +source, expected, t.transform(source)); 3909 assertEquals(message + " " +source2, expected, t.transform(source2)); 3910 assertEquals(message + " " + expected, source, back.transform(expected)); 3911 } 3912 3913 /* 3914 * Tests the method public Enumeration<String> getAvailableTargets(String source) 3915 */ 3916 @Test 3917 public void TestGetAvailableTargets() { 3918 try { 3919 // Tests when if (targets == null) is true 3920 Transliterator.getAvailableTargets(""); 3921 } catch (Exception e) { 3922 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 3923 } 3924 } 3925 3926 /* 3927 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 3928 */ 3929 @Test 3930 public void TestGetAvailableVariants() { 3931 try { 3932 // Tests when if (targets == null) is true 3933 Transliterator.getAvailableVariants("", ""); 3934 } catch (Exception e) { 3935 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 3936 } 3937 } 3938 3939 /* 3940 * Tests the mehtod String nextLine() in RuleBody 3941 */ 3942 @Test 3943 public void TestNextLine() { 3944 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 3945 try{ 3946 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 3947 } catch(Exception e){ 3948 errln("TransliteratorParser.nextLine() was not suppose to return an " + 3949 "exception for a rule of '\\'"); 3950 } 3951 } 3952 3953 /** 3954 * Tests equals and hashCode implementation of Transliterator.Position 3955 */ 3956 @Test 3957 public void TestPositionEquals() { 3958 Transliterator.Position position1 = new Transliterator.Position(1, 0, 0, 0); 3959 Transliterator.Position position2 = new Transliterator.Position(0, 0, 0, 0); 3960 assertNotEquals("2 different positions are not equal", position1, position2); 3961 assertNotEquals("2 different positions have different hash codes", position1.hashCode(), position2.hashCode()); 3962 Transliterator.Position position3 = new Transliterator.Position(1, 0, 0, 0); 3963 assertEquals("2 positions are equal", position1, position3); 3964 assertEquals("2 positions have the same hash codes", position1.hashCode(), position3.hashCode()); 3965 } 3966 } 3967