1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2012, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.translit; 10 11 import java.util.ArrayList; 12 import java.util.Enumeration; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Iterator; 16 import java.util.List; 17 import java.util.Locale; 18 import java.util.Map.Entry; 19 20 import org.junit.Ignore; 21 import org.junit.Test; 22 23 import com.ibm.icu.dev.test.TestFmwk; 24 import com.ibm.icu.dev.test.TestUtil; 25 import com.ibm.icu.dev.util.UnicodeMap; 26 import com.ibm.icu.impl.Utility; 27 import com.ibm.icu.impl.UtilityExtensions; 28 import com.ibm.icu.lang.CharSequences; 29 import com.ibm.icu.lang.UCharacter; 30 import com.ibm.icu.lang.UScript; 31 import com.ibm.icu.text.CanonicalIterator; 32 import com.ibm.icu.text.Normalizer2; 33 import com.ibm.icu.text.Replaceable; 34 import com.ibm.icu.text.ReplaceableString; 35 import com.ibm.icu.text.StringTransform; 36 import com.ibm.icu.text.Transliterator; 37 import com.ibm.icu.text.UTF16; 38 import com.ibm.icu.text.UnicodeFilter; 39 import com.ibm.icu.text.UnicodeSet; 40 import com.ibm.icu.text.UnicodeSetIterator; 41 import com.ibm.icu.util.CaseInsensitiveString; 42 import com.ibm.icu.util.ULocale; 43 44 /*********************************************************************** 45 46 HOW TO USE THIS TEST FILE 47 -or- 48 How I developed on two platforms 49 without losing (too much of) my mind 50 51 52 1. Add new tests by copying/pasting/changing existing tests. On Java, 53 any public void method named Test...() taking no parameters becomes 54 a test. On C++, you need to modify the header and add a line to 55 the runIndexedTest() dispatch method. 56 57 2. Make liberal use of the expect() method; it is your friend. 58 59 3. The tests in this file exactly match those in a sister file on the 60 other side. The two files are: 61 62 icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java 63 icu4c: source/test/intltest/transtst.cpp 64 65 ==> THIS IS THE IMPORTANT PART <== 66 67 When you add a test in this file, add it in transtst.cpp too. 68 Give it the same name and put it in the same relative place. This 69 makes maintenance a lot simpler for any poor soul who ends up 70 trying to synchronize the tests between icu4j and icu4c. 71 72 4. If you MUST enter a test that is NOT paralleled in the sister file, 73 then add it in the special non-mirrored section. These are 74 labeled 75 76 "icu4j ONLY" 77 78 or 79 80 "icu4c ONLY" 81 82 Make sure you document the reason the test is here and not there. 83 84 85 Thank you. 86 The Management 87 ***********************************************************************/ 88 89 /** 90 * @test 91 * @summary General test of Transliterator 92 */ 93 public class TransliteratorTest extends TestFmwk { 94 @Test TestHangul()95 public void TestHangul() { 96 97 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 98 Transliterator hl = lh.getInverse(); 99 100 assertTransform("Transform", "\uCE20", lh, "ch"); 101 102 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 103 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 104 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 105 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 106 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 107 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 108 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 109 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 110 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 111 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 112 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 113 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 114 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 115 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 116 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 117 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 118 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 119 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 120 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 121 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 122 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 123 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 124 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 125 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 126 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 127 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 128 129 } 130 131 @Test TestChinese()132 public void TestChinese() { 133 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 134 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 135 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 136 } 137 138 @Test TestRegistry()139 public void TestRegistry() { 140 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 141 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 142 checkRegistry("foo1", "[:letter:] a > b;"); 143 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 144 String id = (String) e.nextElement(); 145 checkRegistry(id); 146 } 147 } 148 checkRegistry(String id, String rules)149 private void checkRegistry (String id, String rules) { 150 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 151 Transliterator.registerInstance(foo); 152 checkRegistry(id); 153 } 154 checkRegistry(String id)155 private void checkRegistry(String id) { 156 Transliterator fie = Transliterator.getInstance(id); 157 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 158 fie.setFilter(fae); 159 Transliterator foe = Transliterator.getInstance(id); 160 UnicodeFilter fee = foe.getFilter(); 161 if (fae.equals(fee)) { 162 errln("Changed what is in registry for " + id); 163 } 164 } 165 166 // Android-changed: increase timeout. 167 @Test(timeout = 3000000L) TestInstantiation()168 public void TestInstantiation() { 169 long ms = System.currentTimeMillis(); 170 String ID; 171 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 172 ID = (String) e.nextElement(); 173 if (ID.equals("Latin-Han/definition")) { 174 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later"); 175 continue; 176 } 177 Transliterator t = null; 178 try { 179 t = Transliterator.getInstance(ID); 180 // This is only true for some subclasses 181 // // We should get a new instance if we try again 182 // Transliterator t2 = Transliterator.getInstance(ID); 183 // if (t != t2) { 184 // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t); 185 // } else { 186 // errln("FAIL: " + ID + " returned identical instances"); 187 // t = null; 188 // } 189 } catch (IllegalArgumentException ex) { 190 errln("FAIL: " + ID); 191 throw ex; 192 } 193 194 // if (t.getFilter() != null) { 195 // errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter()); 196 // } 197 198 if (t != null) { 199 // Now test toRules 200 String rules = null; 201 try { 202 rules = t.toRules(true); 203 204 Transliterator.createFromRules("x", rules, Transliterator.FORWARD); 205 } catch (IllegalArgumentException ex2) { 206 errln("FAIL: " + ID + ".toRules() => bad rules: " + 207 rules); 208 throw ex2; 209 } 210 } 211 } 212 213 // Now test the failure path 214 try { 215 ID = "<Not a valid Transliterator ID>"; 216 Transliterator t = Transliterator.getInstance(ID); 217 errln("FAIL: " + ID + " returned " + t); 218 } catch (IllegalArgumentException ex) { 219 logln("OK: Bogus ID handled properly"); 220 } 221 222 ms = System.currentTimeMillis() - ms; 223 logln("Elapsed time: " + ms + " ms"); 224 } 225 226 @Test TestSimpleRules()227 public void TestSimpleRules() { 228 /* Example: rules 1. ab>x|y 229 * 2. yc>z 230 * 231 * []|eabcd start - no match, copy e to tranlated buffer 232 * [e]|abcd match rule 1 - copy output & adjust cursor 233 * [ex|y]cd match rule 2 - copy output & adjust cursor 234 * [exz]|d no match, copy d to transliterated buffer 235 * [exzd]| done 236 */ 237 expect("ab>x|y;" + 238 "yc>z", 239 "eabcd", "exzd"); 240 241 /* Another set of rules: 242 * 1. ab>x|yzacw 243 * 2. za>q 244 * 3. qc>r 245 * 4. cw>n 246 * 247 * []|ab Rule 1 248 * [x|yzacw] No match 249 * [xy|zacw] Rule 2 250 * [xyq|cw] Rule 4 251 * [xyqn]| Done 252 */ 253 expect("ab>x|yzacw;" + 254 "za>q;" + 255 "qc>r;" + 256 "cw>n", 257 "ab", "xyqn"); 258 259 /* Test categories 260 */ 261 Transliterator t = Transliterator.createFromRules("<ID>", 262 "$dummy=\uE100;" + 263 "$vowel=[aeiouAEIOU];" + 264 "$lu=[:Lu:];" + 265 "$vowel } $lu > '!';" + 266 "$vowel > '&';" + 267 "'!' { $lu > '^';" + 268 "$lu > '*';" + 269 "a>ERROR", 270 Transliterator.FORWARD); 271 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 272 } 273 274 /** 275 * Test inline set syntax and set variable syntax. 276 */ 277 @Test TestInlineSet()278 public void TestInlineSet() { 279 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 280 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 281 282 expect("$digit = [0-9];" + 283 "$alpha = [a-zA-Z];" + 284 "$alphanumeric = [$digit $alpha];" + // *** 285 "$special = [^$alphanumeric];" + // *** 286 "$alphanumeric > '-';" + 287 "$special > '*';", 288 289 "thx-1138", "---*----"); 290 } 291 292 /** 293 * Create some inverses and confirm that they work. We have to be 294 * careful how we do this, since the inverses will not be true 295 * inverses -- we can't throw any random string at the composition 296 * of the transliterators and expect the identity function. F x 297 * F' != I. However, if we are careful about the input, we will 298 * get the expected results. 299 */ 300 @Test TestRuleBasedInverse()301 public void TestRuleBasedInverse() { 302 String RULES = 303 "abc>zyx;" + 304 "ab>yz;" + 305 "bc>zx;" + 306 "ca>xy;" + 307 "a>x;" + 308 "b>y;" + 309 "c>z;" + 310 311 "abc<zyx;" + 312 "ab<yz;" + 313 "bc<zx;" + 314 "ca<xy;" + 315 "a<x;" + 316 "b<y;" + 317 "c<z;" + 318 319 ""; 320 321 String[] DATA = { 322 // Careful here -- random strings will not work. If we keep 323 // the left side to the domain and the right side to the range 324 // we will be okay though (left, abc; right xyz). 325 "a", "x", 326 "abcacab", "zyxxxyy", 327 "caccb", "xyzzy", 328 }; 329 330 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 331 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 332 for (int i=0; i<DATA.length; i+=2) { 333 expect(fwd, DATA[i], DATA[i+1]); 334 expect(rev, DATA[i+1], DATA[i]); 335 } 336 } 337 338 /** 339 * Basic test of keyboard. 340 */ 341 @Test TestKeyboard()342 public void TestKeyboard() { 343 Transliterator t = Transliterator.createFromRules("<ID>", 344 "psch>Y;" 345 +"ps>y;" 346 +"ch>x;" 347 +"a>A;", Transliterator.FORWARD); 348 String DATA[] = { 349 // insertion, buffer 350 "a", "A", 351 "p", "Ap", 352 "s", "Aps", 353 "c", "Apsc", 354 "a", "AycA", 355 "psch", "AycAY", 356 null, "AycAY", // null means finishKeyboardTransliteration 357 }; 358 359 keyboardAux(t, DATA); 360 } 361 362 /** 363 * Basic test of keyboard with cursor. 364 */ 365 @Test TestKeyboard2()366 public void TestKeyboard2() { 367 Transliterator t = Transliterator.createFromRules("<ID>", 368 "ych>Y;" 369 +"ps>|y;" 370 +"ch>x;" 371 +"a>A;", Transliterator.FORWARD); 372 String DATA[] = { 373 // insertion, buffer 374 "a", "A", 375 "p", "Ap", 376 "s", "Aps", // modified for rollback - "Ay", 377 "c", "Apsc", // modified for rollback - "Ayc", 378 "a", "AycA", 379 "p", "AycAp", 380 "s", "AycAps", // modified for rollback - "AycAy", 381 "c", "AycApsc", // modified for rollback - "AycAyc", 382 "h", "AycAY", 383 null, "AycAY", // null means finishKeyboardTransliteration 384 }; 385 386 keyboardAux(t, DATA); 387 } 388 389 /** 390 * Test keyboard transliteration with back-replacement. 391 */ 392 @Test TestKeyboard3()393 public void TestKeyboard3() { 394 // We want th>z but t>y. Furthermore, during keyboard 395 // transliteration we want t>y then yh>z if t, then h are 396 // typed. 397 String RULES = 398 "t>|y;" + 399 "yh>z;" + 400 ""; 401 402 String[] DATA = { 403 // Column 1: characters to add to buffer (as if typed) 404 // Column 2: expected appearance of buffer after 405 // keyboard xliteration. 406 "a", "a", 407 "b", "ab", 408 "t", "abt", // modified for rollback - "aby", 409 "c", "abyc", 410 "t", "abyct", // modified for rollback - "abycy", 411 "h", "abycz", 412 null, "abycz", // null means finishKeyboardTransliteration 413 }; 414 415 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 416 keyboardAux(t, DATA); 417 } 418 keyboardAux(Transliterator t, String[] DATA)419 private void keyboardAux(Transliterator t, String[] DATA) { 420 Transliterator.Position index = new Transliterator.Position(); 421 ReplaceableString s = new ReplaceableString(); 422 for (int i=0; i<DATA.length; i+=2) { 423 StringBuffer log; 424 if (DATA[i] != null) { 425 log = new StringBuffer(s.toString() + " + " 426 + DATA[i] 427 + " -> "); 428 t.transliterate(s, index, DATA[i]); 429 } else { 430 log = new StringBuffer(s.toString() + " => "); 431 t.finishTransliteration(s, index); 432 } 433 UtilityExtensions.formatInput(log, s, index); 434 if (s.toString().equals(DATA[i+1])) { 435 logln(log.toString()); 436 } else { 437 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 438 } 439 } 440 } 441 442 // Latin-Arabic has been temporarily removed until it can be 443 // done correctly. 444 445 // public void TestArabic() { 446 // String DATA[] = { 447 // "Arabic", 448 // "\u062a\u062a\u0645\u062a\u0639 "+ 449 // "\u0627\u0644\u0644\u063a\u0629 "+ 450 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 451 // "\u0628\u0628\u0646\u0638\u0645 "+ 452 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 453 // "\u062c\u0645\u064a\u0644\u0629" 454 // }; 455 456 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 457 // for (int i=0; i<DATA.length; i+=2) { 458 // expect(t, DATA[i], DATA[i+1]); 459 // } 460 // } 461 462 /** 463 * Compose the Kana transliterator forward and reverse and try 464 * some strings that should come out unchanged. 465 */ 466 @Test TestCompoundKana()467 public void TestCompoundKana() { 468 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 469 expect(t, "aaaaa", "aaaaa"); 470 } 471 472 /** 473 * Compose the hex transliterators forward and reverse. 474 */ 475 @Test TestCompoundHex()476 public void TestCompoundHex() { 477 Transliterator a = Transliterator.getInstance("Any-Hex"); 478 Transliterator b = Transliterator.getInstance("Hex-Any"); 479 // Transliterator[] trans = { a, b }; 480 // Transliterator ab = Transliterator.getInstance(trans); 481 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 482 483 // Do some basic tests of b 484 expect(b, "\\u0030\\u0031", "01"); 485 486 String s = "abcde"; 487 expect(ab, s, s); 488 489 // trans = new Transliterator[] { b, a }; 490 // Transliterator ba = Transliterator.getInstance(trans); 491 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 492 ReplaceableString str = new ReplaceableString(s); 493 a.transliterate(str); 494 expect(ba, str.toString(), str.toString()); 495 } 496 497 /** 498 * Do some basic tests of filtering. 499 */ 500 @Test TestFiltering()501 public void TestFiltering() { 502 503 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 504 tempTrans.setFilter(new UnicodeSet("[a]")); 505 String tempResult = tempTrans.transform("xa"); 506 assertEquals("context should not be filtered ", "xb", tempResult); 507 508 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 509 tempResult = tempTrans.transform("xa"); 510 assertEquals("context should not be filtered ", "xb", tempResult); 511 512 Transliterator hex = Transliterator.getInstance("Any-Hex"); 513 hex.setFilter(new UnicodeFilter() { 514 public boolean contains(int c) { 515 return c != 'c'; 516 } 517 public String toPattern(boolean escapeUnprintable) { 518 return ""; 519 } 520 public boolean matchesIndexValue(int v) { 521 return false; 522 } 523 public void addMatchSetTo(UnicodeSet toUnionTo) {} 524 }); 525 String s = "abcde"; 526 String out = hex.transliterate(s); 527 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 528 if (out.equals(exp)) { 529 logln("Ok: \"" + exp + "\""); 530 } else { 531 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 532 } 533 } 534 535 /** 536 * Test anchors 537 */ 538 @Test TestAnchors()539 public void TestAnchors() { 540 expect("^ab > 01 ;" + 541 " ab > |8 ;" + 542 " b > k ;" + 543 " 8x$ > 45 ;" + 544 " 8x > 77 ;", 545 546 "ababbabxabx", 547 "018k7745"); 548 expect("$s = [z$] ;" + 549 "$s{ab > 01 ;" + 550 " ab > |8 ;" + 551 " b > k ;" + 552 " 8x}$s > 45 ;" + 553 " 8x > 77 ;", 554 555 "abzababbabxzabxabx", 556 "01z018k45z01x45"); 557 } 558 559 /** 560 * Test pattern quoting and escape mechanisms. 561 */ 562 @Test TestPatternQuoting()563 public void TestPatternQuoting() { 564 // Array of 3n items 565 // Each item is <rules>, <input>, <expected output> 566 String[] DATA = { 567 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 568 }; 569 570 for (int i=0; i<DATA.length; i+=3) { 571 logln("Pattern: " + Utility.escape(DATA[i])); 572 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 573 expect(t, DATA[i+1], DATA[i+2]); 574 } 575 } 576 577 @Test TestVariableNames()578 public void TestVariableNames() { 579 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 580 if (gl == null) { 581 errln("FAIL: null Transliterator returned."); 582 } 583 } 584 585 /** 586 * Regression test for bugs found in Greek transliteration. 587 */ 588 @Test TestJ277()589 public void TestJ277() { 590 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 591 592 char sigma = (char)0x3C3; 593 char upsilon = (char)0x3C5; 594 char nu = (char)0x3BD; 595 // not used char PHI = (char)0x3A6; 596 char alpha = (char)0x3B1; 597 // not used char omega = (char)0x3C9; 598 // not used char omicron = (char)0x3BF; 599 // not used char epsilon = (char)0x3B5; 600 601 // sigma upsilon nu -> syn 602 StringBuffer buf = new StringBuffer(); 603 buf.append(sigma).append(upsilon).append(nu); 604 String syn = buf.toString(); 605 expect(gl, syn, "syn"); 606 607 // sigma alpha upsilon nu -> saun 608 buf.setLength(0); 609 buf.append(sigma).append(alpha).append(upsilon).append(nu); 610 String sayn = buf.toString(); 611 expect(gl, sayn, "saun"); 612 613 // Again, using a smaller rule set 614 String rules = 615 "$alpha = \u03B1;" + 616 "$nu = \u03BD;" + 617 "$sigma = \u03C3;" + 618 "$ypsilon = \u03C5;" + 619 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 620 "s <> $sigma;" + 621 "a <> $alpha;" + 622 "u <> $vowel { $ypsilon;" + 623 "y <> $ypsilon;" + 624 "n <> $nu;"; 625 Transliterator mini = Transliterator.createFromRules 626 ("mini", rules, Transliterator.REVERSE); 627 expect(mini, syn, "syn"); 628 expect(mini, sayn, "saun"); 629 630 //| // Transliterate the Greek locale data 631 //| Locale el("el"); 632 //| DateFormatSymbols syms(el, status); 633 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 634 //| int32_t i, count; 635 //| const UnicodeString* data = syms.getMonths(count); 636 //| for (i=0; i<count; ++i) { 637 //| if (data[i].length() == 0) { 638 //| continue; 639 //| } 640 //| UnicodeString out(data[i]); 641 //| gl->transliterate(out); 642 //| bool_t ok = TRUE; 643 //| if (data[i].length() >= 2 && out.length() >= 2 && 644 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 645 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 646 //| ok = FALSE; 647 //| } 648 //| } 649 //| if (ok) { 650 //| logln(prettify(data[i] + " -> " + out)); 651 //| } else { 652 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 653 //| } 654 //| } 655 } 656 657 // /** 658 // * Prefix, suffix support in hex transliterators 659 // */ 660 // public void TestJ243() { 661 // // Test default Hex-Any, which should handle 662 // // \\u, \\U, u+, and U+ 663 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 664 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 665 // 666 // // Try a custom Hex-Any 667 // // \\uXXXX and &#xXXXX; 668 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 669 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 670 // "abcd5fx0123"); 671 // 672 // // Try custom Any-Hex (default is tested elsewhere) 673 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 674 // expect(hex3, "012", "012"); 675 // } 676 677 @Test TestJ329()678 public void TestJ329() { 679 680 Object[] DATA = { 681 Boolean.FALSE, "a > b; c > d", 682 Boolean.TRUE, "a > b; no operator; c > d", 683 }; 684 685 for (int i=0; i<DATA.length; i+=2) { 686 String err = null; 687 try { 688 Transliterator.createFromRules("<ID>", 689 (String) DATA[i+1], 690 Transliterator.FORWARD); 691 } catch (IllegalArgumentException e) { 692 err = e.getMessage(); 693 } 694 boolean gotError = (err != null); 695 String desc = (String) DATA[i+1] + 696 (gotError ? (" -> error: " + err) : " -> no error"); 697 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 698 logln("Ok: " + desc); 699 } else { 700 errln("FAIL: " + desc); 701 } 702 } 703 } 704 705 /** 706 * Test segments and segment references. 707 */ 708 @Test TestSegments()709 public void TestSegments() { 710 // Array of 3n items 711 // Each item is <rules>, <input>, <expected output> 712 String[] DATA = { 713 "([a-z]) '.' ([0-9]) > $2 '-' $1", 714 "abc.123.xyz.456", 715 "ab1-c23.xy4-z56", 716 }; 717 718 for (int i=0; i<DATA.length; i+=3) { 719 logln("Pattern: " + Utility.escape(DATA[i])); 720 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 721 expect(t, DATA[i+1], DATA[i+2]); 722 } 723 } 724 725 /** 726 * Test cursor positioning outside of the key 727 */ 728 @Test TestCursorOffset()729 public void TestCursorOffset() { 730 // Array of 3n items 731 // Each item is <rules>, <input>, <expected output> 732 String[] DATA = { 733 "pre {alpha} post > | @ ALPHA ;" + 734 "eALPHA > beta ;" + 735 "pre {beta} post > BETA @@ | ;" + 736 "post > xyz", 737 738 "prealphapost prebetapost", 739 "prbetaxyz preBETApost", 740 }; 741 742 for (int i=0; i<DATA.length; i+=3) { 743 logln("Pattern: " + Utility.escape(DATA[i])); 744 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 745 expect(t, DATA[i+1], DATA[i+2]); 746 } 747 } 748 749 /** 750 * Test zero length and > 1 char length variable values. Test 751 * use of variable refs in UnicodeSets. 752 */ 753 @Test TestArbitraryVariableValues()754 public void TestArbitraryVariableValues() { 755 // Array of 3n items 756 // Each item is <rules>, <input>, <expected output> 757 String[] DATA = { 758 "$abe = ab;" + 759 "$pat = x[yY]z;" + 760 "$ll = 'a-z';" + 761 "$llZ = [$ll];" + 762 "$llY = [$ll$pat];" + 763 "$emp = ;" + 764 765 "$abe > ABE;" + 766 "$pat > END;" + 767 "$llZ > 1;" + 768 "$llY > 2;" + 769 "7$emp 8 > 9;" + 770 "", 771 772 "ab xYzxyz stY78", 773 "ABE ENDEND 1129", 774 }; 775 776 for (int i=0; i<DATA.length; i+=3) { 777 logln("Pattern: " + Utility.escape(DATA[i])); 778 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 779 expect(t, DATA[i+1], DATA[i+2]); 780 } 781 } 782 783 /** 784 * Confirm that the contextStart, contextLimit, start, and limit 785 * behave correctly. 786 */ 787 @Test TestPositionHandling()788 public void TestPositionHandling() { 789 // Array of 3n items 790 // Each item is <rules>, <input>, <expected output> 791 String[] DATA = { 792 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 793 "xtat txtb", // pos 0,9,0,9 794 "xTTaSS TTxUUb", 795 796 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 797 "xtat txtb", // pos 2,9,3,8 798 "xtaSS TTxUUb", 799 800 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 801 "xtat txtb", // pos 3,8,3,8 802 "xtaTT TTxTTb", 803 }; 804 805 // Array of 4n positions -- these go with the DATA array 806 // They are: contextStart, contextLimit, start, limit 807 int[] POS = { 808 0, 9, 0, 9, 809 2, 9, 3, 8, 810 3, 8, 3, 8, 811 }; 812 813 int n = DATA.length/3; 814 for (int i=0; i<n; i++) { 815 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 816 Transliterator.Position pos = new Transliterator.Position( 817 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 818 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 819 t.transliterate(rsource, pos); 820 t.finishTransliteration(rsource, pos); 821 String result = rsource.toString(); 822 String exp = DATA[3*i+2]; 823 expectAux(Utility.escape(DATA[3*i]), 824 DATA[3*i+1], 825 result, 826 result.equals(exp), 827 exp); 828 } 829 } 830 831 /** 832 * Test the Hiragana-Katakana transliterator. 833 */ 834 @Test TestHiraganaKatakana()835 public void TestHiraganaKatakana() { 836 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 837 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 838 839 // Array of 3n items 840 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 841 String[] DATA = { 842 "both", 843 "\u3042\u3090\u3099\u3092\u3050", 844 "\u30A2\u30F8\u30F2\u30B0", 845 846 "kh", 847 "\u307C\u3051\u3060\u3042\u3093\u30FC", 848 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 849 }; 850 851 for (int i=0; i<DATA.length; i+=3) { 852 switch (DATA[i].charAt(0)) { 853 case 'h': // Hiragana-Katakana 854 expect(hk, DATA[i+1], DATA[i+2]); 855 break; 856 case 'k': // Katakana-Hiragana 857 expect(kh, DATA[i+2], DATA[i+1]); 858 break; 859 case 'b': // both 860 expect(hk, DATA[i+1], DATA[i+2]); 861 expect(kh, DATA[i+2], DATA[i+1]); 862 break; 863 } 864 } 865 866 } 867 868 @Test TestCopyJ476()869 public void TestCopyJ476() { 870 // This is a C++-only copy constructor test 871 } 872 873 /** 874 * Test inter-Indic transliterators. These are composed. 875 */ 876 @Test TestInterIndic()877 public void TestInterIndic() { 878 String ID = "Devanagari-Gujarati"; 879 Transliterator dg = Transliterator.getInstance(ID); 880 if (dg == null) { 881 errln("FAIL: getInstance(" + ID + ") returned null"); 882 return; 883 } 884 String id = dg.getID(); 885 if (!id.equals(ID)) { 886 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 887 } 888 String dev = "\u0901\u090B\u0925"; 889 String guj = "\u0A81\u0A8B\u0AA5"; 890 expect(dg, dev, guj); 891 } 892 893 /** 894 * Test filter syntax in IDs. (J23) 895 */ 896 @Test TestFilterIDs()897 public void TestFilterIDs() { 898 String[] DATA = { 899 "[aeiou]Any-Hex", // ID 900 "[aeiou]Hex-Any", // expected inverse ID 901 "quizzical", // src 902 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 903 904 "[aeiou]Any-Hex;[^5]Hex-Any", 905 "[^5]Any-Hex;[aeiou]Hex-Any", 906 "quizzical", 907 "q\\u0075izzical", 908 909 "[abc]Null", 910 "[abc]Null", 911 "xyz", 912 "xyz", 913 }; 914 915 for (int i=0; i<DATA.length; i+=4) { 916 String ID = DATA[i]; 917 Transliterator t = Transliterator.getInstance(ID); 918 expect(t, DATA[i+2], DATA[i+3]); 919 920 // Check the ID 921 if (!ID.equals(t.getID())) { 922 errln("FAIL: getInstance(" + ID + ").getID() => " + 923 t.getID()); 924 } 925 926 // Check the inverse 927 String uID = DATA[i+1]; 928 Transliterator u = t.getInverse(); 929 if (u == null) { 930 errln("FAIL: " + ID + ".getInverse() returned NULL"); 931 } else if (!u.getID().equals(uID)) { 932 errln("FAIL: " + ID + ".getInverse().getID() => " + 933 u.getID() + ", expected " + uID); 934 } 935 } 936 } 937 938 /** 939 * Test the case mapping transliterators. 940 */ 941 @Test TestCaseMap()942 public void TestCaseMap() { 943 Transliterator toUpper = 944 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 945 Transliterator toLower = 946 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 947 Transliterator toTitle = 948 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 949 950 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 951 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 952 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 953 "the quick brown foX jumped over the lazY dogs."); 954 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 955 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 956 } 957 958 /** 959 * Test the name mapping transliterators. 960 */ 961 @Test TestNameMap()962 public void TestNameMap() { 963 Transliterator uni2name = 964 Transliterator.getInstance("Any-Name[^abc]"); 965 Transliterator name2uni = 966 Transliterator.getInstance("Name-Any"); 967 968 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 969 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 970 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 971 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 972 973 // round trip 974 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 975 976 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 977 expect(t, s, s); 978 } 979 980 /** 981 * Test liberalized ID syntax. 1006c 982 */ 983 @Test TestLiberalizedID()984 public void TestLiberalizedID() { 985 // Some test cases have an expected getID() value of NULL. This 986 // means I have disabled the test case for now. This stuff is 987 // still under development, and I haven't decided whether to make 988 // getID() return canonical case yet. It will all get rewritten 989 // with the move to Source-Target/Variant IDs anyway. [aliu] 990 String DATA[] = { 991 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 992 " Null ", "Null", "whitespace", 993 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 994 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 995 }; 996 997 for (int i=0; i<DATA.length; i+=3) { 998 try { 999 Transliterator t = Transliterator.getInstance(DATA[i]); 1000 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 1001 logln("Ok: " + DATA[i+2] + 1002 " create ID \"" + DATA[i] + "\" => \"" + 1003 t.getID() + "\""); 1004 } else { 1005 errln("FAIL: " + DATA[i+2] + 1006 " create ID \"" + DATA[i] + "\" => \"" + 1007 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 1008 } 1009 } catch (IllegalArgumentException e) { 1010 errln("FAIL: " + DATA[i+2] + 1011 " create ID \"" + DATA[i] + "\""); 1012 } 1013 } 1014 } 1015 1016 @Test TestCreateInstance()1017 public void TestCreateInstance() { 1018 String FORWARD = "F"; 1019 String REVERSE = "R"; 1020 String DATA[] = { 1021 // Column 1: id 1022 // Column 2: direction 1023 // Column 3: expected ID, or "" if expect failure 1024 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 1025 1026 // JB#2689: bad compound causes crash 1027 "InvalidSource-InvalidTarget", FORWARD, "", 1028 "InvalidSource-InvalidTarget", REVERSE, "", 1029 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 1030 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 1031 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 1032 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 1033 1034 null 1035 }; 1036 1037 for (int i=0; DATA[i]!=null; i+=3) { 1038 String id=DATA[i]; 1039 int dir = (DATA[i+1]==FORWARD)? 1040 Transliterator.FORWARD:Transliterator.REVERSE; 1041 String expID=DATA[i+2]; 1042 Exception e = null; 1043 Transliterator t; 1044 try { 1045 t = Transliterator.getInstance(id,dir); 1046 } catch (Exception e1) { 1047 e = e1; 1048 t = null; 1049 } 1050 String newID = (t!=null)?t.getID():""; 1051 boolean ok = (newID.equals(expID)); 1052 if (t==null) { 1053 newID = e.getMessage(); 1054 } 1055 if (ok) { 1056 logln("Ok: createInstance(" + 1057 id + "," + DATA[i+1] + ") => " + newID); 1058 } else { 1059 errln("FAIL: createInstance(" + 1060 id + "," + DATA[i+1] + ") => " + newID + 1061 ", expected " + expID); 1062 } 1063 } 1064 } 1065 1066 /** 1067 * Test the normalization transliterator. 1068 */ 1069 @Test TestNormalizationTransliterator()1070 public void TestNormalizationTransliterator() { 1071 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest 1072 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1073 String[][] CANON = { 1074 // Input Decomposed Composed 1075 {"cat", "cat", "cat" }, 1076 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1077 1078 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1079 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1080 1081 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1082 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1083 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1084 1085 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1086 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1087 1088 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1089 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1090 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1091 1092 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1093 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1094 1095 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1096 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1097 1098 {"Henry IV", "Henry IV", "Henry IV" }, 1099 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1100 1101 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1102 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1103 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1104 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1105 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1106 1107 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1108 }; 1109 1110 String[][] COMPAT = { 1111 // Input Decomposed Composed 1112 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1113 1114 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1115 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1116 1117 {"Henry IV", "Henry IV", "Henry IV" }, 1118 {"Henry \u2163", "Henry IV", "Henry IV" }, 1119 1120 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1121 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1122 1123 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1124 }; 1125 1126 Transliterator NFD = Transliterator.getInstance("NFD"); 1127 Transliterator NFC = Transliterator.getInstance("NFC"); 1128 for (int i=0; i<CANON.length; ++i) { 1129 String in = CANON[i][0]; 1130 String expd = CANON[i][1]; 1131 String expc = CANON[i][2]; 1132 expect(NFD, in, expd); 1133 expect(NFC, in, expc); 1134 } 1135 1136 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1137 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1138 for (int i=0; i<COMPAT.length; ++i) { 1139 String in = COMPAT[i][0]; 1140 String expkd = COMPAT[i][1]; 1141 String expkc = COMPAT[i][2]; 1142 expect(NFKD, in, expkd); 1143 expect(NFKC, in, expkc); 1144 } 1145 1146 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1147 expect(t, "\u010dx", "c\u030C"); 1148 } 1149 1150 /** 1151 * Test compound RBT rules. 1152 */ 1153 @Test TestCompoundRBT()1154 public void TestCompoundRBT() { 1155 // Careful with spacing and ';' here: Phrase this exactly 1156 // as toRules() is going to return it. If toRules() changes 1157 // with regard to spacing or ';', then adjust this string. 1158 String rule = "::Hex-Any;\n" + 1159 "::Any-Lower;\n" + 1160 "a > '.A.';\n" + 1161 "b > '.B.';\n" + 1162 "::[^t]Any-Upper;"; 1163 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1164 if (t == null) { 1165 errln("FAIL: createFromRules failed"); 1166 return; 1167 } 1168 expect(t, "\u0043at in the hat, bat on the mat", 1169 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1170 String r = t.toRules(true); 1171 if (r.equals(rule)) { 1172 logln("OK: toRules() => " + r); 1173 } else { 1174 errln("FAIL: toRules() => " + r + 1175 ", expected " + rule); 1176 } 1177 1178 // Now test toRules 1179 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1180 if (t == null) { 1181 errln("FAIL: createInstance failed"); 1182 return; 1183 } 1184 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1185 r = t.toRules(true); 1186 if (!r.equals(exp)) { 1187 errln("FAIL: toRules() => " + r + 1188 ", expected " + exp); 1189 } else { 1190 logln("OK: toRules() => " + r); 1191 } 1192 1193 // Round trip the result of toRules 1194 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1195 if (t == null) { 1196 errln("FAIL: createFromRules #2 failed"); 1197 return; 1198 } else { 1199 logln("OK: createFromRules(" + r + ") succeeded"); 1200 } 1201 1202 // Test toRules again 1203 r = t.toRules(true); 1204 if (!r.equals(exp)) { 1205 errln("FAIL: toRules() => " + r + 1206 ", expected " + exp); 1207 } else { 1208 logln("OK: toRules() => " + r); 1209 } 1210 1211 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1212 // to what the regenerated ID will look like. 1213 String id = "Upper(Lower);(NFKC)"; 1214 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1215 if (t == null) { 1216 errln("FAIL: createInstance #2 failed"); 1217 return; 1218 } 1219 if (t.getID().equals(id)) { 1220 logln("OK: created " + id); 1221 } else { 1222 errln("FAIL: createInstance(" + id + 1223 ").getID() => " + t.getID()); 1224 } 1225 1226 Transliterator u = t.getInverse(); 1227 if (u == null) { 1228 errln("FAIL: createInverse failed"); 1229 return; 1230 } 1231 exp = "NFKC();Lower(Upper)"; 1232 if (u.getID().equals(exp)) { 1233 logln("OK: createInverse(" + id + ") => " + 1234 u.getID()); 1235 } else { 1236 errln("FAIL: createInverse(" + id + ") => " + 1237 u.getID()); 1238 } 1239 } 1240 1241 /** 1242 * Compound filter semantics were orginially not implemented 1243 * correctly. Originally, each component filter f(i) is replaced by 1244 * f'(i) = f(i) && g, where g is the filter for the compound 1245 * transliterator. 1246 * 1247 * From Mark: 1248 * 1249 * Suppose and I have a transliterator X. Internally X is 1250 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1251 * 1252 * The compound should convert all greek characters (through latin) to 1253 * cyrillic, then lowercase the result. The filter should say "don't 1254 * touch 'A' in the original". But because an intermediate result 1255 * happens to go through "A", the Greek Alpha gets hung up. 1256 */ 1257 @Test TestCompoundFilter()1258 public void TestCompoundFilter() { 1259 Transliterator t = Transliterator.getInstance 1260 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1261 t.setFilter(new UnicodeSet("[^A]")); 1262 1263 // Only the 'A' at index 1 should remain unchanged 1264 expect(t, 1265 CharsToUnicodeString("BA\\u039A\\u0391"), 1266 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1267 } 1268 1269 /** 1270 * Test the "Remove" transliterator. 1271 */ 1272 @Test TestRemove()1273 public void TestRemove() { 1274 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1275 expect(t, "The quick brown fox.", 1276 "Th qck brwn fx."); 1277 } 1278 1279 @Test TestToRules()1280 public void TestToRules() { 1281 String RBT = "rbt"; 1282 String SET = "set"; 1283 String[] DATA = { 1284 RBT, 1285 "$a=\\u4E61; [$a] > A;", 1286 "[\\u4E61] > A;", 1287 1288 RBT, 1289 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1290 "[[:Zs:][:Zl:]]{a} > A;", 1291 1292 SET, 1293 "[[:Zs:][:Zl:]]", 1294 "[[:Zs:][:Zl:]]", 1295 1296 SET, 1297 "[:Ps:]", 1298 "[:Ps:]", 1299 1300 SET, 1301 "[:L:]", 1302 "[:L:]", 1303 1304 SET, 1305 "[[:L:]-[A]]", 1306 "[[:L:]-[A]]", 1307 1308 SET, 1309 "[~[:Lu:][:Ll:]]", 1310 "[~[:Lu:][:Ll:]]", 1311 1312 SET, 1313 "[~[a-z]]", 1314 "[~[a-z]]", 1315 1316 RBT, 1317 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1318 "[^[:Zs:]]{a} > A;", 1319 1320 RBT, 1321 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1322 "[[a-z]-[:Zs:]]{a} > A;", 1323 1324 RBT, 1325 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1326 "[[:Zs:]&[a-z]]{a} > A;", 1327 1328 RBT, 1329 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1330 "[x[:Zs:]]{a} > A;", 1331 1332 RBT, 1333 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1334 "$macron = \\u0304 ;"+ 1335 "$evowel = [aeiouyAEIOUY] ;"+ 1336 "$iotasub = \\u0345 ;"+ 1337 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1338 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1339 1340 RBT, 1341 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1342 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1343 }; 1344 1345 for (int d=0; d < DATA.length; d+=3) { 1346 if (DATA[d] == RBT) { 1347 // Transliterator test 1348 Transliterator t = Transliterator.createFromRules("ID", 1349 DATA[d+1], Transliterator.FORWARD); 1350 if (t == null) { 1351 errln("FAIL: createFromRules failed"); 1352 return; 1353 } 1354 String rules, escapedRules; 1355 rules = t.toRules(false); 1356 escapedRules = t.toRules(true); 1357 String expRules = Utility.unescape(DATA[d+2]); 1358 String expEscapedRules = DATA[d+2]; 1359 if (rules.equals(expRules)) { 1360 logln("Ok: " + DATA[d+1] + 1361 " => " + Utility.escape(rules)); 1362 } else { 1363 errln("FAIL: " + DATA[d+1] + 1364 " => " + Utility.escape(rules + ", exp " + expRules)); 1365 } 1366 if (escapedRules.equals(expEscapedRules)) { 1367 logln("Ok: " + DATA[d+1] + 1368 " => " + escapedRules); 1369 } else { 1370 errln("FAIL: " + DATA[d+1] + 1371 " => " + escapedRules + ", exp " + expEscapedRules); 1372 } 1373 1374 } else { 1375 // UnicodeSet test 1376 String pat = DATA[d+1]; 1377 String expToPat = DATA[d+2]; 1378 UnicodeSet set = new UnicodeSet(pat); 1379 1380 // Adjust spacing etc. as necessary. 1381 String toPat; 1382 toPat = set.toPattern(true); 1383 if (expToPat.equals(toPat)) { 1384 logln("Ok: " + pat + 1385 " => " + toPat); 1386 } else { 1387 errln("FAIL: " + pat + 1388 " => " + Utility.escape(toPat) + 1389 ", exp " + Utility.escape(pat)); 1390 } 1391 } 1392 } 1393 } 1394 1395 @Test TestContext()1396 public void TestContext() { 1397 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1398 1399 expect("de > x; {d}e > y;", 1400 "de", 1401 "ye", 1402 pos); 1403 1404 expect("ab{c} > z;", 1405 "xadabdabcy", 1406 "xadabdabzy"); 1407 } 1408 CharsToUnicodeString(String s)1409 static final String CharsToUnicodeString(String s) { 1410 return Utility.unescape(s); 1411 } 1412 1413 @Test TestSupplemental()1414 public void TestSupplemental() { 1415 1416 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1417 "a > $a; $s > i;"), 1418 CharsToUnicodeString("ab\\U0001030Fx"), 1419 CharsToUnicodeString("\\U00010300bix")); 1420 1421 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1422 "$b=[A-Z\\U00010400-\\U0001044D];" + 1423 "($a)($b) > $2 $1;"), 1424 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1425 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1426 1427 // k|ax\\U00010300xm 1428 1429 // k|a\\U00010400\\U00010300xm 1430 // ky|\\U00010400\\U00010300xm 1431 // ky\\U00010400|\\U00010300xm 1432 1433 // ky\\U00010400|\\U00010300\\U00010400m 1434 // ky\\U00010400y|\\U00010400m 1435 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1436 "$a {x} > | @ \\U00010400;" + 1437 "{$a} [^\\u0000-\\uFFFF] > y;"), 1438 CharsToUnicodeString("kax\\U00010300xm"), 1439 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1440 1441 expect(Transliterator.getInstance("Any-Name"), 1442 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1443 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1444 1445 expect(Transliterator.getInstance("Name-Any"), 1446 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1447 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1448 1449 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1450 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1451 "U+10330U+10FF00U+E0061U+00A0"); 1452 1453 expect(Transliterator.getInstance("Any-Hex/C"), 1454 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1455 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1456 1457 expect(Transliterator.getInstance("Any-Hex/Perl"), 1458 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1459 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1460 1461 expect(Transliterator.getInstance("Any-Hex/Java"), 1462 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1463 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1464 1465 expect(Transliterator.getInstance("Any-Hex/XML"), 1466 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1467 "𐌰􏼀󠁡 "); 1468 1469 expect(Transliterator.getInstance("Any-Hex/XML10"), 1470 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1471 "𐌰􏼀󠁡 "); 1472 1473 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1474 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1475 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1476 } 1477 1478 @Test TestQuantifier()1479 public void TestQuantifier() { 1480 1481 // Make sure @ in a quantified anteContext works 1482 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1483 "AAAAAb", 1484 "aaa(aac)"); 1485 1486 // Make sure @ in a quantified postContext works 1487 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1488 "baaaaa", 1489 "caa(aaa)"); 1490 1491 // Make sure @ in a quantified postContext with seg ref works 1492 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1493 "baaaaa", 1494 "baa(aaa)"); 1495 1496 // Make sure @ past ante context doesn't enter ante context 1497 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1498 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1499 "xxxab", 1500 "xxx(ac)", 1501 pos); 1502 1503 // Make sure @ past post context doesn't pass limit 1504 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1505 expect("{b} a+ > c @@ |; x > y; a > A;", 1506 "baxx", 1507 "caxx", 1508 pos2); 1509 1510 // Make sure @ past post context doesn't enter post context 1511 expect("{b} a+ > c @@ |; x > y; a > A;", 1512 "baxx", 1513 "cayy"); 1514 1515 expect("(ab)? c > d;", 1516 "c abc ababc", 1517 "d d abd"); 1518 1519 // NOTE: The (ab)+ when referenced just yields a single "ab", 1520 // not the full sequence of them. This accords with perl behavior. 1521 expect("(ab)+ {x} > '(' $1 ')';", 1522 "x abx ababxy", 1523 "x ab(ab) abab(ab)y"); 1524 1525 expect("b+ > x;", 1526 "ac abc abbc abbbc", 1527 "ac axc axc axc"); 1528 1529 expect("[abc]+ > x;", 1530 "qac abrc abbcs abtbbc", 1531 "qx xrx xs xtx"); 1532 1533 expect("q{(ab)+} > x;", 1534 "qa qab qaba qababc qaba", 1535 "qa qx qxa qxc qxa"); 1536 1537 expect("q(ab)* > x;", 1538 "qa qab qaba qababc", 1539 "xa x xa xc"); 1540 1541 // NOTE: The (ab)+ when referenced just yields a single "ab", 1542 // not the full sequence of them. This accords with perl behavior. 1543 expect("q(ab)* > '(' $1 ')';", 1544 "qa qab qaba qababc", 1545 "()a (ab) (ab)a (ab)c"); 1546 1547 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1548 // quoted string 1549 expect("'ab'+ > x;", 1550 "bb ab ababb", 1551 "bb x xb"); 1552 1553 // $foo+ and $foo* -- the quantifier should apply to the entire 1554 // variable reference 1555 expect("$var = ab; $var+ > x;", 1556 "bb ab ababb", 1557 "bb x xb"); 1558 } 1559 1560 static class TestFact implements Transliterator.Factory { 1561 static class NameableNullTrans extends Transliterator { NameableNullTrans(String id)1562 public NameableNullTrans(String id) { 1563 super(id, null); 1564 } handleTransliterate(Replaceable text, Position offsets, boolean incremental)1565 protected void handleTransliterate(Replaceable text, 1566 Position offsets, boolean incremental) { 1567 offsets.start = offsets.limit; 1568 } 1569 } 1570 String id; TestFact(String theID)1571 public TestFact(String theID) { 1572 id = theID; 1573 } getInstance(String ignoredID)1574 public Transliterator getInstance(String ignoredID) { 1575 return new NameableNullTrans(id); 1576 } 1577 } 1578 1579 @Test TestSTV()1580 public void TestSTV() { 1581 Enumeration es = Transliterator.getAvailableSources(); 1582 for (int i=0; es.hasMoreElements(); ++i) { 1583 String source = (String) es.nextElement(); 1584 logln("" + i + ": " + source); 1585 if (source.length() == 0) { 1586 errln("FAIL: empty source"); 1587 continue; 1588 } 1589 Enumeration et = Transliterator.getAvailableTargets(source); 1590 for (int j=0; et.hasMoreElements(); ++j) { 1591 String target = (String) et.nextElement(); 1592 logln(" " + j + ": " + target); 1593 if (target.length() == 0) { 1594 errln("FAIL: empty target"); 1595 continue; 1596 } 1597 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1598 for (int k=0; ev.hasMoreElements(); ++k) { 1599 String variant = (String) ev.nextElement(); 1600 if (variant.length() == 0) { 1601 logln(" " + k + ": <empty>"); 1602 } else { 1603 logln(" " + k + ": " + variant); 1604 } 1605 } 1606 } 1607 } 1608 1609 // Test registration 1610 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1611 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1612 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1613 for (int i=0; i<3; ++i) { 1614 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1615 try { 1616 Transliterator t = Transliterator.getInstance(IDS[i]); 1617 if (t.getID().equals(IDS[i])) { 1618 logln("Ok: Registration/creation succeeded for ID " + 1619 IDS[i]); 1620 } else { 1621 errln("FAIL: Registration of ID " + 1622 IDS[i] + " creates ID " + t.getID()); 1623 } 1624 Transliterator.unregister(IDS[i]); 1625 try { 1626 t = Transliterator.getInstance(IDS[i]); 1627 errln("FAIL: Unregistration failed for ID " + 1628 IDS[i] + "; still receiving ID " + t.getID()); 1629 } catch (IllegalArgumentException e2) { 1630 // Good; this is what we expect 1631 logln("Ok; Unregistered " + IDS[i]); 1632 } 1633 } catch (IllegalArgumentException e) { 1634 errln("FAIL: Registration/creation failed for ID " + 1635 IDS[i]); 1636 } finally { 1637 Transliterator.unregister(IDS[i]); 1638 } 1639 } 1640 1641 // Make sure getAvailable API reflects removal 1642 for (Enumeration e = Transliterator.getAvailableIDs(); 1643 e.hasMoreElements(); ) { 1644 String id = (String) e.nextElement(); 1645 for (int i=0; i<3; ++i) { 1646 if (id.equals(FULL_IDS[i])) { 1647 errln("FAIL: unregister(" + id + ") failed"); 1648 } 1649 } 1650 } 1651 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1652 e.hasMoreElements(); ) { 1653 String t = (String) e.nextElement(); 1654 if (t.equals(IDS[0])) { 1655 errln("FAIL: unregister(Any-" + t + ") failed"); 1656 } 1657 } 1658 for (Enumeration e = Transliterator.getAvailableSources(); 1659 e.hasMoreElements(); ) { 1660 String s = (String) e.nextElement(); 1661 for (int i=0; i<3; ++i) { 1662 if (SOURCES[i] == null) continue; 1663 if (s.equals(SOURCES[i])) { 1664 errln("FAIL: unregister(" + s + "-*) failed"); 1665 } 1666 } 1667 } 1668 } 1669 1670 /** 1671 * Test inverse of Greek-Latin; Title() 1672 */ 1673 @Test TestCompoundInverse()1674 public void TestCompoundInverse() { 1675 Transliterator t = Transliterator.getInstance 1676 ("Greek-Latin; Title()", Transliterator.REVERSE); 1677 if (t == null) { 1678 errln("FAIL: createInstance"); 1679 return; 1680 } 1681 String exp = "(Title);Latin-Greek"; 1682 if (t.getID().equals(exp)) { 1683 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1684 t.getID()); 1685 } else { 1686 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1687 t.getID() + "\", expected \"" + exp + "\""); 1688 } 1689 } 1690 1691 /** 1692 * Test NFD chaining with RBT 1693 */ 1694 @Test TestNFDChainRBT()1695 public void TestNFDChainRBT() { 1696 Transliterator t = Transliterator.createFromRules( 1697 "TEST", "::NFD; aa > Q; a > q;", 1698 Transliterator.FORWARD); 1699 logln(t.toRules(true)); 1700 expect(t, "aa", "Q"); 1701 } 1702 1703 /** 1704 * Inverse of "Null" should be "Null". (J21) 1705 */ 1706 @Test TestNullInverse()1707 public void TestNullInverse() { 1708 Transliterator t = Transliterator.getInstance("Null"); 1709 Transliterator u = t.getInverse(); 1710 if (!u.getID().equals("Null")) { 1711 errln("FAIL: Inverse of Null should be Null"); 1712 } 1713 } 1714 1715 /** 1716 * Check ID of inverse of alias. (J22) 1717 */ 1718 @Test TestAliasInverseID()1719 public void TestAliasInverseID() { 1720 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1721 Transliterator t = Transliterator.getInstance(ID); 1722 Transliterator u = t.getInverse(); 1723 String exp = "Hangul-Latin"; 1724 String got = u.getID(); 1725 if (!got.equals(exp)) { 1726 errln("FAIL: Inverse of " + ID + " is " + got + 1727 ", expected " + exp); 1728 } 1729 } 1730 1731 /** 1732 * Test IDs of inverses of compound transliterators. (J20) 1733 */ 1734 @Test TestCompoundInverseID()1735 public void TestCompoundInverseID() { 1736 String ID = "Latin-Jamo;NFC(NFD)"; 1737 Transliterator t = Transliterator.getInstance(ID); 1738 Transliterator u = t.getInverse(); 1739 String exp = "NFD(NFC);Jamo-Latin"; 1740 String got = u.getID(); 1741 if (!got.equals(exp)) { 1742 errln("FAIL: Inverse of " + ID + " is " + got + 1743 ", expected " + exp); 1744 } 1745 } 1746 1747 /** 1748 * Test undefined variable. 1749 */ 1750 @Test TestUndefinedVariable()1751 public void TestUndefinedVariable() { 1752 String rule = "$initial } a <> \u1161;"; 1753 try { 1754 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1755 } catch (IllegalArgumentException e) { 1756 logln("OK: Got exception for " + rule + ", as expected: " + 1757 e.getMessage()); 1758 return; 1759 } 1760 errln("Fail: bogus rule " + rule + " compiled without error"); 1761 } 1762 1763 /** 1764 * Test empty context. 1765 */ 1766 @Test TestEmptyContext()1767 public void TestEmptyContext() { 1768 expect(" { a } > b;", "xay a ", "xby b "); 1769 } 1770 1771 /** 1772 * Test compound filter ID syntax 1773 */ 1774 @Test TestCompoundFilterID()1775 public void TestCompoundFilterID() { 1776 String[] DATA = { 1777 // Col. 1 = ID or rule set (latter must start with #) 1778 1779 // = columns > 1 are null if expect col. 1 to be illegal = 1780 1781 // Col. 2 = direction, "F..." or "R..." 1782 // Col. 3 = source string 1783 // Col. 4 = exp result 1784 1785 "[abc]; [abc]", null, null, null, // multiple filters 1786 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1787 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1788 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1789 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1790 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1791 }; 1792 1793 for (int i=0; i<DATA.length; i+=4) { 1794 String id = DATA[i]; 1795 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1796 Transliterator.REVERSE : Transliterator.FORWARD; 1797 String source = DATA[i+2]; 1798 String exp = DATA[i+3]; 1799 boolean expOk = (DATA[i+1] != null); 1800 Transliterator t = null; 1801 IllegalArgumentException e = null; 1802 try { 1803 if (id.charAt(0) == '#') { 1804 t = Transliterator.createFromRules("ID", id, direction); 1805 } else { 1806 t = Transliterator.getInstance(id, direction); 1807 } 1808 } catch (IllegalArgumentException ee) { 1809 e = ee; 1810 } 1811 boolean ok = (t != null && e == null); 1812 if (ok == expOk) { 1813 logln("Ok: " + id + " => " + t + 1814 (e != null ? (", " + e.getMessage()) : "")); 1815 if (source != null) { 1816 expect(t, source, exp); 1817 } 1818 } else { 1819 errln("FAIL: " + id + " => " + t + 1820 (e != null ? (", " + e.getMessage()) : "")); 1821 } 1822 } 1823 } 1824 1825 /** 1826 * Test new property set syntax 1827 */ 1828 @Test TestPropertySet()1829 public void TestPropertySet() { 1830 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1831 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1832 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1833 } 1834 1835 /** 1836 * Test various failure points of the new 2.0 engine. 1837 */ 1838 @Test TestNewEngine()1839 public void TestNewEngine() { 1840 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1841 // Katakana should be untouched 1842 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1843 1844 if (true) { 1845 // This test will only work if Transliterator.ROLLBACK is 1846 // true. Otherwise, this test will fail, revealing a 1847 // limitation of global filters in incremental mode. 1848 1849 Transliterator a = 1850 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1851 Transliterator A = 1852 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1853 1854 //Transliterator array[] = new Transliterator[] { 1855 // a, 1856 // Transliterator.getInstance("NFD"), 1857 // A }; 1858 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1859 1860 try { 1861 Transliterator.registerInstance(a); 1862 Transliterator.registerInstance(A); 1863 1864 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1865 expect(t, "aAaA", "bAbA"); 1866 1867 Transliterator[] u = t.getElements(); 1868 assertTrue("getElements().length", u.length == 3); 1869 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1870 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1871 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1872 1873 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1874 t.setFilter(new UnicodeSet("[:Ll:]")); 1875 expect(t, "aAaA", "bAbA"); 1876 } finally { 1877 Transliterator.unregister("a_to_A"); 1878 Transliterator.unregister("A_to_b"); 1879 } 1880 } 1881 1882 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1883 "a", 1884 "ax"); 1885 1886 String gr = 1887 "$ddot = \u0308 ;" + 1888 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1889 "$rough = \u0314 ;" + 1890 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1891 "\u03b1 <> a ;" + 1892 "$rough <> h ;"; 1893 1894 expect(gr, "\u03B1\u0314", "ha"); 1895 } 1896 1897 /** 1898 * Test quantified segment behavior. We want: 1899 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1900 */ 1901 @Test TestQuantifiedSegment()1902 public void TestQuantifiedSegment() { 1903 // The normal case 1904 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1905 1906 // The tricky case; the quantifier is around the segment 1907 expect("([abc])+ > x $1 x;", "cba", "xax"); 1908 1909 // Tricky case in reverse direction 1910 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1911 1912 // Check post-context segment 1913 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1914 1915 // Test toRule/toPattern for non-quantified segment. 1916 // Careful with spacing here. 1917 String r = "([a-c]){q} > x $1 x;"; 1918 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1919 String rr = t.toRules(true); 1920 if (!r.equals(rr)) { 1921 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1922 } else { 1923 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1924 } 1925 1926 // Test toRule/toPattern for quantified segment. 1927 // Careful with spacing here. 1928 r = "([a-c])+{q} > x $1 x;"; 1929 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1930 rr = t.toRules(true); 1931 if (!r.equals(rr)) { 1932 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1933 } else { 1934 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1935 } 1936 } 1937 1938 //====================================================================== 1939 // Ram's tests 1940 //====================================================================== 1941 /* this test performs test of rules in ISO 15915 */ 1942 @Test TestDevanagariLatinRT()1943 public void TestDevanagariLatinRT(){ 1944 String[] source = { 1945 "bh\u0101rata", 1946 "kra", 1947 "k\u1E63a", 1948 "khra", 1949 "gra", 1950 "\u1E45ra", 1951 "cra", 1952 "chra", 1953 "j\u00F1a", 1954 "jhra", 1955 "\u00F1ra", 1956 "\u1E6Dya", 1957 "\u1E6Dhra", 1958 "\u1E0Dya", 1959 //"r\u0323ya", // \u095c is not valid in Devanagari 1960 "\u1E0Dhya", 1961 "\u1E5Bhra", 1962 "\u1E47ra", 1963 "tta", 1964 "thra", 1965 "dda", 1966 "dhra", 1967 "nna", 1968 "pra", 1969 "phra", 1970 "bra", 1971 "bhra", 1972 "mra", 1973 "\u1E49ra", 1974 //"l\u0331ra", 1975 "yra", 1976 "\u1E8Fra", 1977 //"l-", 1978 "vra", 1979 "\u015Bra", 1980 "\u1E63ra", 1981 "sra", 1982 "hma", 1983 "\u1E6D\u1E6Da", 1984 "\u1E6D\u1E6Dha", 1985 "\u1E6Dh\u1E6Dha", 1986 "\u1E0D\u1E0Da", 1987 "\u1E0D\u1E0Dha", 1988 "\u1E6Dya", 1989 "\u1E6Dhya", 1990 "\u1E0Dya", 1991 "\u1E0Dhya", 1992 // Not roundtrippable -- 1993 // \u0939\u094d\u094d\u092E - hma 1994 // \u0939\u094d\u092E - hma 1995 // CharsToUnicodeString("hma"), 1996 "hya", 1997 "\u015Br\u0325", 1998 "\u015Bca", 1999 "\u0115", 2000 "san\u0304j\u012Bb s\u0113nagupta", 2001 "\u0101nand vaddir\u0101ju", 2002 }; 2003 String[] expected = { 2004 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 2005 "\u0915\u094D\u0930", /* kra */ 2006 "\u0915\u094D\u0937", /* ks\u0323a */ 2007 "\u0916\u094D\u0930", /* khra */ 2008 "\u0917\u094D\u0930", /* gra */ 2009 "\u0919\u094D\u0930", /* n\u0307ra */ 2010 "\u091A\u094D\u0930", /* cra */ 2011 "\u091B\u094D\u0930", /* chra */ 2012 "\u091C\u094D\u091E", /* jn\u0303a */ 2013 "\u091D\u094D\u0930", /* jhra */ 2014 "\u091E\u094D\u0930", /* n\u0303ra */ 2015 "\u091F\u094D\u092F", /* t\u0323ya */ 2016 "\u0920\u094D\u0930", /* t\u0323hra */ 2017 "\u0921\u094D\u092F", /* d\u0323ya */ 2018 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 2019 "\u0922\u094D\u092F", /* d\u0323hya */ 2020 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 2021 "\u0923\u094D\u0930", /* n\u0323ra */ 2022 "\u0924\u094D\u0924", /* tta */ 2023 "\u0925\u094D\u0930", /* thra */ 2024 "\u0926\u094D\u0926", /* dda */ 2025 "\u0927\u094D\u0930", /* dhra */ 2026 "\u0928\u094D\u0928", /* nna */ 2027 "\u092A\u094D\u0930", /* pra */ 2028 "\u092B\u094D\u0930", /* phra */ 2029 "\u092C\u094D\u0930", /* bra */ 2030 "\u092D\u094D\u0930", /* bhra */ 2031 "\u092E\u094D\u0930", /* mra */ 2032 "\u0929\u094D\u0930", /* n\u0331ra */ 2033 //"\u0934\u094D\u0930", /* l\u0331ra */ 2034 "\u092F\u094D\u0930", /* yra */ 2035 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 2036 //"l-", 2037 "\u0935\u094D\u0930", /* vra */ 2038 "\u0936\u094D\u0930", /* s\u0301ra */ 2039 "\u0937\u094D\u0930", /* s\u0323ra */ 2040 "\u0938\u094D\u0930", /* sra */ 2041 "\u0939\u094d\u092E", /* hma */ 2042 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 2043 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 2044 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 2045 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 2046 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 2047 "\u091F\u094D\u092F", /* t\u0323ya */ 2048 "\u0920\u094D\u092F", /* t\u0323hya */ 2049 "\u0921\u094D\u092F", /* d\u0323ya */ 2050 "\u0922\u094D\u092F", /* d\u0323hya */ 2051 // "hma", /* hma */ 2052 "\u0939\u094D\u092F", /* hya */ 2053 "\u0936\u0943", /* s\u0301r\u0325a */ 2054 "\u0936\u094D\u091A", /* s\u0301ca */ 2055 "\u090d", /* e\u0306 */ 2056 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2057 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2058 }; 2059 2060 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2061 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2062 2063 for(int i= 0; i<source.length; i++){ 2064 expect(latinToDev,(source[i]),(expected[i])); 2065 expect(devToLatin,(expected[i]),(source[i])); 2066 } 2067 2068 } 2069 @Test TestTeluguLatinRT()2070 public void TestTeluguLatinRT(){ 2071 String[] source = { 2072 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2073 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2074 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2075 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2076 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2077 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2078 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2079 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2080 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2081 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2082 }; 2083 2084 String[] expected = { 2085 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2086 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2087 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2088 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2089 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2090 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2091 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2092 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2093 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2094 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2095 }; 2096 2097 2098 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2099 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2100 2101 for(int i= 0; i<source.length; i++){ 2102 expect(latinToDev,(source[i]),(expected[i])); 2103 expect(devToLatin,(expected[i]),(source[i])); 2104 } 2105 } 2106 2107 @Test TestSanskritLatinRT()2108 public void TestSanskritLatinRT(){ 2109 int MAX_LEN =15; 2110 String[] source = { 2111 "rmk\u1E63\u0113t", 2112 "\u015Br\u012Bmad", 2113 "bhagavadg\u012Bt\u0101", 2114 "adhy\u0101ya", 2115 "arjuna", 2116 "vi\u1E63\u0101da", 2117 "y\u014Dga", 2118 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2119 "uv\u0101cr\u0325", 2120 "dharmak\u1E63\u0113tr\u0113", 2121 "kuruk\u1E63\u0113tr\u0113", 2122 "samav\u0113t\u0101", 2123 "yuyutsava\u1E25", 2124 "m\u0101mak\u0101\u1E25", 2125 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2126 "kimakurvata", 2127 "san\u0304java", 2128 }; 2129 String[] expected = { 2130 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2131 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2132 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2133 "\u0905\u0927\u094d\u092f\u093e\u092f", 2134 "\u0905\u0930\u094d\u091c\u0941\u0928", 2135 "\u0935\u093f\u0937\u093e\u0926", 2136 "\u092f\u094b\u0917", 2137 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2138 "\u0909\u0935\u093E\u091A\u0943", 2139 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2140 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2141 "\u0938\u092e\u0935\u0947\u0924\u093e", 2142 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2143 "\u092e\u093e\u092e\u0915\u093e\u0903", 2144 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2145 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2146 "\u0938\u0902\u091c\u0935", 2147 }; 2148 2149 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2150 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2151 for(int i= 0; i<MAX_LEN; i++){ 2152 expect(latinToDev,(source[i]),(expected[i])); 2153 expect(devToLatin,(expected[i]),(source[i])); 2154 } 2155 } 2156 2157 @Test TestCompoundLatinRT()2158 public void TestCompoundLatinRT(){ 2159 int MAX_LEN =15; 2160 String[] source = { 2161 "rmk\u1E63\u0113t", 2162 "\u015Br\u012Bmad", 2163 "bhagavadg\u012Bt\u0101", 2164 "adhy\u0101ya", 2165 "arjuna", 2166 "vi\u1E63\u0101da", 2167 "y\u014Dga", 2168 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2169 "uv\u0101cr\u0325", 2170 "dharmak\u1E63\u0113tr\u0113", 2171 "kuruk\u1E63\u0113tr\u0113", 2172 "samav\u0113t\u0101", 2173 "yuyutsava\u1E25", 2174 "m\u0101mak\u0101\u1E25", 2175 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2176 "kimakurvata", 2177 "san\u0304java" 2178 }; 2179 String[] expected = { 2180 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2181 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2182 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2183 "\u0905\u0927\u094d\u092f\u093e\u092f", 2184 "\u0905\u0930\u094d\u091c\u0941\u0928", 2185 "\u0935\u093f\u0937\u093e\u0926", 2186 "\u092f\u094b\u0917", 2187 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2188 "\u0909\u0935\u093E\u091A\u0943", 2189 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2190 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2191 "\u0938\u092e\u0935\u0947\u0924\u093e", 2192 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2193 "\u092e\u093e\u092e\u0915\u093e\u0903", 2194 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2195 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2196 "\u0938\u0902\u091c\u0935" 2197 }; 2198 2199 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2200 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2201 for(int i= 0; i<MAX_LEN; i++){ 2202 expect(latinToDevToLatin,(source[i]),(source[i])); 2203 expect(devToLatinToDev,(expected[i]),(expected[i])); 2204 } 2205 } 2206 /** 2207 * Test Gurmukhi-Devanagari Tippi and Bindi 2208 */ 2209 @Test TestGurmukhiDevanagari()2210 public void TestGurmukhiDevanagari(){ 2211 // the rule says: 2212 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2213 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2214 2215 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2216 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2217 2218 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2219 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2220 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2221 StringBuffer src = new StringBuffer(" \u0902"); 2222 StringBuffer expect = new StringBuffer(" \u0A02"); 2223 while(vIter.next()){ 2224 src.setCharAt(0,(char) vIter.codepoint); 2225 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2226 expect(trans,src.toString(),expect.toString()); 2227 } 2228 2229 expect.setCharAt(1,'\u0A70'); 2230 while(nvIter.next()){ 2231 //src.setCharAt(0,(char) nvIter.codepoint); 2232 src.setCharAt(0,(char)nvIter.codepoint); 2233 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2234 expect(trans,src.toString(),expect.toString()); 2235 } 2236 } 2237 /** 2238 * Test instantiation from a locale. 2239 */ 2240 @Test TestLocaleInstantiation()2241 public void TestLocaleInstantiation() { 2242 Transliterator t; 2243 try{ 2244 t = Transliterator.getInstance("te_IN-Latin"); 2245 //expect(t, "\u0430", "a"); 2246 }catch(IllegalArgumentException ex){ 2247 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2248 } 2249 try{ 2250 t = Transliterator.getInstance("ru_RU-Latin"); 2251 expect(t, "\u0430", "a"); 2252 }catch(IllegalArgumentException ex){ 2253 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2254 } 2255 try{ 2256 t = Transliterator.getInstance("en-el"); 2257 expect(t, "a", "\u03B1"); 2258 }catch(IllegalArgumentException ex){ 2259 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2260 } 2261 } 2262 2263 /** 2264 * Test title case handling of accent (should ignore accents) 2265 */ 2266 @Test TestTitleAccents()2267 public void TestTitleAccents() { 2268 Transliterator t = Transliterator.getInstance("Title"); 2269 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2270 } 2271 2272 /** 2273 * Basic test of a locale resource based rule. 2274 */ 2275 @Test TestLocaleResource()2276 public void TestLocaleResource() { 2277 String DATA[] = { 2278 // id from to 2279 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2280 "Latin-el", "b", "\u03bc\u03c0", 2281 "Latin-Greek", "b", "\u03B2", 2282 "Greek-Latin/UNGEGN", "\u03B2", "v", 2283 "el-Latin", "\u03B2", "v", 2284 "Greek-Latin", "\u03B2", "b", 2285 }; 2286 for (int i=0; i<DATA.length; i+=3) { 2287 Transliterator t = Transliterator.getInstance(DATA[i]); 2288 expect(t, DATA[i+1], DATA[i+2]); 2289 } 2290 } 2291 2292 /** 2293 * Make sure parse errors reference the right line. 2294 */ 2295 @Test TestParseError()2296 public void TestParseError() { 2297 String rule = 2298 "a > b;\n" + 2299 "# more stuff\n" + 2300 "d << b;"; 2301 try { 2302 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2303 if(t!=null){ 2304 errln("FAIL: Did not get expected exception"); 2305 } 2306 } catch (IllegalArgumentException e) { 2307 String err = e.getMessage(); 2308 if (err.indexOf("d << b") >= 0) { 2309 logln("Ok: " + err); 2310 } else { 2311 errln("FAIL: " + err); 2312 } 2313 return; 2314 } 2315 errln("FAIL: no syntax error"); 2316 } 2317 2318 /** 2319 * Make sure sets on output are disallowed. 2320 */ 2321 @Test TestOutputSet()2322 public void TestOutputSet() { 2323 String rule = "$set = [a-cm-n]; b > $set;"; 2324 Transliterator t = null; 2325 try { 2326 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2327 if(t!=null){ 2328 errln("FAIL: Did not get the expected exception"); 2329 } 2330 } catch (IllegalArgumentException e) { 2331 logln("Ok: " + e.getMessage()); 2332 return; 2333 } 2334 errln("FAIL: No syntax error"); 2335 } 2336 2337 /** 2338 * Test the use variable range pragma, making sure that use of 2339 * variable range characters is detected and flagged as an error. 2340 */ 2341 @Test TestVariableRange()2342 public void TestVariableRange() { 2343 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2344 try { 2345 Transliterator t = 2346 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2347 if(t!=null){ 2348 errln("FAIL: Did not get the expected exception"); 2349 } 2350 } catch (IllegalArgumentException e) { 2351 logln("Ok: " + e.getMessage()); 2352 return; 2353 } 2354 errln("FAIL: No syntax error"); 2355 } 2356 2357 /** 2358 * Test invalid post context error handling 2359 */ 2360 @Test TestInvalidPostContext()2361 public void TestInvalidPostContext() { 2362 try { 2363 Transliterator t = 2364 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2365 if(t!=null){ 2366 errln("FAIL: Did not get the expected exception"); 2367 } 2368 } catch (IllegalArgumentException e) { 2369 String msg = e.getMessage(); 2370 if (msg.indexOf("a}b{c") >= 0) { 2371 logln("Ok: " + msg); 2372 } else { 2373 errln("FAIL: " + msg); 2374 } 2375 return; 2376 } 2377 errln("FAIL: No syntax error"); 2378 } 2379 2380 /** 2381 * Test ID form variants 2382 */ 2383 @Test TestIDForms()2384 public void TestIDForms() { 2385 String DATA[] = { 2386 "NFC", null, "NFD", 2387 "nfd", null, "NFC", // make sure case is ignored 2388 "Any-NFKD", null, "Any-NFKC", 2389 "Null", null, "Null", 2390 "-nfkc", "nfkc", "NFKD", 2391 "-nfkc/", "nfkc", "NFKD", 2392 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2393 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2394 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2395 "Source-", null, null, 2396 "Source/Variant-", null, null, 2397 "Source-/Variant", null, null, 2398 "/Variant", null, null, 2399 "/Variant-", null, null, 2400 "-/Variant", null, null, 2401 "-/", null, null, 2402 "-", null, null, 2403 "/", null, null, 2404 }; 2405 2406 for (int i=0; i<DATA.length; i+=3) { 2407 String ID = DATA[i]; 2408 String expID = DATA[i+1]; 2409 String expInvID = DATA[i+2]; 2410 boolean expValid = (expInvID != null); 2411 if (expID == null) { 2412 expID = ID; 2413 } 2414 try { 2415 Transliterator t = 2416 Transliterator.getInstance(ID); 2417 Transliterator u = t.getInverse(); 2418 if (t.getID().equals(expID) && 2419 u.getID().equals(expInvID)) { 2420 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2421 } else { 2422 errln("FAIL: getInstance(" + ID + ") => " + 2423 t.getID() + " x getInverse() => " + u.getID() + 2424 ", expected " + expInvID); 2425 } 2426 } catch (IllegalArgumentException e) { 2427 if (!expValid) { 2428 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2429 } else { 2430 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2431 } 2432 } 2433 } 2434 } 2435 checkRules(String label, Transliterator t2, String testRulesForward)2436 void checkRules(String label, Transliterator t2, String testRulesForward) { 2437 String rules2 = t2.toRules(true); 2438 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2439 rules2 = TestUtility.replace(rules2, " ", ""); 2440 rules2 = TestUtility.replace(rules2, "\n", ""); 2441 rules2 = TestUtility.replace(rules2, "\r", ""); 2442 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2443 2444 if (!rules2.equals(testRulesForward)) { 2445 errln(label); 2446 logln("GENERATED RULES: " + rules2); 2447 logln("SHOULD BE: " + testRulesForward); 2448 } 2449 } 2450 2451 /** 2452 * Mark's toRules test. 2453 */ 2454 @Test TestToRulesMark()2455 public void TestToRulesMark() { 2456 2457 String testRules = 2458 "::[[:Latin:][:Mark:]];" 2459 + "::NFKD (NFC);" 2460 + "::Lower (Lower);" 2461 + "a <> \\u03B1;" // alpha 2462 + "::NFKC (NFD);" 2463 + "::Upper (Lower);" 2464 + "::Lower ();" 2465 + "::([[:Greek:][:Mark:]]);" 2466 ; 2467 String testRulesForward = 2468 "::[[:Latin:][:Mark:]];" 2469 + "::NFKD(NFC);" 2470 + "::Lower(Lower);" 2471 + "a > \\u03B1;" 2472 + "::NFKC(NFD);" 2473 + "::Upper (Lower);" 2474 + "::Lower ();" 2475 ; 2476 String testRulesBackward = 2477 "::[[:Greek:][:Mark:]];" 2478 + "::Lower (Upper);" 2479 + "::NFD(NFKC);" 2480 + "\\u03B1 > a;" 2481 + "::Lower(Lower);" 2482 + "::NFC(NFKD);" 2483 ; 2484 String source = "\u00E1"; // a-acute 2485 String target = "\u03AC"; // alpha-acute 2486 2487 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2488 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2489 2490 expect(t2, source, target); 2491 expect(t3, target, source); 2492 2493 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2494 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2495 } 2496 2497 /** 2498 * Test Escape and Unescape transliterators. 2499 */ 2500 @Test TestEscape()2501 public void TestEscape() { 2502 expect(Transliterator.getInstance("Hex-Any"), 2503 "\\x{40}\\U000000312Q", 2504 "@12Q"); 2505 expect(Transliterator.getInstance("Any-Hex/C"), 2506 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2507 "\\u0041\\U0010BEEF\\uFEED"); 2508 expect(Transliterator.getInstance("Any-Hex/Java"), 2509 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2510 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2511 expect(Transliterator.getInstance("Any-Hex/Perl"), 2512 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2513 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2514 } 2515 2516 /** 2517 * Make sure display names of variants look reasonable. 2518 */ 2519 @Test TestDisplayName()2520 public void TestDisplayName() { 2521 String DATA[] = { 2522 // ID, forward name, reverse name 2523 // Update the text as necessary -- the important thing is 2524 // not the text itself, but how various cases are handled. 2525 2526 // Basic test 2527 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2528 2529 // Variants 2530 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2531 2532 // Target-only IDs 2533 "NFC", "Any to NFC", "Any to NFD", 2534 }; 2535 2536 Locale US = Locale.US; 2537 2538 for (int i=0; i<DATA.length; i+=3) { 2539 String name = Transliterator.getDisplayName(DATA[i], US); 2540 if (!name.equals(DATA[i+1])) { 2541 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2542 name + ", expected " + DATA[i+1]); 2543 } else { 2544 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2545 } 2546 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2547 name = Transliterator.getDisplayName(t.getID(), US); 2548 if (!name.equals(DATA[i+2])) { 2549 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2550 name + ", expected " + DATA[i+2]); 2551 } else { 2552 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2553 } 2554 2555 // Cover getDisplayName(String) 2556 ULocale save = ULocale.getDefault(); 2557 ULocale.setDefault(ULocale.US); 2558 String name2 = Transliterator.getDisplayName(t.getID()); 2559 if (!name.equals(name2)) 2560 errln("FAIL: getDisplayName with default locale failed"); 2561 ULocale.setDefault(save); 2562 } 2563 } 2564 2565 /** 2566 * Test anchor masking 2567 */ 2568 @Test TestAnchorMasking()2569 public void TestAnchorMasking() { 2570 String rule = "^a > Q; a > q;"; 2571 try { 2572 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2573 if(t==null){ 2574 errln("FAIL: Did not get the expected exception"); 2575 } 2576 } catch (IllegalArgumentException e) { 2577 errln("FAIL: " + rule + " => " + e); 2578 } 2579 } 2580 2581 /** 2582 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2583 * during ICU4J modularization to remove dependency of tests on Transliterator. 2584 */ 2585 @Test TestScriptAllCodepoints()2586 public void TestScriptAllCodepoints(){ 2587 int code; 2588 HashSet scriptIdsChecked = new HashSet(); 2589 HashSet scriptAbbrsChecked = new HashSet(); 2590 for( int i =0; i <= 0x10ffff; i++){ 2591 code = UScript.getScript(i); 2592 if(code==UScript.INVALID_CODE){ 2593 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2594 } 2595 String id =UScript.getName(code); 2596 String abbr = UScript.getShortName(code); 2597 if (!scriptIdsChecked.contains(id)) { 2598 scriptIdsChecked.add(id); 2599 String newId ="[:"+id+":];NFD"; 2600 try{ 2601 Transliterator t = Transliterator.getInstance(newId); 2602 if(t==null){ 2603 errln("Failed to create transliterator for "+hex(i)+ 2604 " script code: " +id); 2605 } 2606 }catch(Exception e){ 2607 errln("Failed to create transliterator for "+hex(i) 2608 +" script code: " +id 2609 + " Exception: "+e.getMessage()); 2610 } 2611 } 2612 if (!scriptAbbrsChecked.contains(abbr)) { 2613 scriptAbbrsChecked.add(abbr); 2614 String newAbbrId ="[:"+abbr+":];NFD"; 2615 try{ 2616 Transliterator t = Transliterator.getInstance(newAbbrId); 2617 if(t==null){ 2618 errln("Failed to create transliterator for "+hex(i)+ 2619 " script code: " +abbr); 2620 } 2621 }catch(Exception e){ 2622 errln("Failed to create transliterator for "+hex(i) 2623 +" script code: " +abbr 2624 + " Exception: "+e.getMessage()); 2625 } 2626 } 2627 } 2628 } 2629 2630 2631 static final String[][] registerRules = { 2632 {"Any-Dev1", "x > X; y > Y;"}, 2633 {"Any-Dev2", "XY > Z"}, 2634 {"Greek-Latin/FAKE", 2635 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2636 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2637 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2638 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2639 }, 2640 }; 2641 2642 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2643 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2644 2645 static final String[][] testCases = { 2646 2647 // NORMALIZATION 2648 // should add more test cases 2649 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2650 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2651 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2652 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2653 2654 // mp -> b BUG 2655 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2656 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2657 2658 // check for devanagari bug 2659 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2660 2661 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2662 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2663 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2664 //TODO: enable this test once Titlecase works right 2665 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2666 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2667 2668 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2669 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2670 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2671 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2672 2673 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2674 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2675 2676 // FORMS OF S 2677 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2678 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2679 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2680 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2681 2682 // Tatiana bug 2683 // Upper: TAT\u02B9\u00C2NA 2684 // Lower: tat\u02B9\u00E2na 2685 // Title: Tat\u02B9\u00E2na 2686 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2687 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2688 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2689 }; 2690 2691 @Test TestSpecialCases()2692 public void TestSpecialCases() { 2693 2694 for (int i = 0; i < registerRules.length; ++i) { 2695 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2696 registerRules[i][1], Transliterator.FORWARD); 2697 DummyFactory.add(registerRules[i][0], t); 2698 } 2699 for (int i = 0; i < testCases.length; ++i) { 2700 String name = testCases[i][0]; 2701 Transliterator t = Transliterator.getInstance(name); 2702 String id = t.getID(); 2703 String source = testCases[i][1]; 2704 String target = null; 2705 2706 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2707 2708 if (testCases[i].length > 2) target = testCases[i][2]; 2709 else if (id.equalsIgnoreCase("NFD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD); 2710 else if (id.equalsIgnoreCase("NFC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC); 2711 else if (id.equalsIgnoreCase("NFKD")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD); 2712 else if (id.equalsIgnoreCase("NFKC")) target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC); 2713 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2714 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2715 2716 expect(t, source, target); 2717 } 2718 for (int i = 0; i < registerRules.length; ++i) { 2719 Transliterator.unregister(registerRules[i][0]); 2720 } 2721 } 2722 2723 // seems like there should be an easier way to just register an instance of a transliterator 2724 2725 static class DummyFactory implements Transliterator.Factory { 2726 static DummyFactory singleton = new DummyFactory(); 2727 static HashMap m = new HashMap(); 2728 2729 // Since Transliterators are immutable, we don't have to clone on set & get add(String ID, Transliterator t)2730 static void add(String ID, Transliterator t) { 2731 m.put(ID, t); 2732 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2733 Transliterator.registerFactory(ID, singleton); 2734 } getInstance(String ID)2735 public Transliterator getInstance(String ID) { 2736 return (Transliterator) m.get(ID); 2737 } 2738 } 2739 2740 @Test TestCasing()2741 public void TestCasing() { 2742 Transliterator toLower = Transliterator.getInstance("lower"); 2743 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2744 Transliterator toUpper = Transliterator.getInstance("upper"); 2745 Transliterator toTitle = Transliterator.getInstance("title"); 2746 for (int i = 0; i < 0x600; ++i) { 2747 String s = UTF16.valueOf(i); 2748 2749 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2750 assertEquals("Lowercase", lower, toLower.transform(s)); 2751 2752 String casefold = UCharacter.foldCase(s, true); 2753 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2754 2755 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2756 assertEquals("Title", title, toTitle.transform(s)); 2757 2758 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2759 assertEquals("Upper", upper, toUpper.transform(s)); 2760 } 2761 } 2762 2763 @Test TestSurrogateCasing()2764 public void TestSurrogateCasing () { 2765 // check that casing handles surrogates 2766 // titlecase is currently defective 2767 int dee = UTF16.charAt(DESERET_dee,0); 2768 int DEE = UCharacter.toTitleCase(dee); 2769 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2770 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2771 } 2772 2773 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2774 errln("Fails uppercase of surrogates"); 2775 } 2776 2777 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2778 errln("Fails lowercase of surrogates"); 2779 } 2780 } 2781 2782 // Check to see that incremental gets at least part way through a reasonable string. 2783 // TODO(junit): should be working - also should be converted to parameterized test 2784 @Ignore 2785 @Test TestIncrementalProgress()2786 public void TestIncrementalProgress() { 2787 String latinTest = "The Quick Brown Fox."; 2788 String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest); 2789 String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest); 2790 String[][] tests = { 2791 {"Any", latinTest}, 2792 {"Latin", latinTest}, 2793 {"Halfwidth", latinTest}, 2794 {"Devanagari", devaTest}, 2795 {"Katakana", kataTest}, 2796 }; 2797 2798 Enumeration sources = Transliterator.getAvailableSources(); 2799 while(sources.hasMoreElements()) { 2800 String source = (String) sources.nextElement(); 2801 String test = findMatch(source, tests); 2802 if (test == null) { 2803 logln("Skipping " + source + "-X"); 2804 continue; 2805 } 2806 Enumeration targets = Transliterator.getAvailableTargets(source); 2807 while(targets.hasMoreElements()) { 2808 String target = (String) targets.nextElement(); 2809 Enumeration variants = Transliterator.getAvailableVariants(source, target); 2810 while(variants.hasMoreElements()) { 2811 String variant = (String) variants.nextElement(); 2812 String id = source + "-" + target + "/" + variant; 2813 logln("id: " + id); 2814 2815 Transliterator t = Transliterator.getInstance(id); 2816 CheckIncrementalAux(t, test); 2817 2818 String rev = t.transliterate(test); 2819 Transliterator inv = t.getInverse(); 2820 CheckIncrementalAux(inv, rev); 2821 } 2822 } 2823 } 2824 } 2825 findMatch(String source, String[][] pairs)2826 public String findMatch (String source, String[][] pairs) { 2827 for (int i = 0; i < pairs.length; ++i) { 2828 if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1]; 2829 } 2830 return null; 2831 } 2832 CheckIncrementalAux(Transliterator t, String input)2833 public void CheckIncrementalAux(Transliterator t, String input) { 2834 2835 Replaceable test = new ReplaceableString(input); 2836 Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length()); 2837 t.transliterate(test, pos); 2838 boolean gotError = false; 2839 2840 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 2841 2842 if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) { 2843 errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2844 gotError = true; 2845 } else { 2846 logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2847 } 2848 t.finishTransliteration(test, pos); 2849 if (pos.start != pos.limit) { 2850 errln("Incomplete, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2851 gotError = true; 2852 } 2853 if(!gotError){ 2854 //errln("FAIL: Did not get expected error"); 2855 } 2856 } 2857 2858 @Test TestFunction()2859 public void TestFunction() { 2860 // Careful with spacing and ';' here: Phrase this exactly 2861 // as toRules() is going to return it. If toRules() changes 2862 // with regard to spacing or ';', then adjust this string. 2863 String rule = 2864 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2865 2866 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2867 if (t == null) { 2868 errln("FAIL: createFromRules failed"); 2869 return; 2870 } 2871 2872 String r = t.toRules(true); 2873 if (r.equals(rule)) { 2874 logln("OK: toRules() => " + r); 2875 } else { 2876 errln("FAIL: toRules() => " + r + 2877 ", expected " + rule); 2878 } 2879 2880 expect(t, "The Quick Brown Fox", 2881 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2882 rule = 2883 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2884 2885 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2886 if (t == null) { 2887 errln("FAIL: createFromRules failed"); 2888 return; 2889 } 2890 2891 r = t.toRules(true); 2892 if (r.equals(rule)) { 2893 logln("OK: toRules() => " + r); 2894 } else { 2895 errln("FAIL: toRules() => " + r + 2896 ", expected " + rule); 2897 } 2898 2899 expect(t, "\u0301", 2900 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2901 } 2902 2903 @Test TestInvalidBackRef()2904 public void TestInvalidBackRef() { 2905 String rule = ". > $1;"; 2906 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2907 try { 2908 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2909 if (t != null) { 2910 errln("FAIL: createFromRules should have returned NULL"); 2911 } 2912 errln("FAIL: Ok: . > $1; => no error"); 2913 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2914 if (t2 != null) { 2915 errln("FAIL: createFromRules should have returned NULL"); 2916 } 2917 errln("FAIL: Ok: . > $1; => no error"); 2918 } catch (IllegalArgumentException e) { 2919 logln("Ok: . > $1; => " + e.getMessage()); 2920 } 2921 } 2922 2923 @Test TestMulticharStringSet()2924 public void TestMulticharStringSet() { 2925 // Basic testing 2926 String rule = 2927 " [{aa}] > x;" + 2928 " a > y;" + 2929 " [b{bc}] > z;" + 2930 "[{gd}] { e > q;" + 2931 " e } [{fg}] > r;" ; 2932 2933 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2934 if (t == null) { 2935 errln("FAIL: createFromRules failed"); 2936 return; 2937 } 2938 2939 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2940 "y x yz z d gd de gdq gdqfg ddrfg"); 2941 2942 // Overlapped string test. Make sure that when multiple 2943 // strings can match that the longest one is matched. 2944 rule = 2945 " [a {ab} {abc}] > x;" + 2946 " b > y;" + 2947 " c > z;" + 2948 " q [t {st} {rst}] { e > p;" ; 2949 2950 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2951 if (t == null) { 2952 errln("FAIL: createFromRules failed"); 2953 return; 2954 } 2955 2956 expect(t, "a ab abc qte qste qrste", 2957 "x x x qtp qstp qrstp"); 2958 } 2959 2960 /** 2961 * Test that user-registered transliterators can be used under function 2962 * syntax. 2963 */ 2964 @Test TestUserFunction()2965 public void TestUserFunction() { 2966 Transliterator t; 2967 2968 // There's no need to register inverses if we don't use them 2969 TestUserFunctionFactory.add("Any-gif", 2970 Transliterator.createFromRules("gif", 2971 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2972 Transliterator.FORWARD)); 2973 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2974 2975 TestUserFunctionFactory.add("Any-RemoveCurly", 2976 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2977 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2978 2979 logln("Trying &hex"); 2980 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2981 logln("Registering"); 2982 TestUserFunctionFactory.add("Any-hex2", t); 2983 t = Transliterator.getInstance("Any-hex2"); 2984 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2985 2986 logln("Trying &gif"); 2987 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2988 logln("Registering"); 2989 TestUserFunctionFactory.add("Any-gif2", t); 2990 t = Transliterator.getInstance("Any-gif2"); 2991 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2992 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2993 2994 // Test that filters are allowed after & 2995 t = Transliterator.createFromRules("test", 2996 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2997 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2998 2999 // Unregister our test stuff 3000 TestUserFunctionFactory.unregister(); 3001 } 3002 3003 static class TestUserFunctionFactory implements Transliterator.Factory { 3004 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 3005 static HashMap m = new HashMap(); 3006 add(String ID, Transliterator t)3007 static void add(String ID, Transliterator t) { 3008 m.put(new CaseInsensitiveString(ID), t); 3009 Transliterator.registerFactory(ID, singleton); 3010 } 3011 getInstance(String ID)3012 public Transliterator getInstance(String ID) { 3013 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 3014 } 3015 unregister()3016 static void unregister() { 3017 Iterator ids = m.keySet().iterator(); 3018 while (ids.hasNext()) { 3019 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 3020 Transliterator.unregister(id.getString()); 3021 ids.remove(); // removes pair from m 3022 } 3023 } 3024 } 3025 3026 /** 3027 * Test the Any-X transliterators. 3028 */ 3029 @Test TestAnyX()3030 public void TestAnyX() { 3031 Transliterator anyLatin = 3032 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3033 3034 expect(anyLatin, 3035 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 3036 "greek:abkABK hiragana:abuku cyrillic:abc"); 3037 } 3038 3039 /** 3040 * Test Any-X transliterators with sample letters from all scripts. 3041 */ 3042 @Test TestAny()3043 public void TestAny() { 3044 UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze(); 3045 StringBuffer testString = new StringBuffer(); 3046 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 3047 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 3048 int count = 5; 3049 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 3050 testString.append(it.getString()); 3051 if (--count < 0) break; 3052 } 3053 } 3054 logln("Sample set for Any-Latin: " + testString); 3055 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 3056 String result = anyLatin.transliterate(testString.toString()); 3057 logln("Sample result for Any-Latin: " + result); 3058 } 3059 3060 3061 /** 3062 * Test the source and target set API. These are only implemented 3063 * for RBT and CompoundTransliterator at this time. 3064 */ 3065 @Test TestSourceTargetSet()3066 public void TestSourceTargetSet() { 3067 // Rules 3068 String r = 3069 "a > b; " + 3070 "r [x{lu}] > q;"; 3071 3072 // Expected source 3073 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 3074 3075 // Expected target 3076 UnicodeSet expTrg = new UnicodeSet("[bq]"); 3077 3078 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3079 UnicodeSet src = t.getSourceSet(); 3080 UnicodeSet trg = t.getTargetSet(); 3081 3082 if (src.equals(expSrc) && trg.equals(expTrg)) { 3083 logln("Ok: " + r + " => source = " + src.toPattern(true) + 3084 ", target = " + trg.toPattern(true)); 3085 } else { 3086 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 3087 ", expected " + expSrc.toPattern(true) + 3088 "; target = " + trg.toPattern(true) + 3089 ", expected " + expTrg.toPattern(true)); 3090 } 3091 } 3092 3093 @Test TestSourceTargetSet2()3094 public void TestSourceTargetSet2() { 3095 3096 3097 Normalizer2 nfc = Normalizer2.getNFCInstance(); 3098 Normalizer2 nfd = Normalizer2.getNFDInstance(); 3099 3100 // Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE); 3101 // UnicodeSet nfkdSource = new UnicodeSet(); 3102 // UnicodeSet nfkdTarget = new UnicodeSet(); 3103 // for (int i = 0; i <= 0x10FFFF; ++i) { 3104 // if (nfkd.isInert(i)) { 3105 // continue; 3106 // } 3107 // nfkdSource.add(i); 3108 // String t = nfkd.getDecomposition(i); 3109 // if (t != null) { 3110 // nfkdTarget.addAll(t); 3111 // } else { 3112 // nfkdTarget.add(i); 3113 // } 3114 // } 3115 // nfkdSource.freeze(); 3116 // nfkdTarget.freeze(); 3117 // logln("NFKD Source: " + nfkdSource.toPattern(false)); 3118 // logln("NFKD Target: " + nfkdTarget.toPattern(false)); 3119 3120 UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap(); 3121 UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap(); 3122 UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze(); 3123 CanonicalIterator can = new CanonicalIterator(""); 3124 3125 UnicodeSet disorderedMarks = new UnicodeSet(); 3126 3127 for (int i = 0; i <= 0x10FFFF; ++i) { 3128 String s = nfd.getDecomposition(i); 3129 if (s == null) { 3130 continue; 3131 } 3132 3133 can.setSource(s); 3134 for (String t = can.next(); t != null; t = can.next()) { 3135 disorderedMarks.add(t); 3136 } 3137 3138 // if s has two code points, (or more), add the lead/trail information 3139 int first = s.codePointAt(0); 3140 int firstCount = Character.charCount(first); 3141 if (s.length() == firstCount) continue; 3142 String trailString = s.substring(firstCount); 3143 3144 // add all the trail characters 3145 if (!nonStarters.containsSome(trailString)) { 3146 continue; 3147 } 3148 UnicodeSet trailSet = leadToTrail.get(first); 3149 if (trailSet == null) { 3150 leadToTrail.put(first, trailSet = new UnicodeSet()); 3151 } 3152 trailSet.addAll(trailString); // add remaining trails 3153 3154 // add the sources 3155 UnicodeSet sourcesSet = leadToSources.get(first); 3156 if (sourcesSet == null) { 3157 leadToSources.put(first, sourcesSet = new UnicodeSet()); 3158 } 3159 sourcesSet.add(i); 3160 } 3161 3162 3163 for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) { 3164 String lead = x.getKey(); 3165 UnicodeSet sources = x.getValue(); 3166 UnicodeSet trailSet = leadToTrail.get(lead); 3167 for (String source : sources) { 3168 for (String trail : trailSet) { 3169 can.setSource(source + trail); 3170 for (String t = can.next(); t != null; t = can.next()) { 3171 if (t.endsWith(trail)) continue; 3172 disorderedMarks.add(t); 3173 } 3174 } 3175 } 3176 } 3177 3178 3179 for (String s : nonStarters) { 3180 disorderedMarks.add("\u0345" + s); 3181 disorderedMarks.add(s+"\u0323"); 3182 String xx = nfc.normalize("\u01EC" + s); 3183 if (!xx.startsWith("\u01EC")) { 3184 logln("??"); 3185 } 3186 } 3187 3188 // for (int i = 0; i <= 0x10FFFF; ++i) { 3189 // String s = nfkd.getDecomposition(i); 3190 // if (s != null) { 3191 // disorderedMarks.add(s); 3192 // disorderedMarks.add(nfc.normalize(s)); 3193 // addDerivedStrings(nfc, disorderedMarks, s); 3194 // } 3195 // s = nfd.getDecomposition(i); 3196 // if (s != null) { 3197 // disorderedMarks.add(s); 3198 // } 3199 // if (!nfc.isInert(i)) { 3200 // if (i == 0x00C0) { 3201 // logln("\u00C0"); 3202 // } 3203 // can.setSource(s+"\u0334"); 3204 // for (String t = can.next(); t != null; t = can.next()) { 3205 // addDerivedStrings(nfc, disorderedMarks, t); 3206 // } 3207 // can.setSource(s+"\u0345"); 3208 // for (String t = can.next(); t != null; t = can.next()) { 3209 // addDerivedStrings(nfc, disorderedMarks, t); 3210 // } 3211 // can.setSource(s+"\u0323"); 3212 // for (String t = can.next(); t != null; t = can.next()) { 3213 // addDerivedStrings(nfc, disorderedMarks, t); 3214 // } 3215 // } 3216 // } 3217 logln("Test cases: " + disorderedMarks.size()); 3218 disorderedMarks.addAll(0,0x10FFFF).freeze(); 3219 logln("isInert \u0104 " + nfc.isInert('\u0104')); 3220 3221 Object[][] rules = { 3222 {":: [:sc=COMMON:] any-name;", null}, 3223 3224 {":: [:Greek:] hex-any/C;", null}, 3225 {":: [:Greek:] any-hex/C;", null}, 3226 3227 {":: [[:Mn:][:Me:]] remove;", null}, 3228 {":: [[:Mn:][:Me:]] null;", null}, 3229 3230 3231 {":: lower;", null}, 3232 {":: upper;", null}, 3233 {":: title;", null}, 3234 {":: CaseFold;", null}, 3235 3236 {":: NFD;", null}, 3237 {":: NFC;", null}, 3238 {":: NFKD;", null}, 3239 {":: NFKC;", null}, 3240 3241 {":: [[:Mn:][:Me:]] NFKD;", null}, 3242 {":: Latin-Greek;", null}, 3243 {":: [:Latin:] NFKD;", null}, 3244 {":: NFKD;", null}, 3245 {":: NFKD;\n" + 3246 ":: [[:Mn:][:Me:]] remove;\n" + 3247 ":: NFC;", null}, 3248 }; 3249 for (Object[] rulex : rules) { 3250 String rule = (String) rulex[0]; 3251 Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD); 3252 UnicodeSet actualSource = trans.getSourceSet(); 3253 UnicodeSet actualTarget = trans.getTargetSet(); 3254 UnicodeSet empiricalSource = new UnicodeSet(); 3255 UnicodeSet empiricalTarget = new UnicodeSet(); 3256 String ruleDisplay = rule.replace("\n", "\t\t"); 3257 UnicodeSet toTest = disorderedMarks; 3258 // if (rulex[1] != null) { 3259 // toTest = new UnicodeSet(disorderedMarks); 3260 // toTest.addAll((UnicodeSet) rulex[1]); 3261 // } 3262 3263 String test = nfd.normalize("\u0104"); 3264 boolean DEBUG = true; 3265 @SuppressWarnings("unused") 3266 int count = 0; // for debugging 3267 for (String s : toTest) { 3268 if (s.equals(test)) { 3269 logln(test); 3270 } 3271 String t = trans.transform(s); 3272 if (!s.equals(t)) { 3273 if (!isAtomic(s, t, trans)) { 3274 isAtomic(s, t, trans); 3275 continue; 3276 } 3277 3278 // only keep the part that changed; so skip the front and end. 3279 // int start = findSharedStartLength(s,t); 3280 // int end = findSharedEndLength(s,t); 3281 // if (start != 0 || end != 0) { 3282 // s = s.substring(start, s.length() - end); 3283 // t = t.substring(start, t.length() - end); 3284 // } 3285 if (DEBUG) { 3286 if (!actualSource.containsAll(s)) { 3287 count++; 3288 } 3289 if (!actualTarget.containsAll(t)) { 3290 count++; 3291 } 3292 } 3293 addSourceTarget(s, empiricalSource, t, empiricalTarget); 3294 } 3295 } 3296 assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK); 3297 assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK); 3298 } 3299 } 3300 3301 @Test TestSourceTargetSetFilter()3302 public void TestSourceTargetSetFilter() { 3303 String[][] tests = { 3304 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 3305 {"[] Latin-Greek", null, "[\']"}, 3306 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 3307 {"[] Any-Latin"}, 3308 {"[] casefold"}, 3309 {"[] NFKD;"}, 3310 {"[] NFKC;"}, 3311 {"[] hex"}, 3312 {"[] lower"}, 3313 {"[] null"}, 3314 {"[] remove"}, 3315 {"[] title"}, 3316 {"[] upper"}, 3317 }; 3318 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3319 for (String[] testPair : tests) { 3320 String test = testPair[0]; 3321 Transliterator t0; 3322 try { 3323 t0 = Transliterator.getInstance(test); 3324 } catch (Exception e) { 3325 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3326 } 3327 Transliterator t1; 3328 try { 3329 t1 = t0.getInverse(); 3330 } catch (Exception e) { 3331 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3332 } 3333 int targetIndex = 0; 3334 for (Transliterator t : new Transliterator[]{t0, t1}) { 3335 boolean ok; 3336 UnicodeSet source = t.getSourceSet(); 3337 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3338 targetIndex++; 3339 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3340 : testPair[targetIndex] == null ? expectedSource 3341 : testPair[targetIndex].length() == 0 ? expectedSource 3342 : new UnicodeSet(testPair[targetIndex]); 3343 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3344 if (!ok) { // for debugging 3345 source = t.getSourceSet(); 3346 } 3347 UnicodeSet target = t.getTargetSet(); 3348 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3349 if (!ok) { // for debugging 3350 target = t.getTargetSet(); 3351 } 3352 } 3353 } 3354 } 3355 isAtomic(String s, String t, Transliterator trans)3356 private boolean isAtomic(String s, String t, Transliterator trans) { 3357 for (int i = 1; i < s.length(); ++i) { 3358 if (!CharSequences.onCharacterBoundary(s, i)) { 3359 continue; 3360 } 3361 String q = trans.transform(s.substring(0,i)); 3362 if (t.startsWith(q)) { 3363 String r = trans.transform(s.substring(i)); 3364 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3365 return false; 3366 } 3367 } 3368 } 3369 return true; 3370 // // make sure that every part is different 3371 // if (s.codePointCount(0, s.length()) > 1) { 3372 // int[] codePoints = It.codePoints(s); 3373 // for (int k = 0; k < codePoints.length; ++k) { 3374 // int pos = indexOf(t,codePoints[k]); 3375 // if (pos >= 0) { 3376 // int x; 3377 // } 3378 // } 3379 // if (s.contains("\u00C0")) { 3380 // logln("\u00C0"); 3381 // } 3382 // } 3383 } 3384 addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3385 private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3386 expectedSource.addAll(s); 3387 if (t.length() > 0) { 3388 expectedTarget.addAll(t); 3389 } 3390 } 3391 3392 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3393 // disorderedMarks.add(s); 3394 // for (int j = 1; j < s.length(); ++j) { 3395 // if (CharSequences.onCharacterBoundary(s, j)) { 3396 // String shorter = s.substring(0,j); 3397 // disorderedMarks.add(shorter); 3398 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3399 // } 3400 // } 3401 // } 3402 3403 @Test TestCharUtils()3404 public void TestCharUtils() { 3405 String[][] startTests = { 3406 {"1", "a", "ab"}, 3407 {"0", "a", "xb"}, 3408 {"0", "\uD800", "\uD800\uDC01"}, 3409 {"1", "\uD800a", "\uD800b"}, 3410 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3411 }; 3412 for (String[] row : startTests) { 3413 int actual = findSharedStartLength(row[1], row[2]); 3414 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3415 Integer.parseInt(row[0]), 3416 actual); 3417 } 3418 String[][] endTests = { 3419 {"0", "\uDC00", "\uD801\uDC00"}, 3420 {"1", "a", "ba"}, 3421 {"0", "a", "bx"}, 3422 {"1", "a\uDC00", "b\uDC00"}, 3423 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3424 }; 3425 for (String[] row : endTests) { 3426 int actual = findSharedEndLength(row[1], row[2]); 3427 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3428 Integer.parseInt(row[0]), 3429 actual); 3430 } 3431 } 3432 3433 /** 3434 * @param s 3435 * @param t 3436 * @return 3437 */ 3438 // TODO make generally available findSharedStartLength(CharSequence s, CharSequence t)3439 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3440 int min = Math.min(s.length(), t.length()); 3441 int i; 3442 char sch, tch; 3443 for (i = 0; i < min; ++i) { 3444 sch = s.charAt(i); 3445 tch = t.charAt(i); 3446 if (sch != tch) { 3447 break; 3448 } 3449 } 3450 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3451 } 3452 3453 /** 3454 * @param s 3455 * @param t 3456 * @return 3457 */ 3458 // TODO make generally available findSharedEndLength(CharSequence s, CharSequence t)3459 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3460 int slength = s.length(); 3461 int tlength = t.length(); 3462 int min = Math.min(slength, tlength); 3463 int i; 3464 char sch, tch; 3465 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3466 for (i = 0; i < min; ++i) { 3467 sch = s.charAt(slength - i - 1); 3468 tch = t.charAt(tlength - i - 1); 3469 if (sch != tch) { 3470 break; 3471 } 3472 } 3473 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3474 } 3475 3476 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3477 assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3478 void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3479 boolean haveError = false; 3480 if (!actual.containsAll(empirical)) { 3481 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3482 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3483 haveError = true; 3484 } 3485 if (!empirical.containsAll(actual)) { 3486 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3487 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3488 haveError = true; 3489 } 3490 if (!haveError) { 3491 logln("OK " + message + ' ' + toPattern(empirical)); 3492 } 3493 } 3494 toPattern(UnicodeSet missing)3495 private String toPattern(UnicodeSet missing) { 3496 String result = missing.toPattern(false); 3497 if (result.length() < 200) { 3498 return result; 3499 } 3500 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3501 } 3502 3503 3504 /** 3505 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3506 */ 3507 @Test TestPatternWhitespace()3508 public void TestPatternWhitespace() { 3509 // Rules 3510 String r = "a > \u200E b;"; 3511 3512 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3513 3514 expect(t, "a", "b"); 3515 3516 // UnicodeSet 3517 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3518 3519 if (set.contains(0x200E)) { 3520 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3521 } 3522 } 3523 3524 @Test TestAlternateSyntax()3525 public void TestAlternateSyntax() { 3526 // U+2206 == & 3527 // U+2190 == < 3528 // U+2192 == > 3529 // U+2194 == <> 3530 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3531 "abc", 3532 "xbz"); 3533 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3534 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3535 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3536 } 3537 3538 @Test TestPositionAPI()3539 public void TestPositionAPI() { 3540 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3541 Transliterator.Position b = new Transliterator.Position(a); 3542 Transliterator.Position c = new Transliterator.Position(); 3543 c.set(a); 3544 // Call the toString() API: 3545 if (a.equals(b) && a.equals(c)) { 3546 logln("Ok: " + a + " == " + b + " == " + c); 3547 } else { 3548 errln("FAIL: " + a + " != " + b + " != " + c); 3549 } 3550 } 3551 3552 //====================================================================== 3553 // New tests for the ::BEGIN/::END syntax 3554 //====================================================================== 3555 3556 private static final String[] BEGIN_END_RULES = new String[] { 3557 // [0] 3558 "abc > xy;" 3559 + "aba > z;", 3560 3561 // [1] 3562 /* 3563 "::BEGIN;" 3564 + "abc > xy;" 3565 + "::END;" 3566 + "::BEGIN;" 3567 + "aba > z;" 3568 + "::END;", 3569 */ 3570 "", // test case commented out below, this is here to keep from messing up the indexes 3571 3572 // [2] 3573 /* 3574 "abc > xy;" 3575 + "::BEGIN;" 3576 + "aba > z;" 3577 + "::END;", 3578 */ 3579 "", // test case commented out below, this is here to keep from messing up the indexes 3580 3581 // [3] 3582 /* 3583 "::BEGIN;" 3584 + "abc > xy;" 3585 + "::END;" 3586 + "aba > z;", 3587 */ 3588 "", // test case commented out below, this is here to keep from messing up the indexes 3589 3590 // [4] 3591 "abc > xy;" 3592 + "::Null;" 3593 + "aba > z;", 3594 3595 // [5] 3596 "::Upper;" 3597 + "ABC > xy;" 3598 + "AB > x;" 3599 + "C > z;" 3600 + "::Upper;" 3601 + "XYZ > p;" 3602 + "XY > q;" 3603 + "Z > r;" 3604 + "::Upper;", 3605 3606 // [6] 3607 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3608 + "$delim = [\\-$ws];" 3609 + "$ws $delim* > ' ';" 3610 + "'-' $delim* > '-';", 3611 3612 // [7] 3613 "::Null;" 3614 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3615 + "$delim = [\\-$ws];" 3616 + "$ws $delim* > ' ';" 3617 + "'-' $delim* > '-';", 3618 3619 // [8] 3620 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3621 + "$delim = [\\-$ws];" 3622 + "$ws $delim* > ' ';" 3623 + "'-' $delim* > '-';" 3624 + "::Null;", 3625 3626 // [9] 3627 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3628 + "$delim = [\\-$ws];" 3629 + "::Null;" 3630 + "$ws $delim* > ' ';" 3631 + "'-' $delim* > '-';", 3632 3633 // [10] 3634 /* 3635 "::BEGIN;" 3636 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3637 + "$delim = [\\-$ws];" 3638 + "::END;" 3639 + "$ws $delim* > ' ';" 3640 + "'-' $delim* > '-';", 3641 */ 3642 "", // test case commented out below, this is here to keep from messing up the indexes 3643 3644 // [11] 3645 /* 3646 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3647 + "$delim = [\\-$ws];" 3648 + "::BEGIN;" 3649 + "$ws $delim* > ' ';" 3650 + "'-' $delim* > '-';" 3651 + "::END;", 3652 */ 3653 "", // test case commented out below, this is here to keep from messing up the indexes 3654 3655 // [12] 3656 /* 3657 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3658 + "$delim = [\\-$ws];" 3659 + "$ab = [ab];" 3660 + "::BEGIN;" 3661 + "$ws $delim* > ' ';" 3662 + "'-' $delim* > '-';" 3663 + "::END;" 3664 + "::BEGIN;" 3665 + "$ab { ' ' } $ab > '-';" 3666 + "c { ' ' > ;" 3667 + "::END;" 3668 + "::BEGIN;" 3669 + "'a-a' > a\\%|a;" 3670 + "::END;", 3671 */ 3672 "", // test case commented out below, this is here to keep from messing up the indexes 3673 3674 // [13] 3675 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3676 + "$delim = [\\-$ws];" 3677 + "$ab = [ab];" 3678 + "::Null;" 3679 + "$ws $delim* > ' ';" 3680 + "'-' $delim* > '-';" 3681 + "::Null;" 3682 + "$ab { ' ' } $ab > '-';" 3683 + "c { ' ' > ;" 3684 + "::Null;" 3685 + "'a-a' > a\\%|a;", 3686 3687 // [14] 3688 /* 3689 "::[abc];" 3690 + "::BEGIN;" 3691 + "abc > xy;" 3692 + "::END;" 3693 + "::BEGIN;" 3694 + "aba > yz;" 3695 + "::END;" 3696 + "::Upper;", 3697 */ 3698 "", // test case commented out below, this is here to keep from messing up the indexes 3699 3700 // [15] 3701 "::[abc];" 3702 + "abc > xy;" 3703 + "::Null;" 3704 + "aba > yz;" 3705 + "::Upper;", 3706 3707 // [16] 3708 /* 3709 "::[abc];" 3710 + "::BEGIN;" 3711 + "abc <> xy;" 3712 + "::END;" 3713 + "::BEGIN;" 3714 + "aba <> yz;" 3715 + "::END;" 3716 + "::Upper(Lower);" 3717 + "::([XYZ]);", 3718 */ 3719 "", // test case commented out below, this is here to keep from messing up the indexes 3720 3721 // [17] 3722 "::[abc];" 3723 + "abc <> xy;" 3724 + "::Null;" 3725 + "aba <> yz;" 3726 + "::Upper(Lower);" 3727 + "::([XYZ]);" 3728 }; 3729 3730 /* 3731 (This entire test is commented out below and will need some heavy revision when we re-add 3732 the ::BEGIN/::END stuff) 3733 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3734 // [7] 3735 "::BEGIN;" 3736 + "abc > xy;" 3737 + "::BEGIN;" 3738 + "aba > z;" 3739 + "::END;" 3740 + "::END;", 3741 3742 // [8] 3743 "abc > xy;" 3744 + " aba > z;" 3745 + "::END;", 3746 3747 // [9] 3748 "::BEGIN;" 3749 + "::Upper;" 3750 + "::END;" 3751 }; 3752 */ 3753 3754 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3755 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3756 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3757 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3758 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3759 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3760 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3761 3762 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3763 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3764 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3765 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3766 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3767 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3768 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3769 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3770 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3771 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3772 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3773 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3774 3775 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3776 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3777 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3778 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3779 }; 3780 3781 @Test TestBeginEnd()3782 public void TestBeginEnd() { 3783 // run through the list of test cases above 3784 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3785 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3786 } 3787 3788 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3789 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3790 Transliterator.REVERSE); 3791 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3792 3793 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3794 // that all of them cause errors 3795 /* 3796 (commented out until we have the real ::BEGIN/::END stuff in place 3797 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3798 try { 3799 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3800 Transliterator.FORWARD); 3801 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3802 } 3803 catch (IllegalArgumentException e) { 3804 // this is supposed to happen; do nothing here 3805 } 3806 } 3807 */ 3808 } 3809 3810 @Test TestBeginEndToRules()3811 public void TestBeginEndToRules() { 3812 // run through the same list of test cases we used above, but this time, instead of just 3813 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3814 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3815 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3816 // to (i.e., does the same thing as) the original rule set 3817 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3818 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3819 Transliterator.FORWARD); 3820 String rules = t.toRules(false); 3821 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3822 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3823 } 3824 3825 // do the same thing for the reversible test case 3826 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3827 Transliterator.REVERSE); 3828 String rules = reversed.toRules(false); 3829 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3830 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3831 } 3832 3833 @Test TestRegisterAlias()3834 public void TestRegisterAlias() { 3835 String longID = "Lower;[aeiou]Upper"; 3836 String shortID = "Any-CapVowels"; 3837 String reallyShortID = "CapVowels"; 3838 3839 Transliterator.registerAlias(shortID, longID); 3840 3841 Transliterator t1 = Transliterator.getInstance(longID); 3842 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3843 3844 if (!t1.getID().equals(longID)) 3845 errln("Transliterator instantiated with long ID doesn't have long ID"); 3846 if (!t2.getID().equals(reallyShortID)) 3847 errln("Transliterator instantiated with short ID doesn't have short ID"); 3848 3849 if (!t1.toRules(true).equals(t2.toRules(true))) 3850 errln("Alias transliterators aren't the same"); 3851 3852 Transliterator.unregister(shortID); 3853 3854 try { 3855 t1 = Transliterator.getInstance(shortID); 3856 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3857 } 3858 catch (IllegalArgumentException e) { 3859 } 3860 3861 // try the same thing again, but this time with something other than 3862 // an instance of CompoundTransliterator 3863 String realID = "Latin-Greek"; 3864 String fakeID = "Latin-dlgkjdflkjdl"; 3865 Transliterator.registerAlias(fakeID, realID); 3866 3867 t1 = Transliterator.getInstance(realID); 3868 t2 = Transliterator.getInstance(fakeID); 3869 3870 if (!t1.toRules(true).equals(t2.toRules(true))) 3871 errln("Alias transliterators aren't the same"); 3872 3873 Transliterator.unregister(fakeID); 3874 } 3875 3876 /** 3877 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3878 */ 3879 @Test TestHalfwidthFullwidth()3880 public void TestHalfwidthFullwidth() { 3881 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3882 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3883 3884 // Array of 3n items 3885 // Each item is 3886 // "hf"|"fh"|"both", 3887 // <Halfwidth>, 3888 // <Fullwidth> 3889 String[] DATA = { 3890 "both", 3891 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3892 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3893 }; 3894 3895 for (int i=0; i<DATA.length; i+=3) { 3896 switch (DATA[i].charAt(0)) { 3897 case 'h': // Halfwidth-Fullwidth only 3898 expect(hf, DATA[i+1], DATA[i+2]); 3899 break; 3900 case 'f': // Fullwidth-Halfwidth only 3901 expect(fh, DATA[i+2], DATA[i+1]); 3902 break; 3903 case 'b': // both directions 3904 expect(hf, DATA[i+1], DATA[i+2]); 3905 expect(fh, DATA[i+2], DATA[i+1]); 3906 break; 3907 } 3908 } 3909 3910 } 3911 3912 /** 3913 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3914 * TODO: confirm that the expected results are correct. 3915 * For now, test just confirms that C++ and Java give identical results. 3916 */ 3917 @Test TestThai()3918 public void TestThai() { 3919 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3920 String thaiText = 3921 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3922 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3923 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3924 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3925 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3926 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3927 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3928 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3929 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3930 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3931 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3932 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3933 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3934 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3935 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3936 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3937 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3938 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3939 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3940 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3941 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3942 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3943 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3944 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3945 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3946 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3947 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3948 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3949 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3950 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3951 3952 String latinText = 3953 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3954 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3955 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3956 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3957 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3958 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3959 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3960 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3961 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3962 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3963 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3964 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3965 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3966 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3967 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3968 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3969 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3970 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3971 3972 expect(tr, thaiText, latinText); 3973 } 3974 3975 3976 //====================================================================== 3977 // These tests are not mirrored (yet) in icu4c at 3978 // source/test/intltest/transtst.cpp 3979 //====================================================================== 3980 3981 /** 3982 * Improve code coverage. 3983 */ 3984 @Test TestCoverage()3985 public void TestCoverage() { 3986 // NullTransliterator 3987 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3988 expect(t, "a", "a"); 3989 3990 // Source, target set 3991 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3992 t.setFilter(new UnicodeSet("[A-Z]")); 3993 logln("source = " + t.getSourceSet()); 3994 logln("target = " + t.getTargetSet()); 3995 3996 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3997 logln("source = " + t.getSourceSet()); 3998 logln("target = " + t.getTargetSet()); 3999 } 4000 /* 4001 * Test case for threading problem in NormalizationTransliterator 4002 * reported by ticket#5160 4003 */ 4004 @Test TestT5160()4005 public void TestT5160() { 4006 final String[] testData = { 4007 "a", 4008 "b", 4009 "\u09BE", 4010 "A\u0301", 4011 }; 4012 final String[] expected = { 4013 "a", 4014 "b", 4015 "\u09BE", 4016 "\u00C1", 4017 }; 4018 Transliterator translit = Transliterator.getInstance("NFC"); 4019 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 4020 for (int i = 0; i < tasks.length; i++) { 4021 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 4022 } 4023 TestUtil.runUntilDone(tasks); 4024 4025 for (int i = 0; i < tasks.length; i++) { 4026 if (tasks[i].getErrorMessage() != null) { 4027 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 4028 break; 4029 } 4030 } 4031 } 4032 4033 static class NormTranslitTask implements Runnable { 4034 Transliterator translit; 4035 String testData; 4036 String expectedData; 4037 String errorMsg; 4038 NormTranslitTask(Transliterator translit, String testData, String expectedData)4039 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 4040 this.translit = translit; 4041 this.testData = testData; 4042 this.expectedData = expectedData; 4043 } 4044 run()4045 public void run() { 4046 errorMsg = null; 4047 StringBuffer inBuf = new StringBuffer(testData); 4048 StringBuffer expectedBuf = new StringBuffer(expectedData); 4049 4050 for(int i = 0; i < 1000; i++) { 4051 String in = inBuf.toString(); 4052 String out = translit.transliterate(in); 4053 String expected = expectedBuf.toString(); 4054 if (!out.equals(expected)) { 4055 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 4056 break; 4057 } 4058 inBuf.append(testData); 4059 expectedBuf.append(expectedData); 4060 } 4061 } 4062 getErrorMessage()4063 public String getErrorMessage() { 4064 return errorMsg; 4065 } 4066 } 4067 4068 //====================================================================== 4069 // Support methods 4070 //====================================================================== expect(String rules, String source, String expectedResult, Transliterator.Position pos)4071 static void expect(String rules, 4072 String source, 4073 String expectedResult, 4074 Transliterator.Position pos) { 4075 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 4076 expect(t, source, expectedResult, pos); 4077 } 4078 expect(String rules, String source, String expectedResult)4079 static void expect(String rules, String source, String expectedResult) { 4080 expect(rules, source, expectedResult, null); 4081 } 4082 expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)4083 static void expect(Transliterator t, String source, String expectedResult, 4084 Transliterator reverseTransliterator) { 4085 expect(t, source, expectedResult); 4086 if (reverseTransliterator != null) { 4087 expect(reverseTransliterator, expectedResult, source); 4088 } 4089 } 4090 expect(Transliterator t, String source, String expectedResult)4091 static void expect(Transliterator t, String source, String expectedResult) { 4092 expect(t, source, expectedResult, (Transliterator.Position) null); 4093 } 4094 expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)4095 static void expect(Transliterator t, String source, String expectedResult, 4096 Transliterator.Position pos) { 4097 if (pos == null) { 4098 String result = t.transliterate(source); 4099 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 4100 } 4101 4102 Transliterator.Position index = null; 4103 if (pos == null) { 4104 index = new Transliterator.Position(0, source.length(), 0, source.length()); 4105 } else { 4106 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4107 pos.start, pos.limit); 4108 } 4109 4110 ReplaceableString rsource = new ReplaceableString(source); 4111 4112 t.finishTransliteration(rsource, index); 4113 // Do it all at once -- below we do it incrementally 4114 4115 if (index.start != index.limit) { 4116 expectAux(t.getID() + ":UNFINISHED", source, 4117 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 4118 return; 4119 } 4120 String result = rsource.toString(); 4121 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 4122 4123 4124 if (pos == null) { 4125 index = new Transliterator.Position(); 4126 } else { 4127 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4128 pos.start, pos.limit); 4129 } 4130 4131 // Test incremental transliteration -- this result 4132 // must be the same after we finalize (see below). 4133 List<String> v = new ArrayList<String>(); 4134 v.add(source); 4135 rsource.replace(0, rsource.length(), ""); 4136 if (pos != null) { 4137 rsource.replace(0, 0, source); 4138 v.add(UtilityExtensions.formatInput(rsource, index)); 4139 t.transliterate(rsource, index); 4140 v.add(UtilityExtensions.formatInput(rsource, index)); 4141 } else { 4142 for (int i=0; i<source.length(); ++i) { 4143 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 4144 //log.append(source.charAt(i)).append(" -> ")); 4145 t.transliterate(rsource, index, source.charAt(i)); 4146 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 4147 v.add(UtilityExtensions.formatInput(rsource, index) + 4148 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 4149 } 4150 } 4151 4152 // As a final step in keyboard transliteration, we must call 4153 // transliterate to finish off any pending partial matches that 4154 // were waiting for more input. 4155 t.finishTransliteration(rsource, index); 4156 result = rsource.toString(); 4157 //log.append(" => ").append(rsource.toString()); 4158 v.add(result); 4159 4160 String[] results = new String[v.size()]; 4161 v.toArray(results); 4162 expectAux(t.getID() + ":Incremental", results, 4163 result.equals(expectedResult), 4164 expectedResult); 4165 } 4166 4167 static boolean expectAux(String tag, String source, 4168 String result, String expectedResult) { 4169 return expectAux(tag, new String[] {source, result}, 4170 result.equals(expectedResult), 4171 expectedResult); 4172 } 4173 4174 static boolean expectAux(String tag, String source, 4175 String result, boolean pass, 4176 String expectedResult) { 4177 return expectAux(tag, new String[] {source, result}, 4178 pass, 4179 expectedResult); 4180 } 4181 4182 static boolean expectAux(String tag, String source, 4183 boolean pass, 4184 String expectedResult) { 4185 return expectAux(tag, new String[] {source}, 4186 pass, 4187 expectedResult); 4188 } 4189 4190 static boolean expectAux(String tag, String[] results, boolean pass, 4191 String expectedResult) { 4192 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 4193 4194 for (int i = 0; i < results.length; ++i) { 4195 String label; 4196 if (i == 0) { 4197 label = "source: "; 4198 } else if (i == results.length - 1) { 4199 label = "result: "; 4200 } else { 4201 if (!isVerbose() && pass) continue; 4202 label = "interm" + i + ": "; 4203 } 4204 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 4205 } 4206 4207 if (!pass) { 4208 msg( " expected: " + expectedResult, ERR, false, true); 4209 } 4210 4211 return pass; 4212 } 4213 4214 static private void assertTransform(String message, String expected, StringTransform t, String source) { 4215 assertEquals(message + " " + source, expected, t.transform(source)); 4216 } 4217 4218 4219 static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 4220 assertEquals(message + " " +source, expected, t.transform(source)); 4221 assertEquals(message + " " +source2, expected, t.transform(source2)); 4222 assertEquals(message + " " + expected, source, back.transform(expected)); 4223 } 4224 4225 /* 4226 * Tests the method public Enumeration<String> getAvailableTargets(String source) 4227 */ 4228 @Test 4229 public void TestGetAvailableTargets() { 4230 try { 4231 // Tests when if (targets == null) is true 4232 Transliterator.getAvailableTargets(""); 4233 } catch (Exception e) { 4234 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 4235 } 4236 } 4237 4238 /* 4239 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 4240 */ 4241 @Test 4242 public void TestGetAvailableVariants() { 4243 try { 4244 // Tests when if (targets == null) is true 4245 Transliterator.getAvailableVariants("", ""); 4246 } catch (Exception e) { 4247 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 4248 } 4249 } 4250 4251 /* 4252 * Tests the mehtod String nextLine() in RuleBody 4253 */ 4254 @Test 4255 public void TestNextLine() { 4256 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 4257 try{ 4258 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 4259 } catch(Exception e){ 4260 errln("TransliteratorParser.nextLine() was not suppose to return an " + 4261 "exception for a rule of '\\'"); 4262 } 4263 } 4264 } 4265