• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.dev.test.translit;
11 
12 import java.util.ArrayList;
13 import java.util.Enumeration;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.List;
18 import java.util.Locale;
19 
20 import org.junit.Test;
21 import org.junit.runner.RunWith;
22 import org.junit.runners.JUnit4;
23 
24 import ohos.global.icu.dev.test.TestFmwk;
25 import ohos.global.icu.dev.test.TestUtil;
26 import ohos.global.icu.impl.Utility;
27 import ohos.global.icu.impl.UtilityExtensions;
28 import ohos.global.icu.lang.CharSequences;
29 import ohos.global.icu.lang.UCharacter;
30 import ohos.global.icu.lang.UScript;
31 import ohos.global.icu.text.Replaceable;
32 import ohos.global.icu.text.ReplaceableString;
33 import ohos.global.icu.text.StringTransform;
34 import ohos.global.icu.text.Transliterator;
35 import ohos.global.icu.text.UTF16;
36 import ohos.global.icu.text.UnicodeFilter;
37 import ohos.global.icu.text.UnicodeSet;
38 import ohos.global.icu.text.UnicodeSetIterator;
39 import ohos.global.icu.util.CaseInsensitiveString;
40 import ohos.global.icu.util.ULocale;
41 
42 
43 /***********************************************************************
44 
45                      HOW TO USE THIS TEST FILE
46                                -or-
47                   How I developed on two platforms
48                 without losing (too much of) my mind
49 
50 
51 1. Add new tests by copying/pasting/changing existing tests.  On Java,
52    any public void method named Test...() taking no parameters becomes
53    a test.  On C++, you need to modify the header and add a line to
54    the runIndexedTest() dispatch method.
55 
56 2. Make liberal use of the expect() method; it is your friend.
57 
58 3. The tests in this file exactly match those in a sister file on the
59    other side.  The two files are:
60 
61    icu4j:  src/ohos.global.icu.dev.test/translit/TransliteratorTest.java
62    icu4c:  source/test/intltest/transtst.cpp
63 
64                   ==> THIS IS THE IMPORTANT PART <==
65 
66    When you add a test in this file, add it in transtst.cpp too.
67    Give it the same name and put it in the same relative place.  This
68    makes maintenance a lot simpler for any poor soul who ends up
69    trying to synchronize the tests between icu4j and icu4c.
70 
71 4. If you MUST enter a test that is NOT paralleled in the sister file,
72    then add it in the special non-mirrored section.  These are
73    labeled
74 
75      "icu4j ONLY"
76 
77    or
78 
79      "icu4c ONLY"
80 
81    Make sure you document the reason the test is here and not there.
82 
83 
84 Thank you.
85 The Management
86  ***********************************************************************/
87 
88 /**
89  * @test
90  * @summary General test of Transliterator
91  */
92 
93 @RunWith(JUnit4.class)
94 public class TransliteratorTest extends TestFmwk {
95     @Test
TestHangul()96     public void TestHangul() {
97 
98         Transliterator lh = Transliterator.getInstance("Latin-Hangul");
99         Transliterator hl = lh.getInverse();
100 
101         assertTransform("Transform", "\uCE20", lh, "ch");
102 
103         assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
104         assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
105         assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
106         assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
107         assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
108         assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
109         assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
110         assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
111         assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
112         assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
113         assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
114         assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
115         assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
116         assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
117         assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
118         assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
119         assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
120         assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
121         assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
122         assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
123         assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
124         assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
125         assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
126         assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
127         assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
128         assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
129 
130     }
131 
132     @Test
TestChinese()133     public void TestChinese() {
134         Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
135         assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
136         assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
137     }
138 
139     @Test
TestRegistry()140     public void TestRegistry() {
141         checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
142         checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
143         checkRegistry("foo1", "[:letter:] a > b;");
144         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
145             String id = (String) e.nextElement();
146             checkRegistry(id);
147         }
148         // Need to remove these test-specific transliterators in order not to interfere with other tests.
149         Transliterator.unregister("foo3");
150         Transliterator.unregister("foo2");
151         Transliterator.unregister("foo1");
152     }
153 
checkRegistry(String id, String rules)154     private void checkRegistry (String id, String rules) {
155         Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
156         Transliterator.registerInstance(foo);
157         checkRegistry(id);
158     }
159 
checkRegistry(String id)160     private void checkRegistry(String id) {
161         Transliterator fie = Transliterator.getInstance(id);
162         final UnicodeSet fae = new UnicodeSet("[a-z5]");
163         fie.setFilter(fae);
164         Transliterator foe = Transliterator.getInstance(id);
165         UnicodeFilter fee = foe.getFilter();
166         if (fae.equals(fee)) {
167             errln("Changed what is in registry for " + id);
168         }
169     }
170 
171     @Test
TestInstantiationError()172     public void TestInstantiationError() {
173         try {
174             String ID = "<Not a valid Transliterator ID>";
175             Transliterator t = Transliterator.getInstance(ID);
176             errln("FAIL: " + ID + " returned " + t);
177         } catch (IllegalArgumentException ex) {
178             logln("OK: Bogus ID handled properly");
179         }
180     }
181 
182     @Test
TestSimpleRules()183     public void TestSimpleRules() {
184         /* Example: rules 1. ab>x|y
185          *                2. yc>z
186          *
187          * []|eabcd  start - no match, copy e to tranlated buffer
188          * [e]|abcd  match rule 1 - copy output & adjust cursor
189          * [ex|y]cd  match rule 2 - copy output & adjust cursor
190          * [exz]|d   no match, copy d to transliterated buffer
191          * [exzd]|   done
192          */
193         expect("ab>x|y;" +
194                 "yc>z",
195                 "eabcd", "exzd");
196 
197         /* Another set of rules:
198          *    1. ab>x|yzacw
199          *    2. za>q
200          *    3. qc>r
201          *    4. cw>n
202          *
203          * []|ab       Rule 1
204          * [x|yzacw]   No match
205          * [xy|zacw]   Rule 2
206          * [xyq|cw]    Rule 4
207          * [xyqn]|     Done
208          */
209         expect("ab>x|yzacw;" +
210                 "za>q;" +
211                 "qc>r;" +
212                 "cw>n",
213                 "ab", "xyqn");
214 
215         /* Test categories
216          */
217         Transliterator t = Transliterator.createFromRules("<ID>",
218                 "$dummy=\uE100;" +
219                 "$vowel=[aeiouAEIOU];" +
220                 "$lu=[:Lu:];" +
221                 "$vowel } $lu > '!';" +
222                 "$vowel > '&';" +
223                 "'!' { $lu > '^';" +
224                 "$lu > '*';" +
225                 "a>ERROR",
226                 Transliterator.FORWARD);
227         expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
228     }
229 
230     /**
231      * Test inline set syntax and set variable syntax.
232      */
233     @Test
TestInlineSet()234     public void TestInlineSet() {
235         expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
236         expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
237 
238         expect("$digit = [0-9];" +
239                 "$alpha = [a-zA-Z];" +
240                 "$alphanumeric = [$digit $alpha];" + // ***
241                 "$special = [^$alphanumeric];" +     // ***
242                 "$alphanumeric > '-';" +
243                 "$special > '*';",
244 
245                 "thx-1138", "---*----");
246     }
247 
248     /**
249      * Create some inverses and confirm that they work.  We have to be
250      * careful how we do this, since the inverses will not be true
251      * inverses -- we can't throw any random string at the composition
252      * of the transliterators and expect the identity function.  F x
253      * F' != I.  However, if we are careful about the input, we will
254      * get the expected results.
255      */
256     @Test
TestRuleBasedInverse()257     public void TestRuleBasedInverse() {
258         String RULES =
259             "abc>zyx;" +
260             "ab>yz;" +
261             "bc>zx;" +
262             "ca>xy;" +
263             "a>x;" +
264             "b>y;" +
265             "c>z;" +
266 
267             "abc<zyx;" +
268             "ab<yz;" +
269             "bc<zx;" +
270             "ca<xy;" +
271             "a<x;" +
272             "b<y;" +
273             "c<z;" +
274 
275             "";
276 
277         String[] DATA = {
278                 // Careful here -- random strings will not work.  If we keep
279                 // the left side to the domain and the right side to the range
280                 // we will be okay though (left, abc; right xyz).
281                 "a", "x",
282                 "abcacab", "zyxxxyy",
283                 "caccb", "xyzzy",
284         };
285 
286         Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
287         Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
288         for (int i=0; i<DATA.length; i+=2) {
289             expect(fwd, DATA[i], DATA[i+1]);
290             expect(rev, DATA[i+1], DATA[i]);
291         }
292     }
293 
294     /**
295      * Basic test of keyboard.
296      */
297     @Test
TestKeyboard()298     public void TestKeyboard() {
299         Transliterator t = Transliterator.createFromRules("<ID>",
300                 "psch>Y;"
301                 +"ps>y;"
302                 +"ch>x;"
303                 +"a>A;", Transliterator.FORWARD);
304         String DATA[] = {
305                 // insertion, buffer
306                 "a", "A",
307                 "p", "Ap",
308                 "s", "Aps",
309                 "c", "Apsc",
310                 "a", "AycA",
311                 "psch", "AycAY",
312                 null, "AycAY", // null means finishKeyboardTransliteration
313         };
314 
315         keyboardAux(t, DATA);
316     }
317 
318     /**
319      * Basic test of keyboard with cursor.
320      */
321     @Test
TestKeyboard2()322     public void TestKeyboard2() {
323         Transliterator t = Transliterator.createFromRules("<ID>",
324                 "ych>Y;"
325                 +"ps>|y;"
326                 +"ch>x;"
327                 +"a>A;", Transliterator.FORWARD);
328         String DATA[] = {
329                 // insertion, buffer
330                 "a", "A",
331                 "p", "Ap",
332                 "s", "Aps", // modified for rollback - "Ay",
333                 "c", "Apsc", // modified for rollback - "Ayc",
334                 "a", "AycA",
335                 "p", "AycAp",
336                 "s", "AycAps", // modified for rollback - "AycAy",
337                 "c", "AycApsc", // modified for rollback - "AycAyc",
338                 "h", "AycAY",
339                 null, "AycAY", // null means finishKeyboardTransliteration
340         };
341 
342         keyboardAux(t, DATA);
343     }
344 
345     /**
346      * Test keyboard transliteration with back-replacement.
347      */
348     @Test
TestKeyboard3()349     public void TestKeyboard3() {
350         // We want th>z but t>y.  Furthermore, during keyboard
351         // transliteration we want t>y then yh>z if t, then h are
352         // typed.
353         String RULES =
354             "t>|y;" +
355             "yh>z;" +
356             "";
357 
358         String[] DATA = {
359                 // Column 1: characters to add to buffer (as if typed)
360                 // Column 2: expected appearance of buffer after
361                 //           keyboard xliteration.
362                 "a", "a",
363                 "b", "ab",
364                 "t", "abt", // modified for rollback - "aby",
365                 "c", "abyc",
366                 "t", "abyct", // modified for rollback - "abycy",
367                 "h", "abycz",
368                 null, "abycz", // null means finishKeyboardTransliteration
369         };
370 
371         Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
372         keyboardAux(t, DATA);
373     }
374 
keyboardAux(Transliterator t, String[] DATA)375     private void keyboardAux(Transliterator t, String[] DATA) {
376         Transliterator.Position index = new Transliterator.Position();
377         ReplaceableString s = new ReplaceableString();
378         for (int i=0; i<DATA.length; i+=2) {
379             StringBuffer log;
380             if (DATA[i] != null) {
381                 log = new StringBuffer(s.toString() + " + "
382                         + DATA[i]
383                                + " -> ");
384                 t.transliterate(s, index, DATA[i]);
385             } else {
386                 log = new StringBuffer(s.toString() + " => ");
387                 t.finishTransliteration(s, index);
388             }
389             UtilityExtensions.formatInput(log, s, index);
390             if (s.toString().equals(DATA[i+1])) {
391                 logln(log.toString());
392             } else {
393                 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
394             }
395         }
396     }
397 
398     // Latin-Arabic has been temporarily removed until it can be
399     // done correctly.
400 
401     //  public void TestArabic() {
402     //      String DATA[] = {
403     //          "Arabic",
404     //              "\u062a\u062a\u0645\u062a\u0639 "+
405     //              "\u0627\u0644\u0644\u063a\u0629 "+
406     //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
407     //              "\u0628\u0628\u0646\u0638\u0645 "+
408     //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
409     //              "\u062c\u0645\u064a\u0644\u0629"
410     //      };
411 
412     //      Transliterator t = Transliterator.getInstance("Latin-Arabic");
413     //      for (int i=0; i<DATA.length; i+=2) {
414     //          expect(t, DATA[i], DATA[i+1]);
415     //      }
416     //  }
417 
418     /**
419      * Compose the Kana transliterator forward and reverse and try
420      * some strings that should come out unchanged.
421      */
422     @Test
TestCompoundKana()423     public void TestCompoundKana() {
424         Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
425         expect(t, "aaaaa", "aaaaa");
426     }
427 
428     /**
429      * Compose the hex transliterators forward and reverse.
430      */
431     @Test
TestCompoundHex()432     public void TestCompoundHex() {
433         Transliterator a = Transliterator.getInstance("Any-Hex");
434         Transliterator b = Transliterator.getInstance("Hex-Any");
435         // Transliterator[] trans = { a, b };
436         // Transliterator ab = Transliterator.getInstance(trans);
437         Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
438 
439         // Do some basic tests of b
440         expect(b, "\\u0030\\u0031", "01");
441 
442         String s = "abcde";
443         expect(ab, s, s);
444 
445         // trans = new Transliterator[] { b, a };
446         // Transliterator ba = Transliterator.getInstance(trans);
447         Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
448         ReplaceableString str = new ReplaceableString(s);
449         a.transliterate(str);
450         expect(ba, str.toString(), str.toString());
451     }
452 
453     /**
454      * Do some basic tests of filtering.
455      */
456     @Test
TestFiltering()457     public void TestFiltering() {
458 
459         Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
460         tempTrans.setFilter(new UnicodeSet("[a]"));
461         String tempResult = tempTrans.transform("xa");
462         assertEquals("context should not be filtered ", "xb", tempResult);
463 
464         tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
465         tempResult = tempTrans.transform("xa");
466         assertEquals("context should not be filtered ", "xb", tempResult);
467 
468         Transliterator hex = Transliterator.getInstance("Any-Hex");
469         hex.setFilter(new UnicodeFilter() {
470             @Override
471             public boolean contains(int c) {
472                 return c != 'c';
473             }
474             @Override
475             public String toPattern(boolean escapeUnprintable) {
476                 return "";
477             }
478             @Override
479             public boolean matchesIndexValue(int v) {
480                 return false;
481             }
482             @Override
483             public void addMatchSetTo(UnicodeSet toUnionTo) {}
484         });
485         String s = "abcde";
486         String out = hex.transliterate(s);
487         String exp = "\\u0061\\u0062c\\u0064\\u0065";
488         if (out.equals(exp)) {
489             logln("Ok:   \"" + exp + "\"");
490         } else {
491             logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
492         }
493     }
494 
495     /**
496      * Test anchors
497      */
498     @Test
TestAnchors()499     public void TestAnchors() {
500         expect("^ab  > 01 ;" +
501                 " ab  > |8 ;" +
502                 "  b  > k ;" +
503                 " 8x$ > 45 ;" +
504                 " 8x  > 77 ;",
505 
506                 "ababbabxabx",
507         "018k7745");
508         expect("$s = [z$] ;" +
509                 "$s{ab    > 01 ;" +
510                 "   ab    > |8 ;" +
511                 "    b    > k ;" +
512                 "   8x}$s > 45 ;" +
513                 "   8x    > 77 ;",
514 
515                 "abzababbabxzabxabx",
516         "01z018k45z01x45");
517     }
518 
519     /**
520      * Test pattern quoting and escape mechanisms.
521      */
522     @Test
TestPatternQuoting()523     public void TestPatternQuoting() {
524         // Array of 3n items
525         // Each item is <rules>, <input>, <expected output>
526         String[] DATA = {
527                 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
528         };
529 
530         for (int i=0; i<DATA.length; i+=3) {
531             logln("Pattern: " + Utility.escape(DATA[i]));
532             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
533             expect(t, DATA[i+1], DATA[i+2]);
534         }
535     }
536 
537     @Test
TestVariableNames()538     public void TestVariableNames() {
539         Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
540         if (gl == null) {
541             errln("FAIL: null Transliterator returned.");
542         }
543     }
544 
545     /**
546      * Regression test for bugs found in Greek transliteration.
547      */
548     @Test
TestJ277()549     public void TestJ277() {
550         Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
551 
552         char sigma = (char)0x3C3;
553         char upsilon = (char)0x3C5;
554         char nu = (char)0x3BD;
555         // not used char PHI = (char)0x3A6;
556         char alpha = (char)0x3B1;
557         // not used char omega = (char)0x3C9;
558         // not used char omicron = (char)0x3BF;
559         // not used char epsilon = (char)0x3B5;
560 
561         // sigma upsilon nu -> syn
562         StringBuffer buf = new StringBuffer();
563         buf.append(sigma).append(upsilon).append(nu);
564         String syn = buf.toString();
565         expect(gl, syn, "syn");
566 
567         // sigma alpha upsilon nu -> saun
568         buf.setLength(0);
569         buf.append(sigma).append(alpha).append(upsilon).append(nu);
570         String sayn = buf.toString();
571         expect(gl, sayn, "saun");
572 
573         // Again, using a smaller rule set
574         String rules =
575             "$alpha   = \u03B1;" +
576             "$nu      = \u03BD;" +
577             "$sigma   = \u03C3;" +
578             "$ypsilon = \u03C5;" +
579             "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +
580             "s <>           $sigma;" +
581             "a <>           $alpha;" +
582             "u <>  $vowel { $ypsilon;" +
583             "y <>           $ypsilon;" +
584             "n <>           $nu;";
585         Transliterator mini = Transliterator.createFromRules
586         ("mini", rules, Transliterator.REVERSE);
587         expect(mini, syn, "syn");
588         expect(mini, sayn, "saun");
589 
590         //|    // Transliterate the Greek locale data
591         //|    Locale el("el");
592         //|    DateFormatSymbols syms(el, status);
593         //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
594         //|    int32_t i, count;
595         //|    const UnicodeString* data = syms.getMonths(count);
596         //|    for (i=0; i<count; ++i) {
597         //|        if (data[i].length() == 0) {
598         //|            continue;
599         //|        }
600         //|        UnicodeString out(data[i]);
601         //|        gl->transliterate(out);
602         //|        bool_t ok = TRUE;
603         //|        if (data[i].length() >= 2 && out.length() >= 2 &&
604         //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
605         //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
606         //|                ok = FALSE;
607         //|            }
608         //|        }
609         //|        if (ok) {
610         //|            logln(prettify(data[i] + " -> " + out));
611         //|        } else {
612         //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
613         //|        }
614         //|    }
615     }
616 
617     //    /**
618     //     * Prefix, suffix support in hex transliterators
619     //     */
620     //    public void TestJ243() {
621     //        // Test default Hex-Any, which should handle
622     //        // \\u, \\U, u+, and U+
623     //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
624     //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
625     //
626     //        // Try a custom Hex-Any
627     //        // \\uXXXX and &#xXXXX;
628     //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
629     //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
630     //               "abcd5fx012&#x00033;");
631     //
632     //        // Try custom Any-Hex (default is tested elsewhere)
633     //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
634     //        expect(hex3, "012", "&#x30;&#x31;&#x32;");
635     //    }
636 
637     @Test
TestJ329()638     public void TestJ329() {
639 
640         Object[] DATA = {
641                 Boolean.FALSE, "a > b; c > d",
642                 Boolean.TRUE,  "a > b; no operator; c > d",
643         };
644 
645         for (int i=0; i<DATA.length; i+=2) {
646             String err = null;
647             try {
648                 Transliterator.createFromRules("<ID>",
649                         (String) DATA[i+1],
650                         Transliterator.FORWARD);
651             } catch (IllegalArgumentException e) {
652                 err = e.getMessage();
653             }
654             boolean gotError = (err != null);
655             String desc = (String) DATA[i+1] +
656             (gotError ? (" -> error: " + err) : " -> no error");
657             if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
658                 logln("Ok:   " + desc);
659             } else {
660                 errln("FAIL: " + desc);
661             }
662         }
663     }
664 
665     /**
666      * Test segments and segment references.
667      */
668     @Test
TestSegments()669     public void TestSegments() {
670         // Array of 3n items
671         // Each item is <rules>, <input>, <expected output>
672         String[] DATA = {
673                 "([a-z]) '.' ([0-9]) > $2 '-' $1",
674                 "abc.123.xyz.456",
675                 "ab1-c23.xy4-z56",
676         };
677 
678         for (int i=0; i<DATA.length; i+=3) {
679             logln("Pattern: " + Utility.escape(DATA[i]));
680             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
681             expect(t, DATA[i+1], DATA[i+2]);
682         }
683     }
684 
685     /**
686      * Test cursor positioning outside of the key
687      */
688     @Test
TestCursorOffset()689     public void TestCursorOffset() {
690         // Array of 3n items
691         // Each item is <rules>, <input>, <expected output>
692         String[] DATA = {
693                 "pre {alpha} post > | @ ALPHA ;" +
694                 "eALPHA > beta ;" +
695                 "pre {beta} post > BETA @@ | ;" +
696                 "post > xyz",
697 
698                 "prealphapost prebetapost",
699                 "prbetaxyz preBETApost",
700         };
701 
702         for (int i=0; i<DATA.length; i+=3) {
703             logln("Pattern: " + Utility.escape(DATA[i]));
704             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
705             expect(t, DATA[i+1], DATA[i+2]);
706         }
707     }
708 
709     /**
710      * Test zero length and > 1 char length variable values.  Test
711      * use of variable refs in UnicodeSets.
712      */
713     @Test
TestArbitraryVariableValues()714     public void TestArbitraryVariableValues() {
715         // Array of 3n items
716         // Each item is <rules>, <input>, <expected output>
717         String[] DATA = {
718                 "$abe = ab;" +
719                 "$pat = x[yY]z;" +
720                 "$ll  = 'a-z';" +
721                 "$llZ = [$ll];" +
722                 "$llY = [$ll$pat];" +
723                 "$emp = ;" +
724 
725                 "$abe > ABE;" +
726                 "$pat > END;" +
727                 "$llZ > 1;" +
728                 "$llY > 2;" +
729                 "7$emp 8 > 9;" +
730                 "",
731 
732                 "ab xYzxyz stY78",
733                 "ABE ENDEND 1129",
734         };
735 
736         for (int i=0; i<DATA.length; i+=3) {
737             logln("Pattern: " + Utility.escape(DATA[i]));
738             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
739             expect(t, DATA[i+1], DATA[i+2]);
740         }
741     }
742 
743     /**
744      * Confirm that the contextStart, contextLimit, start, and limit
745      * behave correctly.
746      */
747     @Test
TestPositionHandling()748     public void TestPositionHandling() {
749         // Array of 3n items
750         // Each item is <rules>, <input>, <expected output>
751         String[] DATA = {
752                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
753                 "xtat txtb", // pos 0,9,0,9
754                 "xTTaSS TTxUUb",
755 
756                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
757                 "xtat txtb", // pos 2,9,3,8
758                 "xtaSS TTxUUb",
759 
760                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
761                 "xtat txtb", // pos 3,8,3,8
762                 "xtaTT TTxTTb",
763         };
764 
765         // Array of 4n positions -- these go with the DATA array
766         // They are: contextStart, contextLimit, start, limit
767         int[] POS = {
768                 0, 9, 0, 9,
769                 2, 9, 3, 8,
770                 3, 8, 3, 8,
771         };
772 
773         int n = DATA.length/3;
774         for (int i=0; i<n; i++) {
775             Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
776             Transliterator.Position pos = new Transliterator.Position(
777                     POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
778             ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
779             t.transliterate(rsource, pos);
780             t.finishTransliteration(rsource, pos);
781             String result = rsource.toString();
782             String exp = DATA[3*i+2];
783             expectAux(Utility.escape(DATA[3*i]),
784                     DATA[3*i+1],
785                     result,
786                     result.equals(exp),
787                     exp);
788         }
789     }
790 
791     /**
792      * Test the Hiragana-Katakana transliterator.
793      */
794     @Test
TestHiraganaKatakana()795     public void TestHiraganaKatakana() {
796         Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
797         Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
798 
799         // Array of 3n items
800         // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
801         String[] DATA = {
802                 "both",
803                 "\u3042\u3090\u3099\u3092\u3050",
804                 "\u30A2\u30F8\u30F2\u30B0",
805 
806                 "kh",
807                 "\u307C\u3051\u3060\u3042\u3093\u30FC",
808                 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
809         };
810 
811         for (int i=0; i<DATA.length; i+=3) {
812             switch (DATA[i].charAt(0)) {
813             case 'h': // Hiragana-Katakana
814                 expect(hk, DATA[i+1], DATA[i+2]);
815                 break;
816             case 'k': // Katakana-Hiragana
817                 expect(kh, DATA[i+2], DATA[i+1]);
818                 break;
819             case 'b': // both
820                 expect(hk, DATA[i+1], DATA[i+2]);
821                 expect(kh, DATA[i+2], DATA[i+1]);
822                 break;
823             }
824         }
825 
826     }
827 
828     @Test
TestCopyJ476()829     public void TestCopyJ476() {
830         // This is a C++-only copy constructor test
831     }
832 
833     /**
834      * Test inter-Indic transliterators.  These are composed.
835      */
836     @Test
TestInterIndic()837     public void TestInterIndic() {
838         String ID = "Devanagari-Gujarati";
839         Transliterator dg = Transliterator.getInstance(ID);
840         if (dg == null) {
841             errln("FAIL: getInstance(" + ID + ") returned null");
842             return;
843         }
844         String id = dg.getID();
845         if (!id.equals(ID)) {
846             errln("FAIL: getInstance(" + ID + ").getID() => " + id);
847         }
848         String dev = "\u0901\u090B\u0925";
849         String guj = "\u0A81\u0A8B\u0AA5";
850         expect(dg, dev, guj);
851     }
852 
853     /**
854      * Test filter syntax in IDs. (J23)
855      */
856     @Test
TestFilterIDs()857     public void TestFilterIDs() {
858         String[] DATA = {
859                 "[aeiou]Any-Hex", // ID
860                 "[aeiou]Hex-Any", // expected inverse ID
861                 "quizzical",      // src
862                 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
863 
864                 "[aeiou]Any-Hex;[^5]Hex-Any",
865                 "[^5]Any-Hex;[aeiou]Hex-Any",
866                 "quizzical",
867                 "q\\u0075izzical",
868 
869                 "[abc]Null",
870                 "[abc]Null",
871                 "xyz",
872                 "xyz",
873         };
874 
875         for (int i=0; i<DATA.length; i+=4) {
876             String ID = DATA[i];
877             Transliterator t = Transliterator.getInstance(ID);
878             expect(t, DATA[i+2], DATA[i+3]);
879 
880             // Check the ID
881             if (!ID.equals(t.getID())) {
882                 errln("FAIL: getInstance(" + ID + ").getID() => " +
883                         t.getID());
884             }
885 
886             // Check the inverse
887             String uID = DATA[i+1];
888             Transliterator u = t.getInverse();
889             if (u == null) {
890                 errln("FAIL: " + ID + ".getInverse() returned NULL");
891             } else if (!u.getID().equals(uID)) {
892                 errln("FAIL: " + ID + ".getInverse().getID() => " +
893                         u.getID() + ", expected " + uID);
894             }
895         }
896     }
897 
898     /**
899      * Test the case mapping transliterators.
900      */
901     @Test
TestCaseMap()902     public void TestCaseMap() {
903         Transliterator toUpper =
904             Transliterator.getInstance("Any-Upper[^xyzXYZ]");
905         Transliterator toLower =
906             Transliterator.getInstance("Any-Lower[^xyzXYZ]");
907         Transliterator toTitle =
908             Transliterator.getInstance("Any-Title[^xyzXYZ]");
909 
910         expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
911         "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
912         expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
913         "the quick brown foX jumped over the lazY dogs.");
914         expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
915         "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
916     }
917 
918     /**
919      * Test the name mapping transliterators.
920      */
921     @Test
TestNameMap()922     public void TestNameMap() {
923         Transliterator uni2name =
924             Transliterator.getInstance("Any-Name[^abc]");
925         Transliterator name2uni =
926             Transliterator.getInstance("Name-Any");
927 
928         expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
929         "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
930         expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
931         "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
932 
933         // round trip
934         Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
935 
936         String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
937         expect(t, s, s);
938     }
939 
940     /**
941      * Test liberalized ID syntax.  1006c
942      */
943     @Test
TestLiberalizedID()944     public void TestLiberalizedID() {
945         // Some test cases have an expected getID() value of NULL.  This
946         // means I have disabled the test case for now.  This stuff is
947         // still under development, and I haven't decided whether to make
948         // getID() return canonical case yet.  It will all get rewritten
949         // with the move to Source-Target/Variant IDs anyway. [aliu]
950         String DATA[] = {
951                 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
952                 "  Null  ", "Null", "whitespace",
953                 " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
954                 "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",
955         };
956 
957         for (int i=0; i<DATA.length; i+=3) {
958             try {
959                 Transliterator t = Transliterator.getInstance(DATA[i]);
960                 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
961                     logln("Ok: " + DATA[i+2] +
962                             " create ID \"" + DATA[i] + "\" => \"" +
963                             t.getID() + "\"");
964                 } else {
965                     errln("FAIL: " + DATA[i+2] +
966                             " create ID \"" + DATA[i] + "\" => \"" +
967                             t.getID() + "\", exp \"" + DATA[i+1] + "\"");
968                 }
969             } catch (IllegalArgumentException e) {
970                 errln("FAIL: " + DATA[i+2] +
971                         " create ID \"" + DATA[i] + "\"");
972             }
973         }
974     }
975 
976     @Test
TestCreateInstance()977     public void TestCreateInstance() {
978         String FORWARD = "F";
979         String REVERSE = "R";
980         String DATA[] = {
981                 // Column 1: id
982                 // Column 2: direction
983                 // Column 3: expected ID, or "" if expect failure
984                 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
985 
986                 // JB#2689: bad compound causes crash
987                 "InvalidSource-InvalidTarget", FORWARD, "",
988                 "InvalidSource-InvalidTarget", REVERSE, "",
989                 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
990                 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
991                 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
992                 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
993 
994                 null
995         };
996 
997         for (int i=0; DATA[i]!=null; i+=3) {
998             String id=DATA[i];
999             int dir = (DATA[i+1]==FORWARD)?
1000                     Transliterator.FORWARD:Transliterator.REVERSE;
1001             String expID=DATA[i+2];
1002             Exception e = null;
1003             Transliterator t;
1004             try {
1005                 t = Transliterator.getInstance(id,dir);
1006             } catch (Exception e1) {
1007                 e = e1;
1008                 t = null;
1009             }
1010             String newID = (t!=null)?t.getID():"";
1011             boolean ok = (newID.equals(expID));
1012             if (t==null) {
1013                 newID = e.getMessage();
1014             }
1015             if (ok) {
1016                 logln("Ok: createInstance(" +
1017                         id + "," + DATA[i+1] + ") => " + newID);
1018             } else {
1019                 errln("FAIL: createInstance(" +
1020                         id + "," + DATA[i+1] + ") => " + newID +
1021                         ", expected " + expID);
1022             }
1023         }
1024     }
1025 
1026     /**
1027      * Test the normalization transliterator.
1028      */
1029     @Test
TestNormalizationTransliterator()1030     public void TestNormalizationTransliterator() {
1031         // THE FOLLOWING TWO TABLES ARE COPIED FROM ohos.global.icu.dev.test.normalizer.BasicTest
1032         // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1033         String[][] CANON = {
1034                 // Input               Decomposed            Composed
1035                 {"cat",                "cat",                "cat"               },
1036                 {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },
1037 
1038                 {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above
1039                 {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above
1040 
1041                 {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above
1042                 {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below
1043                 {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above
1044 
1045                 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1046                 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1047 
1048                 {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave
1049                 {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave
1050                 {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron
1051 
1052                 {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign
1053                 {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring
1054 
1055                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0
1056                 {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0
1057 
1058                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1059                 {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },
1060 
1061                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1062                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1063                 {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten
1064                 {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten
1065                 {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten
1066 
1067                 {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },
1068         };
1069 
1070         String[][] COMPAT = {
1071                 // Input               Decomposed            Composed
1072                 {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed
1073 
1074                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0
1075                 {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i
1076 
1077                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1078                 {"Henry \u2163",       "Henry IV",           "Henry IV"          },
1079 
1080                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1081                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1082 
1083                 {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten
1084         };
1085 
1086         Transliterator NFD = Transliterator.getInstance("NFD");
1087         Transliterator NFC = Transliterator.getInstance("NFC");
1088         for (int i=0; i<CANON.length; ++i) {
1089             String in = CANON[i][0];
1090             String expd = CANON[i][1];
1091             String expc = CANON[i][2];
1092             expect(NFD, in, expd);
1093             expect(NFC, in, expc);
1094         }
1095 
1096         Transliterator NFKD = Transliterator.getInstance("NFKD");
1097         Transliterator NFKC = Transliterator.getInstance("NFKC");
1098         for (int i=0; i<COMPAT.length; ++i) {
1099             String in = COMPAT[i][0];
1100             String expkd = COMPAT[i][1];
1101             String expkc = COMPAT[i][2];
1102             expect(NFKD, in, expkd);
1103             expect(NFKC, in, expkc);
1104         }
1105 
1106         Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1107         expect(t, "\u010dx", "c\u030C");
1108     }
1109 
1110     /**
1111      * Test compound RBT rules.
1112      */
1113     @Test
TestCompoundRBT()1114     public void TestCompoundRBT() {
1115         // Careful with spacing and ';' here:  Phrase this exactly
1116         // as toRules() is going to return it.  If toRules() changes
1117         // with regard to spacing or ';', then adjust this string.
1118         String rule = "::Hex-Any;\n" +
1119         "::Any-Lower;\n" +
1120         "a > '.A.';\n" +
1121         "b > '.B.';\n" +
1122         "::[^t]Any-Upper;";
1123         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1124         if (t == null) {
1125             errln("FAIL: createFromRules failed");
1126             return;
1127         }
1128         expect(t, "\u0043at in the hat, bat on the mat",
1129         "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1130         String r = t.toRules(true);
1131         if (r.equals(rule)) {
1132             logln("OK: toRules() => " + r);
1133         } else {
1134             errln("FAIL: toRules() => " + r +
1135                     ", expected " + rule);
1136         }
1137 
1138         // Now test toRules
1139         t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1140         if (t == null) {
1141             errln("FAIL: createInstance failed");
1142             return;
1143         }
1144         String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1145         r = t.toRules(true);
1146         if (!r.equals(exp)) {
1147             errln("FAIL: toRules() => " + r +
1148                     ", expected " + exp);
1149         } else {
1150             logln("OK: toRules() => " + r);
1151         }
1152 
1153         // Round trip the result of toRules
1154         t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1155         if (t == null) {
1156             errln("FAIL: createFromRules #2 failed");
1157             return;
1158         } else {
1159             logln("OK: createFromRules(" + r + ") succeeded");
1160         }
1161 
1162         // Test toRules again
1163         r = t.toRules(true);
1164         if (!r.equals(exp)) {
1165             errln("FAIL: toRules() => " + r +
1166                     ", expected " + exp);
1167         } else {
1168             logln("OK: toRules() => " + r);
1169         }
1170 
1171         // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1172         // to what the regenerated ID will look like.
1173         String id = "Upper(Lower);(NFKC)";
1174         t = Transliterator.getInstance(id, Transliterator.FORWARD);
1175         if (t == null) {
1176             errln("FAIL: createInstance #2 failed");
1177             return;
1178         }
1179         if (t.getID().equals(id)) {
1180             logln("OK: created " + id);
1181         } else {
1182             errln("FAIL: createInstance(" + id +
1183                     ").getID() => " + t.getID());
1184         }
1185 
1186         Transliterator u = t.getInverse();
1187         if (u == null) {
1188             errln("FAIL: createInverse failed");
1189             return;
1190         }
1191         exp = "NFKC();Lower(Upper)";
1192         if (u.getID().equals(exp)) {
1193             logln("OK: createInverse(" + id + ") => " +
1194                     u.getID());
1195         } else {
1196             errln("FAIL: createInverse(" + id + ") => " +
1197                     u.getID());
1198         }
1199     }
1200 
1201     /**
1202      * Compound filter semantics were orginially not implemented
1203      * correctly.  Originally, each component filter f(i) is replaced by
1204      * f'(i) = f(i) && g, where g is the filter for the compound
1205      * transliterator.
1206      *
1207      * From Mark:
1208      *
1209      * Suppose and I have a transliterator X. Internally X is
1210      * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1211      *
1212      * The compound should convert all greek characters (through latin) to
1213      * cyrillic, then lowercase the result. The filter should say "don't
1214      * touch 'A' in the original". But because an intermediate result
1215      * happens to go through "A", the Greek Alpha gets hung up.
1216      */
1217     @Test
TestCompoundFilter()1218     public void TestCompoundFilter() {
1219         Transliterator t = Transliterator.getInstance
1220         ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1221         t.setFilter(new UnicodeSet("[^A]"));
1222 
1223         // Only the 'A' at index 1 should remain unchanged
1224         expect(t,
1225                 CharsToUnicodeString("BA\\u039A\\u0391"),
1226                 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1227     }
1228 
1229     /**
1230      * Test the "Remove" transliterator.
1231      */
1232     @Test
TestRemove()1233     public void TestRemove() {
1234         Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1235         expect(t, "The quick brown fox.",
1236         "Th qck brwn fx.");
1237     }
1238 
1239     @Test
TestToRules()1240     public void TestToRules() {
1241         String RBT = "rbt";
1242         String SET = "set";
1243         String[] DATA = {
1244                 RBT,
1245                 "$a=\\u4E61; [$a] > A;",
1246                 "[\\u4E61] > A;",
1247 
1248                 RBT,
1249                 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1250                 "[[:Zs:][:Zl:]]{a} > A;",
1251 
1252                 SET,
1253                 "[[:Zs:][:Zl:]]",
1254                 "[[:Zs:][:Zl:]]",
1255 
1256                 SET,
1257                 "[:Ps:]",
1258                 "[:Ps:]",
1259 
1260                 SET,
1261                 "[:L:]",
1262                 "[:L:]",
1263 
1264                 SET,
1265                 "[[:L:]-[A]]",
1266                 "[[:L:]-[A]]",
1267 
1268                 SET,
1269                 "[~[:Lu:][:Ll:]]",
1270                 "[~[:Lu:][:Ll:]]",
1271 
1272                 SET,
1273                 "[~[a-z]]",
1274                 "[~[a-z]]",
1275 
1276                 RBT,
1277                 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1278                 "[^[:Zs:]]{a} > A;",
1279 
1280                 RBT,
1281                 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1282                 "[[a-z]-[:Zs:]]{a} > A;",
1283 
1284                 RBT,
1285                 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1286                 "[[:Zs:]&[a-z]]{a} > A;",
1287 
1288                 RBT,
1289                 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1290                 "[x[:Zs:]]{a} > A;",
1291 
1292                 RBT,
1293                 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1294                 "$macron = \\u0304 ;"+
1295                 "$evowel = [aeiouyAEIOUY] ;"+
1296                 "$iotasub = \\u0345 ;"+
1297                 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1298                 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1299 
1300                 RBT,
1301                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1302                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1303         };
1304 
1305         for (int d=0; d < DATA.length; d+=3) {
1306             if (DATA[d] == RBT) {
1307                 // Transliterator test
1308                 Transliterator t = Transliterator.createFromRules("ID",
1309                         DATA[d+1], Transliterator.FORWARD);
1310                 if (t == null) {
1311                     errln("FAIL: createFromRules failed");
1312                     return;
1313                 }
1314                 String rules, escapedRules;
1315                 rules = t.toRules(false);
1316                 escapedRules = t.toRules(true);
1317                 String expRules = Utility.unescape(DATA[d+2]);
1318                 String expEscapedRules = DATA[d+2];
1319                 if (rules.equals(expRules)) {
1320                     logln("Ok: " + DATA[d+1] +
1321                             " => " + Utility.escape(rules));
1322                 } else {
1323                     errln("FAIL: " + DATA[d+1] +
1324                             " => " + Utility.escape(rules + ", exp " + expRules));
1325                 }
1326                 if (escapedRules.equals(expEscapedRules)) {
1327                     logln("Ok: " + DATA[d+1] +
1328                             " => " + escapedRules);
1329                 } else {
1330                     errln("FAIL: " + DATA[d+1] +
1331                             " => " + escapedRules + ", exp " + expEscapedRules);
1332                 }
1333 
1334             } else {
1335                 // UnicodeSet test
1336                 String pat = DATA[d+1];
1337                 String expToPat = DATA[d+2];
1338                 UnicodeSet set = new UnicodeSet(pat);
1339 
1340                 // Adjust spacing etc. as necessary.
1341                 String toPat;
1342                 toPat = set.toPattern(true);
1343                 if (expToPat.equals(toPat)) {
1344                     logln("Ok: " + pat +
1345                             " => " + toPat);
1346                 } else {
1347                     errln("FAIL: " + pat +
1348                             " => " + Utility.escape(toPat) +
1349                             ", exp " + Utility.escape(pat));
1350                 }
1351             }
1352         }
1353     }
1354 
1355     @Test
TestContext()1356     public void TestContext() {
1357         Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1358 
1359         expect("de > x; {d}e > y;",
1360                 "de",
1361                 "ye",
1362                 pos);
1363 
1364         expect("ab{c} > z;",
1365                 "xadabdabcy",
1366         "xadabdabzy");
1367     }
1368 
CharsToUnicodeString(String s)1369     static final String CharsToUnicodeString(String s) {
1370         return Utility.unescape(s);
1371     }
1372 
1373     @Test
TestSupplemental()1374     public void TestSupplemental() {
1375 
1376         expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1377         "a > $a; $s > i;"),
1378         CharsToUnicodeString("ab\\U0001030Fx"),
1379         CharsToUnicodeString("\\U00010300bix"));
1380 
1381         expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1382                 "$b=[A-Z\\U00010400-\\U0001044D];" +
1383         "($a)($b) > $2 $1;"),
1384         CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1385         CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1386 
1387         // k|ax\\U00010300xm
1388 
1389         // k|a\\U00010400\\U00010300xm
1390         // ky|\\U00010400\\U00010300xm
1391         // ky\\U00010400|\\U00010300xm
1392 
1393         // ky\\U00010400|\\U00010300\\U00010400m
1394         // ky\\U00010400y|\\U00010400m
1395         expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1396                 "$a {x} > | @ \\U00010400;" +
1397         "{$a} [^\\u0000-\\uFFFF] > y;"),
1398         CharsToUnicodeString("kax\\U00010300xm"),
1399         CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1400 
1401         expect(Transliterator.getInstance("Any-Name"),
1402                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1403         "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1404 
1405         expect(Transliterator.getInstance("Name-Any"),
1406                 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1407                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1408 
1409         expect(Transliterator.getInstance("Any-Hex/Unicode"),
1410                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1411         "U+10330U+10FF00U+E0061U+00A0");
1412 
1413         expect(Transliterator.getInstance("Any-Hex/C"),
1414                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1415         "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1416 
1417         expect(Transliterator.getInstance("Any-Hex/Perl"),
1418                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1419         "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1420 
1421         expect(Transliterator.getInstance("Any-Hex/Java"),
1422                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1423         "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1424 
1425         expect(Transliterator.getInstance("Any-Hex/XML"),
1426                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1427         "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1428 
1429         expect(Transliterator.getInstance("Any-Hex/XML10"),
1430                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1431         "&#66352;&#1113856;&#917601;&#160;");
1432 
1433         expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1434                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1435                 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1436     }
1437 
1438     @Test
TestQuantifier()1439     public void TestQuantifier() {
1440 
1441         // Make sure @ in a quantified anteContext works
1442         expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1443                 "AAAAAb",
1444         "aaa(aac)");
1445 
1446         // Make sure @ in a quantified postContext works
1447         expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1448                 "baaaaa",
1449         "caa(aaa)");
1450 
1451         // Make sure @ in a quantified postContext with seg ref works
1452         expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1453                 "baaaaa",
1454         "baa(aaa)");
1455 
1456         // Make sure @ past ante context doesn't enter ante context
1457         Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1458         expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1459                 "xxxab",
1460                 "xxx(ac)",
1461                 pos);
1462 
1463         // Make sure @ past post context doesn't pass limit
1464         Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1465         expect("{b} a+ > c @@ |; x > y; a > A;",
1466                 "baxx",
1467                 "caxx",
1468                 pos2);
1469 
1470         // Make sure @ past post context doesn't enter post context
1471         expect("{b} a+ > c @@ |; x > y; a > A;",
1472                 "baxx",
1473         "cayy");
1474 
1475         expect("(ab)? c > d;",
1476                 "c abc ababc",
1477         "d d abd");
1478 
1479         // NOTE: The (ab)+ when referenced just yields a single "ab",
1480         // not the full sequence of them.  This accords with perl behavior.
1481         expect("(ab)+ {x} > '(' $1 ')';",
1482                 "x abx ababxy",
1483         "x ab(ab) abab(ab)y");
1484 
1485         expect("b+ > x;",
1486                 "ac abc abbc abbbc",
1487         "ac axc axc axc");
1488 
1489         expect("[abc]+ > x;",
1490                 "qac abrc abbcs abtbbc",
1491         "qx xrx xs xtx");
1492 
1493         expect("q{(ab)+} > x;",
1494                 "qa qab qaba qababc qaba",
1495         "qa qx qxa qxc qxa");
1496 
1497         expect("q(ab)* > x;",
1498                 "qa qab qaba qababc",
1499         "xa x xa xc");
1500 
1501         // NOTE: The (ab)+ when referenced just yields a single "ab",
1502         // not the full sequence of them.  This accords with perl behavior.
1503         expect("q(ab)* > '(' $1 ')';",
1504                 "qa qab qaba qababc",
1505         "()a (ab) (ab)a (ab)c");
1506 
1507         // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1508         // quoted string
1509         expect("'ab'+ > x;",
1510                 "bb ab ababb",
1511         "bb x xb");
1512 
1513         // $foo+ and $foo* -- the quantifier should apply to the entire
1514         // variable reference
1515         expect("$var = ab; $var+ > x;",
1516                 "bb ab ababb",
1517         "bb x xb");
1518     }
1519 
1520     static class TestFact implements Transliterator.Factory {
1521         static class NameableNullTrans extends Transliterator {
NameableNullTrans(String id)1522             public NameableNullTrans(String id) {
1523                 super(id, null);
1524             }
1525             @Override
handleTransliterate(Replaceable text, Position offsets, boolean incremental)1526             protected void handleTransliterate(Replaceable text,
1527                     Position offsets, boolean incremental) {
1528                 offsets.start = offsets.limit;
1529             }
1530         }
1531         String id;
TestFact(String theID)1532         public TestFact(String theID) {
1533             id = theID;
1534         }
1535         @Override
getInstance(String ignoredID)1536         public Transliterator getInstance(String ignoredID) {
1537             return new NameableNullTrans(id);
1538         }
1539     }
1540 
1541     @Test
TestSTV()1542     public void TestSTV() {
1543         Enumeration es = Transliterator.getAvailableSources();
1544         for (int i=0; es.hasMoreElements(); ++i) {
1545             String source = (String) es.nextElement();
1546             logln("" + i + ": " + source);
1547             if (source.length() == 0) {
1548                 errln("FAIL: empty source");
1549                 continue;
1550             }
1551             Enumeration et = Transliterator.getAvailableTargets(source);
1552             for (int j=0; et.hasMoreElements(); ++j) {
1553                 String target = (String) et.nextElement();
1554                 logln(" " + j + ": " + target);
1555                 if (target.length() == 0) {
1556                     errln("FAIL: empty target");
1557                     continue;
1558                 }
1559                 Enumeration ev = Transliterator.getAvailableVariants(source, target);
1560                 for (int k=0; ev.hasMoreElements(); ++k) {
1561                     String variant = (String) ev.nextElement();
1562                     if (variant.length() == 0) {
1563                         logln("  " + k + ": <empty>");
1564                     } else {
1565                         logln("  " + k + ": " + variant);
1566                     }
1567                 }
1568             }
1569         }
1570 
1571         // Test registration
1572         String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1573         String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1574         String[] SOURCES = { null, "Seoridf", "Oewoir" };
1575         for (int i=0; i<3; ++i) {
1576             Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1577             try {
1578                 Transliterator t = Transliterator.getInstance(IDS[i]);
1579                 if (t.getID().equals(IDS[i])) {
1580                     logln("Ok: Registration/creation succeeded for ID " +
1581                             IDS[i]);
1582                 } else {
1583                     errln("FAIL: Registration of ID " +
1584                             IDS[i] + " creates ID " + t.getID());
1585                 }
1586                 Transliterator.unregister(IDS[i]);
1587                 try {
1588                     t = Transliterator.getInstance(IDS[i]);
1589                     errln("FAIL: Unregistration failed for ID " +
1590                             IDS[i] + "; still receiving ID " + t.getID());
1591                 } catch (IllegalArgumentException e2) {
1592                     // Good; this is what we expect
1593                     logln("Ok; Unregistered " + IDS[i]);
1594                 }
1595             } catch (IllegalArgumentException e) {
1596                 errln("FAIL: Registration/creation failed for ID " +
1597                         IDS[i]);
1598             } finally {
1599                 Transliterator.unregister(IDS[i]);
1600             }
1601         }
1602 
1603         // Make sure getAvailable API reflects removal
1604         for (Enumeration e = Transliterator.getAvailableIDs();
1605         e.hasMoreElements(); ) {
1606             String id = (String) e.nextElement();
1607             for (int i=0; i<3; ++i) {
1608                 if (id.equals(FULL_IDS[i])) {
1609                     errln("FAIL: unregister(" + id + ") failed");
1610                 }
1611             }
1612         }
1613         for (Enumeration e = Transliterator.getAvailableTargets("Any");
1614         e.hasMoreElements(); ) {
1615             String t = (String) e.nextElement();
1616             if (t.equals(IDS[0])) {
1617                 errln("FAIL: unregister(Any-" + t + ") failed");
1618             }
1619         }
1620         for (Enumeration e = Transliterator.getAvailableSources();
1621         e.hasMoreElements(); ) {
1622             String s = (String) e.nextElement();
1623             for (int i=0; i<3; ++i) {
1624                 if (SOURCES[i] == null) continue;
1625                 if (s.equals(SOURCES[i])) {
1626                     errln("FAIL: unregister(" + s + "-*) failed");
1627                 }
1628             }
1629         }
1630     }
1631 
1632     /**
1633      * Test inverse of Greek-Latin; Title()
1634      */
1635     @Test
TestCompoundInverse()1636     public void TestCompoundInverse() {
1637         Transliterator t = Transliterator.getInstance
1638         ("Greek-Latin; Title()", Transliterator.REVERSE);
1639         if (t == null) {
1640             errln("FAIL: createInstance");
1641             return;
1642         }
1643         String exp = "(Title);Latin-Greek";
1644         if (t.getID().equals(exp)) {
1645             logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1646                     t.getID());
1647         } else {
1648             errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1649                     t.getID() + "\", expected \"" + exp + "\"");
1650         }
1651     }
1652 
1653     /**
1654      * Test NFD chaining with RBT
1655      */
1656     @Test
TestNFDChainRBT()1657     public void TestNFDChainRBT() {
1658         Transliterator t = Transliterator.createFromRules(
1659                 "TEST", "::NFD; aa > Q; a > q;",
1660                 Transliterator.FORWARD);
1661         logln(t.toRules(true));
1662         expect(t, "aa", "Q");
1663     }
1664 
1665     /**
1666      * Inverse of "Null" should be "Null". (J21)
1667      */
1668     @Test
TestNullInverse()1669     public void TestNullInverse() {
1670         Transliterator t = Transliterator.getInstance("Null");
1671         Transliterator u = t.getInverse();
1672         if (!u.getID().equals("Null")) {
1673             errln("FAIL: Inverse of Null should be Null");
1674         }
1675     }
1676 
1677     /**
1678      * Check ID of inverse of alias. (J22)
1679      */
1680     @Test
TestAliasInverseID()1681     public void TestAliasInverseID() {
1682         String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1683         Transliterator t = Transliterator.getInstance(ID);
1684         Transliterator u = t.getInverse();
1685         String exp = "Hangul-Latin";
1686         String got = u.getID();
1687         if (!got.equals(exp)) {
1688             errln("FAIL: Inverse of " + ID + " is " + got +
1689                     ", expected " + exp);
1690         }
1691     }
1692 
1693     /**
1694      * Test IDs of inverses of compound transliterators. (J20)
1695      */
1696     @Test
TestCompoundInverseID()1697     public void TestCompoundInverseID() {
1698         String ID = "Latin-Jamo;NFC(NFD)";
1699         Transliterator t = Transliterator.getInstance(ID);
1700         Transliterator u = t.getInverse();
1701         String exp = "NFD(NFC);Jamo-Latin";
1702         String got = u.getID();
1703         if (!got.equals(exp)) {
1704             errln("FAIL: Inverse of " + ID + " is " + got +
1705                     ", expected " + exp);
1706         }
1707     }
1708 
1709     /**
1710      * Test undefined variable.
1711      */
1712     @Test
TestUndefinedVariable()1713     public void TestUndefinedVariable() {
1714         String rule = "$initial } a <> \u1161;";
1715         try {
1716             Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1717         } catch (IllegalArgumentException e) {
1718             logln("OK: Got exception for " + rule + ", as expected: " +
1719                     e.getMessage());
1720             return;
1721         }
1722         errln("Fail: bogus rule " + rule + " compiled without error");
1723     }
1724 
1725     /**
1726      * Test empty context.
1727      */
1728     @Test
TestEmptyContext()1729     public void TestEmptyContext() {
1730         expect(" { a } > b;", "xay a ", "xby b ");
1731     }
1732 
1733     /**
1734      * Test compound filter ID syntax
1735      */
1736     @Test
TestCompoundFilterID()1737     public void TestCompoundFilterID() {
1738         String[] DATA = {
1739                 // Col. 1 = ID or rule set (latter must start with #)
1740 
1741                 // = columns > 1 are null if expect col. 1 to be illegal =
1742 
1743                 // Col. 2 = direction, "F..." or "R..."
1744                 // Col. 3 = source string
1745                 // Col. 4 = exp result
1746 
1747                 "[abc]; [abc]", null, null, null, // multiple filters
1748                 "Latin-Greek; [abc];", null, null, null, // misplaced filter
1749                 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1750                 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1751                 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1752                 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1753         };
1754 
1755         for (int i=0; i<DATA.length; i+=4) {
1756             String id = DATA[i];
1757             int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1758                     Transliterator.REVERSE : Transliterator.FORWARD;
1759             String source = DATA[i+2];
1760             String exp = DATA[i+3];
1761             boolean expOk = (DATA[i+1] != null);
1762             Transliterator t = null;
1763             IllegalArgumentException e = null;
1764             try {
1765                 if (id.charAt(0) == '#') {
1766                     t = Transliterator.createFromRules("ID", id, direction);
1767                 } else {
1768                     t = Transliterator.getInstance(id, direction);
1769                 }
1770             } catch (IllegalArgumentException ee) {
1771                 e = ee;
1772             }
1773             boolean ok = (t != null && e == null);
1774             if (ok == expOk) {
1775                 logln("Ok: " + id + " => " + t +
1776                         (e != null ? (", " + e.getMessage()) : ""));
1777                 if (source != null) {
1778                     expect(t, source, exp);
1779                 }
1780             } else {
1781                 errln("FAIL: " + id + " => " + t +
1782                         (e != null ? (", " + e.getMessage()) : ""));
1783             }
1784         }
1785     }
1786 
1787     /**
1788      * Test new property set syntax
1789      */
1790     @Test
TestPropertySet()1791     public void TestPropertySet() {
1792         expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1793         expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1794         "[ a stitch ]\n[ in time ]\r[ saves 9]");
1795     }
1796 
1797     /**
1798      * Test various failure points of the new 2.0 engine.
1799      */
1800     @Test
TestNewEngine()1801     public void TestNewEngine() {
1802         Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1803         // Katakana should be untouched
1804         expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1805 
1806         if (true) {
1807             // This test will only work if Transliterator.ROLLBACK is
1808             // true.  Otherwise, this test will fail, revealing a
1809             // limitation of global filters in incremental mode.
1810 
1811             Transliterator a =
1812                 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1813             Transliterator A =
1814                 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1815 
1816             //Transliterator array[] = new Transliterator[] {
1817             //    a,
1818             //    Transliterator.getInstance("NFD"),
1819             //    A };
1820             //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1821 
1822             try {
1823                 Transliterator.registerInstance(a);
1824                 Transliterator.registerInstance(A);
1825 
1826                 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1827                 expect(t, "aAaA", "bAbA");
1828 
1829                 Transliterator[] u = t.getElements();
1830                 assertTrue("getElements().length", u.length == 3);
1831                 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1832                 assertEquals("getElements()[1]", u[1].getID(), "NFD");
1833                 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1834 
1835                 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1836                 t.setFilter(new UnicodeSet("[:Ll:]"));
1837                 expect(t, "aAaA", "bAbA");
1838             } finally {
1839                 Transliterator.unregister("a_to_A");
1840                 Transliterator.unregister("A_to_b");
1841             }
1842         }
1843 
1844         expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1845                 "a",
1846         "ax");
1847 
1848         String gr =
1849             "$ddot = \u0308 ;" +
1850             "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1851             "$rough = \u0314 ;" +
1852             "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1853             "\u03b1 <> a ;" +
1854             "$rough <> h ;";
1855 
1856         expect(gr, "\u03B1\u0314", "ha");
1857     }
1858 
1859     /**
1860      * Test quantified segment behavior.  We want:
1861      * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1862      */
1863     @Test
TestQuantifiedSegment()1864     public void TestQuantifiedSegment() {
1865         // The normal case
1866         expect("([abc]+) > x $1 x;", "cba", "xcbax");
1867 
1868         // The tricky case; the quantifier is around the segment
1869         expect("([abc])+ > x $1 x;", "cba", "xax");
1870 
1871         // Tricky case in reverse direction
1872         expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1873 
1874         // Check post-context segment
1875         expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1876 
1877         // Test toRule/toPattern for non-quantified segment.
1878         // Careful with spacing here.
1879         String r = "([a-c]){q} > x $1 x;";
1880         Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1881         String rr = t.toRules(true);
1882         if (!r.equals(rr)) {
1883             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1884         } else {
1885             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1886         }
1887 
1888         // Test toRule/toPattern for quantified segment.
1889         // Careful with spacing here.
1890         r = "([a-c])+{q} > x $1 x;";
1891         t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1892         rr = t.toRules(true);
1893         if (!r.equals(rr)) {
1894             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1895         } else {
1896             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1897         }
1898     }
1899 
1900     //======================================================================
1901     // Ram's tests
1902     //======================================================================
1903     /* this test performs  test of rules in ISO 15915 */
1904     @Test
TestDevanagariLatinRT()1905     public void  TestDevanagariLatinRT(){
1906         String[]  source = {
1907                 "bh\u0101rata",
1908                 "kra",
1909                 "k\u1E63a",
1910                 "khra",
1911                 "gra",
1912                 "\u1E45ra",
1913                 "cra",
1914                 "chra",
1915                 "j\u00F1a",
1916                 "jhra",
1917                 "\u00F1ra",
1918                 "\u1E6Dya",
1919                 "\u1E6Dhra",
1920                 "\u1E0Dya",
1921                 //"r\u0323ya", // \u095c is not valid in Devanagari
1922                 "\u1E0Dhya",
1923                 "\u1E5Bhra",
1924                 "\u1E47ra",
1925                 "tta",
1926                 "thra",
1927                 "dda",
1928                 "dhra",
1929                 "nna",
1930                 "pra",
1931                 "phra",
1932                 "bra",
1933                 "bhra",
1934                 "mra",
1935                 "\u1E49ra",
1936                 //"l\u0331ra",
1937                 "yra",
1938                 "\u1E8Fra",
1939                 //"l-",
1940                 "vra",
1941                 "\u015Bra",
1942                 "\u1E63ra",
1943                 "sra",
1944                 "hma",
1945                 "\u1E6D\u1E6Da",
1946                 "\u1E6D\u1E6Dha",
1947                 "\u1E6Dh\u1E6Dha",
1948                 "\u1E0D\u1E0Da",
1949                 "\u1E0D\u1E0Dha",
1950                 "\u1E6Dya",
1951                 "\u1E6Dhya",
1952                 "\u1E0Dya",
1953                 "\u1E0Dhya",
1954                 // Not roundtrippable --
1955                 // \u0939\u094d\u094d\u092E  - hma
1956                 // \u0939\u094d\u092E         - hma
1957                 // CharsToUnicodeString("hma"),
1958                 "hya",
1959                 "\u015Br\u0325",
1960                 "\u015Bca",
1961                 "\u0115",
1962                 "san\u0304j\u012Bb s\u0113nagupta",
1963                 "\u0101nand vaddir\u0101ju",
1964         };
1965         String[]  expected = {
1966                 "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */
1967                 "\u0915\u094D\u0930",          /* kra         */
1968                 "\u0915\u094D\u0937",          /* ks\u0323a  */
1969                 "\u0916\u094D\u0930",          /* khra        */
1970                 "\u0917\u094D\u0930",          /* gra         */
1971                 "\u0919\u094D\u0930",          /* n\u0307ra  */
1972                 "\u091A\u094D\u0930",          /* cra         */
1973                 "\u091B\u094D\u0930",          /* chra        */
1974                 "\u091C\u094D\u091E",          /* jn\u0303a  */
1975                 "\u091D\u094D\u0930",          /* jhra        */
1976                 "\u091E\u094D\u0930",          /* n\u0303ra  */
1977                 "\u091F\u094D\u092F",          /* t\u0323ya  */
1978                 "\u0920\u094D\u0930",          /* t\u0323hra */
1979                 "\u0921\u094D\u092F",          /* d\u0323ya  */
1980                 //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari
1981                 "\u0922\u094D\u092F",          /* d\u0323hya */
1982                 "\u0922\u093C\u094D\u0930",    /* r\u0323hra */
1983                 "\u0923\u094D\u0930",          /* n\u0323ra  */
1984                 "\u0924\u094D\u0924",          /* tta         */
1985                 "\u0925\u094D\u0930",          /* thra        */
1986                 "\u0926\u094D\u0926",          /* dda         */
1987                 "\u0927\u094D\u0930",          /* dhra        */
1988                 "\u0928\u094D\u0928",          /* nna         */
1989                 "\u092A\u094D\u0930",          /* pra         */
1990                 "\u092B\u094D\u0930",          /* phra        */
1991                 "\u092C\u094D\u0930",          /* bra         */
1992                 "\u092D\u094D\u0930",          /* bhra        */
1993                 "\u092E\u094D\u0930",          /* mra         */
1994                 "\u0929\u094D\u0930",          /* n\u0331ra  */
1995                 //"\u0934\u094D\u0930",          /* l\u0331ra  */
1996                 "\u092F\u094D\u0930",          /* yra         */
1997                 "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */
1998                 //"l-",
1999                 "\u0935\u094D\u0930",          /* vra         */
2000                 "\u0936\u094D\u0930",          /* s\u0301ra  */
2001                 "\u0937\u094D\u0930",          /* s\u0323ra  */
2002                 "\u0938\u094D\u0930",          /* sra         */
2003                 "\u0939\u094d\u092E",          /* hma         */
2004                 "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */
2005                 "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */
2006                 "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/
2007                 "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */
2008                 "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */
2009                 "\u091F\u094D\u092F",          /* t\u0323ya  */
2010                 "\u0920\u094D\u092F",          /* t\u0323hya */
2011                 "\u0921\u094D\u092F",          /* d\u0323ya  */
2012                 "\u0922\u094D\u092F",          /* d\u0323hya */
2013                 // "hma",                         /* hma         */
2014                 "\u0939\u094D\u092F",          /* hya         */
2015                 "\u0936\u0943",                /* s\u0301r\u0325a  */
2016                 "\u0936\u094D\u091A",          /* s\u0301ca  */
2017                 "\u090d",                      /* e\u0306    */
2018                 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2019                 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2020         };
2021 
2022         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2023         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2024 
2025         for(int i= 0; i<source.length; i++){
2026             expect(latinToDev,(source[i]),(expected[i]));
2027             expect(devToLatin,(expected[i]),(source[i]));
2028         }
2029 
2030     }
2031     @Test
TestTeluguLatinRT()2032     public void  TestTeluguLatinRT(){
2033         String[]  source = {
2034                 "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */
2035                 "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */
2036                 "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */
2037                 "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */
2038                 "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */
2039                 "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */
2040                 "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */
2041                 "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */
2042                 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */
2043                 "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */
2044         };
2045 
2046         String[]  expected = {
2047                 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2048                 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2049                 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2050                 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2051                 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2052                 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2053                 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2054                 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2055                 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2056                 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2057         };
2058 
2059 
2060         Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2061         Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2062 
2063         for(int i= 0; i<source.length; i++){
2064             expect(latinToDev,(source[i]),(expected[i]));
2065             expect(devToLatin,(expected[i]),(source[i]));
2066         }
2067     }
2068 
2069     @Test
TestSanskritLatinRT()2070     public void  TestSanskritLatinRT(){
2071         int MAX_LEN =15;
2072         String[]  source = {
2073                 "rmk\u1E63\u0113t",
2074                 "\u015Br\u012Bmad",
2075                 "bhagavadg\u012Bt\u0101",
2076                 "adhy\u0101ya",
2077                 "arjuna",
2078                 "vi\u1E63\u0101da",
2079                 "y\u014Dga",
2080                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2081                 "uv\u0101cr\u0325",
2082                 "dharmak\u1E63\u0113tr\u0113",
2083                 "kuruk\u1E63\u0113tr\u0113",
2084                 "samav\u0113t\u0101",
2085                 "yuyutsava\u1E25",
2086                 "m\u0101mak\u0101\u1E25",
2087                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2088                 "kimakurvata",
2089                 "san\u0304java",
2090         };
2091         String[]  expected = {
2092                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2093                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2094                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2095                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2096                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2097                 "\u0935\u093f\u0937\u093e\u0926",
2098                 "\u092f\u094b\u0917",
2099                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2100                 "\u0909\u0935\u093E\u091A\u0943",
2101                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2102                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2103                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2104                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2105                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2106                 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2107                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2108                 "\u0938\u0902\u091c\u0935",
2109         };
2110 
2111         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2112         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2113         for(int i= 0; i<MAX_LEN; i++){
2114             expect(latinToDev,(source[i]),(expected[i]));
2115             expect(devToLatin,(expected[i]),(source[i]));
2116         }
2117     }
2118 
2119     @Test
TestCompoundLatinRT()2120     public void  TestCompoundLatinRT(){
2121         int MAX_LEN =15;
2122         String[]  source = {
2123                 "rmk\u1E63\u0113t",
2124                 "\u015Br\u012Bmad",
2125                 "bhagavadg\u012Bt\u0101",
2126                 "adhy\u0101ya",
2127                 "arjuna",
2128                 "vi\u1E63\u0101da",
2129                 "y\u014Dga",
2130                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2131                 "uv\u0101cr\u0325",
2132                 "dharmak\u1E63\u0113tr\u0113",
2133                 "kuruk\u1E63\u0113tr\u0113",
2134                 "samav\u0113t\u0101",
2135                 "yuyutsava\u1E25",
2136                 "m\u0101mak\u0101\u1E25",
2137                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2138                 "kimakurvata",
2139                 "san\u0304java"
2140         };
2141         String[]  expected = {
2142                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2143                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2144                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2145                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2146                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2147                 "\u0935\u093f\u0937\u093e\u0926",
2148                 "\u092f\u094b\u0917",
2149                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2150                 "\u0909\u0935\u093E\u091A\u0943",
2151                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2152                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2153                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2154                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2155                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2156                 //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2157                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2158                 "\u0938\u0902\u091c\u0935"
2159         };
2160 
2161         Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2162         Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2163         for(int i= 0; i<MAX_LEN; i++){
2164             expect(latinToDevToLatin,(source[i]),(source[i]));
2165             expect(devToLatinToDev,(expected[i]),(expected[i]));
2166         }
2167     }
2168     /**
2169      * Test Gurmukhi-Devanagari Tippi and Bindi
2170      */
2171     @Test
TestGurmukhiDevanagari()2172     public void TestGurmukhiDevanagari(){
2173         // the rule says:
2174         // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2175         // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2176 
2177         UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2178         UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2179 
2180         UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2181         UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2182         Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2183         StringBuffer src = new StringBuffer(" \u0902");
2184         StringBuffer expect = new StringBuffer(" \u0A02");
2185         while(vIter.next()){
2186             src.setCharAt(0,(char) vIter.codepoint);
2187             expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2188             expect(trans,src.toString(),expect.toString());
2189         }
2190 
2191         expect.setCharAt(1,'\u0A70');
2192         while(nvIter.next()){
2193             //src.setCharAt(0,(char) nvIter.codepoint);
2194             src.setCharAt(0,(char)nvIter.codepoint);
2195             expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2196             expect(trans,src.toString(),expect.toString());
2197         }
2198     }
2199     /**
2200      * Test instantiation from a locale.
2201      */
2202     @Test
TestLocaleInstantiation()2203     public void TestLocaleInstantiation() {
2204         Transliterator t;
2205         try{
2206             t = Transliterator.getInstance("te_IN-Latin");
2207             //expect(t, "\u0430", "a");
2208         }catch(IllegalArgumentException ex){
2209             warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2210         }
2211         try{
2212             t = Transliterator.getInstance("ru_RU-Latin");
2213             expect(t, "\u0430", "a");
2214         }catch(IllegalArgumentException ex){
2215             warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2216         }
2217         try{
2218             t = Transliterator.getInstance("en-el");
2219             expect(t, "a", "\u03B1");
2220         }catch(IllegalArgumentException ex){
2221             warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2222         }
2223     }
2224 
2225     /**
2226      * Test title case handling of accent (should ignore accents)
2227      */
2228     @Test
TestTitleAccents()2229     public void TestTitleAccents() {
2230         Transliterator t = Transliterator.getInstance("Title");
2231         expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2232     }
2233 
2234     /**
2235      * Basic test of a locale resource based rule.
2236      */
2237     @Test
TestLocaleResource()2238     public void TestLocaleResource() {
2239         String DATA[] = {
2240                 // id                    from             to
2241                 "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",
2242                 "Latin-el",              "b",             "\u03bc\u03c0",
2243                 "Latin-Greek",           "b",             "\u03B2",
2244                 "Greek-Latin/UNGEGN",    "\u03B2",        "v",
2245                 "el-Latin",              "\u03B2",        "v",
2246                 "Greek-Latin",           "\u03B2",        "b",
2247         };
2248         for (int i=0; i<DATA.length; i+=3) {
2249             Transliterator t = Transliterator.getInstance(DATA[i]);
2250             expect(t, DATA[i+1], DATA[i+2]);
2251         }
2252     }
2253 
2254     /**
2255      * Make sure parse errors reference the right line.
2256      */
2257     @Test
TestParseError()2258     public void TestParseError() {
2259         String rule =
2260             "a > b;\n" +
2261             "# more stuff\n" +
2262             "d << b;";
2263         try {
2264             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2265             if(t!=null){
2266                 errln("FAIL: Did not get expected exception");
2267             }
2268         } catch (IllegalArgumentException e) {
2269             String err = e.getMessage();
2270             if (err.indexOf("d << b") >= 0) {
2271                 logln("Ok: " + err);
2272             } else {
2273                 errln("FAIL: " + err);
2274             }
2275             return;
2276         }
2277         errln("FAIL: no syntax error");
2278     }
2279 
2280     /**
2281      * Make sure sets on output are disallowed.
2282      */
2283     @Test
TestOutputSet()2284     public void TestOutputSet() {
2285         String rule = "$set = [a-cm-n]; b > $set;";
2286         Transliterator t = null;
2287         try {
2288             t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2289             if(t!=null){
2290                 errln("FAIL: Did not get the expected exception");
2291             }
2292         } catch (IllegalArgumentException e) {
2293             logln("Ok: " + e.getMessage());
2294             return;
2295         }
2296         errln("FAIL: No syntax error");
2297     }
2298 
2299     /**
2300      * Test the use variable range pragma, making sure that use of
2301      * variable range characters is detected and flagged as an error.
2302      */
2303     @Test
TestVariableRange()2304     public void TestVariableRange() {
2305         String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2306         try {
2307             Transliterator t =
2308                 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2309             if(t!=null){
2310                 errln("FAIL: Did not get the expected exception");
2311             }
2312         } catch (IllegalArgumentException e) {
2313             logln("Ok: " + e.getMessage());
2314             return;
2315         }
2316         errln("FAIL: No syntax error");
2317     }
2318 
2319     /**
2320      * Test invalid post context error handling
2321      */
2322     @Test
TestInvalidPostContext()2323     public void TestInvalidPostContext() {
2324         try {
2325             Transliterator t =
2326                 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2327             if(t!=null){
2328                 errln("FAIL: Did not get the expected exception");
2329             }
2330         } catch (IllegalArgumentException e) {
2331             String msg = e.getMessage();
2332             if (msg.indexOf("a}b{c") >= 0) {
2333                 logln("Ok: " + msg);
2334             } else {
2335                 errln("FAIL: " + msg);
2336             }
2337             return;
2338         }
2339         errln("FAIL: No syntax error");
2340     }
2341 
2342     /**
2343      * Test ID form variants
2344      */
2345     @Test
TestIDForms()2346     public void TestIDForms() {
2347         String DATA[] = {
2348                 "NFC", null, "NFD",
2349                 "nfd", null, "NFC", // make sure case is ignored
2350                 "Any-NFKD", null, "Any-NFKC",
2351                 "Null", null, "Null",
2352                 "-nfkc", "nfkc", "NFKD",
2353                 "-nfkc/", "nfkc", "NFKD",
2354                 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2355                 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2356                 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2357                 "Source-", null, null,
2358                 "Source/Variant-", null, null,
2359                 "Source-/Variant", null, null,
2360                 "/Variant", null, null,
2361                 "/Variant-", null, null,
2362                 "-/Variant", null, null,
2363                 "-/", null, null,
2364                 "-", null, null,
2365                 "/", null, null,
2366         };
2367 
2368         for (int i=0; i<DATA.length; i+=3) {
2369             String ID = DATA[i];
2370             String expID = DATA[i+1];
2371             String expInvID = DATA[i+2];
2372             boolean expValid = (expInvID != null);
2373             if (expID == null) {
2374                 expID = ID;
2375             }
2376             try {
2377                 Transliterator t =
2378                     Transliterator.getInstance(ID);
2379                 Transliterator u = t.getInverse();
2380                 if (t.getID().equals(expID) &&
2381                         u.getID().equals(expInvID)) {
2382                     logln("Ok: " + ID + ".getInverse() => " + expInvID);
2383                 } else {
2384                     errln("FAIL: getInstance(" + ID + ") => " +
2385                             t.getID() + " x getInverse() => " + u.getID() +
2386                             ", expected " + expInvID);
2387                 }
2388             } catch (IllegalArgumentException e) {
2389                 if (!expValid) {
2390                     logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2391                 } else {
2392                     errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2393                 }
2394             }
2395         }
2396     }
2397 
checkRules(String label, Transliterator t2, String testRulesForward)2398     void checkRules(String label, Transliterator t2, String testRulesForward) {
2399         String rules2 = t2.toRules(true);
2400         //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2401         rules2 = TestUtility.replace(rules2, " ", "");
2402         rules2 = TestUtility.replace(rules2, "\n", "");
2403         rules2 = TestUtility.replace(rules2, "\r", "");
2404         testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2405 
2406         if (!rules2.equals(testRulesForward)) {
2407             errln(label);
2408             logln("GENERATED RULES: " + rules2);
2409             logln("SHOULD BE:       " + testRulesForward);
2410         }
2411     }
2412 
2413     /**
2414      * Mark's toRules test.
2415      */
2416     @Test
TestToRulesMark()2417     public void TestToRulesMark() {
2418 
2419         String testRules =
2420             "::[[:Latin:][:Mark:]];"
2421             + "::NFKD (NFC);"
2422             + "::Lower (Lower);"
2423             + "a <> \\u03B1;" // alpha
2424             + "::NFKC (NFD);"
2425             + "::Upper (Lower);"
2426             + "::Lower ();"
2427             + "::([[:Greek:][:Mark:]]);"
2428             ;
2429         String testRulesForward =
2430             "::[[:Latin:][:Mark:]];"
2431             + "::NFKD(NFC);"
2432             + "::Lower(Lower);"
2433             + "a > \\u03B1;"
2434             + "::NFKC(NFD);"
2435             + "::Upper (Lower);"
2436             + "::Lower ();"
2437             ;
2438         String testRulesBackward =
2439             "::[[:Greek:][:Mark:]];"
2440             + "::Lower (Upper);"
2441             + "::NFD(NFKC);"
2442             + "\\u03B1 > a;"
2443             + "::Lower(Lower);"
2444             + "::NFC(NFKD);"
2445             ;
2446         String source = "\u00E1"; // a-acute
2447         String target = "\u03AC"; // alpha-acute
2448 
2449         Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2450         Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2451 
2452         expect(t2, source, target);
2453         expect(t3, target, source);
2454 
2455         checkRules("Failed toRules FORWARD", t2, testRulesForward);
2456         checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2457     }
2458 
2459     /**
2460      * Test Escape and Unescape transliterators.
2461      */
2462     @Test
TestEscape()2463     public void TestEscape() {
2464         expect(Transliterator.getInstance("Hex-Any"),
2465                 "\\x{40}\\U00000031&#x32;&#81;",
2466         "@12Q");
2467         expect(Transliterator.getInstance("Any-Hex/C"),
2468                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2469         "\\u0041\\U0010BEEF\\uFEED");
2470         expect(Transliterator.getInstance("Any-Hex/Java"),
2471                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2472         "\\u0041\\uDBEF\\uDEEF\\uFEED");
2473         expect(Transliterator.getInstance("Any-Hex/Perl"),
2474                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2475         "\\x{41}\\x{10BEEF}\\x{FEED}");
2476     }
2477 
2478     /**
2479      * Make sure display names of variants look reasonable.
2480      */
2481     @Test
TestDisplayName()2482     public void TestDisplayName() {
2483         String DATA[] = {
2484                 // ID, forward name, reverse name
2485                 // Update the text as necessary -- the important thing is
2486                 // not the text itself, but how various cases are handled.
2487 
2488                 // Basic test
2489                 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2490 
2491                 // Variants
2492                 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2493 
2494                 // Target-only IDs
2495                 "NFC", "Any to NFC", "Any to NFD",
2496         };
2497 
2498         Locale US = Locale.US;
2499 
2500         for (int i=0; i<DATA.length; i+=3) {
2501             String name = Transliterator.getDisplayName(DATA[i], US);
2502             if (!name.equals(DATA[i+1])) {
2503                 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2504                         name + ", expected " + DATA[i+1]);
2505             } else {
2506                 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2507             }
2508             Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2509             name = Transliterator.getDisplayName(t.getID(), US);
2510             if (!name.equals(DATA[i+2])) {
2511                 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2512                         name + ", expected " + DATA[i+2]);
2513             } else {
2514                 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2515             }
2516 
2517             // Cover getDisplayName(String)
2518             ULocale save = ULocale.getDefault();
2519             ULocale.setDefault(ULocale.US);
2520             String name2 = Transliterator.getDisplayName(t.getID());
2521             if (!name.equals(name2))
2522                 errln("FAIL: getDisplayName with default locale failed");
2523             ULocale.setDefault(save);
2524         }
2525     }
2526 
2527     /**
2528      * Test anchor masking
2529      */
2530     @Test
TestAnchorMasking()2531     public void TestAnchorMasking() {
2532         String rule = "^a > Q; a > q;";
2533         try {
2534             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2535             if(t==null){
2536                 errln("FAIL: Did not get the expected exception");
2537             }
2538         } catch (IllegalArgumentException e) {
2539             errln("FAIL: " + rule + " => " + e);
2540         }
2541     }
2542 
2543     /**
2544      * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2545      * during ICU4J modularization to remove dependency of tests on Transliterator.
2546      */
2547     @Test
TestScriptAllCodepoints()2548     public void TestScriptAllCodepoints(){
2549         int code;
2550         HashSet  scriptIdsChecked   = new HashSet();
2551         HashSet  scriptAbbrsChecked = new HashSet();
2552         for( int i =0; i <= 0x10ffff; i++){
2553             code = UScript.getScript(i);
2554             if(code==UScript.INVALID_CODE){
2555                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2556             }
2557             String id =UScript.getName(code);
2558             String abbr = UScript.getShortName(code);
2559             if (!scriptIdsChecked.contains(id)) {
2560                 scriptIdsChecked.add(id);
2561                 String newId ="[:"+id+":];NFD";
2562                 try{
2563                     Transliterator t = Transliterator.getInstance(newId);
2564                     if(t==null){
2565                         errln("Failed to create transliterator for "+hex(i)+
2566                                 " script code: " +id);
2567                     }
2568                 }catch(Exception e){
2569                     errln("Failed to create transliterator for "+hex(i)
2570                             +" script code: " +id
2571                             + " Exception: "+e.getMessage());
2572                 }
2573             }
2574             if (!scriptAbbrsChecked.contains(abbr)) {
2575                 scriptAbbrsChecked.add(abbr);
2576                 String newAbbrId ="[:"+abbr+":];NFD";
2577                 try{
2578                     Transliterator t = Transliterator.getInstance(newAbbrId);
2579                     if(t==null){
2580                         errln("Failed to create transliterator for "+hex(i)+
2581                                 " script code: " +abbr);
2582                     }
2583                 }catch(Exception e){
2584                     errln("Failed to create transliterator for "+hex(i)
2585                             +" script code: " +abbr
2586                             + " Exception: "+e.getMessage());
2587                 }
2588             }
2589         }
2590     }
2591 
2592 
2593     static final String[][] registerRules = {
2594         {"Any-Dev1", "x > X; y > Y;"},
2595         {"Any-Dev2", "XY > Z"},
2596         {"Greek-Latin/FAKE",
2597             "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2598             "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2599             "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2600             "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2601         },
2602     };
2603 
2604     static final String DESERET_DEE = UTF16.valueOf(0x10414);
2605     static final String DESERET_dee = UTF16.valueOf(0x1043C);
2606 
2607     static final String[][] testCases = {
2608 
2609         // NORMALIZATION
2610         // should add more test cases
2611         {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2612         {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2613         {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2614         {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2615 
2616         // mp -> b BUG
2617         {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2618         {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2619 
2620         // check for devanagari bug
2621         {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2622 
2623         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2624         {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2625             "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2626             //TODO: enable this test once Titlecase works right
2627             //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2628             //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2629 
2630             {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2631                 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2632                 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2633                     "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2634 
2635                     {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2636                     {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2637 
2638                     // FORMS OF S
2639                     {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2640                     {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2641                     {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2642                     {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2643 
2644                     // Tatiana bug
2645                     // Upper: TAT\u02B9\u00C2NA
2646                     // Lower: tat\u02B9\u00E2na
2647                     // Title: Tat\u02B9\u00E2na
2648                     {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2649                     {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2650                     {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2651     };
2652 
2653     @Test
TestSpecialCases()2654     public void TestSpecialCases() {
2655 
2656         for (int i = 0; i < registerRules.length; ++i) {
2657             Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2658                     registerRules[i][1], Transliterator.FORWARD);
2659             DummyFactory.add(registerRules[i][0], t);
2660         }
2661         for (int i = 0; i < testCases.length; ++i) {
2662             String name = testCases[i][0];
2663             Transliterator t = Transliterator.getInstance(name);
2664             String id = t.getID();
2665             String source = testCases[i][1];
2666             String target = null;
2667 
2668             // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2669 
2670             if (testCases[i].length > 2)    target = testCases[i][2];
2671             else if (id.equalsIgnoreCase("NFD"))    target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFD);
2672             else if (id.equalsIgnoreCase("NFC"))    target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFC);
2673             else if (id.equalsIgnoreCase("NFKD"))   target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFKD);
2674             else if (id.equalsIgnoreCase("NFKC"))   target = ohos.global.icu.text.Normalizer.normalize(source, ohos.global.icu.text.Normalizer.NFKC);
2675             else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
2676             else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);
2677 
2678             expect(t, source, target);
2679         }
2680         for (int i = 0; i < registerRules.length; ++i) {
2681             Transliterator.unregister(registerRules[i][0]);
2682         }
2683     }
2684 
2685     // seems like there should be an easier way to just register an instance of a transliterator
2686 
2687     static class DummyFactory implements Transliterator.Factory {
2688         static DummyFactory singleton = new DummyFactory();
2689         static HashMap m = new HashMap();
2690 
2691         // Since Transliterators are immutable, we don't have to clone on set & get
add(String ID, Transliterator t)2692         static void add(String ID, Transliterator t) {
2693             m.put(ID, t);
2694             //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2695             Transliterator.registerFactory(ID, singleton);
2696         }
2697         @Override
getInstance(String ID)2698         public Transliterator getInstance(String ID) {
2699             return (Transliterator) m.get(ID);
2700         }
2701     }
2702 
2703     @Test
TestCasing()2704     public void TestCasing() {
2705         Transliterator toLower = Transliterator.getInstance("lower");
2706         Transliterator toCasefold = Transliterator.getInstance("casefold");
2707         Transliterator toUpper = Transliterator.getInstance("upper");
2708         Transliterator toTitle = Transliterator.getInstance("title");
2709         for (int i = 0; i < 0x600; ++i) {
2710             String s = UTF16.valueOf(i);
2711 
2712             String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2713             assertEquals("Lowercase", lower, toLower.transform(s));
2714 
2715             String casefold = UCharacter.foldCase(s, true);
2716             assertEquals("Casefold", casefold, toCasefold.transform(s));
2717 
2718             if (i != 0x0345) {
2719                 // ICU 60 changes the default titlecasing index adjustment.
2720                 // For word breaks it is mostly the same as before,
2721                 // but it is different for the iota subscript (the only cased combining mark).
2722                 // This should be ok because the iota subscript is not supposed to appear
2723                 // at the start of a word.
2724                 // The title Transliterator is far below feature parity with the
2725                 // UCharacter and CaseMap titlecasing functions.
2726                 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2727                 assertEquals("Title", title, toTitle.transform(s));
2728             }
2729 
2730             String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2731             assertEquals("Upper", upper, toUpper.transform(s));
2732         }
2733     }
2734 
2735     @Test
TestSurrogateCasing()2736     public void TestSurrogateCasing () {
2737         // check that casing handles surrogates
2738         // titlecase is currently defective
2739         int dee = UTF16.charAt(DESERET_dee,0);
2740         int DEE = UCharacter.toTitleCase(dee);
2741         if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2742             errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2743         }
2744 
2745         if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2746             errln("Fails uppercase of surrogates");
2747         }
2748 
2749         if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2750             errln("Fails lowercase of surrogates");
2751         }
2752     }
2753 
2754 
2755     @Test
TestFunction()2756     public void TestFunction() {
2757         // Careful with spacing and ';' here:  Phrase this exactly
2758         // as toRules() is going to return it.  If toRules() changes
2759         // with regard to spacing or ';', then adjust this string.
2760         String rule =
2761             "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2762 
2763         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2764         if (t == null) {
2765             errln("FAIL: createFromRules failed");
2766             return;
2767         }
2768 
2769         String r = t.toRules(true);
2770         if (r.equals(rule)) {
2771             logln("OK: toRules() => " + r);
2772         } else {
2773             errln("FAIL: toRules() => " + r +
2774                     ", expected " + rule);
2775         }
2776 
2777         expect(t, "The Quick Brown Fox",
2778         "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2779         rule =
2780             "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2781 
2782         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2783         if (t == null) {
2784             errln("FAIL: createFromRules failed");
2785             return;
2786         }
2787 
2788         r = t.toRules(true);
2789         if (r.equals(rule)) {
2790             logln("OK: toRules() => " + r);
2791         } else {
2792             errln("FAIL: toRules() => " + r +
2793                     ", expected " + rule);
2794         }
2795 
2796         expect(t, "\u0301",
2797         "U+0301 \\N{COMBINING ACUTE ACCENT}");
2798     }
2799 
2800     @Test
TestInvalidBackRef()2801     public void TestInvalidBackRef() {
2802         String rule =  ". > $1;";
2803         String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2804         try {
2805             Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2806             if (t != null) {
2807                 errln("FAIL: createFromRules should have returned NULL");
2808             }
2809             errln("FAIL: Ok: . > $1; => no error");
2810             Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2811             if (t2 != null) {
2812                 errln("FAIL: createFromRules should have returned NULL");
2813             }
2814             errln("FAIL: Ok: . > $1; => no error");
2815         } catch (IllegalArgumentException e) {
2816             logln("Ok: . > $1; => " + e.getMessage());
2817         }
2818     }
2819 
2820     @Test
TestMulticharStringSet()2821     public void TestMulticharStringSet() {
2822         // Basic testing
2823         String rule =
2824             "       [{aa}]       > x;" +
2825             "         a          > y;" +
2826             "       [b{bc}]      > z;" +
2827             "[{gd}] { e          > q;" +
2828             "         e } [{fg}] > r;" ;
2829 
2830         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2831         if (t == null) {
2832             errln("FAIL: createFromRules failed");
2833             return;
2834         }
2835 
2836         expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2837         "y x yz z d gd de gdq gdqfg ddrfg");
2838 
2839         // Overlapped string test.  Make sure that when multiple
2840         // strings can match that the longest one is matched.
2841         rule =
2842             "    [a {ab} {abc}]    > x;" +
2843             "           b          > y;" +
2844             "           c          > z;" +
2845             " q [t {st} {rst}] { e > p;" ;
2846 
2847         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2848         if (t == null) {
2849             errln("FAIL: createFromRules failed");
2850             return;
2851         }
2852 
2853         expect(t, "a ab abc qte qste qrste",
2854         "x x x qtp qstp qrstp");
2855     }
2856 
2857     /**
2858      * Test that user-registered transliterators can be used under function
2859      * syntax.
2860      */
2861     @Test
TestUserFunction()2862     public void TestUserFunction() {
2863         Transliterator t;
2864 
2865         // There's no need to register inverses if we don't use them
2866         TestUserFunctionFactory.add("Any-gif",
2867                 Transliterator.createFromRules("gif",
2868                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2869                         Transliterator.FORWARD));
2870         //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2871 
2872         TestUserFunctionFactory.add("Any-RemoveCurly",
2873                 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2874         //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2875 
2876         logln("Trying &hex");
2877         t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2878         logln("Registering");
2879         TestUserFunctionFactory.add("Any-hex2", t);
2880         t = Transliterator.getInstance("Any-hex2");
2881         expect(t, "abc", "\\u0061\\u0062\\u0063");
2882 
2883         logln("Trying &gif");
2884         t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2885         logln("Registering");
2886         TestUserFunctionFactory.add("Any-gif2", t);
2887         t = Transliterator.getInstance("Any-gif2");
2888         expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2889         "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2890 
2891         // Test that filters are allowed after &
2892         t = Transliterator.createFromRules("test",
2893                 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2894         expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2895 
2896         // Unregister our test stuff
2897         TestUserFunctionFactory.unregister();
2898     }
2899 
2900     static class TestUserFunctionFactory implements Transliterator.Factory {
2901         static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2902         static HashMap m = new HashMap();
2903 
add(String ID, Transliterator t)2904         static void add(String ID, Transliterator t) {
2905             m.put(new CaseInsensitiveString(ID), t);
2906             Transliterator.registerFactory(ID, singleton);
2907         }
2908 
2909         @Override
getInstance(String ID)2910         public Transliterator getInstance(String ID) {
2911             return (Transliterator) m.get(new CaseInsensitiveString(ID));
2912         }
2913 
unregister()2914         static void unregister() {
2915             Iterator ids = m.keySet().iterator();
2916             while (ids.hasNext()) {
2917                 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
2918                 Transliterator.unregister(id.getString());
2919                 ids.remove(); // removes pair from m
2920             }
2921         }
2922     }
2923 
2924     /**
2925      * Test the Any-X transliterators.
2926      */
2927     @Test
TestAnyX()2928     public void TestAnyX() {
2929         Transliterator anyLatin =
2930             Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
2931 
2932         expect(anyLatin,
2933                 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2934         "greek:abkABK hiragana:abuku cyrillic:abc");
2935     }
2936 
2937     /**
2938      * Test Any-X transliterators with sample letters from all scripts.
2939      */
2940     @Test
TestAny()2941     public void TestAny() {
2942         UnicodeSet alphabetic = new UnicodeSet("[:alphabetic:]").freeze();
2943         StringBuffer testString = new StringBuffer();
2944         for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
2945             UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
2946             int count = 5;
2947             for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
2948                 testString.append(it.getString());
2949                 if (--count < 0) break;
2950             }
2951         }
2952         logln("Sample set for Any-Latin: " + testString);
2953         Transliterator anyLatin = Transliterator.getInstance("any-Latn");
2954         String result = anyLatin.transliterate(testString.toString());
2955         logln("Sample result for Any-Latin: " + result);
2956     }
2957 
2958 
2959     /**
2960      * Test the source and target set API.  These are only implemented
2961      * for RBT and CompoundTransliterator at this time.
2962      */
2963     @Test
TestSourceTargetSet()2964     public void TestSourceTargetSet() {
2965         // Rules
2966         String r =
2967             "a > b; " +
2968             "r [x{lu}] > q;";
2969 
2970         // Expected source
2971         UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
2972 
2973         // Expected target
2974         UnicodeSet expTrg = new UnicodeSet("[bq]");
2975 
2976         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
2977         UnicodeSet src = t.getSourceSet();
2978         UnicodeSet trg = t.getTargetSet();
2979 
2980         if (src.equals(expSrc) && trg.equals(expTrg)) {
2981             logln("Ok: " + r + " => source = " + src.toPattern(true) +
2982                     ", target = " + trg.toPattern(true));
2983         } else {
2984             errln("FAIL: " + r + " => source = " + src.toPattern(true) +
2985                     ", expected " + expSrc.toPattern(true) +
2986                     "; target = " + trg.toPattern(true) +
2987                     ", expected " + expTrg.toPattern(true));
2988         }
2989     }
2990 
2991     @Test
TestSourceTargetSetFilter()2992     public void TestSourceTargetSetFilter() {
2993         String[][] tests = {
2994                 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
2995                 {"[] Latin-Greek", null, "[\']"},
2996                 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
2997                 {"[] Any-Latin"},
2998                 {"[] casefold"},
2999                 {"[] NFKD;"},
3000                 {"[] NFKC;"},
3001                 {"[] hex"},
3002                 {"[] lower"},
3003                 {"[] null"},
3004                 {"[] remove"},
3005                 {"[] title"},
3006                 {"[] upper"},
3007         };
3008         UnicodeSet expectedSource = UnicodeSet.EMPTY;
3009         for (String[] testPair : tests) {
3010             String test = testPair[0];
3011             Transliterator t0;
3012             try {
3013                 t0 = Transliterator.getInstance(test);
3014             } catch (Exception e) {
3015                 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3016             }
3017             Transliterator t1;
3018             try {
3019                 t1 = t0.getInverse();
3020             } catch (Exception e) {
3021                 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3022             }
3023             int targetIndex = 0;
3024             for (Transliterator t : new Transliterator[]{t0, t1}) {
3025                 boolean ok;
3026                 UnicodeSet source = t.getSourceSet();
3027                 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3028                 targetIndex++;
3029                 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3030                         : testPair[targetIndex] == null ? expectedSource
3031                                 : testPair[targetIndex].length() == 0 ? expectedSource
3032                                         : new UnicodeSet(testPair[targetIndex]);
3033                 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3034                 if (!ok) { // for debugging
3035                     source = t.getSourceSet();
3036                 }
3037                 UnicodeSet target = t.getTargetSet();
3038                 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3039                 if (!ok) { // for debugging
3040                     target = t.getTargetSet();
3041                 }
3042             }
3043         }
3044     }
3045 
isAtomic(String s, String t, Transliterator trans)3046     static boolean isAtomic(String s, String t, Transliterator trans) {
3047         for (int i = 1; i < s.length(); ++i) {
3048             if (!CharSequences.onCharacterBoundary(s, i)) {
3049                 continue;
3050             }
3051             String q = trans.transform(s.substring(0,i));
3052             if (t.startsWith(q)) {
3053                 String r = trans.transform(s.substring(i));
3054                 if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3055                     return false;
3056                 }
3057             }
3058         }
3059         return true;
3060         //        // make sure that every part is different
3061         //        if (s.codePointCount(0, s.length()) > 1) {
3062         //            int[] codePoints = It.codePoints(s);
3063         //            for (int k = 0; k < codePoints.length; ++k) {
3064         //                int pos = indexOf(t,codePoints[k]);
3065         //                if (pos >= 0) {
3066         //                    int x;
3067         //                }
3068         //            }
3069         //            if (s.contains("\u00C0")) {
3070         //                logln("\u00C0");
3071         //            }
3072         //        }
3073     }
3074 
addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3075     static void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3076         expectedSource.addAll(s);
3077         if (t.length() > 0) {
3078             expectedTarget.addAll(t);
3079         }
3080     }
3081 
3082 //    private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3083 //        disorderedMarks.add(s);
3084 //        for (int j = 1; j < s.length(); ++j) {
3085 //            if (CharSequences.onCharacterBoundary(s, j)) {
3086 //                String shorter = s.substring(0,j);
3087 //                disorderedMarks.add(shorter);
3088 //                disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3089 //            }
3090 //        }
3091 //    }
3092 
3093     @Test
TestCharUtils()3094     public void TestCharUtils() {
3095         String[][] startTests = {
3096                 {"1", "a", "ab"},
3097                 {"0", "a", "xb"},
3098                 {"0", "\uD800", "\uD800\uDC01"},
3099                 {"1", "\uD800a", "\uD800b"},
3100                 {"0", "\uD800\uDC00", "\uD800\uDC01"},
3101         };
3102         for (String[] row : startTests) {
3103             int actual = findSharedStartLength(row[1], row[2]);
3104             assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3105                     Integer.parseInt(row[0]),
3106                     actual);
3107         }
3108         String[][] endTests = {
3109                 {"0", "\uDC00", "\uD801\uDC00"},
3110                 {"1", "a", "ba"},
3111                 {"0", "a", "bx"},
3112                 {"1", "a\uDC00", "b\uDC00"},
3113                 {"0", "\uD800\uDC00", "\uD801\uDC00"},
3114         };
3115         for (String[] row : endTests) {
3116             int actual = findSharedEndLength(row[1], row[2]);
3117             assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3118                     Integer.parseInt(row[0]),
3119                     actual);
3120         }
3121     }
3122 
3123     /**
3124      * @param s
3125      * @param t
3126      * @return
3127      */
3128     // TODO make generally available
findSharedStartLength(CharSequence s, CharSequence t)3129     private static int findSharedStartLength(CharSequence s, CharSequence t) {
3130         int min = Math.min(s.length(), t.length());
3131         int i;
3132         char sch, tch;
3133         for (i = 0; i < min; ++i) {
3134             sch = s.charAt(i);
3135             tch = t.charAt(i);
3136             if (sch != tch) {
3137                 break;
3138             }
3139         }
3140         return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3141     }
3142 
3143     /**
3144      * @param s
3145      * @param t
3146      * @return
3147      */
3148     // TODO make generally available
findSharedEndLength(CharSequence s, CharSequence t)3149     private static int findSharedEndLength(CharSequence s, CharSequence t) {
3150         int slength = s.length();
3151         int tlength = t.length();
3152         int min = Math.min(slength, tlength);
3153         int i;
3154         char sch, tch;
3155         // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3156         for (i = 0; i < min; ++i) {
3157             sch = s.charAt(slength - i - 1);
3158             tch = t.charAt(tlength - i - 1);
3159             if (sch != tch) {
3160                 break;
3161             }
3162         }
3163         return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3164     }
3165 
3166     enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3167 
assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3168     static void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3169         boolean haveError = false;
3170         if (!actual.containsAll(empirical)) {
3171             UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3172             errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3173             haveError = true;
3174         }
3175         if (!empirical.containsAll(actual)) {
3176             UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3177             logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3178             haveError = true;
3179         }
3180         if (!haveError) {
3181             logln("OK " + message + ' ' + toPattern(empirical));
3182         }
3183     }
3184 
toPattern(UnicodeSet missing)3185     private static String toPattern(UnicodeSet missing) {
3186         String result = missing.toPattern(false);
3187         if (result.length() < 200) {
3188             return result;
3189         }
3190         return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3191     }
3192 
3193 
3194     /**
3195      * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3196      */
3197     @Test
TestPatternWhitespace()3198     public void TestPatternWhitespace() {
3199         // Rules
3200         String r = "a > \u200E b;";
3201 
3202         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3203 
3204         expect(t, "a", "b");
3205 
3206         // UnicodeSet
3207         UnicodeSet set = new UnicodeSet("[a \u200E]");
3208 
3209         if (set.contains(0x200E)) {
3210             errln("FAIL: U+200E not being ignored by UnicodeSet");
3211         }
3212     }
3213 
3214     @Test
TestAlternateSyntax()3215     public void TestAlternateSyntax() {
3216         // U+2206 == &
3217         // U+2190 == <
3218         // U+2192 == >
3219         // U+2194 == <>
3220         expect("a \u2192 x; b \u2190 y; c \u2194 z",
3221                 "abc",
3222         "xbz");
3223         expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3224                 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3225         "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3226     }
3227 
3228     @Test
TestPositionAPI()3229     public void TestPositionAPI() {
3230         Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3231         Transliterator.Position b = new Transliterator.Position(a);
3232         Transliterator.Position c = new Transliterator.Position();
3233         c.set(a);
3234         // Call the toString() API:
3235         if (a.equals(b) && a.equals(c)) {
3236             logln("Ok: " + a + " == " + b + " == " + c);
3237         } else {
3238             errln("FAIL: " + a + " != " + b + " != " + c);
3239         }
3240     }
3241 
3242     //======================================================================
3243     // New tests for the ::BEGIN/::END syntax
3244     //======================================================================
3245 
3246     private static final String[] BEGIN_END_RULES = new String[] {
3247         // [0]
3248         "abc > xy;"
3249         + "aba > z;",
3250 
3251         // [1]
3252         /*
3253         "::BEGIN;"
3254         + "abc > xy;"
3255         + "::END;"
3256         + "::BEGIN;"
3257         + "aba > z;"
3258         + "::END;",
3259          */
3260         "", // test case commented out below, this is here to keep from messing up the indexes
3261 
3262         // [2]
3263         /*
3264         "abc > xy;"
3265         + "::BEGIN;"
3266         + "aba > z;"
3267         + "::END;",
3268          */
3269         "", // test case commented out below, this is here to keep from messing up the indexes
3270 
3271         // [3]
3272         /*
3273         "::BEGIN;"
3274         + "abc > xy;"
3275         + "::END;"
3276         + "aba > z;",
3277          */
3278         "", // test case commented out below, this is here to keep from messing up the indexes
3279 
3280         // [4]
3281         "abc > xy;"
3282         + "::Null;"
3283         + "aba > z;",
3284 
3285         // [5]
3286         "::Upper;"
3287         + "ABC > xy;"
3288         + "AB > x;"
3289         + "C > z;"
3290         + "::Upper;"
3291         + "XYZ > p;"
3292         + "XY > q;"
3293         + "Z > r;"
3294         + "::Upper;",
3295 
3296         // [6]
3297         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3298         + "$delim = [\\-$ws];"
3299         + "$ws $delim* > ' ';"
3300         + "'-' $delim* > '-';",
3301 
3302         // [7]
3303         "::Null;"
3304         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3305         + "$delim = [\\-$ws];"
3306         + "$ws $delim* > ' ';"
3307         + "'-' $delim* > '-';",
3308 
3309         // [8]
3310         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3311         + "$delim = [\\-$ws];"
3312         + "$ws $delim* > ' ';"
3313         + "'-' $delim* > '-';"
3314         + "::Null;",
3315 
3316         // [9]
3317         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3318         + "$delim = [\\-$ws];"
3319         + "::Null;"
3320         + "$ws $delim* > ' ';"
3321         + "'-' $delim* > '-';",
3322 
3323         // [10]
3324         /*
3325         "::BEGIN;"
3326         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3327         + "$delim = [\\-$ws];"
3328         + "::END;"
3329         + "$ws $delim* > ' ';"
3330         + "'-' $delim* > '-';",
3331          */
3332         "", // test case commented out below, this is here to keep from messing up the indexes
3333 
3334         // [11]
3335         /*
3336         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3337         + "$delim = [\\-$ws];"
3338         + "::BEGIN;"
3339         + "$ws $delim* > ' ';"
3340         + "'-' $delim* > '-';"
3341         + "::END;",
3342          */
3343         "", // test case commented out below, this is here to keep from messing up the indexes
3344 
3345         // [12]
3346         /*
3347         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3348         + "$delim = [\\-$ws];"
3349         + "$ab = [ab];"
3350         + "::BEGIN;"
3351         + "$ws $delim* > ' ';"
3352         + "'-' $delim* > '-';"
3353         + "::END;"
3354         + "::BEGIN;"
3355         + "$ab { ' ' } $ab > '-';"
3356         + "c { ' ' > ;"
3357         + "::END;"
3358         + "::BEGIN;"
3359         + "'a-a' > a\\%|a;"
3360         + "::END;",
3361          */
3362         "", // test case commented out below, this is here to keep from messing up the indexes
3363 
3364         // [13]
3365         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3366         + "$delim = [\\-$ws];"
3367         + "$ab = [ab];"
3368         + "::Null;"
3369         + "$ws $delim* > ' ';"
3370         + "'-' $delim* > '-';"
3371         + "::Null;"
3372         + "$ab { ' ' } $ab > '-';"
3373         + "c { ' ' > ;"
3374         + "::Null;"
3375         + "'a-a' > a\\%|a;",
3376 
3377         // [14]
3378         /*
3379         "::[abc];"
3380         + "::BEGIN;"
3381         + "abc > xy;"
3382         + "::END;"
3383         + "::BEGIN;"
3384         + "aba > yz;"
3385         + "::END;"
3386         + "::Upper;",
3387          */
3388         "", // test case commented out below, this is here to keep from messing up the indexes
3389 
3390         // [15]
3391         "::[abc];"
3392         + "abc > xy;"
3393         + "::Null;"
3394         + "aba > yz;"
3395         + "::Upper;",
3396 
3397         // [16]
3398         /*
3399         "::[abc];"
3400         + "::BEGIN;"
3401         + "abc <> xy;"
3402         + "::END;"
3403         + "::BEGIN;"
3404         + "aba <> yz;"
3405         + "::END;"
3406         + "::Upper(Lower);"
3407         + "::([XYZ]);",
3408          */
3409         "", // test case commented out below, this is here to keep from messing up the indexes
3410 
3411         // [17]
3412         "::[abc];"
3413         + "abc <> xy;"
3414         + "::Null;"
3415         + "aba <> yz;"
3416         + "::Upper(Lower);"
3417         + "::([XYZ]);"
3418     };
3419 
3420     /*
3421 (This entire test is commented out below and will need some heavy revision when we re-add
3422 the ::BEGIN/::END stuff)
3423     private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3424         // [7]
3425         "::BEGIN;"
3426         + "abc > xy;"
3427         + "::BEGIN;"
3428         + "aba > z;"
3429         + "::END;"
3430         + "::END;",
3431 
3432         // [8]
3433         "abc > xy;"
3434         + " aba > z;"
3435         + "::END;",
3436 
3437         // [9]
3438         "::BEGIN;"
3439         + "::Upper;"
3440         + "::END;"
3441     };
3442      */
3443 
3444     private static final String[] BEGIN_END_TEST_CASES = new String[] {
3445         BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3446         //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3447         //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3448         //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3449         BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3450         BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3451 
3452         BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",
3453         BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",
3454         BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",
3455         BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",
3456         //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",
3457         //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",
3458         //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",
3459         //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",
3460         //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3461         BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",
3462         BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",
3463         BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3464 
3465         //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3466         BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3467         //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3468         BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3469     };
3470 
3471     @Test
TestBeginEnd()3472     public void TestBeginEnd() {
3473         // run through the list of test cases above
3474         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3475             expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3476         }
3477 
3478         // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3479         Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3480                 Transliterator.REVERSE);
3481         expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3482 
3483         // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3484         // that all of them cause errors
3485         /*
3486 (commented out until we have the real ::BEGIN/::END stuff in place
3487         for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3488             try {
3489                 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3490                         Transliterator.FORWARD);
3491                 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3492             }
3493             catch (IllegalArgumentException e) {
3494                 // this is supposed to happen; do nothing here
3495             }
3496         }
3497          */
3498     }
3499 
3500     @Test
TestBeginEndToRules()3501     public void TestBeginEndToRules() {
3502         // run through the same list of test cases we used above, but this time, instead of just
3503         // instantiating a Transliterator from the rules and running the test against it, we instantiate
3504         // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3505         // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3506         // to (i.e., does the same thing as) the original rule set
3507         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3508             Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3509                     Transliterator.FORWARD);
3510             String rules = t.toRules(false);
3511             Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3512             expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3513         }
3514 
3515         // do the same thing for the reversible test case
3516         Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3517                 Transliterator.REVERSE);
3518         String rules = reversed.toRules(false);
3519         Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3520         expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3521     }
3522 
3523     @Test
TestRegisterAlias()3524     public void TestRegisterAlias() {
3525         String longID = "Lower;[aeiou]Upper";
3526         String shortID = "Any-CapVowels";
3527         String reallyShortID = "CapVowels";
3528 
3529         Transliterator.registerAlias(shortID, longID);
3530 
3531         Transliterator t1 = Transliterator.getInstance(longID);
3532         Transliterator t2 = Transliterator.getInstance(reallyShortID);
3533 
3534         if (!t1.getID().equals(longID))
3535             errln("Transliterator instantiated with long ID doesn't have long ID");
3536         if (!t2.getID().equals(reallyShortID))
3537             errln("Transliterator instantiated with short ID doesn't have short ID");
3538 
3539         if (!t1.toRules(true).equals(t2.toRules(true)))
3540             errln("Alias transliterators aren't the same");
3541 
3542         Transliterator.unregister(shortID);
3543 
3544         try {
3545             t1 = Transliterator.getInstance(shortID);
3546             errln("Instantiation with short ID succeeded after short ID was unregistered");
3547         }
3548         catch (IllegalArgumentException e) {
3549         }
3550 
3551         // try the same thing again, but this time with something other than
3552         // an instance of CompoundTransliterator
3553         String realID = "Latin-Greek";
3554         String fakeID = "Latin-dlgkjdflkjdl";
3555         Transliterator.registerAlias(fakeID, realID);
3556 
3557         t1 = Transliterator.getInstance(realID);
3558         t2 = Transliterator.getInstance(fakeID);
3559 
3560         if (!t1.toRules(true).equals(t2.toRules(true)))
3561             errln("Alias transliterators aren't the same");
3562 
3563         Transliterator.unregister(fakeID);
3564     }
3565 
3566     /**
3567      * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3568      */
3569     @Test
TestHalfwidthFullwidth()3570     public void TestHalfwidthFullwidth() {
3571         Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3572         Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3573 
3574         // Array of 3n items
3575         // Each item is
3576         //   "hf"|"fh"|"both",
3577         //   <Halfwidth>,
3578         //   <Fullwidth>
3579         String[] DATA = {
3580                 "both",
3581                 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3582                 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3583         };
3584 
3585         for (int i=0; i<DATA.length; i+=3) {
3586             switch (DATA[i].charAt(0)) {
3587             case 'h': // Halfwidth-Fullwidth only
3588                 expect(hf, DATA[i+1], DATA[i+2]);
3589                 break;
3590             case 'f': // Fullwidth-Halfwidth only
3591                 expect(fh, DATA[i+2], DATA[i+1]);
3592                 break;
3593             case 'b': // both directions
3594                 expect(hf, DATA[i+1], DATA[i+2]);
3595                 expect(fh, DATA[i+2], DATA[i+1]);
3596                 break;
3597             }
3598         }
3599 
3600     }
3601 
3602     /**
3603      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3604      *              TODO: confirm that the expected results are correct.
3605      *              For now, test just confirms that C++ and Java give identical results.
3606      */
3607     @Test
TestThai()3608     public void TestThai() {
3609         Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3610         String thaiText =
3611             "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3612             "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3613             "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3614             "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3615             "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3616             "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3617             "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3618             "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3619             "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3620             "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3621             "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3622             "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3623             "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3624             "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3625             "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3626             "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3627             "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3628             "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3629             "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3630             "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3631             "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3632             "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3633             "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3634             "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3635             " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3636             "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3637             "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3638             " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3639             "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3640             "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3641 
3642         String latinText =
3643             "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3644             "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3645             "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3646             "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3647             "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3648             " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3649             "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3650             "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3651             "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3652             "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3653             "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3654             "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3655             " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3656             "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3657             " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3658             "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3659             "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3660             "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3661 
3662         expect(tr, thaiText, latinText);
3663     }
3664 
3665 
3666     //======================================================================
3667     // These tests are not mirrored (yet) in icu4c at
3668     // source/test/intltest/transtst.cpp
3669     //======================================================================
3670 
3671     /**
3672      * Improve code coverage.
3673      */
3674     @Test
TestCoverage()3675     public void TestCoverage() {
3676         // NullTransliterator
3677         Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3678         expect(t, "a", "a");
3679 
3680         // Source, target set
3681         t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3682         t.setFilter(new UnicodeSet("[A-Z]"));
3683         logln("source = " + t.getSourceSet());
3684         logln("target = " + t.getTargetSet());
3685 
3686         t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3687         logln("source = " + t.getSourceSet());
3688         logln("target = " + t.getTargetSet());
3689     }
3690     /*
3691      * Test case for threading problem in NormalizationTransliterator
3692      * reported by ticket#5160
3693      */
3694     @Test
TestT5160()3695     public void TestT5160() {
3696         final String[] testData = {
3697                 "a",
3698                 "b",
3699                 "\u09BE",
3700                 "A\u0301",
3701         };
3702         final String[] expected = {
3703                 "a",
3704                 "b",
3705                 "\u09BE",
3706                 "\u00C1",
3707         };
3708         Transliterator translit = Transliterator.getInstance("NFC");
3709         NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
3710         for (int i = 0; i < tasks.length; i++) {
3711             tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
3712         }
3713         TestUtil.runUntilDone(tasks);
3714 
3715         for (int i = 0; i < tasks.length; i++) {
3716             if (tasks[i].getErrorMessage() != null) {
3717                 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
3718                 break;
3719             }
3720         }
3721     }
3722 
3723     static class NormTranslitTask implements Runnable {
3724         Transliterator translit;
3725         String testData;
3726         String expectedData;
3727         String errorMsg;
3728 
NormTranslitTask(Transliterator translit, String testData, String expectedData)3729         NormTranslitTask(Transliterator translit, String testData, String expectedData) {
3730             this.translit = translit;
3731             this.testData = testData;
3732             this.expectedData = expectedData;
3733         }
3734 
3735         @Override
run()3736         public void run() {
3737             errorMsg = null;
3738             StringBuffer inBuf = new StringBuffer(testData);
3739             StringBuffer expectedBuf = new StringBuffer(expectedData);
3740 
3741             for(int i = 0; i < 1000; i++) {
3742                 String in = inBuf.toString();
3743                 String out = translit.transliterate(in);
3744                 String expected = expectedBuf.toString();
3745                 if (!out.equals(expected)) {
3746                     errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
3747                     break;
3748                 }
3749                 inBuf.append(testData);
3750                 expectedBuf.append(expectedData);
3751             }
3752         }
3753 
getErrorMessage()3754         public String getErrorMessage() {
3755             return errorMsg;
3756         }
3757     }
3758 
3759     //======================================================================
3760     // Support methods
3761     //======================================================================
expect(String rules, String source, String expectedResult, Transliterator.Position pos)3762     static void expect(String rules,
3763             String source,
3764             String expectedResult,
3765             Transliterator.Position pos) {
3766         Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
3767         expect(t, source, expectedResult, pos);
3768     }
3769 
expect(String rules, String source, String expectedResult)3770     static void expect(String rules, String source, String expectedResult) {
3771         expect(rules, source, expectedResult, null);
3772     }
3773 
expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3774     static void expect(Transliterator t, String source, String expectedResult,
3775             Transliterator reverseTransliterator) {
3776         expect(t, source, expectedResult);
3777         if (reverseTransliterator != null) {
3778             expect(reverseTransliterator, expectedResult, source);
3779         }
3780     }
3781 
expect(Transliterator t, String source, String expectedResult)3782     static void expect(Transliterator t, String source, String expectedResult) {
3783         expect(t, source, expectedResult, (Transliterator.Position) null);
3784     }
3785 
expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)3786     static void expect(Transliterator t, String source, String expectedResult,
3787             Transliterator.Position pos) {
3788         if (pos == null) {
3789             String result = t.transliterate(source);
3790             if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
3791         }
3792 
3793         Transliterator.Position index = null;
3794         if (pos == null) {
3795             index = new Transliterator.Position(0, source.length(), 0, source.length());
3796         } else {
3797             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
3798                     pos.start, pos.limit);
3799         }
3800 
3801         ReplaceableString rsource = new ReplaceableString(source);
3802 
3803         t.finishTransliteration(rsource, index);
3804         // Do it all at once -- below we do it incrementally
3805 
3806         if (index.start != index.limit) {
3807             expectAux(t.getID() + ":UNFINISHED", source,
3808                     "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
3809             return;
3810         }
3811         String result = rsource.toString();
3812         if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
3813 
3814 
3815         if (pos == null) {
3816             index = new Transliterator.Position();
3817         } else {
3818             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
3819                     pos.start, pos.limit);
3820         }
3821 
3822         // Test incremental transliteration -- this result
3823         // must be the same after we finalize (see below).
3824         List<String> v = new ArrayList<String>();
3825         v.add(source);
3826         rsource.replace(0, rsource.length(), "");
3827         if (pos != null) {
3828             rsource.replace(0, 0, source);
3829             v.add(UtilityExtensions.formatInput(rsource, index));
3830             t.transliterate(rsource, index);
3831             v.add(UtilityExtensions.formatInput(rsource, index));
3832         } else {
3833             for (int i=0; i<source.length(); ++i) {
3834                 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
3835                 //log.append(source.charAt(i)).append(" -> "));
3836                 t.transliterate(rsource, index, source.charAt(i));
3837                 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
3838                 v.add(UtilityExtensions.formatInput(rsource, index) +
3839                         ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
3840             }
3841         }
3842 
3843         // As a final step in keyboard transliteration, we must call
3844         // transliterate to finish off any pending partial matches that
3845         // were waiting for more input.
3846         t.finishTransliteration(rsource, index);
3847         result = rsource.toString();
3848         //log.append(" => ").append(rsource.toString());
3849         v.add(result);
3850 
3851         String[] results = new String[v.size()];
3852         v.toArray(results);
3853         expectAux(t.getID() + ":Incremental", results,
3854                 result.equals(expectedResult),
3855                 expectedResult);
3856     }
3857 
3858     static boolean expectAux(String tag, String source,
3859             String result, String expectedResult) {
3860         return expectAux(tag, new String[] {source, result},
3861                 result.equals(expectedResult),
3862                 expectedResult);
3863     }
3864 
3865     static boolean expectAux(String tag, String source,
3866             String result, boolean pass,
3867             String expectedResult) {
3868         return expectAux(tag, new String[] {source, result},
3869                 pass,
3870                 expectedResult);
3871     }
3872 
3873     static boolean expectAux(String tag, String source,
3874             boolean pass,
3875             String expectedResult) {
3876         return expectAux(tag, new String[] {source},
3877                 pass,
3878                 expectedResult);
3879     }
3880 
3881     static boolean expectAux(String tag, String[] results, boolean pass,
3882             String expectedResult) {
3883         msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
3884 
3885         for (int i = 0; i < results.length; ++i) {
3886             String label;
3887             if (i == 0) {
3888                 label = "source:   ";
3889             } else if (i == results.length - 1) {
3890                 label = "result:   ";
3891             } else {
3892                 if (!isVerbose() && pass) continue;
3893                 label = "interm" + i + ":  ";
3894             }
3895             msg("    " + label + results[i], pass ? LOG : ERR, false, true);
3896         }
3897 
3898         if (!pass) {
3899             msg(  "    expected: " + expectedResult, ERR, false, true);
3900         }
3901 
3902         return pass;
3903     }
3904 
3905     static private void assertTransform(String message, String expected, StringTransform t, String source) {
3906         assertEquals(message + " " + source, expected, t.transform(source));
3907     }
3908 
3909 
3910     static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
3911         assertEquals(message + " " +source, expected, t.transform(source));
3912         assertEquals(message + " " +source2, expected, t.transform(source2));
3913         assertEquals(message + " " + expected, source, back.transform(expected));
3914     }
3915 
3916     /*
3917      * Tests the method public Enumeration<String> getAvailableTargets(String source)
3918      */
3919     @Test
3920     public void TestGetAvailableTargets() {
3921         try {
3922             // Tests when if (targets == null) is true
3923             Transliterator.getAvailableTargets("");
3924         } catch (Exception e) {
3925             errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
3926         }
3927     }
3928 
3929     /*
3930      * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
3931      */
3932     @Test
3933     public void TestGetAvailableVariants() {
3934         try {
3935             // Tests when if (targets == null) is true
3936             Transliterator.getAvailableVariants("", "");
3937         } catch (Exception e) {
3938             errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
3939         }
3940     }
3941 
3942     /*
3943      * Tests the mehtod String nextLine() in RuleBody
3944      */
3945     @Test
3946     public void TestNextLine() {
3947         // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
3948         try{
3949             Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
3950         } catch(Exception e){
3951             errln("TransliteratorParser.nextLine() was not suppose to return an " +
3952             "exception for a rule of '\\'");
3953         }
3954     }
3955 
3956     /**
3957      * Tests equals and hashCode implementation of Transliterator.Position
3958      */
3959     @Test
3960     public void TestPositionEquals() {
3961         Transliterator.Position position1 = new Transliterator.Position(1, 0, 0, 0);
3962         Transliterator.Position position2 = new Transliterator.Position(0, 0, 0, 0);
3963         assertNotEquals("2 different positions are not equal", position1, position2);
3964         assertNotEquals("2 different positions have different hash codes", position1.hashCode(), position2.hashCode());
3965         Transliterator.Position position3 = new Transliterator.Position(1, 0, 0, 0);
3966         assertEquals("2 positions are equal", position1, position3);
3967         assertEquals("2 positions have the same hash codes", position1.hashCode(), position3.hashCode());
3968     }
3969 }
3970