• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.test.translit;
10 
11 import java.util.ArrayList;
12 import java.util.Enumeration;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.List;
17 import java.util.Locale;
18 
19 import org.junit.Test;
20 import org.junit.runner.RunWith;
21 import org.junit.runners.JUnit4;
22 
23 import com.ibm.icu.dev.test.TestFmwk;
24 import com.ibm.icu.dev.test.TestUtil;
25 import com.ibm.icu.impl.Utility;
26 import com.ibm.icu.impl.UtilityExtensions;
27 import com.ibm.icu.lang.CharSequences;
28 import com.ibm.icu.lang.UCharacter;
29 import com.ibm.icu.lang.UScript;
30 import com.ibm.icu.text.Replaceable;
31 import com.ibm.icu.text.ReplaceableString;
32 import com.ibm.icu.text.StringTransform;
33 import com.ibm.icu.text.Transliterator;
34 import com.ibm.icu.text.UTF16;
35 import com.ibm.icu.text.UnicodeFilter;
36 import com.ibm.icu.text.UnicodeSet;
37 import com.ibm.icu.text.UnicodeSetIterator;
38 import com.ibm.icu.util.CaseInsensitiveString;
39 import com.ibm.icu.util.ULocale;
40 
41 /***********************************************************************
42 
43                      HOW TO USE THIS TEST FILE
44                                -or-
45                   How I developed on two platforms
46                 without losing (too much of) my mind
47 
48 
49 1. Add new tests by copying/pasting/changing existing tests.  On Java,
50    any public void method named Test...() taking no parameters becomes
51    a test.  On C++, you need to modify the header and add a line to
52    the runIndexedTest() dispatch method.
53 
54 2. Make liberal use of the expect() method; it is your friend.
55 
56 3. The tests in this file exactly match those in a sister file on the
57    other side.  The two files are:
58 
59    icu4j:  src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
60    icu4c:  source/test/intltest/transtst.cpp
61 
62                   ==> THIS IS THE IMPORTANT PART <==
63 
64    When you add a test in this file, add it in transtst.cpp too.
65    Give it the same name and put it in the same relative place.  This
66    makes maintenance a lot simpler for any poor soul who ends up
67    trying to synchronize the tests between icu4j and icu4c.
68 
69 4. If you MUST enter a test that is NOT paralleled in the sister file,
70    then add it in the special non-mirrored section.  These are
71    labeled
72 
73      "icu4j ONLY"
74 
75    or
76 
77      "icu4c ONLY"
78 
79    Make sure you document the reason the test is here and not there.
80 
81 
82 Thank you.
83 The Management
84  ***********************************************************************/
85 
86 /**
87  * @test
88  * @summary General test of Transliterator
89  */
90 @RunWith(JUnit4.class)
91 public class TransliteratorTest extends TestFmwk {
92     @Test
TestHangul()93     public void TestHangul() {
94 
95         Transliterator lh = Transliterator.getInstance("Latin-Hangul");
96         Transliterator hl = lh.getInverse();
97 
98         assertTransform("Transform", "\uCE20", lh, "ch");
99 
100         assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
101         assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
102         assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
103         assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
104         assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
105         assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
106         assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
107         assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
108         assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
109         assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
110         assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
111         assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
112         assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
113         assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
114         assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
115         assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
116         assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
117         assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
118         assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
119         assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
120         assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
121         assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
122         assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
123         assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
124         assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
125         assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
126 
127     }
128 
129     @Test
TestChinese()130     public void TestChinese() {
131         Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
132         assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
133         assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
134     }
135 
136     @Test
TestRegistry()137     public void TestRegistry() {
138         checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
139         checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
140         checkRegistry("foo1", "[:letter:] a > b;");
141         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
142             String id = (String) e.nextElement();
143             checkRegistry(id);
144         }
145         // Need to remove these test-specific transliterators in order not to interfere with other tests.
146         Transliterator.unregister("foo3");
147         Transliterator.unregister("foo2");
148         Transliterator.unregister("foo1");
149     }
150 
checkRegistry(String id, String rules)151     private void checkRegistry (String id, String rules) {
152         Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
153         Transliterator.registerInstance(foo);
154         checkRegistry(id);
155     }
156 
checkRegistry(String id)157     private void checkRegistry(String id) {
158         Transliterator fie = Transliterator.getInstance(id);
159         final UnicodeSet fae = new UnicodeSet("[a-z5]");
160         fie.setFilter(fae);
161         Transliterator foe = Transliterator.getInstance(id);
162         UnicodeFilter fee = foe.getFilter();
163         if (fae.equals(fee)) {
164             errln("Changed what is in registry for " + id);
165         }
166     }
167 
168     @Test
TestInstantiationError()169     public void TestInstantiationError() {
170         try {
171             String ID = "<Not a valid Transliterator ID>";
172             Transliterator t = Transliterator.getInstance(ID);
173             errln("FAIL: " + ID + " returned " + t);
174         } catch (IllegalArgumentException ex) {
175             logln("OK: Bogus ID handled properly");
176         }
177     }
178 
179     @Test
TestSimpleRules()180     public void TestSimpleRules() {
181         /* Example: rules 1. ab>x|y
182          *                2. yc>z
183          *
184          * []|eabcd  start - no match, copy e to tranlated buffer
185          * [e]|abcd  match rule 1 - copy output & adjust cursor
186          * [ex|y]cd  match rule 2 - copy output & adjust cursor
187          * [exz]|d   no match, copy d to transliterated buffer
188          * [exzd]|   done
189          */
190         expect("ab>x|y;" +
191                 "yc>z",
192                 "eabcd", "exzd");
193 
194         /* Another set of rules:
195          *    1. ab>x|yzacw
196          *    2. za>q
197          *    3. qc>r
198          *    4. cw>n
199          *
200          * []|ab       Rule 1
201          * [x|yzacw]   No match
202          * [xy|zacw]   Rule 2
203          * [xyq|cw]    Rule 4
204          * [xyqn]|     Done
205          */
206         expect("ab>x|yzacw;" +
207                 "za>q;" +
208                 "qc>r;" +
209                 "cw>n",
210                 "ab", "xyqn");
211 
212         /* Test categories
213          */
214         Transliterator t = Transliterator.createFromRules("<ID>",
215                 "$dummy=\uE100;" +
216                 "$vowel=[aeiouAEIOU];" +
217                 "$lu=[:Lu:];" +
218                 "$vowel } $lu > '!';" +
219                 "$vowel > '&';" +
220                 "'!' { $lu > '^';" +
221                 "$lu > '*';" +
222                 "a>ERROR",
223                 Transliterator.FORWARD);
224         expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
225     }
226 
227     /**
228      * Test inline set syntax and set variable syntax.
229      */
230     @Test
TestInlineSet()231     public void TestInlineSet() {
232         expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
233         expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
234 
235         expect("$digit = [0-9];" +
236                 "$alpha = [a-zA-Z];" +
237                 "$alphanumeric = [$digit $alpha];" + // ***
238                 "$special = [^$alphanumeric];" +     // ***
239                 "$alphanumeric > '-';" +
240                 "$special > '*';",
241 
242                 "thx-1138", "---*----");
243     }
244 
245     /**
246      * Create some inverses and confirm that they work.  We have to be
247      * careful how we do this, since the inverses will not be true
248      * inverses -- we can't throw any random string at the composition
249      * of the transliterators and expect the identity function.  F x
250      * F' != I.  However, if we are careful about the input, we will
251      * get the expected results.
252      */
253     @Test
TestRuleBasedInverse()254     public void TestRuleBasedInverse() {
255         String RULES =
256             "abc>zyx;" +
257             "ab>yz;" +
258             "bc>zx;" +
259             "ca>xy;" +
260             "a>x;" +
261             "b>y;" +
262             "c>z;" +
263 
264             "abc<zyx;" +
265             "ab<yz;" +
266             "bc<zx;" +
267             "ca<xy;" +
268             "a<x;" +
269             "b<y;" +
270             "c<z;" +
271 
272             "";
273 
274         String[] DATA = {
275                 // Careful here -- random strings will not work.  If we keep
276                 // the left side to the domain and the right side to the range
277                 // we will be okay though (left, abc; right xyz).
278                 "a", "x",
279                 "abcacab", "zyxxxyy",
280                 "caccb", "xyzzy",
281         };
282 
283         Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
284         Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
285         for (int i=0; i<DATA.length; i+=2) {
286             expect(fwd, DATA[i], DATA[i+1]);
287             expect(rev, DATA[i+1], DATA[i]);
288         }
289     }
290 
291     /**
292      * Basic test of keyboard.
293      */
294     @Test
TestKeyboard()295     public void TestKeyboard() {
296         Transliterator t = Transliterator.createFromRules("<ID>",
297                 "psch>Y;"
298                 +"ps>y;"
299                 +"ch>x;"
300                 +"a>A;", Transliterator.FORWARD);
301         String DATA[] = {
302                 // insertion, buffer
303                 "a", "A",
304                 "p", "Ap",
305                 "s", "Aps",
306                 "c", "Apsc",
307                 "a", "AycA",
308                 "psch", "AycAY",
309                 null, "AycAY", // null means finishKeyboardTransliteration
310         };
311 
312         keyboardAux(t, DATA);
313     }
314 
315     /**
316      * Basic test of keyboard with cursor.
317      */
318     @Test
TestKeyboard2()319     public void TestKeyboard2() {
320         Transliterator t = Transliterator.createFromRules("<ID>",
321                 "ych>Y;"
322                 +"ps>|y;"
323                 +"ch>x;"
324                 +"a>A;", Transliterator.FORWARD);
325         String DATA[] = {
326                 // insertion, buffer
327                 "a", "A",
328                 "p", "Ap",
329                 "s", "Aps", // modified for rollback - "Ay",
330                 "c", "Apsc", // modified for rollback - "Ayc",
331                 "a", "AycA",
332                 "p", "AycAp",
333                 "s", "AycAps", // modified for rollback - "AycAy",
334                 "c", "AycApsc", // modified for rollback - "AycAyc",
335                 "h", "AycAY",
336                 null, "AycAY", // null means finishKeyboardTransliteration
337         };
338 
339         keyboardAux(t, DATA);
340     }
341 
342     /**
343      * Test keyboard transliteration with back-replacement.
344      */
345     @Test
TestKeyboard3()346     public void TestKeyboard3() {
347         // We want th>z but t>y.  Furthermore, during keyboard
348         // transliteration we want t>y then yh>z if t, then h are
349         // typed.
350         String RULES =
351             "t>|y;" +
352             "yh>z;" +
353             "";
354 
355         String[] DATA = {
356                 // Column 1: characters to add to buffer (as if typed)
357                 // Column 2: expected appearance of buffer after
358                 //           keyboard xliteration.
359                 "a", "a",
360                 "b", "ab",
361                 "t", "abt", // modified for rollback - "aby",
362                 "c", "abyc",
363                 "t", "abyct", // modified for rollback - "abycy",
364                 "h", "abycz",
365                 null, "abycz", // null means finishKeyboardTransliteration
366         };
367 
368         Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
369         keyboardAux(t, DATA);
370     }
371 
keyboardAux(Transliterator t, String[] DATA)372     private void keyboardAux(Transliterator t, String[] DATA) {
373         Transliterator.Position index = new Transliterator.Position();
374         ReplaceableString s = new ReplaceableString();
375         for (int i=0; i<DATA.length; i+=2) {
376             StringBuffer log;
377             if (DATA[i] != null) {
378                 log = new StringBuffer(s.toString() + " + "
379                         + DATA[i]
380                                + " -> ");
381                 t.transliterate(s, index, DATA[i]);
382             } else {
383                 log = new StringBuffer(s.toString() + " => ");
384                 t.finishTransliteration(s, index);
385             }
386             UtilityExtensions.formatInput(log, s, index);
387             if (s.toString().equals(DATA[i+1])) {
388                 logln(log.toString());
389             } else {
390                 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
391             }
392         }
393     }
394 
395     // Latin-Arabic has been temporarily removed until it can be
396     // done correctly.
397 
398     //  public void TestArabic() {
399     //      String DATA[] = {
400     //          "Arabic",
401     //              "\u062a\u062a\u0645\u062a\u0639 "+
402     //              "\u0627\u0644\u0644\u063a\u0629 "+
403     //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
404     //              "\u0628\u0628\u0646\u0638\u0645 "+
405     //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
406     //              "\u062c\u0645\u064a\u0644\u0629"
407     //      };
408 
409     //      Transliterator t = Transliterator.getInstance("Latin-Arabic");
410     //      for (int i=0; i<DATA.length; i+=2) {
411     //          expect(t, DATA[i], DATA[i+1]);
412     //      }
413     //  }
414 
415     /**
416      * Compose the Kana transliterator forward and reverse and try
417      * some strings that should come out unchanged.
418      */
419     @Test
TestCompoundKana()420     public void TestCompoundKana() {
421         Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
422         expect(t, "aaaaa", "aaaaa");
423     }
424 
425     /**
426      * Compose the hex transliterators forward and reverse.
427      */
428     @Test
TestCompoundHex()429     public void TestCompoundHex() {
430         Transliterator a = Transliterator.getInstance("Any-Hex");
431         Transliterator b = Transliterator.getInstance("Hex-Any");
432         // Transliterator[] trans = { a, b };
433         // Transliterator ab = Transliterator.getInstance(trans);
434         Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
435 
436         // Do some basic tests of b
437         expect(b, "\\u0030\\u0031", "01");
438 
439         String s = "abcde";
440         expect(ab, s, s);
441 
442         // trans = new Transliterator[] { b, a };
443         // Transliterator ba = Transliterator.getInstance(trans);
444         Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
445         ReplaceableString str = new ReplaceableString(s);
446         a.transliterate(str);
447         expect(ba, str.toString(), str.toString());
448     }
449 
450     /**
451      * Do some basic tests of filtering.
452      */
453     @Test
TestFiltering()454     public void TestFiltering() {
455 
456         Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
457         tempTrans.setFilter(new UnicodeSet("[a]"));
458         String tempResult = tempTrans.transform("xa");
459         assertEquals("context should not be filtered ", "xb", tempResult);
460 
461         tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
462         tempResult = tempTrans.transform("xa");
463         assertEquals("context should not be filtered ", "xb", tempResult);
464 
465         Transliterator hex = Transliterator.getInstance("Any-Hex");
466         hex.setFilter(new UnicodeFilter() {
467             @Override
468             public boolean contains(int c) {
469                 return c != 'c';
470             }
471             @Override
472             public String toPattern(boolean escapeUnprintable) {
473                 return "";
474             }
475             @Override
476             public boolean matchesIndexValue(int v) {
477                 return false;
478             }
479             @Override
480             public void addMatchSetTo(UnicodeSet toUnionTo) {}
481         });
482         String s = "abcde";
483         String out = hex.transliterate(s);
484         String exp = "\\u0061\\u0062c\\u0064\\u0065";
485         if (out.equals(exp)) {
486             logln("Ok:   \"" + exp + "\"");
487         } else {
488             logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
489         }
490     }
491 
492     /**
493      * Test anchors
494      */
495     @Test
TestAnchors()496     public void TestAnchors() {
497         expect("^ab  > 01 ;" +
498                 " ab  > |8 ;" +
499                 "  b  > k ;" +
500                 " 8x$ > 45 ;" +
501                 " 8x  > 77 ;",
502 
503                 "ababbabxabx",
504         "018k7745");
505         expect("$s = [z$] ;" +
506                 "$s{ab    > 01 ;" +
507                 "   ab    > |8 ;" +
508                 "    b    > k ;" +
509                 "   8x}$s > 45 ;" +
510                 "   8x    > 77 ;",
511 
512                 "abzababbabxzabxabx",
513         "01z018k45z01x45");
514     }
515 
516     /**
517      * Test pattern quoting and escape mechanisms.
518      */
519     @Test
TestPatternQuoting()520     public void TestPatternQuoting() {
521         // Array of 3n items
522         // Each item is <rules>, <input>, <expected output>
523         String[] DATA = {
524                 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
525         };
526 
527         for (int i=0; i<DATA.length; i+=3) {
528             logln("Pattern: " + Utility.escape(DATA[i]));
529             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
530             expect(t, DATA[i+1], DATA[i+2]);
531         }
532     }
533 
534     @Test
TestVariableNames()535     public void TestVariableNames() {
536         Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
537         if (gl == null) {
538             errln("FAIL: null Transliterator returned.");
539         }
540     }
541 
542     /**
543      * Regression test for bugs found in Greek transliteration.
544      */
545     @Test
TestJ277()546     public void TestJ277() {
547         Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
548 
549         char sigma = (char)0x3C3;
550         char upsilon = (char)0x3C5;
551         char nu = (char)0x3BD;
552         // not used char PHI = (char)0x3A6;
553         char alpha = (char)0x3B1;
554         // not used char omega = (char)0x3C9;
555         // not used char omicron = (char)0x3BF;
556         // not used char epsilon = (char)0x3B5;
557 
558         // sigma upsilon nu -> syn
559         StringBuffer buf = new StringBuffer();
560         buf.append(sigma).append(upsilon).append(nu);
561         String syn = buf.toString();
562         expect(gl, syn, "syn");
563 
564         // sigma alpha upsilon nu -> saun
565         buf.setLength(0);
566         buf.append(sigma).append(alpha).append(upsilon).append(nu);
567         String sayn = buf.toString();
568         expect(gl, sayn, "saun");
569 
570         // Again, using a smaller rule set
571         String rules =
572             "$alpha   = \u03B1;" +
573             "$nu      = \u03BD;" +
574             "$sigma   = \u03C3;" +
575             "$ypsilon = \u03C5;" +
576             "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +
577             "s <>           $sigma;" +
578             "a <>           $alpha;" +
579             "u <>  $vowel { $ypsilon;" +
580             "y <>           $ypsilon;" +
581             "n <>           $nu;";
582         Transliterator mini = Transliterator.createFromRules
583         ("mini", rules, Transliterator.REVERSE);
584         expect(mini, syn, "syn");
585         expect(mini, sayn, "saun");
586 
587         //|    // Transliterate the Greek locale data
588         //|    Locale el("el");
589         //|    DateFormatSymbols syms(el, status);
590         //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
591         //|    int32_t i, count;
592         //|    const UnicodeString* data = syms.getMonths(count);
593         //|    for (i=0; i<count; ++i) {
594         //|        if (data[i].length() == 0) {
595         //|            continue;
596         //|        }
597         //|        UnicodeString out(data[i]);
598         //|        gl->transliterate(out);
599         //|        bool_t ok = TRUE;
600         //|        if (data[i].length() >= 2 && out.length() >= 2 &&
601         //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
602         //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
603         //|                ok = FALSE;
604         //|            }
605         //|        }
606         //|        if (ok) {
607         //|            logln(prettify(data[i] + " -> " + out));
608         //|        } else {
609         //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
610         //|        }
611         //|    }
612     }
613 
614     //    /**
615     //     * Prefix, suffix support in hex transliterators
616     //     */
617     //    public void TestJ243() {
618     //        // Test default Hex-Any, which should handle
619     //        // \\u, \\U, u+, and U+
620     //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
621     //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
622     //
623     //        // Try a custom Hex-Any
624     //        // \\uXXXX and &#xXXXX;
625     //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
626     //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
627     //               "abcd5fx012&#x00033;");
628     //
629     //        // Try custom Any-Hex (default is tested elsewhere)
630     //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
631     //        expect(hex3, "012", "&#x30;&#x31;&#x32;");
632     //    }
633 
634     @Test
TestJ329()635     public void TestJ329() {
636 
637         Object[] DATA = {
638                 Boolean.FALSE, "a > b; c > d",
639                 Boolean.TRUE,  "a > b; no operator; c > d",
640         };
641 
642         for (int i=0; i<DATA.length; i+=2) {
643             String err = null;
644             try {
645                 Transliterator.createFromRules("<ID>",
646                         (String) DATA[i+1],
647                         Transliterator.FORWARD);
648             } catch (IllegalArgumentException e) {
649                 err = e.getMessage();
650             }
651             boolean gotError = (err != null);
652             String desc = (String) DATA[i+1] +
653             (gotError ? (" -> error: " + err) : " -> no error");
654             if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
655                 logln("Ok:   " + desc);
656             } else {
657                 errln("FAIL: " + desc);
658             }
659         }
660     }
661 
662     /**
663      * Test segments and segment references.
664      */
665     @Test
TestSegments()666     public void TestSegments() {
667         // Array of 3n items
668         // Each item is <rules>, <input>, <expected output>
669         String[] DATA = {
670                 "([a-z]) '.' ([0-9]) > $2 '-' $1",
671                 "abc.123.xyz.456",
672                 "ab1-c23.xy4-z56",
673         };
674 
675         for (int i=0; i<DATA.length; i+=3) {
676             logln("Pattern: " + Utility.escape(DATA[i]));
677             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
678             expect(t, DATA[i+1], DATA[i+2]);
679         }
680     }
681 
682     /**
683      * Test cursor positioning outside of the key
684      */
685     @Test
TestCursorOffset()686     public void TestCursorOffset() {
687         // Array of 3n items
688         // Each item is <rules>, <input>, <expected output>
689         String[] DATA = {
690                 "pre {alpha} post > | @ ALPHA ;" +
691                 "eALPHA > beta ;" +
692                 "pre {beta} post > BETA @@ | ;" +
693                 "post > xyz",
694 
695                 "prealphapost prebetapost",
696                 "prbetaxyz preBETApost",
697         };
698 
699         for (int i=0; i<DATA.length; i+=3) {
700             logln("Pattern: " + Utility.escape(DATA[i]));
701             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
702             expect(t, DATA[i+1], DATA[i+2]);
703         }
704     }
705 
706     /**
707      * Test zero length and > 1 char length variable values.  Test
708      * use of variable refs in UnicodeSets.
709      */
710     @Test
TestArbitraryVariableValues()711     public void TestArbitraryVariableValues() {
712         // Array of 3n items
713         // Each item is <rules>, <input>, <expected output>
714         String[] DATA = {
715                 "$abe = ab;" +
716                 "$pat = x[yY]z;" +
717                 "$ll  = 'a-z';" +
718                 "$llZ = [$ll];" +
719                 "$llY = [$ll$pat];" +
720                 "$emp = ;" +
721 
722                 "$abe > ABE;" +
723                 "$pat > END;" +
724                 "$llZ > 1;" +
725                 "$llY > 2;" +
726                 "7$emp 8 > 9;" +
727                 "",
728 
729                 "ab xYzxyz stY78",
730                 "ABE ENDEND 1129",
731         };
732 
733         for (int i=0; i<DATA.length; i+=3) {
734             logln("Pattern: " + Utility.escape(DATA[i]));
735             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
736             expect(t, DATA[i+1], DATA[i+2]);
737         }
738     }
739 
740     /**
741      * Confirm that the contextStart, contextLimit, start, and limit
742      * behave correctly.
743      */
744     @Test
TestPositionHandling()745     public void TestPositionHandling() {
746         // Array of 3n items
747         // Each item is <rules>, <input>, <expected output>
748         String[] DATA = {
749                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
750                 "xtat txtb", // pos 0,9,0,9
751                 "xTTaSS TTxUUb",
752 
753                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
754                 "xtat txtb", // pos 2,9,3,8
755                 "xtaSS TTxUUb",
756 
757                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
758                 "xtat txtb", // pos 3,8,3,8
759                 "xtaTT TTxTTb",
760         };
761 
762         // Array of 4n positions -- these go with the DATA array
763         // They are: contextStart, contextLimit, start, limit
764         int[] POS = {
765                 0, 9, 0, 9,
766                 2, 9, 3, 8,
767                 3, 8, 3, 8,
768         };
769 
770         int n = DATA.length/3;
771         for (int i=0; i<n; i++) {
772             Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
773             Transliterator.Position pos = new Transliterator.Position(
774                     POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
775             ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
776             t.transliterate(rsource, pos);
777             t.finishTransliteration(rsource, pos);
778             String result = rsource.toString();
779             String exp = DATA[3*i+2];
780             expectAux(Utility.escape(DATA[3*i]),
781                     DATA[3*i+1],
782                     result,
783                     result.equals(exp),
784                     exp);
785         }
786     }
787 
788     /**
789      * Test the Hiragana-Katakana transliterator.
790      */
791     @Test
TestHiraganaKatakana()792     public void TestHiraganaKatakana() {
793         Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
794         Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
795 
796         // Array of 3n items
797         // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
798         String[] DATA = {
799                 "both",
800                 "\u3042\u3090\u3099\u3092\u3050",
801                 "\u30A2\u30F8\u30F2\u30B0",
802 
803                 "kh",
804                 "\u307C\u3051\u3060\u3042\u3093\u30FC",
805                 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
806         };
807 
808         for (int i=0; i<DATA.length; i+=3) {
809             switch (DATA[i].charAt(0)) {
810             case 'h': // Hiragana-Katakana
811                 expect(hk, DATA[i+1], DATA[i+2]);
812                 break;
813             case 'k': // Katakana-Hiragana
814                 expect(kh, DATA[i+2], DATA[i+1]);
815                 break;
816             case 'b': // both
817                 expect(hk, DATA[i+1], DATA[i+2]);
818                 expect(kh, DATA[i+2], DATA[i+1]);
819                 break;
820             }
821         }
822 
823     }
824 
825     @Test
TestCopyJ476()826     public void TestCopyJ476() {
827         // This is a C++-only copy constructor test
828     }
829 
830     /**
831      * Test inter-Indic transliterators.  These are composed.
832      */
833     @Test
TestInterIndic()834     public void TestInterIndic() {
835         String ID = "Devanagari-Gujarati";
836         Transliterator dg = Transliterator.getInstance(ID);
837         if (dg == null) {
838             errln("FAIL: getInstance(" + ID + ") returned null");
839             return;
840         }
841         String id = dg.getID();
842         if (!id.equals(ID)) {
843             errln("FAIL: getInstance(" + ID + ").getID() => " + id);
844         }
845         String dev = "\u0901\u090B\u0925";
846         String guj = "\u0A81\u0A8B\u0AA5";
847         expect(dg, dev, guj);
848     }
849 
850     /**
851      * Test filter syntax in IDs. (J23)
852      */
853     @Test
TestFilterIDs()854     public void TestFilterIDs() {
855         String[] DATA = {
856                 "[aeiou]Any-Hex", // ID
857                 "[aeiou]Hex-Any", // expected inverse ID
858                 "quizzical",      // src
859                 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
860 
861                 "[aeiou]Any-Hex;[^5]Hex-Any",
862                 "[^5]Any-Hex;[aeiou]Hex-Any",
863                 "quizzical",
864                 "q\\u0075izzical",
865 
866                 "[abc]Null",
867                 "[abc]Null",
868                 "xyz",
869                 "xyz",
870         };
871 
872         for (int i=0; i<DATA.length; i+=4) {
873             String ID = DATA[i];
874             Transliterator t = Transliterator.getInstance(ID);
875             expect(t, DATA[i+2], DATA[i+3]);
876 
877             // Check the ID
878             if (!ID.equals(t.getID())) {
879                 errln("FAIL: getInstance(" + ID + ").getID() => " +
880                         t.getID());
881             }
882 
883             // Check the inverse
884             String uID = DATA[i+1];
885             Transliterator u = t.getInverse();
886             if (u == null) {
887                 errln("FAIL: " + ID + ".getInverse() returned NULL");
888             } else if (!u.getID().equals(uID)) {
889                 errln("FAIL: " + ID + ".getInverse().getID() => " +
890                         u.getID() + ", expected " + uID);
891             }
892         }
893     }
894 
895     /**
896      * Test the case mapping transliterators.
897      */
898     @Test
TestCaseMap()899     public void TestCaseMap() {
900         Transliterator toUpper =
901             Transliterator.getInstance("Any-Upper[^xyzXYZ]");
902         Transliterator toLower =
903             Transliterator.getInstance("Any-Lower[^xyzXYZ]");
904         Transliterator toTitle =
905             Transliterator.getInstance("Any-Title[^xyzXYZ]");
906 
907         expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
908         "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
909         expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
910         "the quick brown foX jumped over the lazY dogs.");
911         expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
912         "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
913     }
914 
915     /**
916      * Test the name mapping transliterators.
917      */
918     @Test
TestNameMap()919     public void TestNameMap() {
920         Transliterator uni2name =
921             Transliterator.getInstance("Any-Name[^abc]");
922         Transliterator name2uni =
923             Transliterator.getInstance("Name-Any");
924 
925         expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
926         "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
927         expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
928         "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
929 
930         // round trip
931         Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
932 
933         String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
934         expect(t, s, s);
935     }
936 
937     /**
938      * Test liberalized ID syntax.  1006c
939      */
940     @Test
TestLiberalizedID()941     public void TestLiberalizedID() {
942         // Some test cases have an expected getID() value of NULL.  This
943         // means I have disabled the test case for now.  This stuff is
944         // still under development, and I haven't decided whether to make
945         // getID() return canonical case yet.  It will all get rewritten
946         // with the move to Source-Target/Variant IDs anyway. [aliu]
947         String DATA[] = {
948                 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
949                 "  Null  ", "Null", "whitespace",
950                 " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
951                 "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",
952         };
953 
954         for (int i=0; i<DATA.length; i+=3) {
955             try {
956                 Transliterator t = Transliterator.getInstance(DATA[i]);
957                 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
958                     logln("Ok: " + DATA[i+2] +
959                             " create ID \"" + DATA[i] + "\" => \"" +
960                             t.getID() + "\"");
961                 } else {
962                     errln("FAIL: " + DATA[i+2] +
963                             " create ID \"" + DATA[i] + "\" => \"" +
964                             t.getID() + "\", exp \"" + DATA[i+1] + "\"");
965                 }
966             } catch (IllegalArgumentException e) {
967                 errln("FAIL: " + DATA[i+2] +
968                         " create ID \"" + DATA[i] + "\"");
969             }
970         }
971     }
972 
973     @Test
TestCreateInstance()974     public void TestCreateInstance() {
975         String FORWARD = "F";
976         String REVERSE = "R";
977         String DATA[] = {
978                 // Column 1: id
979                 // Column 2: direction
980                 // Column 3: expected ID, or "" if expect failure
981                 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
982 
983                 // JB#2689: bad compound causes crash
984                 "InvalidSource-InvalidTarget", FORWARD, "",
985                 "InvalidSource-InvalidTarget", REVERSE, "",
986                 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
987                 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
988                 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
989                 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
990 
991                 null
992         };
993 
994         for (int i=0; DATA[i]!=null; i+=3) {
995             String id=DATA[i];
996             int dir = (DATA[i+1]==FORWARD)?
997                     Transliterator.FORWARD:Transliterator.REVERSE;
998             String expID=DATA[i+2];
999             Exception e = null;
1000             Transliterator t;
1001             try {
1002                 t = Transliterator.getInstance(id,dir);
1003             } catch (Exception e1) {
1004                 e = e1;
1005                 t = null;
1006             }
1007             String newID = (t!=null)?t.getID():"";
1008             boolean ok = (newID.equals(expID));
1009             if (t==null) {
1010                 newID = e.getMessage();
1011             }
1012             if (ok) {
1013                 logln("Ok: createInstance(" +
1014                         id + "," + DATA[i+1] + ") => " + newID);
1015             } else {
1016                 errln("FAIL: createInstance(" +
1017                         id + "," + DATA[i+1] + ") => " + newID +
1018                         ", expected " + expID);
1019             }
1020         }
1021     }
1022 
1023     /**
1024      * Test the normalization transliterator.
1025      */
1026     @Test
TestNormalizationTransliterator()1027     public void TestNormalizationTransliterator() {
1028         // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
1029         // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1030         String[][] CANON = {
1031                 // Input               Decomposed            Composed
1032                 {"cat",                "cat",                "cat"               },
1033                 {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },
1034 
1035                 {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above
1036                 {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above
1037 
1038                 {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above
1039                 {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below
1040                 {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above
1041 
1042                 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1043                 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1044 
1045                 {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave
1046                 {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave
1047                 {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron
1048 
1049                 {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign
1050                 {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring
1051 
1052                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0
1053                 {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0
1054 
1055                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1056                 {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },
1057 
1058                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1059                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1060                 {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten
1061                 {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten
1062                 {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten
1063 
1064                 {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },
1065         };
1066 
1067         String[][] COMPAT = {
1068                 // Input               Decomposed            Composed
1069                 {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed
1070 
1071                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0
1072                 {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i
1073 
1074                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1075                 {"Henry \u2163",       "Henry IV",           "Henry IV"          },
1076 
1077                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1078                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1079 
1080                 {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten
1081         };
1082 
1083         Transliterator NFD = Transliterator.getInstance("NFD");
1084         Transliterator NFC = Transliterator.getInstance("NFC");
1085         for (int i=0; i<CANON.length; ++i) {
1086             String in = CANON[i][0];
1087             String expd = CANON[i][1];
1088             String expc = CANON[i][2];
1089             expect(NFD, in, expd);
1090             expect(NFC, in, expc);
1091         }
1092 
1093         Transliterator NFKD = Transliterator.getInstance("NFKD");
1094         Transliterator NFKC = Transliterator.getInstance("NFKC");
1095         for (int i=0; i<COMPAT.length; ++i) {
1096             String in = COMPAT[i][0];
1097             String expkd = COMPAT[i][1];
1098             String expkc = COMPAT[i][2];
1099             expect(NFKD, in, expkd);
1100             expect(NFKC, in, expkc);
1101         }
1102 
1103         Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1104         expect(t, "\u010dx", "c\u030C");
1105     }
1106 
1107     /**
1108      * Test compound RBT rules.
1109      */
1110     @Test
TestCompoundRBT()1111     public void TestCompoundRBT() {
1112         // Careful with spacing and ';' here:  Phrase this exactly
1113         // as toRules() is going to return it.  If toRules() changes
1114         // with regard to spacing or ';', then adjust this string.
1115         String rule = "::Hex-Any;\n" +
1116         "::Any-Lower;\n" +
1117         "a > '.A.';\n" +
1118         "b > '.B.';\n" +
1119         "::[^t]Any-Upper;";
1120         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1121         if (t == null) {
1122             errln("FAIL: createFromRules failed");
1123             return;
1124         }
1125         expect(t, "\u0043at in the hat, bat on the mat",
1126         "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1127         String r = t.toRules(true);
1128         if (r.equals(rule)) {
1129             logln("OK: toRules() => " + r);
1130         } else {
1131             errln("FAIL: toRules() => " + r +
1132                     ", expected " + rule);
1133         }
1134 
1135         // Now test toRules
1136         t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1137         if (t == null) {
1138             errln("FAIL: createInstance failed");
1139             return;
1140         }
1141         String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1142         r = t.toRules(true);
1143         if (!r.equals(exp)) {
1144             errln("FAIL: toRules() => " + r +
1145                     ", expected " + exp);
1146         } else {
1147             logln("OK: toRules() => " + r);
1148         }
1149 
1150         // Round trip the result of toRules
1151         t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1152         if (t == null) {
1153             errln("FAIL: createFromRules #2 failed");
1154             return;
1155         } else {
1156             logln("OK: createFromRules(" + r + ") succeeded");
1157         }
1158 
1159         // Test toRules again
1160         r = t.toRules(true);
1161         if (!r.equals(exp)) {
1162             errln("FAIL: toRules() => " + r +
1163                     ", expected " + exp);
1164         } else {
1165             logln("OK: toRules() => " + r);
1166         }
1167 
1168         // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1169         // to what the regenerated ID will look like.
1170         String id = "Upper(Lower);(NFKC)";
1171         t = Transliterator.getInstance(id, Transliterator.FORWARD);
1172         if (t == null) {
1173             errln("FAIL: createInstance #2 failed");
1174             return;
1175         }
1176         if (t.getID().equals(id)) {
1177             logln("OK: created " + id);
1178         } else {
1179             errln("FAIL: createInstance(" + id +
1180                     ").getID() => " + t.getID());
1181         }
1182 
1183         Transliterator u = t.getInverse();
1184         if (u == null) {
1185             errln("FAIL: createInverse failed");
1186             return;
1187         }
1188         exp = "NFKC();Lower(Upper)";
1189         if (u.getID().equals(exp)) {
1190             logln("OK: createInverse(" + id + ") => " +
1191                     u.getID());
1192         } else {
1193             errln("FAIL: createInverse(" + id + ") => " +
1194                     u.getID());
1195         }
1196     }
1197 
1198     /**
1199      * Compound filter semantics were orginially not implemented
1200      * correctly.  Originally, each component filter f(i) is replaced by
1201      * f'(i) = f(i) && g, where g is the filter for the compound
1202      * transliterator.
1203      *
1204      * From Mark:
1205      *
1206      * Suppose and I have a transliterator X. Internally X is
1207      * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1208      *
1209      * The compound should convert all greek characters (through latin) to
1210      * cyrillic, then lowercase the result. The filter should say "don't
1211      * touch 'A' in the original". But because an intermediate result
1212      * happens to go through "A", the Greek Alpha gets hung up.
1213      */
1214     @Test
TestCompoundFilter()1215     public void TestCompoundFilter() {
1216         Transliterator t = Transliterator.getInstance
1217         ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1218         t.setFilter(new UnicodeSet("[^A]"));
1219 
1220         // Only the 'A' at index 1 should remain unchanged
1221         expect(t,
1222                 CharsToUnicodeString("BA\\u039A\\u0391"),
1223                 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1224     }
1225 
1226     /**
1227      * Test the "Remove" transliterator.
1228      */
1229     @Test
TestRemove()1230     public void TestRemove() {
1231         Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1232         expect(t, "The quick brown fox.",
1233         "Th qck brwn fx.");
1234     }
1235 
1236     @Test
TestToRules()1237     public void TestToRules() {
1238         String RBT = "rbt";
1239         String SET = "set";
1240         String[] DATA = {
1241                 RBT,
1242                 "$a=\\u4E61; [$a] > A;",
1243                 "[\\u4E61] > A;",
1244 
1245                 RBT,
1246                 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1247                 "[[:Zs:][:Zl:]]{a} > A;",
1248 
1249                 SET,
1250                 "[[:Zs:][:Zl:]]",
1251                 "[[:Zs:][:Zl:]]",
1252 
1253                 SET,
1254                 "[:Ps:]",
1255                 "[:Ps:]",
1256 
1257                 SET,
1258                 "[:L:]",
1259                 "[:L:]",
1260 
1261                 SET,
1262                 "[[:L:]-[A]]",
1263                 "[[:L:]-[A]]",
1264 
1265                 SET,
1266                 "[~[:Lu:][:Ll:]]",
1267                 "[~[:Lu:][:Ll:]]",
1268 
1269                 SET,
1270                 "[~[a-z]]",
1271                 "[~[a-z]]",
1272 
1273                 RBT,
1274                 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1275                 "[^[:Zs:]]{a} > A;",
1276 
1277                 RBT,
1278                 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1279                 "[[a-z]-[:Zs:]]{a} > A;",
1280 
1281                 RBT,
1282                 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1283                 "[[:Zs:]&[a-z]]{a} > A;",
1284 
1285                 RBT,
1286                 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1287                 "[x[:Zs:]]{a} > A;",
1288 
1289                 RBT,
1290                 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1291                 "$macron = \\u0304 ;"+
1292                 "$evowel = [aeiouyAEIOUY] ;"+
1293                 "$iotasub = \\u0345 ;"+
1294                 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1295                 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1296 
1297                 RBT,
1298                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1299                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1300         };
1301 
1302         for (int d=0; d < DATA.length; d+=3) {
1303             if (DATA[d] == RBT) {
1304                 // Transliterator test
1305                 Transliterator t = Transliterator.createFromRules("ID",
1306                         DATA[d+1], Transliterator.FORWARD);
1307                 if (t == null) {
1308                     errln("FAIL: createFromRules failed");
1309                     return;
1310                 }
1311                 String rules, escapedRules;
1312                 rules = t.toRules(false);
1313                 escapedRules = t.toRules(true);
1314                 String expRules = Utility.unescape(DATA[d+2]);
1315                 String expEscapedRules = DATA[d+2];
1316                 if (rules.equals(expRules)) {
1317                     logln("Ok: " + DATA[d+1] +
1318                             " => " + Utility.escape(rules));
1319                 } else {
1320                     errln("FAIL: " + DATA[d+1] +
1321                             " => " + Utility.escape(rules + ", exp " + expRules));
1322                 }
1323                 if (escapedRules.equals(expEscapedRules)) {
1324                     logln("Ok: " + DATA[d+1] +
1325                             " => " + escapedRules);
1326                 } else {
1327                     errln("FAIL: " + DATA[d+1] +
1328                             " => " + escapedRules + ", exp " + expEscapedRules);
1329                 }
1330 
1331             } else {
1332                 // UnicodeSet test
1333                 String pat = DATA[d+1];
1334                 String expToPat = DATA[d+2];
1335                 UnicodeSet set = new UnicodeSet(pat);
1336 
1337                 // Adjust spacing etc. as necessary.
1338                 String toPat;
1339                 toPat = set.toPattern(true);
1340                 if (expToPat.equals(toPat)) {
1341                     logln("Ok: " + pat +
1342                             " => " + toPat);
1343                 } else {
1344                     errln("FAIL: " + pat +
1345                             " => " + Utility.escape(toPat) +
1346                             ", exp " + Utility.escape(pat));
1347                 }
1348             }
1349         }
1350     }
1351 
1352     @Test
TestContext()1353     public void TestContext() {
1354         Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1355 
1356         expect("de > x; {d}e > y;",
1357                 "de",
1358                 "ye",
1359                 pos);
1360 
1361         expect("ab{c} > z;",
1362                 "xadabdabcy",
1363         "xadabdabzy");
1364     }
1365 
CharsToUnicodeString(String s)1366     static final String CharsToUnicodeString(String s) {
1367         return Utility.unescape(s);
1368     }
1369 
1370     @Test
TestSupplemental()1371     public void TestSupplemental() {
1372 
1373         expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1374         "a > $a; $s > i;"),
1375         CharsToUnicodeString("ab\\U0001030Fx"),
1376         CharsToUnicodeString("\\U00010300bix"));
1377 
1378         expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1379                 "$b=[A-Z\\U00010400-\\U0001044D];" +
1380         "($a)($b) > $2 $1;"),
1381         CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1382         CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1383 
1384         // k|ax\\U00010300xm
1385 
1386         // k|a\\U00010400\\U00010300xm
1387         // ky|\\U00010400\\U00010300xm
1388         // ky\\U00010400|\\U00010300xm
1389 
1390         // ky\\U00010400|\\U00010300\\U00010400m
1391         // ky\\U00010400y|\\U00010400m
1392         expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1393                 "$a {x} > | @ \\U00010400;" +
1394         "{$a} [^\\u0000-\\uFFFF] > y;"),
1395         CharsToUnicodeString("kax\\U00010300xm"),
1396         CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1397 
1398         expect(Transliterator.getInstance("Any-Name"),
1399                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1400         "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1401 
1402         expect(Transliterator.getInstance("Name-Any"),
1403                 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1404                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1405 
1406         expect(Transliterator.getInstance("Any-Hex/Unicode"),
1407                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1408         "U+10330U+10FF00U+E0061U+00A0");
1409 
1410         expect(Transliterator.getInstance("Any-Hex/C"),
1411                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1412         "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1413 
1414         expect(Transliterator.getInstance("Any-Hex/Perl"),
1415                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1416         "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1417 
1418         expect(Transliterator.getInstance("Any-Hex/Java"),
1419                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1420         "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1421 
1422         expect(Transliterator.getInstance("Any-Hex/XML"),
1423                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1424         "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1425 
1426         expect(Transliterator.getInstance("Any-Hex/XML10"),
1427                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1428         "&#66352;&#1113856;&#917601;&#160;");
1429 
1430         expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1431                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1432                 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1433     }
1434 
1435     @Test
TestQuantifier()1436     public void TestQuantifier() {
1437 
1438         // Make sure @ in a quantified anteContext works
1439         expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1440                 "AAAAAb",
1441         "aaa(aac)");
1442 
1443         // Make sure @ in a quantified postContext works
1444         expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1445                 "baaaaa",
1446         "caa(aaa)");
1447 
1448         // Make sure @ in a quantified postContext with seg ref works
1449         expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1450                 "baaaaa",
1451         "baa(aaa)");
1452 
1453         // Make sure @ past ante context doesn't enter ante context
1454         Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1455         expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1456                 "xxxab",
1457                 "xxx(ac)",
1458                 pos);
1459 
1460         // Make sure @ past post context doesn't pass limit
1461         Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1462         expect("{b} a+ > c @@ |; x > y; a > A;",
1463                 "baxx",
1464                 "caxx",
1465                 pos2);
1466 
1467         // Make sure @ past post context doesn't enter post context
1468         expect("{b} a+ > c @@ |; x > y; a > A;",
1469                 "baxx",
1470         "cayy");
1471 
1472         expect("(ab)? c > d;",
1473                 "c abc ababc",
1474         "d d abd");
1475 
1476         // NOTE: The (ab)+ when referenced just yields a single "ab",
1477         // not the full sequence of them.  This accords with perl behavior.
1478         expect("(ab)+ {x} > '(' $1 ')';",
1479                 "x abx ababxy",
1480         "x ab(ab) abab(ab)y");
1481 
1482         expect("b+ > x;",
1483                 "ac abc abbc abbbc",
1484         "ac axc axc axc");
1485 
1486         expect("[abc]+ > x;",
1487                 "qac abrc abbcs abtbbc",
1488         "qx xrx xs xtx");
1489 
1490         expect("q{(ab)+} > x;",
1491                 "qa qab qaba qababc qaba",
1492         "qa qx qxa qxc qxa");
1493 
1494         expect("q(ab)* > x;",
1495                 "qa qab qaba qababc",
1496         "xa x xa xc");
1497 
1498         // NOTE: The (ab)+ when referenced just yields a single "ab",
1499         // not the full sequence of them.  This accords with perl behavior.
1500         expect("q(ab)* > '(' $1 ')';",
1501                 "qa qab qaba qababc",
1502         "()a (ab) (ab)a (ab)c");
1503 
1504         // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1505         // quoted string
1506         expect("'ab'+ > x;",
1507                 "bb ab ababb",
1508         "bb x xb");
1509 
1510         // $foo+ and $foo* -- the quantifier should apply to the entire
1511         // variable reference
1512         expect("$var = ab; $var+ > x;",
1513                 "bb ab ababb",
1514         "bb x xb");
1515     }
1516 
1517     static class TestFact implements Transliterator.Factory {
1518         static class NameableNullTrans extends Transliterator {
NameableNullTrans(String id)1519             public NameableNullTrans(String id) {
1520                 super(id, null);
1521             }
1522             @Override
handleTransliterate(Replaceable text, Position offsets, boolean incremental)1523             protected void handleTransliterate(Replaceable text,
1524                     Position offsets, boolean incremental) {
1525                 offsets.start = offsets.limit;
1526             }
1527         }
1528         String id;
TestFact(String theID)1529         public TestFact(String theID) {
1530             id = theID;
1531         }
1532         @Override
getInstance(String ignoredID)1533         public Transliterator getInstance(String ignoredID) {
1534             return new NameableNullTrans(id);
1535         }
1536     }
1537 
1538     @Test
TestSTV()1539     public void TestSTV() {
1540         Enumeration es = Transliterator.getAvailableSources();
1541         for (int i=0; es.hasMoreElements(); ++i) {
1542             String source = (String) es.nextElement();
1543             logln("" + i + ": " + source);
1544             if (source.length() == 0) {
1545                 errln("FAIL: empty source");
1546                 continue;
1547             }
1548             Enumeration et = Transliterator.getAvailableTargets(source);
1549             for (int j=0; et.hasMoreElements(); ++j) {
1550                 String target = (String) et.nextElement();
1551                 logln(" " + j + ": " + target);
1552                 if (target.length() == 0) {
1553                     errln("FAIL: empty target");
1554                     continue;
1555                 }
1556                 Enumeration ev = Transliterator.getAvailableVariants(source, target);
1557                 for (int k=0; ev.hasMoreElements(); ++k) {
1558                     String variant = (String) ev.nextElement();
1559                     if (variant.length() == 0) {
1560                         logln("  " + k + ": <empty>");
1561                     } else {
1562                         logln("  " + k + ": " + variant);
1563                     }
1564                 }
1565             }
1566         }
1567 
1568         // Test registration
1569         String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1570         String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1571         String[] SOURCES = { null, "Seoridf", "Oewoir" };
1572         for (int i=0; i<3; ++i) {
1573             Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1574             try {
1575                 Transliterator t = Transliterator.getInstance(IDS[i]);
1576                 if (t.getID().equals(IDS[i])) {
1577                     logln("Ok: Registration/creation succeeded for ID " +
1578                             IDS[i]);
1579                 } else {
1580                     errln("FAIL: Registration of ID " +
1581                             IDS[i] + " creates ID " + t.getID());
1582                 }
1583                 Transliterator.unregister(IDS[i]);
1584                 try {
1585                     t = Transliterator.getInstance(IDS[i]);
1586                     errln("FAIL: Unregistration failed for ID " +
1587                             IDS[i] + "; still receiving ID " + t.getID());
1588                 } catch (IllegalArgumentException e2) {
1589                     // Good; this is what we expect
1590                     logln("Ok; Unregistered " + IDS[i]);
1591                 }
1592             } catch (IllegalArgumentException e) {
1593                 errln("FAIL: Registration/creation failed for ID " +
1594                         IDS[i]);
1595             } finally {
1596                 Transliterator.unregister(IDS[i]);
1597             }
1598         }
1599 
1600         // Make sure getAvailable API reflects removal
1601         for (Enumeration e = Transliterator.getAvailableIDs();
1602         e.hasMoreElements(); ) {
1603             String id = (String) e.nextElement();
1604             for (int i=0; i<3; ++i) {
1605                 if (id.equals(FULL_IDS[i])) {
1606                     errln("FAIL: unregister(" + id + ") failed");
1607                 }
1608             }
1609         }
1610         for (Enumeration e = Transliterator.getAvailableTargets("Any");
1611         e.hasMoreElements(); ) {
1612             String t = (String) e.nextElement();
1613             if (t.equals(IDS[0])) {
1614                 errln("FAIL: unregister(Any-" + t + ") failed");
1615             }
1616         }
1617         for (Enumeration e = Transliterator.getAvailableSources();
1618         e.hasMoreElements(); ) {
1619             String s = (String) e.nextElement();
1620             for (int i=0; i<3; ++i) {
1621                 if (SOURCES[i] == null) continue;
1622                 if (s.equals(SOURCES[i])) {
1623                     errln("FAIL: unregister(" + s + "-*) failed");
1624                 }
1625             }
1626         }
1627     }
1628 
1629     /**
1630      * Test inverse of Greek-Latin; Title()
1631      */
1632     @Test
TestCompoundInverse()1633     public void TestCompoundInverse() {
1634         Transliterator t = Transliterator.getInstance
1635         ("Greek-Latin; Title()", Transliterator.REVERSE);
1636         if (t == null) {
1637             errln("FAIL: createInstance");
1638             return;
1639         }
1640         String exp = "(Title);Latin-Greek";
1641         if (t.getID().equals(exp)) {
1642             logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1643                     t.getID());
1644         } else {
1645             errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1646                     t.getID() + "\", expected \"" + exp + "\"");
1647         }
1648     }
1649 
1650     /**
1651      * Test NFD chaining with RBT
1652      */
1653     @Test
TestNFDChainRBT()1654     public void TestNFDChainRBT() {
1655         Transliterator t = Transliterator.createFromRules(
1656                 "TEST", "::NFD; aa > Q; a > q;",
1657                 Transliterator.FORWARD);
1658         logln(t.toRules(true));
1659         expect(t, "aa", "Q");
1660     }
1661 
1662     /**
1663      * Inverse of "Null" should be "Null". (J21)
1664      */
1665     @Test
TestNullInverse()1666     public void TestNullInverse() {
1667         Transliterator t = Transliterator.getInstance("Null");
1668         Transliterator u = t.getInverse();
1669         if (!u.getID().equals("Null")) {
1670             errln("FAIL: Inverse of Null should be Null");
1671         }
1672     }
1673 
1674     /**
1675      * Check ID of inverse of alias. (J22)
1676      */
1677     @Test
TestAliasInverseID()1678     public void TestAliasInverseID() {
1679         String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1680         Transliterator t = Transliterator.getInstance(ID);
1681         Transliterator u = t.getInverse();
1682         String exp = "Hangul-Latin";
1683         String got = u.getID();
1684         if (!got.equals(exp)) {
1685             errln("FAIL: Inverse of " + ID + " is " + got +
1686                     ", expected " + exp);
1687         }
1688     }
1689 
1690     /**
1691      * Test IDs of inverses of compound transliterators. (J20)
1692      */
1693     @Test
TestCompoundInverseID()1694     public void TestCompoundInverseID() {
1695         String ID = "Latin-Jamo;NFC(NFD)";
1696         Transliterator t = Transliterator.getInstance(ID);
1697         Transliterator u = t.getInverse();
1698         String exp = "NFD(NFC);Jamo-Latin";
1699         String got = u.getID();
1700         if (!got.equals(exp)) {
1701             errln("FAIL: Inverse of " + ID + " is " + got +
1702                     ", expected " + exp);
1703         }
1704     }
1705 
1706     /**
1707      * Test undefined variable.
1708      */
1709     @Test
TestUndefinedVariable()1710     public void TestUndefinedVariable() {
1711         String rule = "$initial } a <> \u1161;";
1712         try {
1713             Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1714         } catch (IllegalArgumentException e) {
1715             logln("OK: Got exception for " + rule + ", as expected: " +
1716                     e.getMessage());
1717             return;
1718         }
1719         errln("Fail: bogus rule " + rule + " compiled without error");
1720     }
1721 
1722     /**
1723      * Test empty context.
1724      */
1725     @Test
TestEmptyContext()1726     public void TestEmptyContext() {
1727         expect(" { a } > b;", "xay a ", "xby b ");
1728     }
1729 
1730     /**
1731      * Test compound filter ID syntax
1732      */
1733     @Test
TestCompoundFilterID()1734     public void TestCompoundFilterID() {
1735         String[] DATA = {
1736                 // Col. 1 = ID or rule set (latter must start with #)
1737 
1738                 // = columns > 1 are null if expect col. 1 to be illegal =
1739 
1740                 // Col. 2 = direction, "F..." or "R..."
1741                 // Col. 3 = source string
1742                 // Col. 4 = exp result
1743 
1744                 "[abc]; [abc]", null, null, null, // multiple filters
1745                 "Latin-Greek; [abc];", null, null, null, // misplaced filter
1746                 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1747                 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1748                 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1749                 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1750         };
1751 
1752         for (int i=0; i<DATA.length; i+=4) {
1753             String id = DATA[i];
1754             int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1755                     Transliterator.REVERSE : Transliterator.FORWARD;
1756             String source = DATA[i+2];
1757             String exp = DATA[i+3];
1758             boolean expOk = (DATA[i+1] != null);
1759             Transliterator t = null;
1760             IllegalArgumentException e = null;
1761             try {
1762                 if (id.charAt(0) == '#') {
1763                     t = Transliterator.createFromRules("ID", id, direction);
1764                 } else {
1765                     t = Transliterator.getInstance(id, direction);
1766                 }
1767             } catch (IllegalArgumentException ee) {
1768                 e = ee;
1769             }
1770             boolean ok = (t != null && e == null);
1771             if (ok == expOk) {
1772                 logln("Ok: " + id + " => " + t +
1773                         (e != null ? (", " + e.getMessage()) : ""));
1774                 if (source != null) {
1775                     expect(t, source, exp);
1776                 }
1777             } else {
1778                 errln("FAIL: " + id + " => " + t +
1779                         (e != null ? (", " + e.getMessage()) : ""));
1780             }
1781         }
1782     }
1783 
1784     /**
1785      * Test new property set syntax
1786      */
1787     @Test
TestPropertySet()1788     public void TestPropertySet() {
1789         expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1790         expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1791         "[ a stitch ]\n[ in time ]\r[ saves 9]");
1792     }
1793 
1794     /**
1795      * Test various failure points of the new 2.0 engine.
1796      */
1797     @Test
TestNewEngine()1798     public void TestNewEngine() {
1799         Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1800         // Katakana should be untouched
1801         expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1802 
1803         if (true) {
1804             // This test will only work if Transliterator.ROLLBACK is
1805             // true.  Otherwise, this test will fail, revealing a
1806             // limitation of global filters in incremental mode.
1807 
1808             Transliterator a =
1809                 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1810             Transliterator A =
1811                 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1812 
1813             //Transliterator array[] = new Transliterator[] {
1814             //    a,
1815             //    Transliterator.getInstance("NFD"),
1816             //    A };
1817             //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1818 
1819             try {
1820                 Transliterator.registerInstance(a);
1821                 Transliterator.registerInstance(A);
1822 
1823                 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1824                 expect(t, "aAaA", "bAbA");
1825 
1826                 Transliterator[] u = t.getElements();
1827                 assertTrue("getElements().length", u.length == 3);
1828                 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1829                 assertEquals("getElements()[1]", u[1].getID(), "NFD");
1830                 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1831 
1832                 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1833                 t.setFilter(new UnicodeSet("[:Ll:]"));
1834                 expect(t, "aAaA", "bAbA");
1835             } finally {
1836                 Transliterator.unregister("a_to_A");
1837                 Transliterator.unregister("A_to_b");
1838             }
1839         }
1840 
1841         expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1842                 "a",
1843         "ax");
1844 
1845         String gr =
1846             "$ddot = \u0308 ;" +
1847             "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1848             "$rough = \u0314 ;" +
1849             "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1850             "\u03b1 <> a ;" +
1851             "$rough <> h ;";
1852 
1853         expect(gr, "\u03B1\u0314", "ha");
1854     }
1855 
1856     /**
1857      * Test quantified segment behavior.  We want:
1858      * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1859      */
1860     @Test
TestQuantifiedSegment()1861     public void TestQuantifiedSegment() {
1862         // The normal case
1863         expect("([abc]+) > x $1 x;", "cba", "xcbax");
1864 
1865         // The tricky case; the quantifier is around the segment
1866         expect("([abc])+ > x $1 x;", "cba", "xax");
1867 
1868         // Tricky case in reverse direction
1869         expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1870 
1871         // Check post-context segment
1872         expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1873 
1874         // Test toRule/toPattern for non-quantified segment.
1875         // Careful with spacing here.
1876         String r = "([a-c]){q} > x $1 x;";
1877         Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1878         String rr = t.toRules(true);
1879         if (!r.equals(rr)) {
1880             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1881         } else {
1882             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1883         }
1884 
1885         // Test toRule/toPattern for quantified segment.
1886         // Careful with spacing here.
1887         r = "([a-c])+{q} > x $1 x;";
1888         t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1889         rr = t.toRules(true);
1890         if (!r.equals(rr)) {
1891             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1892         } else {
1893             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1894         }
1895     }
1896 
1897     //======================================================================
1898     // Ram's tests
1899     //======================================================================
1900     /* this test performs  test of rules in ISO 15915 */
1901     @Test
TestDevanagariLatinRT()1902     public void  TestDevanagariLatinRT(){
1903         String[]  source = {
1904                 "bh\u0101rata",
1905                 "kra",
1906                 "k\u1E63a",
1907                 "khra",
1908                 "gra",
1909                 "\u1E45ra",
1910                 "cra",
1911                 "chra",
1912                 "j\u00F1a",
1913                 "jhra",
1914                 "\u00F1ra",
1915                 "\u1E6Dya",
1916                 "\u1E6Dhra",
1917                 "\u1E0Dya",
1918                 //"r\u0323ya", // \u095c is not valid in Devanagari
1919                 "\u1E0Dhya",
1920                 "\u1E5Bhra",
1921                 "\u1E47ra",
1922                 "tta",
1923                 "thra",
1924                 "dda",
1925                 "dhra",
1926                 "nna",
1927                 "pra",
1928                 "phra",
1929                 "bra",
1930                 "bhra",
1931                 "mra",
1932                 "\u1E49ra",
1933                 //"l\u0331ra",
1934                 "yra",
1935                 "\u1E8Fra",
1936                 //"l-",
1937                 "vra",
1938                 "\u015Bra",
1939                 "\u1E63ra",
1940                 "sra",
1941                 "hma",
1942                 "\u1E6D\u1E6Da",
1943                 "\u1E6D\u1E6Dha",
1944                 "\u1E6Dh\u1E6Dha",
1945                 "\u1E0D\u1E0Da",
1946                 "\u1E0D\u1E0Dha",
1947                 "\u1E6Dya",
1948                 "\u1E6Dhya",
1949                 "\u1E0Dya",
1950                 "\u1E0Dhya",
1951                 // Not roundtrippable --
1952                 // \u0939\u094d\u094d\u092E  - hma
1953                 // \u0939\u094d\u092E         - hma
1954                 // CharsToUnicodeString("hma"),
1955                 "hya",
1956                 "\u015Br\u0325",
1957                 "\u015Bca",
1958                 "\u0115",
1959                 "san\u0304j\u012Bb s\u0113nagupta",
1960                 "\u0101nand vaddir\u0101ju",
1961         };
1962         String[]  expected = {
1963                 "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */
1964                 "\u0915\u094D\u0930",          /* kra         */
1965                 "\u0915\u094D\u0937",          /* ks\u0323a  */
1966                 "\u0916\u094D\u0930",          /* khra        */
1967                 "\u0917\u094D\u0930",          /* gra         */
1968                 "\u0919\u094D\u0930",          /* n\u0307ra  */
1969                 "\u091A\u094D\u0930",          /* cra         */
1970                 "\u091B\u094D\u0930",          /* chra        */
1971                 "\u091C\u094D\u091E",          /* jn\u0303a  */
1972                 "\u091D\u094D\u0930",          /* jhra        */
1973                 "\u091E\u094D\u0930",          /* n\u0303ra  */
1974                 "\u091F\u094D\u092F",          /* t\u0323ya  */
1975                 "\u0920\u094D\u0930",          /* t\u0323hra */
1976                 "\u0921\u094D\u092F",          /* d\u0323ya  */
1977                 //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari
1978                 "\u0922\u094D\u092F",          /* d\u0323hya */
1979                 "\u0922\u093C\u094D\u0930",    /* r\u0323hra */
1980                 "\u0923\u094D\u0930",          /* n\u0323ra  */
1981                 "\u0924\u094D\u0924",          /* tta         */
1982                 "\u0925\u094D\u0930",          /* thra        */
1983                 "\u0926\u094D\u0926",          /* dda         */
1984                 "\u0927\u094D\u0930",          /* dhra        */
1985                 "\u0928\u094D\u0928",          /* nna         */
1986                 "\u092A\u094D\u0930",          /* pra         */
1987                 "\u092B\u094D\u0930",          /* phra        */
1988                 "\u092C\u094D\u0930",          /* bra         */
1989                 "\u092D\u094D\u0930",          /* bhra        */
1990                 "\u092E\u094D\u0930",          /* mra         */
1991                 "\u0929\u094D\u0930",          /* n\u0331ra  */
1992                 //"\u0934\u094D\u0930",          /* l\u0331ra  */
1993                 "\u092F\u094D\u0930",          /* yra         */
1994                 "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */
1995                 //"l-",
1996                 "\u0935\u094D\u0930",          /* vra         */
1997                 "\u0936\u094D\u0930",          /* s\u0301ra  */
1998                 "\u0937\u094D\u0930",          /* s\u0323ra  */
1999                 "\u0938\u094D\u0930",          /* sra         */
2000                 "\u0939\u094d\u092E",          /* hma         */
2001                 "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */
2002                 "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */
2003                 "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/
2004                 "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */
2005                 "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */
2006                 "\u091F\u094D\u092F",          /* t\u0323ya  */
2007                 "\u0920\u094D\u092F",          /* t\u0323hya */
2008                 "\u0921\u094D\u092F",          /* d\u0323ya  */
2009                 "\u0922\u094D\u092F",          /* d\u0323hya */
2010                 // "hma",                         /* hma         */
2011                 "\u0939\u094D\u092F",          /* hya         */
2012                 "\u0936\u0943",                /* s\u0301r\u0325a  */
2013                 "\u0936\u094D\u091A",          /* s\u0301ca  */
2014                 "\u090d",                      /* e\u0306    */
2015                 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2016                 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2017         };
2018 
2019         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2020         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2021 
2022         for(int i= 0; i<source.length; i++){
2023             expect(latinToDev,(source[i]),(expected[i]));
2024             expect(devToLatin,(expected[i]),(source[i]));
2025         }
2026 
2027     }
2028     @Test
TestTeluguLatinRT()2029     public void  TestTeluguLatinRT(){
2030         String[]  source = {
2031                 "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */
2032                 "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */
2033                 "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */
2034                 "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */
2035                 "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */
2036                 "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */
2037                 "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */
2038                 "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */
2039                 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */
2040                 "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */
2041         };
2042 
2043         String[]  expected = {
2044                 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2045                 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2046                 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2047                 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2048                 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2049                 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2050                 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2051                 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2052                 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2053                 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2054         };
2055 
2056 
2057         Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2058         Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2059 
2060         for(int i= 0; i<source.length; i++){
2061             expect(latinToDev,(source[i]),(expected[i]));
2062             expect(devToLatin,(expected[i]),(source[i]));
2063         }
2064     }
2065 
2066     @Test
TestSanskritLatinRT()2067     public void  TestSanskritLatinRT(){
2068         int MAX_LEN =15;
2069         String[]  source = {
2070                 "rmk\u1E63\u0113t",
2071                 "\u015Br\u012Bmad",
2072                 "bhagavadg\u012Bt\u0101",
2073                 "adhy\u0101ya",
2074                 "arjuna",
2075                 "vi\u1E63\u0101da",
2076                 "y\u014Dga",
2077                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2078                 "uv\u0101cr\u0325",
2079                 "dharmak\u1E63\u0113tr\u0113",
2080                 "kuruk\u1E63\u0113tr\u0113",
2081                 "samav\u0113t\u0101",
2082                 "yuyutsava\u1E25",
2083                 "m\u0101mak\u0101\u1E25",
2084                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2085                 "kimakurvata",
2086                 "san\u0304java",
2087         };
2088         String[]  expected = {
2089                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2090                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2091                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2092                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2093                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2094                 "\u0935\u093f\u0937\u093e\u0926",
2095                 "\u092f\u094b\u0917",
2096                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2097                 "\u0909\u0935\u093E\u091A\u0943",
2098                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2099                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2100                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2101                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2102                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2103                 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2104                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2105                 "\u0938\u0902\u091c\u0935",
2106         };
2107 
2108         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2109         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2110         for(int i= 0; i<MAX_LEN; i++){
2111             expect(latinToDev,(source[i]),(expected[i]));
2112             expect(devToLatin,(expected[i]),(source[i]));
2113         }
2114     }
2115 
2116     @Test
TestCompoundLatinRT()2117     public void  TestCompoundLatinRT(){
2118         int MAX_LEN =15;
2119         String[]  source = {
2120                 "rmk\u1E63\u0113t",
2121                 "\u015Br\u012Bmad",
2122                 "bhagavadg\u012Bt\u0101",
2123                 "adhy\u0101ya",
2124                 "arjuna",
2125                 "vi\u1E63\u0101da",
2126                 "y\u014Dga",
2127                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2128                 "uv\u0101cr\u0325",
2129                 "dharmak\u1E63\u0113tr\u0113",
2130                 "kuruk\u1E63\u0113tr\u0113",
2131                 "samav\u0113t\u0101",
2132                 "yuyutsava\u1E25",
2133                 "m\u0101mak\u0101\u1E25",
2134                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2135                 "kimakurvata",
2136                 "san\u0304java"
2137         };
2138         String[]  expected = {
2139                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2140                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2141                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2142                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2143                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2144                 "\u0935\u093f\u0937\u093e\u0926",
2145                 "\u092f\u094b\u0917",
2146                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2147                 "\u0909\u0935\u093E\u091A\u0943",
2148                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2149                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2150                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2151                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2152                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2153                 //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2154                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2155                 "\u0938\u0902\u091c\u0935"
2156         };
2157 
2158         Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2159         Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2160         for(int i= 0; i<MAX_LEN; i++){
2161             expect(latinToDevToLatin,(source[i]),(source[i]));
2162             expect(devToLatinToDev,(expected[i]),(expected[i]));
2163         }
2164     }
2165     /**
2166      * Test Gurmukhi-Devanagari Tippi and Bindi
2167      */
2168     @Test
TestGurmukhiDevanagari()2169     public void TestGurmukhiDevanagari(){
2170         // the rule says:
2171         // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2172         // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2173 
2174         UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2175         UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2176 
2177         UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2178         UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2179         Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2180         StringBuffer src = new StringBuffer(" \u0902");
2181         StringBuffer expect = new StringBuffer(" \u0A02");
2182         while(vIter.next()){
2183             src.setCharAt(0,(char) vIter.codepoint);
2184             expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2185             expect(trans,src.toString(),expect.toString());
2186         }
2187 
2188         expect.setCharAt(1,'\u0A70');
2189         while(nvIter.next()){
2190             //src.setCharAt(0,(char) nvIter.codepoint);
2191             src.setCharAt(0,(char)nvIter.codepoint);
2192             expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2193             expect(trans,src.toString(),expect.toString());
2194         }
2195     }
2196     /**
2197      * Test instantiation from a locale.
2198      */
2199     @Test
TestLocaleInstantiation()2200     public void TestLocaleInstantiation() {
2201         Transliterator t;
2202         try{
2203             t = Transliterator.getInstance("te_IN-Latin");
2204             //expect(t, "\u0430", "a");
2205         }catch(IllegalArgumentException ex){
2206             warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2207         }
2208         try{
2209             t = Transliterator.getInstance("ru_RU-Latin");
2210             expect(t, "\u0430", "a");
2211         }catch(IllegalArgumentException ex){
2212             warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2213         }
2214         try{
2215             t = Transliterator.getInstance("en-el");
2216             expect(t, "a", "\u03B1");
2217         }catch(IllegalArgumentException ex){
2218             warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2219         }
2220     }
2221 
2222     /**
2223      * Test title case handling of accent (should ignore accents)
2224      */
2225     @Test
TestTitleAccents()2226     public void TestTitleAccents() {
2227         Transliterator t = Transliterator.getInstance("Title");
2228         expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2229     }
2230 
2231     /**
2232      * Basic test of a locale resource based rule.
2233      */
2234     @Test
TestLocaleResource()2235     public void TestLocaleResource() {
2236         String DATA[] = {
2237                 // id                    from             to
2238                 "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",
2239                 "Latin-el",              "b",             "\u03bc\u03c0",
2240                 "Latin-Greek",           "b",             "\u03B2",
2241                 "Greek-Latin/UNGEGN",    "\u03B2",        "v",
2242                 "el-Latin",              "\u03B2",        "v",
2243                 "Greek-Latin",           "\u03B2",        "b",
2244         };
2245         for (int i=0; i<DATA.length; i+=3) {
2246             Transliterator t = Transliterator.getInstance(DATA[i]);
2247             expect(t, DATA[i+1], DATA[i+2]);
2248         }
2249     }
2250 
2251     /**
2252      * Make sure parse errors reference the right line.
2253      */
2254     @Test
TestParseError()2255     public void TestParseError() {
2256         String rule =
2257             "a > b;\n" +
2258             "# more stuff\n" +
2259             "d << b;";
2260         try {
2261             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2262             if(t!=null){
2263                 errln("FAIL: Did not get expected exception");
2264             }
2265         } catch (IllegalArgumentException e) {
2266             String err = e.getMessage();
2267             if (err.indexOf("d << b") >= 0) {
2268                 logln("Ok: " + err);
2269             } else {
2270                 errln("FAIL: " + err);
2271             }
2272             return;
2273         }
2274         errln("FAIL: no syntax error");
2275     }
2276 
2277     /**
2278      * Make sure sets on output are disallowed.
2279      */
2280     @Test
TestOutputSet()2281     public void TestOutputSet() {
2282         String rule = "$set = [a-cm-n]; b > $set;";
2283         Transliterator t = null;
2284         try {
2285             t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2286             if(t!=null){
2287                 errln("FAIL: Did not get the expected exception");
2288             }
2289         } catch (IllegalArgumentException e) {
2290             logln("Ok: " + e.getMessage());
2291             return;
2292         }
2293         errln("FAIL: No syntax error");
2294     }
2295 
2296     /**
2297      * Test the use variable range pragma, making sure that use of
2298      * variable range characters is detected and flagged as an error.
2299      */
2300     @Test
TestVariableRange()2301     public void TestVariableRange() {
2302         String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2303         try {
2304             Transliterator t =
2305                 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2306             if(t!=null){
2307                 errln("FAIL: Did not get the expected exception");
2308             }
2309         } catch (IllegalArgumentException e) {
2310             logln("Ok: " + e.getMessage());
2311             return;
2312         }
2313         errln("FAIL: No syntax error");
2314     }
2315 
2316     /**
2317      * Test invalid post context error handling
2318      */
2319     @Test
TestInvalidPostContext()2320     public void TestInvalidPostContext() {
2321         try {
2322             Transliterator t =
2323                 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2324             if(t!=null){
2325                 errln("FAIL: Did not get the expected exception");
2326             }
2327         } catch (IllegalArgumentException e) {
2328             String msg = e.getMessage();
2329             if (msg.indexOf("a}b{c") >= 0) {
2330                 logln("Ok: " + msg);
2331             } else {
2332                 errln("FAIL: " + msg);
2333             }
2334             return;
2335         }
2336         errln("FAIL: No syntax error");
2337     }
2338 
2339     /**
2340      * Test ID form variants
2341      */
2342     @Test
TestIDForms()2343     public void TestIDForms() {
2344         String DATA[] = {
2345                 "NFC", null, "NFD",
2346                 "nfd", null, "NFC", // make sure case is ignored
2347                 "Any-NFKD", null, "Any-NFKC",
2348                 "Null", null, "Null",
2349                 "-nfkc", "nfkc", "NFKD",
2350                 "-nfkc/", "nfkc", "NFKD",
2351                 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2352                 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2353                 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2354                 "Source-", null, null,
2355                 "Source/Variant-", null, null,
2356                 "Source-/Variant", null, null,
2357                 "/Variant", null, null,
2358                 "/Variant-", null, null,
2359                 "-/Variant", null, null,
2360                 "-/", null, null,
2361                 "-", null, null,
2362                 "/", null, null,
2363         };
2364 
2365         for (int i=0; i<DATA.length; i+=3) {
2366             String ID = DATA[i];
2367             String expID = DATA[i+1];
2368             String expInvID = DATA[i+2];
2369             boolean expValid = (expInvID != null);
2370             if (expID == null) {
2371                 expID = ID;
2372             }
2373             try {
2374                 Transliterator t =
2375                     Transliterator.getInstance(ID);
2376                 Transliterator u = t.getInverse();
2377                 if (t.getID().equals(expID) &&
2378                         u.getID().equals(expInvID)) {
2379                     logln("Ok: " + ID + ".getInverse() => " + expInvID);
2380                 } else {
2381                     errln("FAIL: getInstance(" + ID + ") => " +
2382                             t.getID() + " x getInverse() => " + u.getID() +
2383                             ", expected " + expInvID);
2384                 }
2385             } catch (IllegalArgumentException e) {
2386                 if (!expValid) {
2387                     logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2388                 } else {
2389                     errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2390                 }
2391             }
2392         }
2393     }
2394 
checkRules(String label, Transliterator t2, String testRulesForward)2395     void checkRules(String label, Transliterator t2, String testRulesForward) {
2396         String rules2 = t2.toRules(true);
2397         //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2398         rules2 = TestUtility.replace(rules2, " ", "");
2399         rules2 = TestUtility.replace(rules2, "\n", "");
2400         rules2 = TestUtility.replace(rules2, "\r", "");
2401         testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2402 
2403         if (!rules2.equals(testRulesForward)) {
2404             errln(label);
2405             logln("GENERATED RULES: " + rules2);
2406             logln("SHOULD BE:       " + testRulesForward);
2407         }
2408     }
2409 
2410     /**
2411      * Mark's toRules test.
2412      */
2413     @Test
TestToRulesMark()2414     public void TestToRulesMark() {
2415 
2416         String testRules =
2417             "::[[:Latin:][:Mark:]];"
2418             + "::NFKD (NFC);"
2419             + "::Lower (Lower);"
2420             + "a <> \\u03B1;" // alpha
2421             + "::NFKC (NFD);"
2422             + "::Upper (Lower);"
2423             + "::Lower ();"
2424             + "::([[:Greek:][:Mark:]]);"
2425             ;
2426         String testRulesForward =
2427             "::[[:Latin:][:Mark:]];"
2428             + "::NFKD(NFC);"
2429             + "::Lower(Lower);"
2430             + "a > \\u03B1;"
2431             + "::NFKC(NFD);"
2432             + "::Upper (Lower);"
2433             + "::Lower ();"
2434             ;
2435         String testRulesBackward =
2436             "::[[:Greek:][:Mark:]];"
2437             + "::Lower (Upper);"
2438             + "::NFD(NFKC);"
2439             + "\\u03B1 > a;"
2440             + "::Lower(Lower);"
2441             + "::NFC(NFKD);"
2442             ;
2443         String source = "\u00E1"; // a-acute
2444         String target = "\u03AC"; // alpha-acute
2445 
2446         Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2447         Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2448 
2449         expect(t2, source, target);
2450         expect(t3, target, source);
2451 
2452         checkRules("Failed toRules FORWARD", t2, testRulesForward);
2453         checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2454     }
2455 
2456     /**
2457      * Test Escape and Unescape transliterators.
2458      */
2459     @Test
TestEscape()2460     public void TestEscape() {
2461         expect(Transliterator.getInstance("Hex-Any"),
2462                 "\\x{40}\\U00000031&#x32;&#81;",
2463         "@12Q");
2464         expect(Transliterator.getInstance("Any-Hex/C"),
2465                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2466         "\\u0041\\U0010BEEF\\uFEED");
2467         expect(Transliterator.getInstance("Any-Hex/Java"),
2468                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2469         "\\u0041\\uDBEF\\uDEEF\\uFEED");
2470         expect(Transliterator.getInstance("Any-Hex/Perl"),
2471                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2472         "\\x{41}\\x{10BEEF}\\x{FEED}");
2473     }
2474 
2475     /**
2476      * Make sure display names of variants look reasonable.
2477      */
2478     @Test
TestDisplayName()2479     public void TestDisplayName() {
2480         String DATA[] = {
2481                 // ID, forward name, reverse name
2482                 // Update the text as necessary -- the important thing is
2483                 // not the text itself, but how various cases are handled.
2484 
2485                 // Basic test
2486                 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2487 
2488                 // Variants
2489                 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2490 
2491                 // Target-only IDs
2492                 "NFC", "Any to NFC", "Any to NFD",
2493         };
2494 
2495         Locale US = Locale.US;
2496 
2497         for (int i=0; i<DATA.length; i+=3) {
2498             String name = Transliterator.getDisplayName(DATA[i], US);
2499             if (!name.equals(DATA[i+1])) {
2500                 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2501                         name + ", expected " + DATA[i+1]);
2502             } else {
2503                 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2504             }
2505             Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2506             name = Transliterator.getDisplayName(t.getID(), US);
2507             if (!name.equals(DATA[i+2])) {
2508                 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2509                         name + ", expected " + DATA[i+2]);
2510             } else {
2511                 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2512             }
2513 
2514             // Cover getDisplayName(String)
2515             ULocale save = ULocale.getDefault();
2516             ULocale.setDefault(ULocale.US);
2517             String name2 = Transliterator.getDisplayName(t.getID());
2518             if (!name.equals(name2))
2519                 errln("FAIL: getDisplayName with default locale failed");
2520             ULocale.setDefault(save);
2521         }
2522     }
2523 
2524     /**
2525      * Test anchor masking
2526      */
2527     @Test
TestAnchorMasking()2528     public void TestAnchorMasking() {
2529         String rule = "^a > Q; a > q;";
2530         try {
2531             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2532             if(t==null){
2533                 errln("FAIL: Did not get the expected exception");
2534             }
2535         } catch (IllegalArgumentException e) {
2536             errln("FAIL: " + rule + " => " + e);
2537         }
2538     }
2539 
2540     /**
2541      * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2542      * during ICU4J modularization to remove dependency of tests on Transliterator.
2543      */
2544     @Test
TestScriptAllCodepoints()2545     public void TestScriptAllCodepoints(){
2546         int code;
2547         HashSet  scriptIdsChecked   = new HashSet();
2548         HashSet  scriptAbbrsChecked = new HashSet();
2549         for( int i =0; i <= 0x10ffff; i++){
2550             code = UScript.getScript(i);
2551             if(code==UScript.INVALID_CODE){
2552                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2553             }
2554             String id =UScript.getName(code);
2555             String abbr = UScript.getShortName(code);
2556             if (!scriptIdsChecked.contains(id)) {
2557                 scriptIdsChecked.add(id);
2558                 String newId ="[:"+id+":];NFD";
2559                 try{
2560                     Transliterator t = Transliterator.getInstance(newId);
2561                     if(t==null){
2562                         errln("Failed to create transliterator for "+hex(i)+
2563                                 " script code: " +id);
2564                     }
2565                 }catch(Exception e){
2566                     errln("Failed to create transliterator for "+hex(i)
2567                             +" script code: " +id
2568                             + " Exception: "+e.getMessage());
2569                 }
2570             }
2571             if (!scriptAbbrsChecked.contains(abbr)) {
2572                 scriptAbbrsChecked.add(abbr);
2573                 String newAbbrId ="[:"+abbr+":];NFD";
2574                 try{
2575                     Transliterator t = Transliterator.getInstance(newAbbrId);
2576                     if(t==null){
2577                         errln("Failed to create transliterator for "+hex(i)+
2578                                 " script code: " +abbr);
2579                     }
2580                 }catch(Exception e){
2581                     errln("Failed to create transliterator for "+hex(i)
2582                             +" script code: " +abbr
2583                             + " Exception: "+e.getMessage());
2584                 }
2585             }
2586         }
2587     }
2588 
2589 
2590     static final String[][] registerRules = {
2591         {"Any-Dev1", "x > X; y > Y;"},
2592         {"Any-Dev2", "XY > Z"},
2593         {"Greek-Latin/FAKE",
2594             "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2595             "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2596             "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2597             "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2598         },
2599     };
2600 
2601     static final String DESERET_DEE = UTF16.valueOf(0x10414);
2602     static final String DESERET_dee = UTF16.valueOf(0x1043C);
2603 
2604     static final String[][] testCases = {
2605 
2606         // NORMALIZATION
2607         // should add more test cases
2608         {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2609         {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2610         {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2611         {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2612 
2613         // mp -> b BUG
2614         {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2615         {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2616 
2617         // check for devanagari bug
2618         {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2619 
2620         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2621         {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2622             "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2623             //TODO: enable this test once Titlecase works right
2624             //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2625             //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2626 
2627             {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2628                 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2629                 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2630                     "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2631 
2632                     {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2633                     {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2634 
2635                     // FORMS OF S
2636                     {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2637                     {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2638                     {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2639                     {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2640 
2641                     // Tatiana bug
2642                     // Upper: TAT\u02B9\u00C2NA
2643                     // Lower: tat\u02B9\u00E2na
2644                     // Title: Tat\u02B9\u00E2na
2645                     {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2646                     {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2647                     {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2648     };
2649 
2650     @Test
TestSpecialCases()2651     public void TestSpecialCases() {
2652 
2653         for (int i = 0; i < registerRules.length; ++i) {
2654             Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2655                     registerRules[i][1], Transliterator.FORWARD);
2656             DummyFactory.add(registerRules[i][0], t);
2657         }
2658         for (int i = 0; i < testCases.length; ++i) {
2659             String name = testCases[i][0];
2660             Transliterator t = Transliterator.getInstance(name);
2661             String id = t.getID();
2662             String source = testCases[i][1];
2663             String target = null;
2664 
2665             // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2666 
2667             if (testCases[i].length > 2)    target = testCases[i][2];
2668             else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
2669             else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
2670             else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
2671             else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
2672             else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
2673             else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);
2674 
2675             expect(t, source, target);
2676         }
2677         for (int i = 0; i < registerRules.length; ++i) {
2678             Transliterator.unregister(registerRules[i][0]);
2679         }
2680     }
2681 
2682     // seems like there should be an easier way to just register an instance of a transliterator
2683 
2684     static class DummyFactory implements Transliterator.Factory {
2685         static DummyFactory singleton = new DummyFactory();
2686         static HashMap m = new HashMap();
2687 
2688         // Since Transliterators are immutable, we don't have to clone on set & get
add(String ID, Transliterator t)2689         static void add(String ID, Transliterator t) {
2690             m.put(ID, t);
2691             //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2692             Transliterator.registerFactory(ID, singleton);
2693         }
2694         @Override
getInstance(String ID)2695         public Transliterator getInstance(String ID) {
2696             return (Transliterator) m.get(ID);
2697         }
2698     }
2699 
2700     @Test
TestCasing()2701     public void TestCasing() {
2702         Transliterator toLower = Transliterator.getInstance("lower");
2703         Transliterator toCasefold = Transliterator.getInstance("casefold");
2704         Transliterator toUpper = Transliterator.getInstance("upper");
2705         Transliterator toTitle = Transliterator.getInstance("title");
2706         for (int i = 0; i < 0x600; ++i) {
2707             String s = UTF16.valueOf(i);
2708 
2709             String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2710             assertEquals("Lowercase", lower, toLower.transform(s));
2711 
2712             String casefold = UCharacter.foldCase(s, true);
2713             assertEquals("Casefold", casefold, toCasefold.transform(s));
2714 
2715             if (i != 0x0345) {
2716                 // ICU 60 changes the default titlecasing index adjustment.
2717                 // For word breaks it is mostly the same as before,
2718                 // but it is different for the iota subscript (the only cased combining mark).
2719                 // This should be ok because the iota subscript is not supposed to appear
2720                 // at the start of a word.
2721                 // The title Transliterator is far below feature parity with the
2722                 // UCharacter and CaseMap titlecasing functions.
2723                 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2724                 assertEquals("Title", title, toTitle.transform(s));
2725             }
2726 
2727             String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2728             assertEquals("Upper", upper, toUpper.transform(s));
2729         }
2730     }
2731 
2732     @Test
TestSurrogateCasing()2733     public void TestSurrogateCasing () {
2734         // check that casing handles surrogates
2735         // titlecase is currently defective
2736         int dee = UTF16.charAt(DESERET_dee,0);
2737         int DEE = UCharacter.toTitleCase(dee);
2738         if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2739             errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2740         }
2741 
2742         if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2743             errln("Fails uppercase of surrogates");
2744         }
2745 
2746         if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2747             errln("Fails lowercase of surrogates");
2748         }
2749     }
2750 
2751 
2752     @Test
TestFunction()2753     public void TestFunction() {
2754         // Careful with spacing and ';' here:  Phrase this exactly
2755         // as toRules() is going to return it.  If toRules() changes
2756         // with regard to spacing or ';', then adjust this string.
2757         String rule =
2758             "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2759 
2760         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2761         if (t == null) {
2762             errln("FAIL: createFromRules failed");
2763             return;
2764         }
2765 
2766         String r = t.toRules(true);
2767         if (r.equals(rule)) {
2768             logln("OK: toRules() => " + r);
2769         } else {
2770             errln("FAIL: toRules() => " + r +
2771                     ", expected " + rule);
2772         }
2773 
2774         expect(t, "The Quick Brown Fox",
2775         "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2776         rule =
2777             "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2778 
2779         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2780         if (t == null) {
2781             errln("FAIL: createFromRules failed");
2782             return;
2783         }
2784 
2785         r = t.toRules(true);
2786         if (r.equals(rule)) {
2787             logln("OK: toRules() => " + r);
2788         } else {
2789             errln("FAIL: toRules() => " + r +
2790                     ", expected " + rule);
2791         }
2792 
2793         expect(t, "\u0301",
2794         "U+0301 \\N{COMBINING ACUTE ACCENT}");
2795     }
2796 
2797     @Test
TestInvalidBackRef()2798     public void TestInvalidBackRef() {
2799         String rule =  ". > $1;";
2800         String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2801         try {
2802             Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2803             if (t != null) {
2804                 errln("FAIL: createFromRules should have returned NULL");
2805             }
2806             errln("FAIL: Ok: . > $1; => no error");
2807             Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2808             if (t2 != null) {
2809                 errln("FAIL: createFromRules should have returned NULL");
2810             }
2811             errln("FAIL: Ok: . > $1; => no error");
2812         } catch (IllegalArgumentException e) {
2813             logln("Ok: . > $1; => " + e.getMessage());
2814         }
2815     }
2816 
2817     @Test
TestMulticharStringSet()2818     public void TestMulticharStringSet() {
2819         // Basic testing
2820         String rule =
2821             "       [{aa}]       > x;" +
2822             "         a          > y;" +
2823             "       [b{bc}]      > z;" +
2824             "[{gd}] { e          > q;" +
2825             "         e } [{fg}] > r;" ;
2826 
2827         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2828         if (t == null) {
2829             errln("FAIL: createFromRules failed");
2830             return;
2831         }
2832 
2833         expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2834         "y x yz z d gd de gdq gdqfg ddrfg");
2835 
2836         // Overlapped string test.  Make sure that when multiple
2837         // strings can match that the longest one is matched.
2838         rule =
2839             "    [a {ab} {abc}]    > x;" +
2840             "           b          > y;" +
2841             "           c          > z;" +
2842             " q [t {st} {rst}] { e > p;" ;
2843 
2844         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2845         if (t == null) {
2846             errln("FAIL: createFromRules failed");
2847             return;
2848         }
2849 
2850         expect(t, "a ab abc qte qste qrste",
2851         "x x x qtp qstp qrstp");
2852     }
2853 
2854     /**
2855      * Test that user-registered transliterators can be used under function
2856      * syntax.
2857      */
2858     @Test
TestUserFunction()2859     public void TestUserFunction() {
2860         Transliterator t;
2861 
2862         // There's no need to register inverses if we don't use them
2863         TestUserFunctionFactory.add("Any-gif",
2864                 Transliterator.createFromRules("gif",
2865                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2866                         Transliterator.FORWARD));
2867         //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2868 
2869         TestUserFunctionFactory.add("Any-RemoveCurly",
2870                 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2871         //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2872 
2873         logln("Trying &hex");
2874         t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2875         logln("Registering");
2876         TestUserFunctionFactory.add("Any-hex2", t);
2877         t = Transliterator.getInstance("Any-hex2");
2878         expect(t, "abc", "\\u0061\\u0062\\u0063");
2879 
2880         logln("Trying &gif");
2881         t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2882         logln("Registering");
2883         TestUserFunctionFactory.add("Any-gif2", t);
2884         t = Transliterator.getInstance("Any-gif2");
2885         expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2886         "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2887 
2888         // Test that filters are allowed after &
2889         t = Transliterator.createFromRules("test",
2890                 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2891         expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2892 
2893         // Unregister our test stuff
2894         TestUserFunctionFactory.unregister();
2895     }
2896 
2897     static class TestUserFunctionFactory implements Transliterator.Factory {
2898         static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2899         static HashMap m = new HashMap();
2900 
add(String ID, Transliterator t)2901         static void add(String ID, Transliterator t) {
2902             m.put(new CaseInsensitiveString(ID), t);
2903             Transliterator.registerFactory(ID, singleton);
2904         }
2905 
2906         @Override
getInstance(String ID)2907         public Transliterator getInstance(String ID) {
2908             return (Transliterator) m.get(new CaseInsensitiveString(ID));
2909         }
2910 
unregister()2911         static void unregister() {
2912             Iterator ids = m.keySet().iterator();
2913             while (ids.hasNext()) {
2914                 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
2915                 Transliterator.unregister(id.getString());
2916                 ids.remove(); // removes pair from m
2917             }
2918         }
2919     }
2920 
2921     /**
2922      * Test the Any-X transliterators.
2923      */
2924     @Test
TestAnyX()2925     public void TestAnyX() {
2926         Transliterator anyLatin =
2927             Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
2928 
2929         expect(anyLatin,
2930                 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2931         "greek:abkABK hiragana:abuku cyrillic:abc");
2932     }
2933 
2934     /**
2935      * Test Any-X transliterators with sample letters from all scripts.
2936      */
2937     @Test
TestAny()2938     public void TestAny() {
2939         UnicodeSet alphabetic = new UnicodeSet("[:alphabetic:]").freeze();
2940         StringBuffer testString = new StringBuffer();
2941         for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
2942             UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
2943             int count = 5;
2944             for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
2945                 testString.append(it.getString());
2946                 if (--count < 0) break;
2947             }
2948         }
2949         logln("Sample set for Any-Latin: " + testString);
2950         Transliterator anyLatin = Transliterator.getInstance("any-Latn");
2951         String result = anyLatin.transliterate(testString.toString());
2952         logln("Sample result for Any-Latin: " + result);
2953     }
2954 
2955 
2956     /**
2957      * Test the source and target set API.  These are only implemented
2958      * for RBT and CompoundTransliterator at this time.
2959      */
2960     @Test
TestSourceTargetSet()2961     public void TestSourceTargetSet() {
2962         // Rules
2963         String r =
2964             "a > b; " +
2965             "r [x{lu}] > q;";
2966 
2967         // Expected source
2968         UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
2969 
2970         // Expected target
2971         UnicodeSet expTrg = new UnicodeSet("[bq]");
2972 
2973         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
2974         UnicodeSet src = t.getSourceSet();
2975         UnicodeSet trg = t.getTargetSet();
2976 
2977         if (src.equals(expSrc) && trg.equals(expTrg)) {
2978             logln("Ok: " + r + " => source = " + src.toPattern(true) +
2979                     ", target = " + trg.toPattern(true));
2980         } else {
2981             errln("FAIL: " + r + " => source = " + src.toPattern(true) +
2982                     ", expected " + expSrc.toPattern(true) +
2983                     "; target = " + trg.toPattern(true) +
2984                     ", expected " + expTrg.toPattern(true));
2985         }
2986     }
2987 
2988     @Test
TestSourceTargetSetFilter()2989     public void TestSourceTargetSetFilter() {
2990         String[][] tests = {
2991                 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
2992                 {"[] Latin-Greek", null, "[\']"},
2993                 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
2994                 {"[] Any-Latin"},
2995                 {"[] casefold"},
2996                 {"[] NFKD;"},
2997                 {"[] NFKC;"},
2998                 {"[] hex"},
2999                 {"[] lower"},
3000                 {"[] null"},
3001                 {"[] remove"},
3002                 {"[] title"},
3003                 {"[] upper"},
3004         };
3005         UnicodeSet expectedSource = UnicodeSet.EMPTY;
3006         for (String[] testPair : tests) {
3007             String test = testPair[0];
3008             Transliterator t0;
3009             try {
3010                 t0 = Transliterator.getInstance(test);
3011             } catch (Exception e) {
3012                 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3013             }
3014             Transliterator t1;
3015             try {
3016                 t1 = t0.getInverse();
3017             } catch (Exception e) {
3018                 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3019             }
3020             int targetIndex = 0;
3021             for (Transliterator t : new Transliterator[]{t0, t1}) {
3022                 boolean ok;
3023                 UnicodeSet source = t.getSourceSet();
3024                 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3025                 targetIndex++;
3026                 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3027                         : testPair[targetIndex] == null ? expectedSource
3028                                 : testPair[targetIndex].length() == 0 ? expectedSource
3029                                         : new UnicodeSet(testPair[targetIndex]);
3030                 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3031                 if (!ok) { // for debugging
3032                     source = t.getSourceSet();
3033                 }
3034                 UnicodeSet target = t.getTargetSet();
3035                 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3036                 if (!ok) { // for debugging
3037                     target = t.getTargetSet();
3038                 }
3039             }
3040         }
3041     }
3042 
isAtomic(String s, String t, Transliterator trans)3043     static boolean isAtomic(String s, String t, Transliterator trans) {
3044         for (int i = 1; i < s.length(); ++i) {
3045             if (!CharSequences.onCharacterBoundary(s, i)) {
3046                 continue;
3047             }
3048             String q = trans.transform(s.substring(0,i));
3049             if (t.startsWith(q)) {
3050                 String r = trans.transform(s.substring(i));
3051                 if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3052                     return false;
3053                 }
3054             }
3055         }
3056         return true;
3057         //        // make sure that every part is different
3058         //        if (s.codePointCount(0, s.length()) > 1) {
3059         //            int[] codePoints = It.codePoints(s);
3060         //            for (int k = 0; k < codePoints.length; ++k) {
3061         //                int pos = indexOf(t,codePoints[k]);
3062         //                if (pos >= 0) {
3063         //                    int x;
3064         //                }
3065         //            }
3066         //            if (s.contains("\u00C0")) {
3067         //                logln("\u00C0");
3068         //            }
3069         //        }
3070     }
3071 
addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3072     static void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3073         expectedSource.addAll(s);
3074         if (t.length() > 0) {
3075             expectedTarget.addAll(t);
3076         }
3077     }
3078 
3079 //    private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3080 //        disorderedMarks.add(s);
3081 //        for (int j = 1; j < s.length(); ++j) {
3082 //            if (CharSequences.onCharacterBoundary(s, j)) {
3083 //                String shorter = s.substring(0,j);
3084 //                disorderedMarks.add(shorter);
3085 //                disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3086 //            }
3087 //        }
3088 //    }
3089 
3090     @Test
TestCharUtils()3091     public void TestCharUtils() {
3092         String[][] startTests = {
3093                 {"1", "a", "ab"},
3094                 {"0", "a", "xb"},
3095                 {"0", "\uD800", "\uD800\uDC01"},
3096                 {"1", "\uD800a", "\uD800b"},
3097                 {"0", "\uD800\uDC00", "\uD800\uDC01"},
3098         };
3099         for (String[] row : startTests) {
3100             int actual = findSharedStartLength(row[1], row[2]);
3101             assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3102                     Integer.parseInt(row[0]),
3103                     actual);
3104         }
3105         String[][] endTests = {
3106                 {"0", "\uDC00", "\uD801\uDC00"},
3107                 {"1", "a", "ba"},
3108                 {"0", "a", "bx"},
3109                 {"1", "a\uDC00", "b\uDC00"},
3110                 {"0", "\uD800\uDC00", "\uD801\uDC00"},
3111         };
3112         for (String[] row : endTests) {
3113             int actual = findSharedEndLength(row[1], row[2]);
3114             assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3115                     Integer.parseInt(row[0]),
3116                     actual);
3117         }
3118     }
3119 
3120     /**
3121      * @param s
3122      * @param t
3123      * @return
3124      */
3125     // TODO make generally available
findSharedStartLength(CharSequence s, CharSequence t)3126     private static int findSharedStartLength(CharSequence s, CharSequence t) {
3127         int min = Math.min(s.length(), t.length());
3128         int i;
3129         char sch, tch;
3130         for (i = 0; i < min; ++i) {
3131             sch = s.charAt(i);
3132             tch = t.charAt(i);
3133             if (sch != tch) {
3134                 break;
3135             }
3136         }
3137         return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3138     }
3139 
3140     /**
3141      * @param s
3142      * @param t
3143      * @return
3144      */
3145     // TODO make generally available
findSharedEndLength(CharSequence s, CharSequence t)3146     private static int findSharedEndLength(CharSequence s, CharSequence t) {
3147         int slength = s.length();
3148         int tlength = t.length();
3149         int min = Math.min(slength, tlength);
3150         int i;
3151         char sch, tch;
3152         // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3153         for (i = 0; i < min; ++i) {
3154             sch = s.charAt(slength - i - 1);
3155             tch = t.charAt(tlength - i - 1);
3156             if (sch != tch) {
3157                 break;
3158             }
3159         }
3160         return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3161     }
3162 
3163     enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3164 
assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3165     static void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3166         boolean haveError = false;
3167         if (!actual.containsAll(empirical)) {
3168             UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3169             errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3170             haveError = true;
3171         }
3172         if (!empirical.containsAll(actual)) {
3173             UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3174             logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3175             haveError = true;
3176         }
3177         if (!haveError) {
3178             logln("OK " + message + ' ' + toPattern(empirical));
3179         }
3180     }
3181 
toPattern(UnicodeSet missing)3182     private static String toPattern(UnicodeSet missing) {
3183         String result = missing.toPattern(false);
3184         if (result.length() < 200) {
3185             return result;
3186         }
3187         return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3188     }
3189 
3190 
3191     /**
3192      * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3193      */
3194     @Test
TestPatternWhitespace()3195     public void TestPatternWhitespace() {
3196         // Rules
3197         String r = "a > \u200E b;";
3198 
3199         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3200 
3201         expect(t, "a", "b");
3202 
3203         // UnicodeSet
3204         UnicodeSet set = new UnicodeSet("[a \u200E]");
3205 
3206         if (set.contains(0x200E)) {
3207             errln("FAIL: U+200E not being ignored by UnicodeSet");
3208         }
3209     }
3210 
3211     @Test
TestAlternateSyntax()3212     public void TestAlternateSyntax() {
3213         // U+2206 == &
3214         // U+2190 == <
3215         // U+2192 == >
3216         // U+2194 == <>
3217         expect("a \u2192 x; b \u2190 y; c \u2194 z",
3218                 "abc",
3219         "xbz");
3220         expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3221                 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3222         "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3223     }
3224 
3225     @Test
TestPositionAPI()3226     public void TestPositionAPI() {
3227         Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3228         Transliterator.Position b = new Transliterator.Position(a);
3229         Transliterator.Position c = new Transliterator.Position();
3230         c.set(a);
3231         // Call the toString() API:
3232         if (a.equals(b) && a.equals(c)) {
3233             logln("Ok: " + a + " == " + b + " == " + c);
3234         } else {
3235             errln("FAIL: " + a + " != " + b + " != " + c);
3236         }
3237     }
3238 
3239     //======================================================================
3240     // New tests for the ::BEGIN/::END syntax
3241     //======================================================================
3242 
3243     private static final String[] BEGIN_END_RULES = new String[] {
3244         // [0]
3245         "abc > xy;"
3246         + "aba > z;",
3247 
3248         // [1]
3249         /*
3250         "::BEGIN;"
3251         + "abc > xy;"
3252         + "::END;"
3253         + "::BEGIN;"
3254         + "aba > z;"
3255         + "::END;",
3256          */
3257         "", // test case commented out below, this is here to keep from messing up the indexes
3258 
3259         // [2]
3260         /*
3261         "abc > xy;"
3262         + "::BEGIN;"
3263         + "aba > z;"
3264         + "::END;",
3265          */
3266         "", // test case commented out below, this is here to keep from messing up the indexes
3267 
3268         // [3]
3269         /*
3270         "::BEGIN;"
3271         + "abc > xy;"
3272         + "::END;"
3273         + "aba > z;",
3274          */
3275         "", // test case commented out below, this is here to keep from messing up the indexes
3276 
3277         // [4]
3278         "abc > xy;"
3279         + "::Null;"
3280         + "aba > z;",
3281 
3282         // [5]
3283         "::Upper;"
3284         + "ABC > xy;"
3285         + "AB > x;"
3286         + "C > z;"
3287         + "::Upper;"
3288         + "XYZ > p;"
3289         + "XY > q;"
3290         + "Z > r;"
3291         + "::Upper;",
3292 
3293         // [6]
3294         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3295         + "$delim = [\\-$ws];"
3296         + "$ws $delim* > ' ';"
3297         + "'-' $delim* > '-';",
3298 
3299         // [7]
3300         "::Null;"
3301         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3302         + "$delim = [\\-$ws];"
3303         + "$ws $delim* > ' ';"
3304         + "'-' $delim* > '-';",
3305 
3306         // [8]
3307         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3308         + "$delim = [\\-$ws];"
3309         + "$ws $delim* > ' ';"
3310         + "'-' $delim* > '-';"
3311         + "::Null;",
3312 
3313         // [9]
3314         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3315         + "$delim = [\\-$ws];"
3316         + "::Null;"
3317         + "$ws $delim* > ' ';"
3318         + "'-' $delim* > '-';",
3319 
3320         // [10]
3321         /*
3322         "::BEGIN;"
3323         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3324         + "$delim = [\\-$ws];"
3325         + "::END;"
3326         + "$ws $delim* > ' ';"
3327         + "'-' $delim* > '-';",
3328          */
3329         "", // test case commented out below, this is here to keep from messing up the indexes
3330 
3331         // [11]
3332         /*
3333         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3334         + "$delim = [\\-$ws];"
3335         + "::BEGIN;"
3336         + "$ws $delim* > ' ';"
3337         + "'-' $delim* > '-';"
3338         + "::END;",
3339          */
3340         "", // test case commented out below, this is here to keep from messing up the indexes
3341 
3342         // [12]
3343         /*
3344         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3345         + "$delim = [\\-$ws];"
3346         + "$ab = [ab];"
3347         + "::BEGIN;"
3348         + "$ws $delim* > ' ';"
3349         + "'-' $delim* > '-';"
3350         + "::END;"
3351         + "::BEGIN;"
3352         + "$ab { ' ' } $ab > '-';"
3353         + "c { ' ' > ;"
3354         + "::END;"
3355         + "::BEGIN;"
3356         + "'a-a' > a\\%|a;"
3357         + "::END;",
3358          */
3359         "", // test case commented out below, this is here to keep from messing up the indexes
3360 
3361         // [13]
3362         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3363         + "$delim = [\\-$ws];"
3364         + "$ab = [ab];"
3365         + "::Null;"
3366         + "$ws $delim* > ' ';"
3367         + "'-' $delim* > '-';"
3368         + "::Null;"
3369         + "$ab { ' ' } $ab > '-';"
3370         + "c { ' ' > ;"
3371         + "::Null;"
3372         + "'a-a' > a\\%|a;",
3373 
3374         // [14]
3375         /*
3376         "::[abc];"
3377         + "::BEGIN;"
3378         + "abc > xy;"
3379         + "::END;"
3380         + "::BEGIN;"
3381         + "aba > yz;"
3382         + "::END;"
3383         + "::Upper;",
3384          */
3385         "", // test case commented out below, this is here to keep from messing up the indexes
3386 
3387         // [15]
3388         "::[abc];"
3389         + "abc > xy;"
3390         + "::Null;"
3391         + "aba > yz;"
3392         + "::Upper;",
3393 
3394         // [16]
3395         /*
3396         "::[abc];"
3397         + "::BEGIN;"
3398         + "abc <> xy;"
3399         + "::END;"
3400         + "::BEGIN;"
3401         + "aba <> yz;"
3402         + "::END;"
3403         + "::Upper(Lower);"
3404         + "::([XYZ]);",
3405          */
3406         "", // test case commented out below, this is here to keep from messing up the indexes
3407 
3408         // [17]
3409         "::[abc];"
3410         + "abc <> xy;"
3411         + "::Null;"
3412         + "aba <> yz;"
3413         + "::Upper(Lower);"
3414         + "::([XYZ]);"
3415     };
3416 
3417     /*
3418 (This entire test is commented out below and will need some heavy revision when we re-add
3419 the ::BEGIN/::END stuff)
3420     private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3421         // [7]
3422         "::BEGIN;"
3423         + "abc > xy;"
3424         + "::BEGIN;"
3425         + "aba > z;"
3426         + "::END;"
3427         + "::END;",
3428 
3429         // [8]
3430         "abc > xy;"
3431         + " aba > z;"
3432         + "::END;",
3433 
3434         // [9]
3435         "::BEGIN;"
3436         + "::Upper;"
3437         + "::END;"
3438     };
3439      */
3440 
3441     private static final String[] BEGIN_END_TEST_CASES = new String[] {
3442         BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3443         //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3444         //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3445         //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3446         BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3447         BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3448 
3449         BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",
3450         BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",
3451         BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",
3452         BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",
3453         //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",
3454         //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",
3455         //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",
3456         //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",
3457         //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3458         BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",
3459         BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",
3460         BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3461 
3462         //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3463         BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3464         //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3465         BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3466     };
3467 
3468     @Test
TestBeginEnd()3469     public void TestBeginEnd() {
3470         // run through the list of test cases above
3471         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3472             expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3473         }
3474 
3475         // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3476         Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3477                 Transliterator.REVERSE);
3478         expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3479 
3480         // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3481         // that all of them cause errors
3482         /*
3483 (commented out until we have the real ::BEGIN/::END stuff in place
3484         for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3485             try {
3486                 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3487                         Transliterator.FORWARD);
3488                 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3489             }
3490             catch (IllegalArgumentException e) {
3491                 // this is supposed to happen; do nothing here
3492             }
3493         }
3494          */
3495     }
3496 
3497     @Test
TestBeginEndToRules()3498     public void TestBeginEndToRules() {
3499         // run through the same list of test cases we used above, but this time, instead of just
3500         // instantiating a Transliterator from the rules and running the test against it, we instantiate
3501         // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3502         // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3503         // to (i.e., does the same thing as) the original rule set
3504         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3505             Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3506                     Transliterator.FORWARD);
3507             String rules = t.toRules(false);
3508             Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3509             expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3510         }
3511 
3512         // do the same thing for the reversible test case
3513         Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3514                 Transliterator.REVERSE);
3515         String rules = reversed.toRules(false);
3516         Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3517         expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3518     }
3519 
3520     @Test
TestRegisterAlias()3521     public void TestRegisterAlias() {
3522         String longID = "Lower;[aeiou]Upper";
3523         String shortID = "Any-CapVowels";
3524         String reallyShortID = "CapVowels";
3525 
3526         Transliterator.registerAlias(shortID, longID);
3527 
3528         Transliterator t1 = Transliterator.getInstance(longID);
3529         Transliterator t2 = Transliterator.getInstance(reallyShortID);
3530 
3531         if (!t1.getID().equals(longID))
3532             errln("Transliterator instantiated with long ID doesn't have long ID");
3533         if (!t2.getID().equals(reallyShortID))
3534             errln("Transliterator instantiated with short ID doesn't have short ID");
3535 
3536         if (!t1.toRules(true).equals(t2.toRules(true)))
3537             errln("Alias transliterators aren't the same");
3538 
3539         Transliterator.unregister(shortID);
3540 
3541         try {
3542             t1 = Transliterator.getInstance(shortID);
3543             errln("Instantiation with short ID succeeded after short ID was unregistered");
3544         }
3545         catch (IllegalArgumentException e) {
3546         }
3547 
3548         // try the same thing again, but this time with something other than
3549         // an instance of CompoundTransliterator
3550         String realID = "Latin-Greek";
3551         String fakeID = "Latin-dlgkjdflkjdl";
3552         Transliterator.registerAlias(fakeID, realID);
3553 
3554         t1 = Transliterator.getInstance(realID);
3555         t2 = Transliterator.getInstance(fakeID);
3556 
3557         if (!t1.toRules(true).equals(t2.toRules(true)))
3558             errln("Alias transliterators aren't the same");
3559 
3560         Transliterator.unregister(fakeID);
3561     }
3562 
3563     /**
3564      * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3565      */
3566     @Test
TestHalfwidthFullwidth()3567     public void TestHalfwidthFullwidth() {
3568         Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3569         Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3570 
3571         // Array of 3n items
3572         // Each item is
3573         //   "hf"|"fh"|"both",
3574         //   <Halfwidth>,
3575         //   <Fullwidth>
3576         String[] DATA = {
3577                 "both",
3578                 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3579                 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3580         };
3581 
3582         for (int i=0; i<DATA.length; i+=3) {
3583             switch (DATA[i].charAt(0)) {
3584             case 'h': // Halfwidth-Fullwidth only
3585                 expect(hf, DATA[i+1], DATA[i+2]);
3586                 break;
3587             case 'f': // Fullwidth-Halfwidth only
3588                 expect(fh, DATA[i+2], DATA[i+1]);
3589                 break;
3590             case 'b': // both directions
3591                 expect(hf, DATA[i+1], DATA[i+2]);
3592                 expect(fh, DATA[i+2], DATA[i+1]);
3593                 break;
3594             }
3595         }
3596 
3597     }
3598 
3599     /**
3600      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3601      *              TODO: confirm that the expected results are correct.
3602      *              For now, test just confirms that C++ and Java give identical results.
3603      */
3604     @Test
TestThai()3605     public void TestThai() {
3606         Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3607         String thaiText =
3608             "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3609             "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3610             "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3611             "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3612             "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3613             "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3614             "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3615             "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3616             "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3617             "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3618             "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3619             "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3620             "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3621             "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3622             "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3623             "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3624             "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3625             "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3626             "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3627             "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3628             "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3629             "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3630             "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3631             "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3632             " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3633             "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3634             "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3635             " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3636             "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3637             "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3638 
3639         String latinText =
3640             "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3641             "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3642             "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3643             "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3644             "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3645             " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3646             "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3647             "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3648             "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3649             "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3650             "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3651             "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3652             " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3653             "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3654             " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3655             "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3656             "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3657             "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3658 
3659         expect(tr, thaiText, latinText);
3660     }
3661 
3662 
3663     //======================================================================
3664     // These tests are not mirrored (yet) in icu4c at
3665     // source/test/intltest/transtst.cpp
3666     //======================================================================
3667 
3668     /**
3669      * Improve code coverage.
3670      */
3671     @Test
TestCoverage()3672     public void TestCoverage() {
3673         // NullTransliterator
3674         Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3675         expect(t, "a", "a");
3676 
3677         // Source, target set
3678         t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3679         t.setFilter(new UnicodeSet("[A-Z]"));
3680         logln("source = " + t.getSourceSet());
3681         logln("target = " + t.getTargetSet());
3682 
3683         t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3684         logln("source = " + t.getSourceSet());
3685         logln("target = " + t.getTargetSet());
3686     }
3687     /*
3688      * Test case for threading problem in NormalizationTransliterator
3689      * reported by ticket#5160
3690      */
3691     @Test
TestT5160()3692     public void TestT5160() {
3693         final String[] testData = {
3694                 "a",
3695                 "b",
3696                 "\u09BE",
3697                 "A\u0301",
3698         };
3699         final String[] expected = {
3700                 "a",
3701                 "b",
3702                 "\u09BE",
3703                 "\u00C1",
3704         };
3705         Transliterator translit = Transliterator.getInstance("NFC");
3706         NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
3707         for (int i = 0; i < tasks.length; i++) {
3708             tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
3709         }
3710         TestUtil.runUntilDone(tasks);
3711 
3712         for (int i = 0; i < tasks.length; i++) {
3713             if (tasks[i].getErrorMessage() != null) {
3714                 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
3715                 break;
3716             }
3717         }
3718     }
3719 
3720     static class NormTranslitTask implements Runnable {
3721         Transliterator translit;
3722         String testData;
3723         String expectedData;
3724         String errorMsg;
3725 
NormTranslitTask(Transliterator translit, String testData, String expectedData)3726         NormTranslitTask(Transliterator translit, String testData, String expectedData) {
3727             this.translit = translit;
3728             this.testData = testData;
3729             this.expectedData = expectedData;
3730         }
3731 
3732         @Override
run()3733         public void run() {
3734             errorMsg = null;
3735             StringBuffer inBuf = new StringBuffer(testData);
3736             StringBuffer expectedBuf = new StringBuffer(expectedData);
3737 
3738             for(int i = 0; i < 1000; i++) {
3739                 String in = inBuf.toString();
3740                 String out = translit.transliterate(in);
3741                 String expected = expectedBuf.toString();
3742                 if (!out.equals(expected)) {
3743                     errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
3744                     break;
3745                 }
3746                 inBuf.append(testData);
3747                 expectedBuf.append(expectedData);
3748             }
3749         }
3750 
getErrorMessage()3751         public String getErrorMessage() {
3752             return errorMsg;
3753         }
3754     }
3755 
3756     //======================================================================
3757     // Support methods
3758     //======================================================================
expect(String rules, String source, String expectedResult, Transliterator.Position pos)3759     static void expect(String rules,
3760             String source,
3761             String expectedResult,
3762             Transliterator.Position pos) {
3763         Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
3764         expect(t, source, expectedResult, pos);
3765     }
3766 
expect(String rules, String source, String expectedResult)3767     static void expect(String rules, String source, String expectedResult) {
3768         expect(rules, source, expectedResult, null);
3769     }
3770 
expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3771     static void expect(Transliterator t, String source, String expectedResult,
3772             Transliterator reverseTransliterator) {
3773         expect(t, source, expectedResult);
3774         if (reverseTransliterator != null) {
3775             expect(reverseTransliterator, expectedResult, source);
3776         }
3777     }
3778 
expect(Transliterator t, String source, String expectedResult)3779     static void expect(Transliterator t, String source, String expectedResult) {
3780         expect(t, source, expectedResult, (Transliterator.Position) null);
3781     }
3782 
expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)3783     static void expect(Transliterator t, String source, String expectedResult,
3784             Transliterator.Position pos) {
3785         if (pos == null) {
3786             String result = t.transliterate(source);
3787             if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
3788         }
3789 
3790         Transliterator.Position index = null;
3791         if (pos == null) {
3792             index = new Transliterator.Position(0, source.length(), 0, source.length());
3793         } else {
3794             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
3795                     pos.start, pos.limit);
3796         }
3797 
3798         ReplaceableString rsource = new ReplaceableString(source);
3799 
3800         t.finishTransliteration(rsource, index);
3801         // Do it all at once -- below we do it incrementally
3802 
3803         if (index.start != index.limit) {
3804             expectAux(t.getID() + ":UNFINISHED", source,
3805                     "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
3806             return;
3807         }
3808         String result = rsource.toString();
3809         if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
3810 
3811 
3812         if (pos == null) {
3813             index = new Transliterator.Position();
3814         } else {
3815             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
3816                     pos.start, pos.limit);
3817         }
3818 
3819         // Test incremental transliteration -- this result
3820         // must be the same after we finalize (see below).
3821         List<String> v = new ArrayList<String>();
3822         v.add(source);
3823         rsource.replace(0, rsource.length(), "");
3824         if (pos != null) {
3825             rsource.replace(0, 0, source);
3826             v.add(UtilityExtensions.formatInput(rsource, index));
3827             t.transliterate(rsource, index);
3828             v.add(UtilityExtensions.formatInput(rsource, index));
3829         } else {
3830             for (int i=0; i<source.length(); ++i) {
3831                 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
3832                 //log.append(source.charAt(i)).append(" -> "));
3833                 t.transliterate(rsource, index, source.charAt(i));
3834                 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
3835                 v.add(UtilityExtensions.formatInput(rsource, index) +
3836                         ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
3837             }
3838         }
3839 
3840         // As a final step in keyboard transliteration, we must call
3841         // transliterate to finish off any pending partial matches that
3842         // were waiting for more input.
3843         t.finishTransliteration(rsource, index);
3844         result = rsource.toString();
3845         //log.append(" => ").append(rsource.toString());
3846         v.add(result);
3847 
3848         String[] results = new String[v.size()];
3849         v.toArray(results);
3850         expectAux(t.getID() + ":Incremental", results,
3851                 result.equals(expectedResult),
3852                 expectedResult);
3853     }
3854 
3855     static boolean expectAux(String tag, String source,
3856             String result, String expectedResult) {
3857         return expectAux(tag, new String[] {source, result},
3858                 result.equals(expectedResult),
3859                 expectedResult);
3860     }
3861 
3862     static boolean expectAux(String tag, String source,
3863             String result, boolean pass,
3864             String expectedResult) {
3865         return expectAux(tag, new String[] {source, result},
3866                 pass,
3867                 expectedResult);
3868     }
3869 
3870     static boolean expectAux(String tag, String source,
3871             boolean pass,
3872             String expectedResult) {
3873         return expectAux(tag, new String[] {source},
3874                 pass,
3875                 expectedResult);
3876     }
3877 
3878     static boolean expectAux(String tag, String[] results, boolean pass,
3879             String expectedResult) {
3880         msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
3881 
3882         for (int i = 0; i < results.length; ++i) {
3883             String label;
3884             if (i == 0) {
3885                 label = "source:   ";
3886             } else if (i == results.length - 1) {
3887                 label = "result:   ";
3888             } else {
3889                 if (!isVerbose() && pass) continue;
3890                 label = "interm" + i + ":  ";
3891             }
3892             msg("    " + label + results[i], pass ? LOG : ERR, false, true);
3893         }
3894 
3895         if (!pass) {
3896             msg(  "    expected: " + expectedResult, ERR, false, true);
3897         }
3898 
3899         return pass;
3900     }
3901 
3902     static private void assertTransform(String message, String expected, StringTransform t, String source) {
3903         assertEquals(message + " " + source, expected, t.transform(source));
3904     }
3905 
3906 
3907     static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
3908         assertEquals(message + " " +source, expected, t.transform(source));
3909         assertEquals(message + " " +source2, expected, t.transform(source2));
3910         assertEquals(message + " " + expected, source, back.transform(expected));
3911     }
3912 
3913     /*
3914      * Tests the method public Enumeration<String> getAvailableTargets(String source)
3915      */
3916     @Test
3917     public void TestGetAvailableTargets() {
3918         try {
3919             // Tests when if (targets == null) is true
3920             Transliterator.getAvailableTargets("");
3921         } catch (Exception e) {
3922             errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
3923         }
3924     }
3925 
3926     /*
3927      * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
3928      */
3929     @Test
3930     public void TestGetAvailableVariants() {
3931         try {
3932             // Tests when if (targets == null) is true
3933             Transliterator.getAvailableVariants("", "");
3934         } catch (Exception e) {
3935             errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
3936         }
3937     }
3938 
3939     /*
3940      * Tests the mehtod String nextLine() in RuleBody
3941      */
3942     @Test
3943     public void TestNextLine() {
3944         // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
3945         try{
3946             Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
3947         } catch(Exception e){
3948             errln("TransliteratorParser.nextLine() was not suppose to return an " +
3949             "exception for a rule of '\\'");
3950         }
3951     }
3952 
3953     /**
3954      * Tests equals and hashCode implementation of Transliterator.Position
3955      */
3956     @Test
3957     public void TestPositionEquals() {
3958         Transliterator.Position position1 = new Transliterator.Position(1, 0, 0, 0);
3959         Transliterator.Position position2 = new Transliterator.Position(0, 0, 0, 0);
3960         assertNotEquals("2 different positions are not equal", position1, position2);
3961         assertNotEquals("2 different positions have different hash codes", position1.hashCode(), position2.hashCode());
3962         Transliterator.Position position3 = new Transliterator.Position(1, 0, 0, 0);
3963         assertEquals("2 positions are equal", position1, position3);
3964         assertEquals("2 positions have the same hash codes", position1.hashCode(), position3.hashCode());
3965     }
3966 }
3967