• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.test.translit;
10 
11 import java.util.ArrayList;
12 import java.util.Enumeration;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.List;
17 import java.util.Locale;
18 
19 import org.junit.Test;
20 import org.junit.runner.RunWith;
21 import org.junit.runners.JUnit4;
22 
23 import com.ibm.icu.dev.test.TestFmwk;
24 import com.ibm.icu.dev.test.TestUtil;
25 import com.ibm.icu.dev.test.rbbi.RBBITstUtils;
26 import com.ibm.icu.impl.Utility;
27 import com.ibm.icu.impl.UtilityExtensions;
28 import com.ibm.icu.lang.CharSequences;
29 import com.ibm.icu.lang.UCharacter;
30 import com.ibm.icu.lang.UScript;
31 import com.ibm.icu.text.Replaceable;
32 import com.ibm.icu.text.ReplaceableString;
33 import com.ibm.icu.text.StringTransform;
34 import com.ibm.icu.text.Transliterator;
35 import com.ibm.icu.text.UTF16;
36 import com.ibm.icu.text.UnicodeFilter;
37 import com.ibm.icu.text.UnicodeSet;
38 import com.ibm.icu.text.UnicodeSetIterator;
39 import com.ibm.icu.util.CaseInsensitiveString;
40 import com.ibm.icu.util.ULocale;
41 
42 /***********************************************************************
43 
44                      HOW TO USE THIS TEST FILE
45                                -or-
46                   How I developed on two platforms
47                 without losing (too much of) my mind
48 
49 
50 1. Add new tests by copying/pasting/changing existing tests.  On Java,
51    any public void method named Test...() taking no parameters becomes
52    a test.  On C++, you need to modify the header and add a line to
53    the runIndexedTest() dispatch method.
54 
55 2. Make liberal use of the expect() method; it is your friend.
56 
57 3. The tests in this file exactly match those in a sister file on the
58    other side.  The two files are:
59 
60    icu4j:  src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
61    icu4c:  source/test/intltest/transtst.cpp
62 
63                   ==> THIS IS THE IMPORTANT PART <==
64 
65    When you add a test in this file, add it in transtst.cpp too.
66    Give it the same name and put it in the same relative place.  This
67    makes maintenance a lot simpler for any poor soul who ends up
68    trying to synchronize the tests between icu4j and icu4c.
69 
70 4. If you MUST enter a test that is NOT paralleled in the sister file,
71    then add it in the special non-mirrored section.  These are
72    labeled
73 
74      "icu4j ONLY"
75 
76    or
77 
78      "icu4c ONLY"
79 
80    Make sure you document the reason the test is here and not there.
81 
82 
83 Thank you.
84 The Management
85  ***********************************************************************/
86 
87 /**
88  * @test
89  * @summary General test of Transliterator
90  */
91 @RunWith(JUnit4.class)
92 public class TransliteratorTest extends TestFmwk {
93     @Test
TestHangul()94     public void TestHangul() {
95 
96         Transliterator lh = Transliterator.getInstance("Latin-Hangul");
97         Transliterator hl = lh.getInverse();
98 
99         assertTransform("Transform", "\uCE20", lh, "ch");
100 
101         assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
102         assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
103         assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
104         assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
105         assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
106         assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
107         assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
108         assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
109         assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
110         assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
111         assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
112         assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
113         assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
114         assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
115         assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
116         assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
117         assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
118         assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
119         assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
120         assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
121         assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
122         assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
123         assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
124         assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
125         assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
126         assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
127 
128     }
129 
130     @Test
TestChinese()131     public void TestChinese() {
132         Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
133         assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
134         assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
135     }
136 
137     @Test
TestRegistry()138     public void TestRegistry() {
139         checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
140         checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
141         checkRegistry("foo1", "[:letter:] a > b;");
142         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
143             String id = (String) e.nextElement();
144             checkRegistry(id);
145         }
146         // Need to remove these test-specific transliterators in order not to interfere with other tests.
147         Transliterator.unregister("foo3");
148         Transliterator.unregister("foo2");
149         Transliterator.unregister("foo1");
150     }
151 
checkRegistry(String id, String rules)152     private void checkRegistry (String id, String rules) {
153         Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
154         Transliterator.registerInstance(foo);
155         checkRegistry(id);
156     }
157 
checkRegistry(String id)158     private void checkRegistry(String id) {
159         Transliterator fie = Transliterator.getInstance(id);
160         final UnicodeSet fae = new UnicodeSet("[a-z5]");
161         fie.setFilter(fae);
162         Transliterator foe = Transliterator.getInstance(id);
163         UnicodeFilter fee = foe.getFilter();
164         if (fae.equals(fee)) {
165             errln("Changed what is in registry for " + id);
166         }
167     }
168 
169     @Test
TestInstantiationError()170     public void TestInstantiationError() {
171         try {
172             String ID = "<Not a valid Transliterator ID>";
173             Transliterator t = Transliterator.getInstance(ID);
174             errln("FAIL: " + ID + " returned " + t);
175         } catch (IllegalArgumentException ex) {
176             logln("OK: Bogus ID handled properly");
177         }
178     }
179 
180     @Test
TestSimpleRules()181     public void TestSimpleRules() {
182         /* Example: rules 1. ab>x|y
183          *                2. yc>z
184          *
185          * []|eabcd  start - no match, copy e to translated buffer
186          * [e]|abcd  match rule 1 - copy output & adjust cursor
187          * [ex|y]cd  match rule 2 - copy output & adjust cursor
188          * [exz]|d   no match, copy d to transliterated buffer
189          * [exzd]|   done
190          */
191         expect("ab>x|y;" +
192                 "yc>z",
193                 "eabcd", "exzd");
194 
195         /* Another set of rules:
196          *    1. ab>x|yzacw
197          *    2. za>q
198          *    3. qc>r
199          *    4. cw>n
200          *
201          * []|ab       Rule 1
202          * [x|yzacw]   No match
203          * [xy|zacw]   Rule 2
204          * [xyq|cw]    Rule 4
205          * [xyqn]|     Done
206          */
207         expect("ab>x|yzacw;" +
208                 "za>q;" +
209                 "qc>r;" +
210                 "cw>n",
211                 "ab", "xyqn");
212 
213         /* Test categories
214          */
215         Transliterator t = Transliterator.createFromRules("<ID>",
216                 "$dummy=\uE100;" +
217                 "$vowel=[aeiouAEIOU];" +
218                 "$lu=[:Lu:];" +
219                 "$vowel } $lu > '!';" +
220                 "$vowel > '&';" +
221                 "'!' { $lu > '^';" +
222                 "$lu > '*';" +
223                 "a>ERROR",
224                 Transliterator.FORWARD);
225         expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
226     }
227 
228     /**
229      * Test inline set syntax and set variable syntax.
230      */
231     @Test
TestInlineSet()232     public void TestInlineSet() {
233         expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
234         expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
235 
236         expect("$digit = [0-9];" +
237                 "$alpha = [a-zA-Z];" +
238                 "$alphanumeric = [$digit $alpha];" + // ***
239                 "$special = [^$alphanumeric];" +     // ***
240                 "$alphanumeric > '-';" +
241                 "$special > '*';",
242 
243                 "thx-1138", "---*----");
244     }
245 
246     /**
247      * Create some inverses and confirm that they work.  We have to be
248      * careful how we do this, since the inverses will not be true
249      * inverses -- we can't throw any random string at the composition
250      * of the transliterators and expect the identity function.  F x
251      * F' != I.  However, if we are careful about the input, we will
252      * get the expected results.
253      */
254     @Test
TestRuleBasedInverse()255     public void TestRuleBasedInverse() {
256         String RULES =
257             "abc>zyx;" +
258             "ab>yz;" +
259             "bc>zx;" +
260             "ca>xy;" +
261             "a>x;" +
262             "b>y;" +
263             "c>z;" +
264 
265             "abc<zyx;" +
266             "ab<yz;" +
267             "bc<zx;" +
268             "ca<xy;" +
269             "a<x;" +
270             "b<y;" +
271             "c<z;" +
272 
273             "";
274 
275         String[] DATA = {
276                 // Careful here -- random strings will not work.  If we keep
277                 // the left side to the domain and the right side to the range
278                 // we will be okay though (left, abc; right xyz).
279                 "a", "x",
280                 "abcacab", "zyxxxyy",
281                 "caccb", "xyzzy",
282         };
283 
284         Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
285         Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
286         for (int i=0; i<DATA.length; i+=2) {
287             expect(fwd, DATA[i], DATA[i+1]);
288             expect(rev, DATA[i+1], DATA[i]);
289         }
290     }
291 
292     /**
293      * Basic test of keyboard.
294      */
295     @Test
TestKeyboard()296     public void TestKeyboard() {
297         Transliterator t = Transliterator.createFromRules("<ID>",
298                 "psch>Y;"
299                 +"ps>y;"
300                 +"ch>x;"
301                 +"a>A;", Transliterator.FORWARD);
302         String DATA[] = {
303                 // insertion, buffer
304                 "a", "A",
305                 "p", "Ap",
306                 "s", "Aps",
307                 "c", "Apsc",
308                 "a", "AycA",
309                 "psch", "AycAY",
310                 null, "AycAY", // null means finishKeyboardTransliteration
311         };
312 
313         keyboardAux(t, DATA);
314     }
315 
316     /**
317      * Basic test of keyboard with cursor.
318      */
319     @Test
TestKeyboard2()320     public void TestKeyboard2() {
321         Transliterator t = Transliterator.createFromRules("<ID>",
322                 "ych>Y;"
323                 +"ps>|y;"
324                 +"ch>x;"
325                 +"a>A;", Transliterator.FORWARD);
326         String DATA[] = {
327                 // insertion, buffer
328                 "a", "A",
329                 "p", "Ap",
330                 "s", "Aps", // modified for rollback - "Ay",
331                 "c", "Apsc", // modified for rollback - "Ayc",
332                 "a", "AycA",
333                 "p", "AycAp",
334                 "s", "AycAps", // modified for rollback - "AycAy",
335                 "c", "AycApsc", // modified for rollback - "AycAyc",
336                 "h", "AycAY",
337                 null, "AycAY", // null means finishKeyboardTransliteration
338         };
339 
340         keyboardAux(t, DATA);
341     }
342 
343     /**
344      * Test keyboard transliteration with back-replacement.
345      */
346     @Test
TestKeyboard3()347     public void TestKeyboard3() {
348         // We want th>z but t>y.  Furthermore, during keyboard
349         // transliteration we want t>y then yh>z if t, then h are
350         // typed.
351         String RULES =
352             "t>|y;" +
353             "yh>z;" +
354             "";
355 
356         String[] DATA = {
357                 // Column 1: characters to add to buffer (as if typed)
358                 // Column 2: expected appearance of buffer after
359                 //           keyboard xliteration.
360                 "a", "a",
361                 "b", "ab",
362                 "t", "abt", // modified for rollback - "aby",
363                 "c", "abyc",
364                 "t", "abyct", // modified for rollback - "abycy",
365                 "h", "abycz",
366                 null, "abycz", // null means finishKeyboardTransliteration
367         };
368 
369         Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
370         keyboardAux(t, DATA);
371     }
372 
keyboardAux(Transliterator t, String[] DATA)373     private void keyboardAux(Transliterator t, String[] DATA) {
374         Transliterator.Position index = new Transliterator.Position();
375         ReplaceableString s = new ReplaceableString();
376         for (int i=0; i<DATA.length; i+=2) {
377             StringBuffer log;
378             if (DATA[i] != null) {
379                 log = new StringBuffer(s.toString() + " + "
380                         + DATA[i]
381                                + " -> ");
382                 t.transliterate(s, index, DATA[i]);
383             } else {
384                 log = new StringBuffer(s.toString() + " => ");
385                 t.finishTransliteration(s, index);
386             }
387             UtilityExtensions.formatInput(log, s, index);
388             if (s.toString().equals(DATA[i+1])) {
389                 logln(log.toString());
390             } else {
391                 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
392             }
393         }
394     }
395 
396     // Latin-Arabic has been temporarily removed until it can be
397     // done correctly.
398 
399     //  public void TestArabic() {
400     //      String DATA[] = {
401     //          "Arabic",
402     //              "\u062a\u062a\u0645\u062a\u0639 "+
403     //              "\u0627\u0644\u0644\u063a\u0629 "+
404     //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
405     //              "\u0628\u0628\u0646\u0638\u0645 "+
406     //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
407     //              "\u062c\u0645\u064a\u0644\u0629"
408     //      };
409 
410     //      Transliterator t = Transliterator.getInstance("Latin-Arabic");
411     //      for (int i=0; i<DATA.length; i+=2) {
412     //          expect(t, DATA[i], DATA[i+1]);
413     //      }
414     //  }
415 
416     /**
417      * Compose the Kana transliterator forward and reverse and try
418      * some strings that should come out unchanged.
419      */
420     @Test
TestCompoundKana()421     public void TestCompoundKana() {
422         Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
423         expect(t, "aaaaa", "aaaaa");
424     }
425 
426     /**
427      * Compose the hex transliterators forward and reverse.
428      */
429     @Test
TestCompoundHex()430     public void TestCompoundHex() {
431         Transliterator a = Transliterator.getInstance("Any-Hex");
432         Transliterator b = Transliterator.getInstance("Hex-Any");
433         // Transliterator[] trans = { a, b };
434         // Transliterator ab = Transliterator.getInstance(trans);
435         Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
436 
437         // Do some basic tests of b
438         expect(b, "\\u0030\\u0031", "01");
439 
440         String s = "abcde";
441         expect(ab, s, s);
442 
443         // trans = new Transliterator[] { b, a };
444         // Transliterator ba = Transliterator.getInstance(trans);
445         Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
446         ReplaceableString str = new ReplaceableString(s);
447         a.transliterate(str);
448         expect(ba, str.toString(), str.toString());
449     }
450 
451     /**
452      * Do some basic tests of filtering.
453      */
454     @Test
TestFiltering()455     public void TestFiltering() {
456 
457         Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
458         tempTrans.setFilter(new UnicodeSet("[a]"));
459         String tempResult = tempTrans.transform("xa");
460         assertEquals("context should not be filtered ", "xb", tempResult);
461 
462         tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
463         tempResult = tempTrans.transform("xa");
464         assertEquals("context should not be filtered ", "xb", tempResult);
465 
466         Transliterator hex = Transliterator.getInstance("Any-Hex");
467         hex.setFilter(new UnicodeFilter() {
468             @Override
469             public boolean contains(int c) {
470                 return c != 'c';
471             }
472             @Override
473             public String toPattern(boolean escapeUnprintable) {
474                 return "";
475             }
476             @Override
477             public boolean matchesIndexValue(int v) {
478                 return false;
479             }
480             @Override
481             public void addMatchSetTo(UnicodeSet toUnionTo) {}
482         });
483         String s = "abcde";
484         String out = hex.transliterate(s);
485         String exp = "\\u0061\\u0062c\\u0064\\u0065";
486         if (out.equals(exp)) {
487             logln("Ok:   \"" + exp + "\"");
488         } else {
489             logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
490         }
491     }
492 
493     /**
494      * Test anchors
495      */
496     @Test
TestAnchors()497     public void TestAnchors() {
498         expect("^ab  > 01 ;" +
499                 " ab  > |8 ;" +
500                 "  b  > k ;" +
501                 " 8x$ > 45 ;" +
502                 " 8x  > 77 ;",
503 
504                 "ababbabxabx",
505         "018k7745");
506         expect("$s = [z$] ;" +
507                 "$s{ab    > 01 ;" +
508                 "   ab    > |8 ;" +
509                 "    b    > k ;" +
510                 "   8x}$s > 45 ;" +
511                 "   8x    > 77 ;",
512 
513                 "abzababbabxzabxabx",
514         "01z018k45z01x45");
515     }
516 
517     /**
518      * Test pattern quoting and escape mechanisms.
519      */
520     @Test
TestPatternQuoting()521     public void TestPatternQuoting() {
522         // Array of 3n items
523         // Each item is <rules>, <input>, <expected output>
524         String[] DATA = {
525                 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
526         };
527 
528         for (int i=0; i<DATA.length; i+=3) {
529             logln("Pattern: " + Utility.escape(DATA[i]));
530             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
531             expect(t, DATA[i+1], DATA[i+2]);
532         }
533     }
534 
535     @Test
TestVariableNames()536     public void TestVariableNames() {
537         Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
538         if (gl == null) {
539             errln("FAIL: null Transliterator returned.");
540         }
541     }
542 
543     /**
544      * Regression test for bugs found in Greek transliteration.
545      */
546     @Test
TestJ277()547     public void TestJ277() {
548         Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
549 
550         char sigma = (char)0x3C3;
551         char upsilon = (char)0x3C5;
552         char nu = (char)0x3BD;
553         // not used char PHI = (char)0x3A6;
554         char alpha = (char)0x3B1;
555         // not used char omega = (char)0x3C9;
556         // not used char omicron = (char)0x3BF;
557         // not used char epsilon = (char)0x3B5;
558 
559         // sigma upsilon nu -> syn
560         StringBuffer buf = new StringBuffer();
561         buf.append(sigma).append(upsilon).append(nu);
562         String syn = buf.toString();
563         expect(gl, syn, "syn");
564 
565         // sigma alpha upsilon nu -> saun
566         buf.setLength(0);
567         buf.append(sigma).append(alpha).append(upsilon).append(nu);
568         String sayn = buf.toString();
569         expect(gl, sayn, "saun");
570 
571         // Again, using a smaller rule set
572         String rules =
573             "$alpha   = \u03B1;" +
574             "$nu      = \u03BD;" +
575             "$sigma   = \u03C3;" +
576             "$ypsilon = \u03C5;" +
577             "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +
578             "s <>           $sigma;" +
579             "a <>           $alpha;" +
580             "u <>  $vowel { $ypsilon;" +
581             "y <>           $ypsilon;" +
582             "n <>           $nu;";
583         Transliterator mini = Transliterator.createFromRules
584         ("mini", rules, Transliterator.REVERSE);
585         expect(mini, syn, "syn");
586         expect(mini, sayn, "saun");
587 
588         //|    // Transliterate the Greek locale data
589         //|    Locale el("el");
590         //|    DateFormatSymbols syms(el, status);
591         //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
592         //|    int32_t i, count;
593         //|    const UnicodeString* data = syms.getMonths(count);
594         //|    for (i=0; i<count; ++i) {
595         //|        if (data[i].length() == 0) {
596         //|            continue;
597         //|        }
598         //|        UnicodeString out(data[i]);
599         //|        gl->transliterate(out);
600         //|        bool_t ok = true;
601         //|        if (data[i].length() >= 2 && out.length() >= 2 &&
602         //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
603         //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
604         //|                ok = false;
605         //|            }
606         //|        }
607         //|        if (ok) {
608         //|            logln(prettify(data[i] + " -> " + out));
609         //|        } else {
610         //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
611         //|        }
612         //|    }
613     }
614 
615     //    /**
616     //     * Prefix, suffix support in hex transliterators
617     //     */
618     //    public void TestJ243() {
619     //        // Test default Hex-Any, which should handle
620     //        // \\u, \\U, u+, and U+
621     //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
622     //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
623     //
624     //        // Try a custom Hex-Any
625     //        // \\uXXXX and &#xXXXX;
626     //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
627     //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
628     //               "abcd5fx012&#x00033;");
629     //
630     //        // Try custom Any-Hex (default is tested elsewhere)
631     //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
632     //        expect(hex3, "012", "&#x30;&#x31;&#x32;");
633     //    }
634 
635     @Test
TestJ329()636     public void TestJ329() {
637 
638         Object[] DATA = {
639                 Boolean.FALSE, "a > b; c > d",
640                 Boolean.TRUE,  "a > b; no operator; c > d",
641         };
642 
643         for (int i=0; i<DATA.length; i+=2) {
644             String err = null;
645             try {
646                 Transliterator.createFromRules("<ID>",
647                         (String) DATA[i+1],
648                         Transliterator.FORWARD);
649             } catch (IllegalArgumentException e) {
650                 err = e.getMessage();
651             }
652             boolean gotError = (err != null);
653             String desc = (String) DATA[i+1] +
654             (gotError ? (" -> error: " + err) : " -> no error");
655             if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
656                 logln("Ok:   " + desc);
657             } else {
658                 errln("FAIL: " + desc);
659             }
660         }
661     }
662 
663     /**
664      * Test segments and segment references.
665      */
666     @Test
TestSegments()667     public void TestSegments() {
668         // Array of 3n items
669         // Each item is <rules>, <input>, <expected output>
670         String[] DATA = {
671                 "([a-z]) '.' ([0-9]) > $2 '-' $1",
672                 "abc.123.xyz.456",
673                 "ab1-c23.xy4-z56",
674         };
675 
676         for (int i=0; i<DATA.length; i+=3) {
677             logln("Pattern: " + Utility.escape(DATA[i]));
678             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
679             expect(t, DATA[i+1], DATA[i+2]);
680         }
681     }
682 
683     /**
684      * Test cursor positioning outside of the key
685      */
686     @Test
TestCursorOffset()687     public void TestCursorOffset() {
688         // Array of 3n items
689         // Each item is <rules>, <input>, <expected output>
690         String[] DATA = {
691                 "pre {alpha} post > | @ ALPHA ;" +
692                 "eALPHA > beta ;" +
693                 "pre {beta} post > BETA @@ | ;" +
694                 "post > xyz",
695 
696                 "prealphapost prebetapost",
697                 "prbetaxyz preBETApost",
698         };
699 
700         for (int i=0; i<DATA.length; i+=3) {
701             logln("Pattern: " + Utility.escape(DATA[i]));
702             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
703             expect(t, DATA[i+1], DATA[i+2]);
704         }
705     }
706 
707     /**
708      * Test zero length and > 1 char length variable values.  Test
709      * use of variable refs in UnicodeSets.
710      */
711     @Test
TestArbitraryVariableValues()712     public void TestArbitraryVariableValues() {
713         // Array of 3n items
714         // Each item is <rules>, <input>, <expected output>
715         String[] DATA = {
716                 "$abe = ab;" +
717                 "$pat = x[yY]z;" +
718                 "$ll  = 'a-z';" +
719                 "$llZ = [$ll];" +
720                 "$llY = [$ll$pat];" +
721                 "$emp = ;" +
722 
723                 "$abe > ABE;" +
724                 "$pat > END;" +
725                 "$llZ > 1;" +
726                 "$llY > 2;" +
727                 "7$emp 8 > 9;" +
728                 "",
729 
730                 "ab xYzxyz stY78",
731                 "ABE ENDEND 1129",
732         };
733 
734         for (int i=0; i<DATA.length; i+=3) {
735             logln("Pattern: " + Utility.escape(DATA[i]));
736             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
737             expect(t, DATA[i+1], DATA[i+2]);
738         }
739     }
740 
741     /**
742      * Confirm that the contextStart, contextLimit, start, and limit
743      * behave correctly.
744      */
745     @Test
TestPositionHandling()746     public void TestPositionHandling() {
747         // Array of 3n items
748         // Each item is <rules>, <input>, <expected output>
749         String[] DATA = {
750                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
751                 "xtat txtb", // pos 0,9,0,9
752                 "xTTaSS TTxUUb",
753 
754                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
755                 "xtat txtb", // pos 2,9,3,8
756                 "xtaSS TTxUUb",
757 
758                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
759                 "xtat txtb", // pos 3,8,3,8
760                 "xtaTT TTxTTb",
761         };
762 
763         // Array of 4n positions -- these go with the DATA array
764         // They are: contextStart, contextLimit, start, limit
765         int[] POS = {
766                 0, 9, 0, 9,
767                 2, 9, 3, 8,
768                 3, 8, 3, 8,
769         };
770 
771         int n = DATA.length/3;
772         for (int i=0; i<n; i++) {
773             Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
774             Transliterator.Position pos = new Transliterator.Position(
775                     POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
776             ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
777             t.transliterate(rsource, pos);
778             t.finishTransliteration(rsource, pos);
779             String result = rsource.toString();
780             String exp = DATA[3*i+2];
781             expectAux(Utility.escape(DATA[3*i]),
782                     DATA[3*i+1],
783                     result,
784                     result.equals(exp),
785                     exp);
786         }
787     }
788 
789     /**
790      * Test the Hiragana-Katakana transliterator.
791      */
792     @Test
TestHiraganaKatakana()793     public void TestHiraganaKatakana() {
794         Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
795         Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
796 
797         // Array of 3n items
798         // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
799         String[] DATA = {
800                 "both",
801                 "\u3042\u3090\u3099\u3092\u3050",
802                 "\u30A2\u30F8\u30F2\u30B0",
803 
804                 "kh",
805                 "\u307C\u3051\u3060\u3042\u3093\u30FC",
806                 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
807         };
808 
809         for (int i=0; i<DATA.length; i+=3) {
810             switch (DATA[i].charAt(0)) {
811             case 'h': // Hiragana-Katakana
812                 expect(hk, DATA[i+1], DATA[i+2]);
813                 break;
814             case 'k': // Katakana-Hiragana
815                 expect(kh, DATA[i+2], DATA[i+1]);
816                 break;
817             case 'b': // both
818                 expect(hk, DATA[i+1], DATA[i+2]);
819                 expect(kh, DATA[i+2], DATA[i+1]);
820                 break;
821             }
822         }
823 
824     }
825 
826     @Test
TestCopyJ476()827     public void TestCopyJ476() {
828         // This is a C++-only copy constructor test
829     }
830 
831     /**
832      * Test inter-Indic transliterators.  These are composed.
833      */
834     @Test
TestInterIndic()835     public void TestInterIndic() {
836         String ID = "Devanagari-Gujarati";
837         Transliterator dg = Transliterator.getInstance(ID);
838         if (dg == null) {
839             errln("FAIL: getInstance(" + ID + ") returned null");
840             return;
841         }
842         String id = dg.getID();
843         if (!id.equals(ID)) {
844             errln("FAIL: getInstance(" + ID + ").getID() => " + id);
845         }
846         String dev = "\u0901\u090B\u0925";
847         String guj = "\u0A81\u0A8B\u0AA5";
848         expect(dg, dev, guj);
849     }
850 
851     /**
852      * Test filter syntax in IDs. (J23)
853      */
854     @Test
TestFilterIDs()855     public void TestFilterIDs() {
856         String[] DATA = {
857                 "[aeiou]Any-Hex", // ID
858                 "[aeiou]Hex-Any", // expected inverse ID
859                 "quizzical",      // src
860                 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
861 
862                 "[aeiou]Any-Hex;[^5]Hex-Any",
863                 "[^5]Any-Hex;[aeiou]Hex-Any",
864                 "quizzical",
865                 "q\\u0075izzical",
866 
867                 "[abc]Null",
868                 "[abc]Null",
869                 "xyz",
870                 "xyz",
871         };
872 
873         for (int i=0; i<DATA.length; i+=4) {
874             String ID = DATA[i];
875             Transliterator t = Transliterator.getInstance(ID);
876             expect(t, DATA[i+2], DATA[i+3]);
877 
878             // Check the ID
879             if (!ID.equals(t.getID())) {
880                 errln("FAIL: getInstance(" + ID + ").getID() => " +
881                         t.getID());
882             }
883 
884             // Check the inverse
885             String uID = DATA[i+1];
886             Transliterator u = t.getInverse();
887             if (u == null) {
888                 errln("FAIL: " + ID + ".getInverse() returned NULL");
889             } else if (!u.getID().equals(uID)) {
890                 errln("FAIL: " + ID + ".getInverse().getID() => " +
891                         u.getID() + ", expected " + uID);
892             }
893         }
894     }
895 
896     /**
897      * Test the case mapping transliterators.
898      */
899     @Test
TestCaseMap()900     public void TestCaseMap() {
901         Transliterator toUpper =
902             Transliterator.getInstance("Any-Upper[^xyzXYZ]");
903         Transliterator toLower =
904             Transliterator.getInstance("Any-Lower[^xyzXYZ]");
905         Transliterator toTitle =
906             Transliterator.getInstance("Any-Title[^xyzXYZ]");
907 
908         expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
909         "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
910         expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
911         "the quick brown foX jumped over the lazY dogs.");
912         expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
913         "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
914     }
915 
916     /**
917      * Test the name mapping transliterators.
918      */
919     @Test
TestNameMap()920     public void TestNameMap() {
921         Transliterator uni2name =
922             Transliterator.getInstance("Any-Name[^abc]");
923         Transliterator name2uni =
924             Transliterator.getInstance("Name-Any");
925 
926         expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
927         "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
928         expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
929         "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
930 
931         // round trip
932         Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
933 
934         String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
935         expect(t, s, s);
936     }
937 
938     /**
939      * Test liberalized ID syntax.  1006c
940      */
941     @Test
TestLiberalizedID()942     public void TestLiberalizedID() {
943         // Some test cases have an expected getID() value of NULL.  This
944         // means I have disabled the test case for now.  This stuff is
945         // still under development, and I haven't decided whether to make
946         // getID() return canonical case yet.  It will all get rewritten
947         // with the move to Source-Target/Variant IDs anyway. [aliu]
948         String DATA[] = {
949                 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
950                 "  Null  ", "Null", "whitespace",
951                 " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
952                 "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",
953         };
954 
955         for (int i=0; i<DATA.length; i+=3) {
956             try {
957                 Transliterator t = Transliterator.getInstance(DATA[i]);
958                 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
959                     logln("Ok: " + DATA[i+2] +
960                             " create ID \"" + DATA[i] + "\" => \"" +
961                             t.getID() + "\"");
962                 } else {
963                     errln("FAIL: " + DATA[i+2] +
964                             " create ID \"" + DATA[i] + "\" => \"" +
965                             t.getID() + "\", exp \"" + DATA[i+1] + "\"");
966                 }
967             } catch (IllegalArgumentException e) {
968                 errln("FAIL: " + DATA[i+2] +
969                         " create ID \"" + DATA[i] + "\"");
970             }
971         }
972     }
973 
974     @Test
TestCreateInstance()975     public void TestCreateInstance() {
976         String FORWARD = "F";
977         String REVERSE = "R";
978         String DATA[] = {
979                 // Column 1: id
980                 // Column 2: direction
981                 // Column 3: expected ID, or "" if expect failure
982                 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
983 
984                 // JB#2689: bad compound causes crash
985                 "InvalidSource-InvalidTarget", FORWARD, "",
986                 "InvalidSource-InvalidTarget", REVERSE, "",
987                 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
988                 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
989                 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
990                 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
991 
992                 null
993         };
994 
995         for (int i=0; DATA[i]!=null; i+=3) {
996             String id=DATA[i];
997             int dir = (DATA[i+1]==FORWARD)?
998                     Transliterator.FORWARD:Transliterator.REVERSE;
999             String expID=DATA[i+2];
1000             Exception e = null;
1001             Transliterator t;
1002             try {
1003                 t = Transliterator.getInstance(id,dir);
1004             } catch (Exception e1) {
1005                 e = e1;
1006                 t = null;
1007             }
1008             String newID = (t!=null)?t.getID():"";
1009             boolean ok = (newID.equals(expID));
1010             if (t==null) {
1011                 newID = e.getMessage();
1012             }
1013             if (ok) {
1014                 logln("Ok: createInstance(" +
1015                         id + "," + DATA[i+1] + ") => " + newID);
1016             } else {
1017                 errln("FAIL: createInstance(" +
1018                         id + "," + DATA[i+1] + ") => " + newID +
1019                         ", expected " + expID);
1020             }
1021         }
1022     }
1023 
1024     /**
1025      * Test the normalization transliterator.
1026      */
1027     @Test
TestNormalizationTransliterator()1028     public void TestNormalizationTransliterator() {
1029         // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
1030         // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1031         String[][] CANON = {
1032                 // Input               Decomposed            Composed
1033                 {"cat",                "cat",                "cat"               },
1034                 {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },
1035 
1036                 {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above
1037                 {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above
1038 
1039                 {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above
1040                 {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below
1041                 {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above
1042 
1043                 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1044                 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1045 
1046                 {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave
1047                 {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave
1048                 {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron
1049 
1050                 {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign
1051                 {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring
1052 
1053                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0
1054                 {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0
1055 
1056                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1057                 {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },
1058 
1059                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1060                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1061                 {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten
1062                 {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten
1063                 {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten
1064 
1065                 {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },
1066         };
1067 
1068         String[][] COMPAT = {
1069                 // Input               Decomposed            Composed
1070                 {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed
1071 
1072                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0
1073                 {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i
1074 
1075                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1076                 {"Henry \u2163",       "Henry IV",           "Henry IV"          },
1077 
1078                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1079                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1080 
1081                 {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten
1082         };
1083 
1084         Transliterator NFD = Transliterator.getInstance("NFD");
1085         Transliterator NFC = Transliterator.getInstance("NFC");
1086         for (int i=0; i<CANON.length; ++i) {
1087             String in = CANON[i][0];
1088             String expd = CANON[i][1];
1089             String expc = CANON[i][2];
1090             expect(NFD, in, expd);
1091             expect(NFC, in, expc);
1092         }
1093 
1094         Transliterator NFKD = Transliterator.getInstance("NFKD");
1095         Transliterator NFKC = Transliterator.getInstance("NFKC");
1096         for (int i=0; i<COMPAT.length; ++i) {
1097             String in = COMPAT[i][0];
1098             String expkd = COMPAT[i][1];
1099             String expkc = COMPAT[i][2];
1100             expect(NFKD, in, expkd);
1101             expect(NFKC, in, expkc);
1102         }
1103 
1104         Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1105         expect(t, "\u010dx", "c\u030C");
1106     }
1107 
1108     /**
1109      * Test compound RBT rules.
1110      */
1111     @Test
TestCompoundRBT()1112     public void TestCompoundRBT() {
1113         // Careful with spacing and ';' here:  Phrase this exactly
1114         // as toRules() is going to return it.  If toRules() changes
1115         // with regard to spacing or ';', then adjust this string.
1116         String rule = "::Hex-Any;\n" +
1117         "::Any-Lower;\n" +
1118         "a > '.A.';\n" +
1119         "b > '.B.';\n" +
1120         "::[^t]Any-Upper;";
1121         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1122         if (t == null) {
1123             errln("FAIL: createFromRules failed");
1124             return;
1125         }
1126         expect(t, "\u0043at in the hat, bat on the mat",
1127         "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1128         String r = t.toRules(true);
1129         if (r.equals(rule)) {
1130             logln("OK: toRules() => " + r);
1131         } else {
1132             errln("FAIL: toRules() => " + r +
1133                     ", expected " + rule);
1134         }
1135 
1136         // Now test toRules
1137         t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1138         if (t == null) {
1139             errln("FAIL: createInstance failed");
1140             return;
1141         }
1142         String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1143         r = t.toRules(true);
1144         if (!r.equals(exp)) {
1145             errln("FAIL: toRules() => " + r +
1146                     ", expected " + exp);
1147         } else {
1148             logln("OK: toRules() => " + r);
1149         }
1150 
1151         // Round trip the result of toRules
1152         t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1153         if (t == null) {
1154             errln("FAIL: createFromRules #2 failed");
1155             return;
1156         } else {
1157             logln("OK: createFromRules(" + r + ") succeeded");
1158         }
1159 
1160         // Test toRules again
1161         r = t.toRules(true);
1162         if (!r.equals(exp)) {
1163             errln("FAIL: toRules() => " + r +
1164                     ", expected " + exp);
1165         } else {
1166             logln("OK: toRules() => " + r);
1167         }
1168 
1169         // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1170         // to what the regenerated ID will look like.
1171         String id = "Upper(Lower);(NFKC)";
1172         t = Transliterator.getInstance(id, Transliterator.FORWARD);
1173         if (t == null) {
1174             errln("FAIL: createInstance #2 failed");
1175             return;
1176         }
1177         if (t.getID().equals(id)) {
1178             logln("OK: created " + id);
1179         } else {
1180             errln("FAIL: createInstance(" + id +
1181                     ").getID() => " + t.getID());
1182         }
1183 
1184         Transliterator u = t.getInverse();
1185         if (u == null) {
1186             errln("FAIL: createInverse failed");
1187             return;
1188         }
1189         exp = "NFKC();Lower(Upper)";
1190         if (u.getID().equals(exp)) {
1191             logln("OK: createInverse(" + id + ") => " +
1192                     u.getID());
1193         } else {
1194             errln("FAIL: createInverse(" + id + ") => " +
1195                     u.getID());
1196         }
1197     }
1198 
1199     /**
1200      * Compound filter semantics were originally not implemented
1201      * correctly.  Originally, each component filter f(i) is replaced by
1202      * f'(i) = f(i) && g, where g is the filter for the compound
1203      * transliterator.
1204      *
1205      * From Mark:
1206      *
1207      * Suppose and I have a transliterator X. Internally X is
1208      * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1209      *
1210      * The compound should convert all greek characters (through latin) to
1211      * cyrillic, then lowercase the result. The filter should say "don't
1212      * touch 'A' in the original". But because an intermediate result
1213      * happens to go through "A", the Greek Alpha gets hung up.
1214      */
1215     @Test
TestCompoundFilter()1216     public void TestCompoundFilter() {
1217         Transliterator t = Transliterator.getInstance
1218         ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1219         t.setFilter(new UnicodeSet("[^A]"));
1220 
1221         // Only the 'A' at index 1 should remain unchanged
1222         expect(t,
1223                 CharsToUnicodeString("BA\\u039A\\u0391"),
1224                 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1225     }
1226 
1227     /**
1228      * Test the "Remove" transliterator.
1229      */
1230     @Test
TestRemove()1231     public void TestRemove() {
1232         Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1233         expect(t, "The quick brown fox.",
1234         "Th qck brwn fx.");
1235     }
1236 
1237     @Test
TestToRules()1238     public void TestToRules() {
1239         String RBT = "rbt";
1240         String SET = "set";
1241         String[] DATA = {
1242                 RBT,
1243                 "$a=\\u4E61; [$a] > A;",
1244                 "[\\u4E61] > A;",
1245 
1246                 RBT,
1247                 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1248                 "[[:Zs:][:Zl:]]{a} > A;",
1249 
1250                 SET,
1251                 "[[:Zs:][:Zl:]]",
1252                 "[[:Zs:][:Zl:]]",
1253 
1254                 SET,
1255                 "[:Ps:]",
1256                 "[:Ps:]",
1257 
1258                 SET,
1259                 "[:L:]",
1260                 "[:L:]",
1261 
1262                 SET,
1263                 "[[:L:]-[A]]",
1264                 "[[:L:]-[A]]",
1265 
1266                 SET,
1267                 "[~[:Lu:][:Ll:]]",
1268                 "[~[:Lu:][:Ll:]]",
1269 
1270                 SET,
1271                 "[~[a-z]]",
1272                 "[~[a-z]]",
1273 
1274                 RBT,
1275                 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1276                 "[^[:Zs:]]{a} > A;",
1277 
1278                 RBT,
1279                 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1280                 "[[a-z]-[:Zs:]]{a} > A;",
1281 
1282                 RBT,
1283                 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1284                 "[[:Zs:]&[a-z]]{a} > A;",
1285 
1286                 RBT,
1287                 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1288                 "[x[:Zs:]]{a} > A;",
1289 
1290                 RBT,
1291                 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1292                 "$macron = \\u0304 ;"+
1293                 "$evowel = [aeiouyAEIOUY] ;"+
1294                 "$iotasub = \\u0345 ;"+
1295                 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1296                 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1297 
1298                 RBT,
1299                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1300                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1301         };
1302 
1303         for (int d=0; d < DATA.length; d+=3) {
1304             if (DATA[d] == RBT) {
1305                 // Transliterator test
1306                 Transliterator t = Transliterator.createFromRules("ID",
1307                         DATA[d+1], Transliterator.FORWARD);
1308                 if (t == null) {
1309                     errln("FAIL: createFromRules failed");
1310                     return;
1311                 }
1312                 String rules, escapedRules;
1313                 rules = t.toRules(false);
1314                 escapedRules = t.toRules(true);
1315                 String expRules = Utility.unescape(DATA[d+2]);
1316                 String expEscapedRules = DATA[d+2];
1317                 if (rules.equals(expRules)) {
1318                     logln("Ok: " + DATA[d+1] +
1319                             " => " + Utility.escape(rules));
1320                 } else {
1321                     errln("FAIL: " + DATA[d+1] +
1322                             " => " + Utility.escape(rules + ", exp " + expRules));
1323                 }
1324                 if (escapedRules.equals(expEscapedRules)) {
1325                     logln("Ok: " + DATA[d+1] +
1326                             " => " + escapedRules);
1327                 } else {
1328                     errln("FAIL: " + DATA[d+1] +
1329                             " => " + escapedRules + ", exp " + expEscapedRules);
1330                 }
1331 
1332             } else {
1333                 // UnicodeSet test
1334                 String pat = DATA[d+1];
1335                 String expToPat = DATA[d+2];
1336                 UnicodeSet set = new UnicodeSet(pat);
1337 
1338                 // Adjust spacing etc. as necessary.
1339                 String toPat;
1340                 toPat = set.toPattern(true);
1341                 if (expToPat.equals(toPat)) {
1342                     logln("Ok: " + pat +
1343                             " => " + toPat);
1344                 } else {
1345                     errln("FAIL: " + pat +
1346                             " => " + Utility.escape(toPat) +
1347                             ", exp " + Utility.escape(pat));
1348                 }
1349             }
1350         }
1351     }
1352 
1353     @Test
TestContext()1354     public void TestContext() {
1355         Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1356 
1357         expect("de > x; {d}e > y;",
1358                 "de",
1359                 "ye",
1360                 pos);
1361 
1362         expect("ab{c} > z;",
1363                 "xadabdabcy",
1364         "xadabdabzy");
1365     }
1366 
CharsToUnicodeString(String s)1367     static final String CharsToUnicodeString(String s) {
1368         return Utility.unescape(s);
1369     }
1370 
1371     @Test
TestSupplemental()1372     public void TestSupplemental() {
1373 
1374         expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1375         "a > $a; $s > i;"),
1376         CharsToUnicodeString("ab\\U0001030Fx"),
1377         CharsToUnicodeString("\\U00010300bix"));
1378 
1379         expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1380                 "$b=[A-Z\\U00010400-\\U0001044D];" +
1381         "($a)($b) > $2 $1;"),
1382         CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1383         CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1384 
1385         // k|ax\\U00010300xm
1386 
1387         // k|a\\U00010400\\U00010300xm
1388         // ky|\\U00010400\\U00010300xm
1389         // ky\\U00010400|\\U00010300xm
1390 
1391         // ky\\U00010400|\\U00010300\\U00010400m
1392         // ky\\U00010400y|\\U00010400m
1393         expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1394                 "$a {x} > | @ \\U00010400;" +
1395         "{$a} [^\\u0000-\\uFFFF] > y;"),
1396         CharsToUnicodeString("kax\\U00010300xm"),
1397         CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1398 
1399         expect(Transliterator.getInstance("Any-Name"),
1400                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1401         "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1402 
1403         expect(Transliterator.getInstance("Name-Any"),
1404                 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1405                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1406 
1407         expect(Transliterator.getInstance("Any-Hex/Unicode"),
1408                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1409         "U+10330U+10FF00U+E0061U+00A0");
1410 
1411         expect(Transliterator.getInstance("Any-Hex/C"),
1412                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1413         "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1414 
1415         expect(Transliterator.getInstance("Any-Hex/Perl"),
1416                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1417         "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1418 
1419         expect(Transliterator.getInstance("Any-Hex/Java"),
1420                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1421         "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1422 
1423         expect(Transliterator.getInstance("Any-Hex/XML"),
1424                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1425         "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1426 
1427         expect(Transliterator.getInstance("Any-Hex/XML10"),
1428                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1429         "&#66352;&#1113856;&#917601;&#160;");
1430 
1431         expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1432                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1433                 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1434     }
1435 
1436     @Test
TestQuantifier()1437     public void TestQuantifier() {
1438 
1439         // Make sure @ in a quantified anteContext works
1440         expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1441                 "AAAAAb",
1442         "aaa(aac)");
1443 
1444         // Make sure @ in a quantified postContext works
1445         expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1446                 "baaaaa",
1447         "caa(aaa)");
1448 
1449         // Make sure @ in a quantified postContext with seg ref works
1450         expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1451                 "baaaaa",
1452         "baa(aaa)");
1453 
1454         // Make sure @ past ante context doesn't enter ante context
1455         Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1456         expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1457                 "xxxab",
1458                 "xxx(ac)",
1459                 pos);
1460 
1461         // Make sure @ past post context doesn't pass limit
1462         Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1463         expect("{b} a+ > c @@ |; x > y; a > A;",
1464                 "baxx",
1465                 "caxx",
1466                 pos2);
1467 
1468         // Make sure @ past post context doesn't enter post context
1469         expect("{b} a+ > c @@ |; x > y; a > A;",
1470                 "baxx",
1471         "cayy");
1472 
1473         expect("(ab)? c > d;",
1474                 "c abc ababc",
1475         "d d abd");
1476 
1477         // NOTE: The (ab)+ when referenced just yields a single "ab",
1478         // not the full sequence of them.  This accords with perl behavior.
1479         expect("(ab)+ {x} > '(' $1 ')';",
1480                 "x abx ababxy",
1481         "x ab(ab) abab(ab)y");
1482 
1483         expect("b+ > x;",
1484                 "ac abc abbc abbbc",
1485         "ac axc axc axc");
1486 
1487         expect("[abc]+ > x;",
1488                 "qac abrc abbcs abtbbc",
1489         "qx xrx xs xtx");
1490 
1491         expect("q{(ab)+} > x;",
1492                 "qa qab qaba qababc qaba",
1493         "qa qx qxa qxc qxa");
1494 
1495         expect("q(ab)* > x;",
1496                 "qa qab qaba qababc",
1497         "xa x xa xc");
1498 
1499         // NOTE: The (ab)+ when referenced just yields a single "ab",
1500         // not the full sequence of them.  This accords with perl behavior.
1501         expect("q(ab)* > '(' $1 ')';",
1502                 "qa qab qaba qababc",
1503         "()a (ab) (ab)a (ab)c");
1504 
1505         // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1506         // quoted string
1507         expect("'ab'+ > x;",
1508                 "bb ab ababb",
1509         "bb x xb");
1510 
1511         // $foo+ and $foo* -- the quantifier should apply to the entire
1512         // variable reference
1513         expect("$var = ab; $var+ > x;",
1514                 "bb ab ababb",
1515         "bb x xb");
1516     }
1517 
1518     static class TestFact implements Transliterator.Factory {
1519         static class NameableNullTrans extends Transliterator {
NameableNullTrans(String id)1520             public NameableNullTrans(String id) {
1521                 super(id, null);
1522             }
1523             @Override
handleTransliterate(Replaceable text, Position offsets, boolean incremental)1524             protected void handleTransliterate(Replaceable text,
1525                     Position offsets, boolean incremental) {
1526                 offsets.start = offsets.limit;
1527             }
1528         }
1529         String id;
TestFact(String theID)1530         public TestFact(String theID) {
1531             id = theID;
1532         }
1533         @Override
getInstance(String ignoredID)1534         public Transliterator getInstance(String ignoredID) {
1535             return new NameableNullTrans(id);
1536         }
1537     }
1538 
1539     @Test
TestSTV()1540     public void TestSTV() {
1541         Enumeration es = Transliterator.getAvailableSources();
1542         for (int i=0; es.hasMoreElements(); ++i) {
1543             String source = (String) es.nextElement();
1544             logln("" + i + ": " + source);
1545             if (source.length() == 0) {
1546                 errln("FAIL: empty source");
1547                 continue;
1548             }
1549             Enumeration et = Transliterator.getAvailableTargets(source);
1550             for (int j=0; et.hasMoreElements(); ++j) {
1551                 String target = (String) et.nextElement();
1552                 logln(" " + j + ": " + target);
1553                 if (target.length() == 0) {
1554                     errln("FAIL: empty target");
1555                     continue;
1556                 }
1557                 Enumeration ev = Transliterator.getAvailableVariants(source, target);
1558                 for (int k=0; ev.hasMoreElements(); ++k) {
1559                     String variant = (String) ev.nextElement();
1560                     if (variant.length() == 0) {
1561                         logln("  " + k + ": <empty>");
1562                     } else {
1563                         logln("  " + k + ": " + variant);
1564                     }
1565                 }
1566             }
1567         }
1568 
1569         // Test registration
1570         String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1571         String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1572         String[] SOURCES = { null, "Seoridf", "Oewoir" };
1573         for (int i=0; i<3; ++i) {
1574             Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1575             try {
1576                 Transliterator t = Transliterator.getInstance(IDS[i]);
1577                 if (t.getID().equals(IDS[i])) {
1578                     logln("Ok: Registration/creation succeeded for ID " +
1579                             IDS[i]);
1580                 } else {
1581                     errln("FAIL: Registration of ID " +
1582                             IDS[i] + " creates ID " + t.getID());
1583                 }
1584                 Transliterator.unregister(IDS[i]);
1585                 try {
1586                     t = Transliterator.getInstance(IDS[i]);
1587                     errln("FAIL: Unregistration failed for ID " +
1588                             IDS[i] + "; still receiving ID " + t.getID());
1589                 } catch (IllegalArgumentException e2) {
1590                     // Good; this is what we expect
1591                     logln("Ok; Unregistered " + IDS[i]);
1592                 }
1593             } catch (IllegalArgumentException e) {
1594                 errln("FAIL: Registration/creation failed for ID " +
1595                         IDS[i]);
1596             } finally {
1597                 Transliterator.unregister(IDS[i]);
1598             }
1599         }
1600 
1601         // Make sure getAvailable API reflects removal
1602         for (Enumeration e = Transliterator.getAvailableIDs();
1603         e.hasMoreElements(); ) {
1604             String id = (String) e.nextElement();
1605             for (int i=0; i<3; ++i) {
1606                 if (id.equals(FULL_IDS[i])) {
1607                     errln("FAIL: unregister(" + id + ") failed");
1608                 }
1609             }
1610         }
1611         for (Enumeration e = Transliterator.getAvailableTargets("Any");
1612         e.hasMoreElements(); ) {
1613             String t = (String) e.nextElement();
1614             if (t.equals(IDS[0])) {
1615                 errln("FAIL: unregister(Any-" + t + ") failed");
1616             }
1617         }
1618         for (Enumeration e = Transliterator.getAvailableSources();
1619         e.hasMoreElements(); ) {
1620             String s = (String) e.nextElement();
1621             for (int i=0; i<3; ++i) {
1622                 if (SOURCES[i] == null) continue;
1623                 if (s.equals(SOURCES[i])) {
1624                     errln("FAIL: unregister(" + s + "-*) failed");
1625                 }
1626             }
1627         }
1628     }
1629 
1630     /**
1631      * Test inverse of Greek-Latin; Title()
1632      */
1633     @Test
TestCompoundInverse()1634     public void TestCompoundInverse() {
1635         Transliterator t = Transliterator.getInstance
1636         ("Greek-Latin; Title()", Transliterator.REVERSE);
1637         if (t == null) {
1638             errln("FAIL: createInstance");
1639             return;
1640         }
1641         String exp = "(Title);Latin-Greek";
1642         if (t.getID().equals(exp)) {
1643             logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1644                     t.getID());
1645         } else {
1646             errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1647                     t.getID() + "\", expected \"" + exp + "\"");
1648         }
1649     }
1650 
1651     /**
1652      * Test NFD chaining with RBT
1653      */
1654     @Test
TestNFDChainRBT()1655     public void TestNFDChainRBT() {
1656         Transliterator t = Transliterator.createFromRules(
1657                 "TEST", "::NFD; aa > Q; a > q;",
1658                 Transliterator.FORWARD);
1659         logln(t.toRules(true));
1660         expect(t, "aa", "Q");
1661     }
1662 
1663     /**
1664      * Inverse of "Null" should be "Null". (J21)
1665      */
1666     @Test
TestNullInverse()1667     public void TestNullInverse() {
1668         Transliterator t = Transliterator.getInstance("Null");
1669         Transliterator u = t.getInverse();
1670         if (!u.getID().equals("Null")) {
1671             errln("FAIL: Inverse of Null should be Null");
1672         }
1673     }
1674 
1675     /**
1676      * Check ID of inverse of alias. (J22)
1677      */
1678     @Test
TestAliasInverseID()1679     public void TestAliasInverseID() {
1680         String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1681         Transliterator t = Transliterator.getInstance(ID);
1682         Transliterator u = t.getInverse();
1683         String exp = "Hangul-Latin";
1684         String got = u.getID();
1685         if (!got.equals(exp)) {
1686             errln("FAIL: Inverse of " + ID + " is " + got +
1687                     ", expected " + exp);
1688         }
1689     }
1690 
1691     /**
1692      * Test IDs of inverses of compound transliterators. (J20)
1693      */
1694     @Test
TestCompoundInverseID()1695     public void TestCompoundInverseID() {
1696         String ID = "Latin-Jamo;NFC(NFD)";
1697         Transliterator t = Transliterator.getInstance(ID);
1698         Transliterator u = t.getInverse();
1699         String exp = "NFD(NFC);Jamo-Latin";
1700         String got = u.getID();
1701         if (!got.equals(exp)) {
1702             errln("FAIL: Inverse of " + ID + " is " + got +
1703                     ", expected " + exp);
1704         }
1705     }
1706 
1707     /**
1708      * Test undefined variable.
1709      */
1710     @Test
TestUndefinedVariable()1711     public void TestUndefinedVariable() {
1712         String rule = "$initial } a <> \u1161;";
1713         try {
1714             Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1715         } catch (IllegalArgumentException e) {
1716             logln("OK: Got exception for " + rule + ", as expected: " +
1717                     e.getMessage());
1718             return;
1719         }
1720         errln("Fail: bogus rule " + rule + " compiled without error");
1721     }
1722 
1723     /**
1724      * Test empty context.
1725      */
1726     @Test
TestEmptyContext()1727     public void TestEmptyContext() {
1728         expect(" { a } > b;", "xay a ", "xby b ");
1729     }
1730 
1731     /**
1732      * Test compound filter ID syntax
1733      */
1734     @Test
TestCompoundFilterID()1735     public void TestCompoundFilterID() {
1736         String[] DATA = {
1737                 // Col. 1 = ID or rule set (latter must start with #)
1738 
1739                 // = columns > 1 are null if expect col. 1 to be illegal =
1740 
1741                 // Col. 2 = direction, "F..." or "R..."
1742                 // Col. 3 = source string
1743                 // Col. 4 = exp result
1744 
1745                 "[abc]; [abc]", null, null, null, // multiple filters
1746                 "Latin-Greek; [abc];", null, null, null, // misplaced filter
1747                 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1748                 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1749                 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1750                 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1751         };
1752 
1753         for (int i=0; i<DATA.length; i+=4) {
1754             String id = DATA[i];
1755             int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1756                     Transliterator.REVERSE : Transliterator.FORWARD;
1757             String source = DATA[i+2];
1758             String exp = DATA[i+3];
1759             boolean expOk = (DATA[i+1] != null);
1760             Transliterator t = null;
1761             IllegalArgumentException e = null;
1762             try {
1763                 if (id.charAt(0) == '#') {
1764                     t = Transliterator.createFromRules("ID", id, direction);
1765                 } else {
1766                     t = Transliterator.getInstance(id, direction);
1767                 }
1768             } catch (IllegalArgumentException ee) {
1769                 e = ee;
1770             }
1771             boolean ok = (t != null && e == null);
1772             if (ok == expOk) {
1773                 logln("Ok: " + id + " => " + t +
1774                         (e != null ? (", " + e.getMessage()) : ""));
1775                 if (source != null) {
1776                     expect(t, source, exp);
1777                 }
1778             } else {
1779                 errln("FAIL: " + id + " => " + t +
1780                         (e != null ? (", " + e.getMessage()) : ""));
1781             }
1782         }
1783     }
1784 
1785     /**
1786      * Test new property set syntax
1787      */
1788     @Test
TestPropertySet()1789     public void TestPropertySet() {
1790         expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1791         expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1792         "[ a stitch ]\n[ in time ]\r[ saves 9]");
1793     }
1794 
1795     /**
1796      * Test various failure points of the new 2.0 engine.
1797      */
1798     @Test
TestNewEngine()1799     public void TestNewEngine() {
1800         Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1801         // Katakana should be untouched
1802         expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1803 
1804         if (true) {
1805             // This test will only work if Transliterator.ROLLBACK is
1806             // true.  Otherwise, this test will fail, revealing a
1807             // limitation of global filters in incremental mode.
1808 
1809             Transliterator a =
1810                 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1811             Transliterator A =
1812                 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1813 
1814             //Transliterator array[] = new Transliterator[] {
1815             //    a,
1816             //    Transliterator.getInstance("NFD"),
1817             //    A };
1818             //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1819 
1820             try {
1821                 Transliterator.registerInstance(a);
1822                 Transliterator.registerInstance(A);
1823 
1824                 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1825                 expect(t, "aAaA", "bAbA");
1826 
1827                 Transliterator[] u = t.getElements();
1828                 assertTrue("getElements().length", u.length == 3);
1829                 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1830                 assertEquals("getElements()[1]", u[1].getID(), "NFD");
1831                 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1832 
1833                 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1834                 t.setFilter(new UnicodeSet("[:Ll:]"));
1835                 expect(t, "aAaA", "bAbA");
1836             } finally {
1837                 Transliterator.unregister("a_to_A");
1838                 Transliterator.unregister("A_to_b");
1839             }
1840         }
1841 
1842         expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1843                 "a",
1844         "ax");
1845 
1846         String gr =
1847             "$ddot = \u0308 ;" +
1848             "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1849             "$rough = \u0314 ;" +
1850             "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1851             "\u03b1 <> a ;" +
1852             "$rough <> h ;";
1853 
1854         expect(gr, "\u03B1\u0314", "ha");
1855     }
1856 
1857     /**
1858      * Test quantified segment behavior.  We want:
1859      * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1860      */
1861     @Test
TestQuantifiedSegment()1862     public void TestQuantifiedSegment() {
1863         // The normal case
1864         expect("([abc]+) > x $1 x;", "cba", "xcbax");
1865 
1866         // The tricky case; the quantifier is around the segment
1867         expect("([abc])+ > x $1 x;", "cba", "xax");
1868 
1869         // Tricky case in reverse direction
1870         expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1871 
1872         // Check post-context segment
1873         expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1874 
1875         // Test toRule/toPattern for non-quantified segment.
1876         // Careful with spacing here.
1877         String r = "([a-c]){q} > x $1 x;";
1878         Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1879         String rr = t.toRules(true);
1880         if (!r.equals(rr)) {
1881             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1882         } else {
1883             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1884         }
1885 
1886         // Test toRule/toPattern for quantified segment.
1887         // Careful with spacing here.
1888         r = "([a-c])+{q} > x $1 x;";
1889         t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1890         rr = t.toRules(true);
1891         if (!r.equals(rr)) {
1892             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1893         } else {
1894             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1895         }
1896     }
1897 
1898     //======================================================================
1899     // Ram's tests
1900     //======================================================================
1901     /* this test performs  test of rules in ISO 15915 */
1902     @Test
TestDevanagariLatinRT()1903     public void  TestDevanagariLatinRT(){
1904         String[]  source = {
1905                 "bh\u0101rata",
1906                 "kra",
1907                 "k\u1E63a",
1908                 "khra",
1909                 "gra",
1910                 "\u1E45ra",
1911                 "cra",
1912                 "chra",
1913                 "j\u00F1a",
1914                 "jhra",
1915                 "\u00F1ra",
1916                 "\u1E6Dya",
1917                 "\u1E6Dhra",
1918                 "\u1E0Dya",
1919                 //"r\u0323ya", // \u095c is not valid in Devanagari
1920                 "\u1E0Dhya",
1921                 "\u1E5Bhra",
1922                 "\u1E47ra",
1923                 "tta",
1924                 "thra",
1925                 "dda",
1926                 "dhra",
1927                 "nna",
1928                 "pra",
1929                 "phra",
1930                 "bra",
1931                 "bhra",
1932                 "mra",
1933                 "\u1E49ra",
1934                 //"l\u0331ra",
1935                 "yra",
1936                 "\u1E8Fra",
1937                 //"l-",
1938                 "vra",
1939                 "\u015Bra",
1940                 "\u1E63ra",
1941                 "sra",
1942                 "hma",
1943                 "\u1E6D\u1E6Da",
1944                 "\u1E6D\u1E6Dha",
1945                 "\u1E6Dh\u1E6Dha",
1946                 "\u1E0D\u1E0Da",
1947                 "\u1E0D\u1E0Dha",
1948                 "\u1E6Dya",
1949                 "\u1E6Dhya",
1950                 "\u1E0Dya",
1951                 "\u1E0Dhya",
1952                 // Not roundtrippable --
1953                 // \u0939\u094d\u094d\u092E  - hma
1954                 // \u0939\u094d\u092E         - hma
1955                 // CharsToUnicodeString("hma"),
1956                 "hya",
1957                 "\u015Br\u0325",
1958                 "\u015Bca",
1959                 "\u0115",
1960                 "san\u0304j\u012Bb s\u0113nagupta",
1961                 "\u0101nand vaddir\u0101ju",
1962         };
1963         String[]  expected = {
1964                 "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */
1965                 "\u0915\u094D\u0930",          /* kra         */
1966                 "\u0915\u094D\u0937",          /* ks\u0323a  */
1967                 "\u0916\u094D\u0930",          /* khra        */
1968                 "\u0917\u094D\u0930",          /* gra         */
1969                 "\u0919\u094D\u0930",          /* n\u0307ra  */
1970                 "\u091A\u094D\u0930",          /* cra         */
1971                 "\u091B\u094D\u0930",          /* chra        */
1972                 "\u091C\u094D\u091E",          /* jn\u0303a  */
1973                 "\u091D\u094D\u0930",          /* jhra        */
1974                 "\u091E\u094D\u0930",          /* n\u0303ra  */
1975                 "\u091F\u094D\u092F",          /* t\u0323ya  */
1976                 "\u0920\u094D\u0930",          /* t\u0323hra */
1977                 "\u0921\u094D\u092F",          /* d\u0323ya  */
1978                 //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari
1979                 "\u0922\u094D\u092F",          /* d\u0323hya */
1980                 "\u0922\u093C\u094D\u0930",    /* r\u0323hra */
1981                 "\u0923\u094D\u0930",          /* n\u0323ra  */
1982                 "\u0924\u094D\u0924",          /* tta         */
1983                 "\u0925\u094D\u0930",          /* thra        */
1984                 "\u0926\u094D\u0926",          /* dda         */
1985                 "\u0927\u094D\u0930",          /* dhra        */
1986                 "\u0928\u094D\u0928",          /* nna         */
1987                 "\u092A\u094D\u0930",          /* pra         */
1988                 "\u092B\u094D\u0930",          /* phra        */
1989                 "\u092C\u094D\u0930",          /* bra         */
1990                 "\u092D\u094D\u0930",          /* bhra        */
1991                 "\u092E\u094D\u0930",          /* mra         */
1992                 "\u0929\u094D\u0930",          /* n\u0331ra  */
1993                 //"\u0934\u094D\u0930",          /* l\u0331ra  */
1994                 "\u092F\u094D\u0930",          /* yra         */
1995                 "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */
1996                 //"l-",
1997                 "\u0935\u094D\u0930",          /* vra         */
1998                 "\u0936\u094D\u0930",          /* s\u0301ra  */
1999                 "\u0937\u094D\u0930",          /* s\u0323ra  */
2000                 "\u0938\u094D\u0930",          /* sra         */
2001                 "\u0939\u094d\u092E",          /* hma         */
2002                 "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */
2003                 "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */
2004                 "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/
2005                 "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */
2006                 "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */
2007                 "\u091F\u094D\u092F",          /* t\u0323ya  */
2008                 "\u0920\u094D\u092F",          /* t\u0323hya */
2009                 "\u0921\u094D\u092F",          /* d\u0323ya  */
2010                 "\u0922\u094D\u092F",          /* d\u0323hya */
2011                 // "hma",                         /* hma         */
2012                 "\u0939\u094D\u092F",          /* hya         */
2013                 "\u0936\u0943",                /* s\u0301r\u0325a  */
2014                 "\u0936\u094D\u091A",          /* s\u0301ca  */
2015                 "\u090d",                      /* e\u0306    */
2016                 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2017                 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2018         };
2019 
2020         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2021         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2022 
2023         for(int i= 0; i<source.length; i++){
2024             expect(latinToDev,(source[i]),(expected[i]));
2025             expect(devToLatin,(expected[i]),(source[i]));
2026         }
2027 
2028     }
2029     @Test
TestTeluguLatinRT()2030     public void  TestTeluguLatinRT(){
2031         String[]  source = {
2032                 "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */
2033                 "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */
2034                 "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */
2035                 "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */
2036                 "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */
2037                 "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */
2038                 "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */
2039                 "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */
2040                 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */
2041                 "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */
2042         };
2043 
2044         String[]  expected = {
2045                 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2046                 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2047                 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2048                 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2049                 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2050                 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2051                 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2052                 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2053                 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2054                 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2055         };
2056 
2057 
2058         Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2059         Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2060 
2061         for(int i= 0; i<source.length; i++){
2062             expect(latinToDev,(source[i]),(expected[i]));
2063             expect(devToLatin,(expected[i]),(source[i]));
2064         }
2065     }
2066 
2067     @Test
TestSanskritLatinRT()2068     public void  TestSanskritLatinRT(){
2069         int MAX_LEN =15;
2070         String[]  source = {
2071                 "rmk\u1E63\u0113t",
2072                 "\u015Br\u012Bmad",
2073                 "bhagavadg\u012Bt\u0101",
2074                 "adhy\u0101ya",
2075                 "arjuna",
2076                 "vi\u1E63\u0101da",
2077                 "y\u014Dga",
2078                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2079                 "uv\u0101cr\u0325",
2080                 "dharmak\u1E63\u0113tr\u0113",
2081                 "kuruk\u1E63\u0113tr\u0113",
2082                 "samav\u0113t\u0101",
2083                 "yuyutsava\u1E25",
2084                 "m\u0101mak\u0101\u1E25",
2085                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2086                 "kimakurvata",
2087                 "san\u0304java",
2088         };
2089         String[]  expected = {
2090                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2091                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2092                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2093                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2094                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2095                 "\u0935\u093f\u0937\u093e\u0926",
2096                 "\u092f\u094b\u0917",
2097                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2098                 "\u0909\u0935\u093E\u091A\u0943",
2099                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2100                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2101                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2102                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2103                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2104                 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2105                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2106                 "\u0938\u0902\u091c\u0935",
2107         };
2108 
2109         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2110         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2111         for(int i= 0; i<MAX_LEN; i++){
2112             expect(latinToDev,(source[i]),(expected[i]));
2113             expect(devToLatin,(expected[i]),(source[i]));
2114         }
2115     }
2116 
2117     @Test
TestCompoundLatinRT()2118     public void  TestCompoundLatinRT(){
2119         int MAX_LEN =15;
2120         String[]  source = {
2121                 "rmk\u1E63\u0113t",
2122                 "\u015Br\u012Bmad",
2123                 "bhagavadg\u012Bt\u0101",
2124                 "adhy\u0101ya",
2125                 "arjuna",
2126                 "vi\u1E63\u0101da",
2127                 "y\u014Dga",
2128                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2129                 "uv\u0101cr\u0325",
2130                 "dharmak\u1E63\u0113tr\u0113",
2131                 "kuruk\u1E63\u0113tr\u0113",
2132                 "samav\u0113t\u0101",
2133                 "yuyutsava\u1E25",
2134                 "m\u0101mak\u0101\u1E25",
2135                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2136                 "kimakurvata",
2137                 "san\u0304java"
2138         };
2139         String[]  expected = {
2140                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2141                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2142                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2143                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2144                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2145                 "\u0935\u093f\u0937\u093e\u0926",
2146                 "\u092f\u094b\u0917",
2147                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2148                 "\u0909\u0935\u093E\u091A\u0943",
2149                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2150                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2151                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2152                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2153                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2154                 //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2155                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2156                 "\u0938\u0902\u091c\u0935"
2157         };
2158 
2159         Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2160         Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2161         for(int i= 0; i<MAX_LEN; i++){
2162             expect(latinToDevToLatin,(source[i]),(source[i]));
2163             expect(devToLatinToDev,(expected[i]),(expected[i]));
2164         }
2165     }
2166     /**
2167      * Test Gurmukhi-Devanagari Tippi and Bindi
2168      */
2169     @Test
TestGurmukhiDevanagari()2170     public void TestGurmukhiDevanagari(){
2171         // the rule says:
2172         // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2173         // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2174 
2175         UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2176         UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2177 
2178         UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2179         UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2180         Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2181         StringBuffer src = new StringBuffer(" \u0902");
2182         StringBuffer expect = new StringBuffer(" \u0A02");
2183         while(vIter.next()){
2184             src.setCharAt(0,(char) vIter.codepoint);
2185             expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2186             expect(trans,src.toString(),expect.toString());
2187         }
2188 
2189         expect.setCharAt(1,'\u0A70');
2190         while(nvIter.next()){
2191             //src.setCharAt(0,(char) nvIter.codepoint);
2192             src.setCharAt(0,(char)nvIter.codepoint);
2193             expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2194             expect(trans,src.toString(),expect.toString());
2195         }
2196     }
2197     /**
2198      * Test instantiation from a locale.
2199      */
2200     @Test
TestLocaleInstantiation()2201     public void TestLocaleInstantiation() {
2202         Transliterator t;
2203         try{
2204             t = Transliterator.getInstance("te_IN-Latin");
2205             //expect(t, "\u0430", "a");
2206         }catch(IllegalArgumentException ex){
2207             warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2208         }
2209         try{
2210             t = Transliterator.getInstance("ru_RU-Latin");
2211             expect(t, "\u0430", "a");
2212         }catch(IllegalArgumentException ex){
2213             warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2214         }
2215         try{
2216             t = Transliterator.getInstance("en-el");
2217             expect(t, "a", "\u03B1");
2218         }catch(IllegalArgumentException ex){
2219             warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2220         }
2221     }
2222 
2223     /**
2224      * Test title case handling of accent (should ignore accents)
2225      */
2226     @Test
TestTitleAccents()2227     public void TestTitleAccents() {
2228         Transliterator t = Transliterator.getInstance("Title");
2229         expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2230     }
2231 
2232     /**
2233      * Basic test of a locale resource based rule.
2234      */
2235     @Test
TestLocaleResource()2236     public void TestLocaleResource() {
2237         String DATA[] = {
2238                 // id                    from             to
2239                 "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",
2240                 "Latin-el",              "b",             "\u03bc\u03c0",
2241                 "Latin-Greek",           "b",             "\u03B2",
2242                 "Greek-Latin/UNGEGN",    "\u03B2",        "v",
2243                 "el-Latin",              "\u03B2",        "v",
2244                 "Greek-Latin",           "\u03B2",        "b",
2245         };
2246         for (int i=0; i<DATA.length; i+=3) {
2247             Transliterator t = Transliterator.getInstance(DATA[i]);
2248             expect(t, DATA[i+1], DATA[i+2]);
2249         }
2250     }
2251 
2252     /**
2253      * Make sure parse errors reference the right line.
2254      */
2255     @Test
TestParseError()2256     public void TestParseError() {
2257         String rule =
2258             "a > b;\n" +
2259             "# more stuff\n" +
2260             "d << b;";
2261         try {
2262             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2263             if(t!=null){
2264                 errln("FAIL: Did not get expected exception");
2265             }
2266         } catch (IllegalArgumentException e) {
2267             String err = e.getMessage();
2268             if (err.indexOf("d << b") >= 0) {
2269                 logln("Ok: " + err);
2270             } else {
2271                 errln("FAIL: " + err);
2272             }
2273             return;
2274         }
2275         errln("FAIL: no syntax error");
2276     }
2277 
2278     /**
2279      * Make sure sets on output are disallowed.
2280      */
2281     @Test
TestOutputSet()2282     public void TestOutputSet() {
2283         String rule = "$set = [a-cm-n]; b > $set;";
2284         Transliterator t = null;
2285         try {
2286             t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2287             if(t!=null){
2288                 errln("FAIL: Did not get the expected exception");
2289             }
2290         } catch (IllegalArgumentException e) {
2291             logln("Ok: " + e.getMessage());
2292             return;
2293         }
2294         errln("FAIL: No syntax error");
2295     }
2296 
2297     /**
2298      * Test the use variable range pragma, making sure that use of
2299      * variable range characters is detected and flagged as an error.
2300      */
2301     @Test
TestVariableRange()2302     public void TestVariableRange() {
2303         String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2304         try {
2305             Transliterator t =
2306                 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2307             if(t!=null){
2308                 errln("FAIL: Did not get the expected exception");
2309             }
2310         } catch (IllegalArgumentException e) {
2311             logln("Ok: " + e.getMessage());
2312             return;
2313         }
2314         errln("FAIL: No syntax error");
2315     }
2316 
2317     /**
2318      * Test invalid post context error handling
2319      */
2320     @Test
TestInvalidPostContext()2321     public void TestInvalidPostContext() {
2322         try {
2323             Transliterator t =
2324                 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2325             if(t!=null){
2326                 errln("FAIL: Did not get the expected exception");
2327             }
2328         } catch (IllegalArgumentException e) {
2329             String msg = e.getMessage();
2330             if (msg.indexOf("a}b{c") >= 0) {
2331                 logln("Ok: " + msg);
2332             } else {
2333                 errln("FAIL: " + msg);
2334             }
2335             return;
2336         }
2337         errln("FAIL: No syntax error");
2338     }
2339 
2340     /**
2341      * Test ID form variants
2342      */
2343     @Test
TestIDForms()2344     public void TestIDForms() {
2345         String DATA[] = {
2346                 "NFC", null, "NFD",
2347                 "nfd", null, "NFC", // make sure case is ignored
2348                 "Any-NFKD", null, "Any-NFKC",
2349                 "Null", null, "Null",
2350                 "-nfkc", "nfkc", "NFKD",
2351                 "-nfkc/", "nfkc", "NFKD",
2352                 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2353                 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2354                 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2355                 "Source-", null, null,
2356                 "Source/Variant-", null, null,
2357                 "Source-/Variant", null, null,
2358                 "/Variant", null, null,
2359                 "/Variant-", null, null,
2360                 "-/Variant", null, null,
2361                 "-/", null, null,
2362                 "-", null, null,
2363                 "/", null, null,
2364         };
2365 
2366         for (int i=0; i<DATA.length; i+=3) {
2367             String ID = DATA[i];
2368             String expID = DATA[i+1];
2369             String expInvID = DATA[i+2];
2370             boolean expValid = (expInvID != null);
2371             if (expID == null) {
2372                 expID = ID;
2373             }
2374             try {
2375                 Transliterator t =
2376                     Transliterator.getInstance(ID);
2377                 Transliterator u = t.getInverse();
2378                 if (t.getID().equals(expID) &&
2379                         u.getID().equals(expInvID)) {
2380                     logln("Ok: " + ID + ".getInverse() => " + expInvID);
2381                 } else {
2382                     errln("FAIL: getInstance(" + ID + ") => " +
2383                             t.getID() + " x getInverse() => " + u.getID() +
2384                             ", expected " + expInvID);
2385                 }
2386             } catch (IllegalArgumentException e) {
2387                 if (!expValid) {
2388                     logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2389                 } else {
2390                     errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2391                 }
2392             }
2393         }
2394     }
2395 
checkRules(String label, Transliterator t2, String testRulesForward)2396     void checkRules(String label, Transliterator t2, String testRulesForward) {
2397         String rules2 = t2.toRules(true);
2398         //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2399         rules2 = TestUtility.replace(rules2, " ", "");
2400         rules2 = TestUtility.replace(rules2, "\n", "");
2401         rules2 = TestUtility.replace(rules2, "\r", "");
2402         testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2403 
2404         if (!rules2.equals(testRulesForward)) {
2405             errln(label);
2406             logln("GENERATED RULES: " + rules2);
2407             logln("SHOULD BE:       " + testRulesForward);
2408         }
2409     }
2410 
2411     /**
2412      * Mark's toRules test.
2413      */
2414     @Test
TestToRulesMark()2415     public void TestToRulesMark() {
2416 
2417         String testRules =
2418             "::[[:Latin:][:Mark:]];"
2419             + "::NFKD (NFC);"
2420             + "::Lower (Lower);"
2421             + "a <> \\u03B1;" // alpha
2422             + "::NFKC (NFD);"
2423             + "::Upper (Lower);"
2424             + "::Lower ();"
2425             + "::([[:Greek:][:Mark:]]);"
2426             ;
2427         String testRulesForward =
2428             "::[[:Latin:][:Mark:]];"
2429             + "::NFKD(NFC);"
2430             + "::Lower(Lower);"
2431             + "a > \\u03B1;"
2432             + "::NFKC(NFD);"
2433             + "::Upper (Lower);"
2434             + "::Lower ();"
2435             ;
2436         String testRulesBackward =
2437             "::[[:Greek:][:Mark:]];"
2438             + "::Lower (Upper);"
2439             + "::NFD(NFKC);"
2440             + "\\u03B1 > a;"
2441             + "::Lower(Lower);"
2442             + "::NFC(NFKD);"
2443             ;
2444         String source = "\u00E1"; // a-acute
2445         String target = "\u03AC"; // alpha-acute
2446 
2447         Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2448         Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2449 
2450         expect(t2, source, target);
2451         expect(t3, target, source);
2452 
2453         checkRules("Failed toRules FORWARD", t2, testRulesForward);
2454         checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2455     }
2456 
2457     /**
2458      * Test Escape and Unescape transliterators.
2459      */
2460     @Test
TestEscape()2461     public void TestEscape() {
2462         expect(Transliterator.getInstance("Hex-Any"),
2463                 "\\x{40}\\U00000031&#x32;&#81;",
2464         "@12Q");
2465         expect(Transliterator.getInstance("Any-Hex/C"),
2466                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2467         "\\u0041\\U0010BEEF\\uFEED");
2468         expect(Transliterator.getInstance("Any-Hex/Java"),
2469                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2470         "\\u0041\\uDBEF\\uDEEF\\uFEED");
2471         expect(Transliterator.getInstance("Any-Hex/Perl"),
2472                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2473         "\\x{41}\\x{10BEEF}\\x{FEED}");
2474     }
2475 
2476     /**
2477      * Make sure display names of variants look reasonable.
2478      */
2479     @Test
TestDisplayName()2480     public void TestDisplayName() {
2481         String DATA[] = {
2482                 // ID, forward name, reverse name
2483                 // Update the text as necessary -- the important thing is
2484                 // not the text itself, but how various cases are handled.
2485 
2486                 // Basic test
2487                 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2488 
2489                 // Variants
2490                 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2491 
2492                 // Target-only IDs
2493                 "NFC", "Any to NFC", "Any to NFD",
2494         };
2495 
2496         Locale US = Locale.US;
2497 
2498         for (int i=0; i<DATA.length; i+=3) {
2499             String name = Transliterator.getDisplayName(DATA[i], US);
2500             if (!name.equals(DATA[i+1])) {
2501                 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2502                         name + ", expected " + DATA[i+1]);
2503             } else {
2504                 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2505             }
2506             Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2507             name = Transliterator.getDisplayName(t.getID(), US);
2508             if (!name.equals(DATA[i+2])) {
2509                 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2510                         name + ", expected " + DATA[i+2]);
2511             } else {
2512                 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2513             }
2514 
2515             // Cover getDisplayName(String)
2516             ULocale save = ULocale.getDefault();
2517             ULocale.setDefault(ULocale.US);
2518             String name2 = Transliterator.getDisplayName(t.getID());
2519             if (!name.equals(name2))
2520                 errln("FAIL: getDisplayName with default locale failed");
2521             ULocale.setDefault(save);
2522         }
2523     }
2524 
2525     /**
2526      * Test anchor masking
2527      */
2528     @Test
TestAnchorMasking()2529     public void TestAnchorMasking() {
2530         String rule = "^a > Q; a > q;";
2531         try {
2532             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2533             if(t==null){
2534                 errln("FAIL: Did not get the expected exception");
2535             }
2536         } catch (IllegalArgumentException e) {
2537             errln("FAIL: " + rule + " => " + e);
2538         }
2539     }
2540 
2541     /**
2542      * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2543      * during ICU4J modularization to remove dependency of tests on Transliterator.
2544      */
2545     @Test
TestScriptAllCodepoints()2546     public void TestScriptAllCodepoints(){
2547         int code;
2548         HashSet  scriptIdsChecked   = new HashSet();
2549         HashSet  scriptAbbrsChecked = new HashSet();
2550         for( int i =0; i <= 0x10ffff; i++){
2551             code = UScript.getScript(i);
2552             if(code==UScript.INVALID_CODE){
2553                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2554             }
2555             String id =UScript.getName(code);
2556             String abbr = UScript.getShortName(code);
2557             if (!scriptIdsChecked.contains(id)) {
2558                 scriptIdsChecked.add(id);
2559                 String newId ="[:"+id+":];NFD";
2560                 try{
2561                     Transliterator t = Transliterator.getInstance(newId);
2562                     if(t==null){
2563                         errln("Failed to create transliterator for "+hex(i)+
2564                                 " script code: " +id);
2565                     }
2566                 }catch(Exception e){
2567                     errln("Failed to create transliterator for "+hex(i)
2568                             +" script code: " +id
2569                             + " Exception: "+e.getMessage());
2570                 }
2571             }
2572             if (!scriptAbbrsChecked.contains(abbr)) {
2573                 scriptAbbrsChecked.add(abbr);
2574                 String newAbbrId ="[:"+abbr+":];NFD";
2575                 try{
2576                     Transliterator t = Transliterator.getInstance(newAbbrId);
2577                     if(t==null){
2578                         errln("Failed to create transliterator for "+hex(i)+
2579                                 " script code: " +abbr);
2580                     }
2581                 }catch(Exception e){
2582                     errln("Failed to create transliterator for "+hex(i)
2583                             +" script code: " +abbr
2584                             + " Exception: "+e.getMessage());
2585                 }
2586             }
2587         }
2588     }
2589 
2590     static final String[][] registerRules = {
2591         {"Any-Dev1", "x > X; y > Y;"},
2592         {"Any-Dev2", "XY > Z"},
2593         {"Greek-Latin/FAKE",
2594             "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2595             "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2596             "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2597             "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2598         },
2599     };
2600 
2601     static final String DESERET_DEE = UTF16.valueOf(0x10414);
2602     static final String DESERET_dee = UTF16.valueOf(0x1043C);
2603 
2604     static final String[][] testCases = {
2605 
2606         // NORMALIZATION
2607         // should add more test cases
2608         {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2609         {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2610         {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2611         {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2612 
2613         // mp -> b BUG
2614         {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2615         {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2616 
2617         // check for devanagari bug
2618         {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2619 
2620         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2621         {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2622             "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2623             //TODO: enable this test once Titlecase works right
2624             //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2625             //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2626 
2627             {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2628                 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2629                 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2630                     "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2631 
2632                     {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2633                     {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2634 
2635                     // FORMS OF S
2636                     {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2637                     {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2638                     {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2639                     {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2640 
2641                     // Tatiana bug
2642                     // Upper: TAT\u02B9\u00C2NA
2643                     // Lower: tat\u02B9\u00E2na
2644                     // Title: Tat\u02B9\u00E2na
2645                     {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2646                     {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2647                     {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2648     };
2649 
2650     @Test
TestSpecialCases()2651     public void TestSpecialCases() {
2652 
2653         for (int i = 0; i < registerRules.length; ++i) {
2654             Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2655                     registerRules[i][1], Transliterator.FORWARD);
2656             DummyFactory.add(registerRules[i][0], t);
2657         }
2658         for (int i = 0; i < testCases.length; ++i) {
2659             String name = testCases[i][0];
2660             Transliterator t = Transliterator.getInstance(name);
2661             String id = t.getID();
2662             String source = testCases[i][1];
2663             String target = null;
2664 
2665             // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2666 
2667             if (testCases[i].length > 2)    target = testCases[i][2];
2668             else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
2669             else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
2670             else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
2671             else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
2672             else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
2673             else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);
2674 
2675             expect(t, source, target);
2676         }
2677         for (int i = 0; i < registerRules.length; ++i) {
2678             Transliterator.unregister(registerRules[i][0]);
2679         }
2680     }
2681 
2682     // seems like there should be an easier way to just register an instance of a transliterator
2683 
2684     static class DummyFactory implements Transliterator.Factory {
2685         static DummyFactory singleton = new DummyFactory();
2686         static HashMap m = new HashMap();
2687 
2688         // Since Transliterators are immutable, we don't have to clone on set & get
add(String ID, Transliterator t)2689         static void add(String ID, Transliterator t) {
2690             m.put(ID, t);
2691             //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2692             Transliterator.registerFactory(ID, singleton);
2693         }
2694         @Override
getInstance(String ID)2695         public Transliterator getInstance(String ID) {
2696             return (Transliterator) m.get(ID);
2697         }
2698     }
2699 
2700     @Test
TestCasing()2701     public void TestCasing() {
2702         Transliterator toLower = Transliterator.getInstance("lower");
2703         Transliterator toCasefold = Transliterator.getInstance("casefold");
2704         Transliterator toUpper = Transliterator.getInstance("upper");
2705         Transliterator toTitle = Transliterator.getInstance("title");
2706         for (int i = 0; i < 0x600; ++i) {
2707             String s = UTF16.valueOf(i);
2708 
2709             String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2710             assertEquals("Lowercase", lower, toLower.transform(s));
2711 
2712             String casefold = UCharacter.foldCase(s, true);
2713             assertEquals("Casefold", casefold, toCasefold.transform(s));
2714 
2715             if (i != 0x0345) {
2716                 // ICU 60 changes the default titlecasing index adjustment.
2717                 // For word breaks it is mostly the same as before,
2718                 // but it is different for the iota subscript (the only cased combining mark).
2719                 // This should be ok because the iota subscript is not supposed to appear
2720                 // at the start of a word.
2721                 // The title Transliterator is far below feature parity with the
2722                 // UCharacter and CaseMap titlecasing functions.
2723                 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2724                 assertEquals("Title", title, toTitle.transform(s));
2725             }
2726 
2727             String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2728             assertEquals("Upper", upper, toUpper.transform(s));
2729         }
2730     }
2731 
2732     @Test
TestSurrogateCasing()2733     public void TestSurrogateCasing () {
2734         // check that casing handles surrogates
2735         // titlecase is currently defective
2736         int dee = UTF16.charAt(DESERET_dee,0);
2737         int DEE = UCharacter.toTitleCase(dee);
2738         if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2739             errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2740         }
2741 
2742         if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2743             errln("Fails uppercase of surrogates");
2744         }
2745 
2746         if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2747             errln("Fails lowercase of surrogates");
2748         }
2749     }
2750 
2751 
2752     @Test
TestFunction()2753     public void TestFunction() {
2754         // Careful with spacing and ';' here:  Phrase this exactly
2755         // as toRules() is going to return it.  If toRules() changes
2756         // with regard to spacing or ';', then adjust this string.
2757         String rule =
2758             "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2759 
2760         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2761         if (t == null) {
2762             errln("FAIL: createFromRules failed");
2763             return;
2764         }
2765 
2766         String r = t.toRules(true);
2767         if (r.equals(rule)) {
2768             logln("OK: toRules() => " + r);
2769         } else {
2770             errln("FAIL: toRules() => " + r +
2771                     ", expected " + rule);
2772         }
2773 
2774         expect(t, "The Quick Brown Fox",
2775         "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2776         rule =
2777             "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2778 
2779         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2780         if (t == null) {
2781             errln("FAIL: createFromRules failed");
2782             return;
2783         }
2784 
2785         r = t.toRules(true);
2786         if (r.equals(rule)) {
2787             logln("OK: toRules() => " + r);
2788         } else {
2789             errln("FAIL: toRules() => " + r +
2790                     ", expected " + rule);
2791         }
2792 
2793         expect(t, "\u0301",
2794         "U+0301 \\N{COMBINING ACUTE ACCENT}");
2795     }
2796 
2797     @Test
TestInvalidBackRef()2798     public void TestInvalidBackRef() {
2799         String rule =  ". > $1;";
2800         String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2801         try {
2802             Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2803             if (t != null) {
2804                 errln("FAIL: createFromRules should have returned NULL");
2805             }
2806             errln("FAIL: Ok: . > $1; => no error");
2807             Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2808             if (t2 != null) {
2809                 errln("FAIL: createFromRules should have returned NULL");
2810             }
2811             errln("FAIL: Ok: . > $1; => no error");
2812         } catch (IllegalArgumentException e) {
2813             logln("Ok: . > $1; => " + e.getMessage());
2814         }
2815     }
2816 
2817     @Test
TestMulticharStringSet()2818     public void TestMulticharStringSet() {
2819         // Basic testing
2820         String rule =
2821             "       [{aa}]       > x;" +
2822             "         a          > y;" +
2823             "       [b{bc}]      > z;" +
2824             "[{gd}] { e          > q;" +
2825             "         e } [{fg}] > r;" ;
2826 
2827         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2828         if (t == null) {
2829             errln("FAIL: createFromRules failed");
2830             return;
2831         }
2832 
2833         expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2834         "y x yz z d gd de gdq gdqfg ddrfg");
2835 
2836         // Overlapped string test.  Make sure that when multiple
2837         // strings can match that the longest one is matched.
2838         rule =
2839             "    [a {ab} {abc}]    > x;" +
2840             "           b          > y;" +
2841             "           c          > z;" +
2842             " q [t {st} {rst}] { e > p;" ;
2843 
2844         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2845         if (t == null) {
2846             errln("FAIL: createFromRules failed");
2847             return;
2848         }
2849 
2850         expect(t, "a ab abc qte qste qrste",
2851         "x x x qtp qstp qrstp");
2852     }
2853 
2854     /**
2855      * Test that user-registered transliterators can be used under function
2856      * syntax.
2857      */
2858     @Test
TestUserFunction()2859     public void TestUserFunction() {
2860         Transliterator t;
2861 
2862         // There's no need to register inverses if we don't use them
2863         TestUserFunctionFactory.add("Any-gif",
2864                 Transliterator.createFromRules("gif",
2865                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2866                         Transliterator.FORWARD));
2867         //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2868 
2869         TestUserFunctionFactory.add("Any-RemoveCurly",
2870                 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2871         //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2872 
2873         logln("Trying &hex");
2874         t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2875         logln("Registering");
2876         TestUserFunctionFactory.add("Any-hex2", t);
2877         t = Transliterator.getInstance("Any-hex2");
2878         expect(t, "abc", "\\u0061\\u0062\\u0063");
2879 
2880         logln("Trying &gif");
2881         t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2882         logln("Registering");
2883         TestUserFunctionFactory.add("Any-gif2", t);
2884         t = Transliterator.getInstance("Any-gif2");
2885         expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2886         "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2887 
2888         // Test that filters are allowed after &
2889         t = Transliterator.createFromRules("test",
2890                 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2891         expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2892 
2893         // Unregister our test stuff
2894         TestUserFunctionFactory.unregister();
2895     }
2896 
2897     static class TestUserFunctionFactory implements Transliterator.Factory {
2898         static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2899         static HashMap m = new HashMap();
2900 
add(String ID, Transliterator t)2901         static void add(String ID, Transliterator t) {
2902             m.put(new CaseInsensitiveString(ID), t);
2903             Transliterator.registerFactory(ID, singleton);
2904         }
2905 
2906         @Override
getInstance(String ID)2907         public Transliterator getInstance(String ID) {
2908             return (Transliterator) m.get(new CaseInsensitiveString(ID));
2909         }
2910 
unregister()2911         static void unregister() {
2912             Iterator ids = m.keySet().iterator();
2913             while (ids.hasNext()) {
2914                 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
2915                 Transliterator.unregister(id.getString());
2916                 ids.remove(); // removes pair from m
2917             }
2918         }
2919     }
2920 
2921     /**
2922      * Test the Any-X transliterators.
2923      */
2924     @Test
TestAnyX()2925     public void TestAnyX() {
2926         Transliterator anyLatin =
2927             Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
2928 
2929         expect(anyLatin,
2930                 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2931         "greek:abkABK hiragana:abuku cyrillic:abc");
2932     }
2933 
2934     /**
2935      * Test Any-X transliterators with sample letters from all scripts.
2936      */
2937     @Test
TestAny()2938     public void TestAny() {
2939         UnicodeSet alphabetic = new UnicodeSet("[:alphabetic:]").freeze();
2940         StringBuffer testString = new StringBuffer();
2941         for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
2942             UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
2943             int count = 5;
2944             for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
2945                 testString.append(it.getString());
2946                 if (--count < 0) break;
2947             }
2948         }
2949         logln("Sample set for Any-Latin: " + testString);
2950         Transliterator anyLatin = Transliterator.getInstance("any-Latn");
2951         String result = anyLatin.transliterate(testString.toString());
2952         logln("Sample result for Any-Latin: " + result);
2953     }
2954 
2955 
2956     /**
2957      * Test the source and target set API.  These are only implemented
2958      * for RBT and CompoundTransliterator at this time.
2959      */
2960     @Test
TestSourceTargetSet()2961     public void TestSourceTargetSet() {
2962         // Rules
2963         String r =
2964             "a > b; " +
2965             "r [x{lu}] > q;";
2966 
2967         // Expected source
2968         UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
2969 
2970         // Expected target
2971         UnicodeSet expTrg = new UnicodeSet("[bq]");
2972 
2973         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
2974         UnicodeSet src = t.getSourceSet();
2975         UnicodeSet trg = t.getTargetSet();
2976 
2977         if (src.equals(expSrc) && trg.equals(expTrg)) {
2978             logln("Ok: " + r + " => source = " + src.toPattern(true) +
2979                     ", target = " + trg.toPattern(true));
2980         } else {
2981             errln("FAIL: " + r + " => source = " + src.toPattern(true) +
2982                     ", expected " + expSrc.toPattern(true) +
2983                     "; target = " + trg.toPattern(true) +
2984                     ", expected " + expTrg.toPattern(true));
2985         }
2986     }
2987 
2988     @Test
TestSourceTargetSetFilter()2989     public void TestSourceTargetSetFilter() {
2990         String[][] tests = {
2991                 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
2992                 {"[] Latin-Greek", null, "[\']"},
2993                 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
2994                 {"[] Any-Latin"},
2995                 {"[] casefold"},
2996                 {"[] NFKD;"},
2997                 {"[] NFKC;"},
2998                 {"[] hex"},
2999                 {"[] lower"},
3000                 {"[] null"},
3001                 {"[] remove"},
3002                 {"[] title"},
3003                 {"[] upper"},
3004         };
3005         UnicodeSet expectedSource = UnicodeSet.EMPTY;
3006         for (String[] testPair : tests) {
3007             String test = testPair[0];
3008             Transliterator t0;
3009             try {
3010                 t0 = Transliterator.getInstance(test);
3011             } catch (Exception e) {
3012                 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3013             }
3014             Transliterator t1;
3015             try {
3016                 t1 = t0.getInverse();
3017             } catch (Exception e) {
3018                 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3019             }
3020             int targetIndex = 0;
3021             for (Transliterator t : new Transliterator[]{t0, t1}) {
3022                 boolean ok;
3023                 UnicodeSet source = t.getSourceSet();
3024                 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3025                 targetIndex++;
3026                 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3027                         : testPair[targetIndex] == null ? expectedSource
3028                                 : testPair[targetIndex].length() == 0 ? expectedSource
3029                                         : new UnicodeSet(testPair[targetIndex]);
3030                 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3031                 if (!ok) { // for debugging
3032                     source = t.getSourceSet();
3033                 }
3034                 UnicodeSet target = t.getTargetSet();
3035                 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3036                 if (!ok) { // for debugging
3037                     target = t.getTargetSet();
3038                 }
3039             }
3040         }
3041     }
3042 
isAtomic(String s, String t, Transliterator trans)3043     static boolean isAtomic(String s, String t, Transliterator trans) {
3044         for (int i = 1; i < s.length(); ++i) {
3045             if (!CharSequences.onCharacterBoundary(s, i)) {
3046                 continue;
3047             }
3048             String q = trans.transform(s.substring(0,i));
3049             if (t.startsWith(q)) {
3050                 String r = trans.transform(s.substring(i));
3051                 if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3052                     return false;
3053                 }
3054             }
3055         }
3056         return true;
3057         //        // make sure that every part is different
3058         //        if (s.codePointCount(0, s.length()) > 1) {
3059         //            int[] codePoints = It.codePoints(s);
3060         //            for (int k = 0; k < codePoints.length; ++k) {
3061         //                int pos = indexOf(t,codePoints[k]);
3062         //                if (pos >= 0) {
3063         //                    int x;
3064         //                }
3065         //            }
3066         //            if (s.contains("\u00C0")) {
3067         //                logln("\u00C0");
3068         //            }
3069         //        }
3070     }
3071 
addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3072     static void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3073         expectedSource.addAll(s);
3074         if (t.length() > 0) {
3075             expectedTarget.addAll(t);
3076         }
3077     }
3078 
3079 //    private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3080 //        disorderedMarks.add(s);
3081 //        for (int j = 1; j < s.length(); ++j) {
3082 //            if (CharSequences.onCharacterBoundary(s, j)) {
3083 //                String shorter = s.substring(0,j);
3084 //                disorderedMarks.add(shorter);
3085 //                disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3086 //            }
3087 //        }
3088 //    }
3089 
3090     @Test
TestCharUtils()3091     public void TestCharUtils() {
3092         String[][] startTests = {
3093                 {"1", "a", "ab"},
3094                 {"0", "a", "xb"},
3095                 {"0", "\uD800", "\uD800\uDC01"},
3096                 {"1", "\uD800a", "\uD800b"},
3097                 {"0", "\uD800\uDC00", "\uD800\uDC01"},
3098         };
3099         for (String[] row : startTests) {
3100             int actual = findSharedStartLength(row[1], row[2]);
3101             assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3102                     Integer.parseInt(row[0]),
3103                     actual);
3104         }
3105         String[][] endTests = {
3106                 {"0", "\uDC00", "\uD801\uDC00"},
3107                 {"1", "a", "ba"},
3108                 {"0", "a", "bx"},
3109                 {"1", "a\uDC00", "b\uDC00"},
3110                 {"0", "\uD800\uDC00", "\uD801\uDC00"},
3111         };
3112         for (String[] row : endTests) {
3113             int actual = findSharedEndLength(row[1], row[2]);
3114             assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3115                     Integer.parseInt(row[0]),
3116                     actual);
3117         }
3118     }
3119 
3120     /**
3121      * @param s
3122      * @param t
3123      * @return
3124      */
3125     // TODO make generally available
findSharedStartLength(CharSequence s, CharSequence t)3126     private static int findSharedStartLength(CharSequence s, CharSequence t) {
3127         int min = Math.min(s.length(), t.length());
3128         int i;
3129         char sch, tch;
3130         for (i = 0; i < min; ++i) {
3131             sch = s.charAt(i);
3132             tch = t.charAt(i);
3133             if (sch != tch) {
3134                 break;
3135             }
3136         }
3137         return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3138     }
3139 
3140     /**
3141      * @param s
3142      * @param t
3143      * @return
3144      */
3145     // TODO make generally available
findSharedEndLength(CharSequence s, CharSequence t)3146     private static int findSharedEndLength(CharSequence s, CharSequence t) {
3147         int slength = s.length();
3148         int tlength = t.length();
3149         int min = Math.min(slength, tlength);
3150         int i;
3151         char sch, tch;
3152         // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3153         for (i = 0; i < min; ++i) {
3154             sch = s.charAt(slength - i - 1);
3155             tch = t.charAt(tlength - i - 1);
3156             if (sch != tch) {
3157                 break;
3158             }
3159         }
3160         return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3161     }
3162 
3163     enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3164 
assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3165     static void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3166         boolean haveError = false;
3167         if (!actual.containsAll(empirical)) {
3168             UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3169             errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3170             haveError = true;
3171         }
3172         if (!empirical.containsAll(actual)) {
3173             UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3174             logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3175             haveError = true;
3176         }
3177         if (!haveError) {
3178             logln("OK " + message + ' ' + toPattern(empirical));
3179         }
3180     }
3181 
toPattern(UnicodeSet missing)3182     private static String toPattern(UnicodeSet missing) {
3183         String result = missing.toPattern(false);
3184         if (result.length() < 200) {
3185             return result;
3186         }
3187         return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3188     }
3189 
3190 
3191     /**
3192      * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3193      */
3194     @Test
TestPatternWhitespace()3195     public void TestPatternWhitespace() {
3196         // Rules
3197         String r = "a > \u200E b;";
3198 
3199         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3200 
3201         expect(t, "a", "b");
3202 
3203         // UnicodeSet
3204         UnicodeSet set = new UnicodeSet("[a \u200E]");
3205 
3206         if (set.contains(0x200E)) {
3207             errln("FAIL: U+200E not being ignored by UnicodeSet");
3208         }
3209     }
3210 
3211     @Test
TestAlternateSyntax()3212     public void TestAlternateSyntax() {
3213         // U+2206 == &
3214         // U+2190 == <
3215         // U+2192 == >
3216         // U+2194 == <>
3217         expect("a \u2192 x; b \u2190 y; c \u2194 z",
3218                 "abc",
3219         "xbz");
3220         expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3221                 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3222         "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3223     }
3224 
3225     @Test
TestPositionAPI()3226     public void TestPositionAPI() {
3227         Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3228         Transliterator.Position b = new Transliterator.Position(a);
3229         Transliterator.Position c = new Transliterator.Position();
3230         c.set(a);
3231         // Call the toString() API:
3232         if (a.equals(b) && a.equals(c)) {
3233             logln("Ok: " + a + " == " + b + " == " + c);
3234         } else {
3235             errln("FAIL: " + a + " != " + b + " != " + c);
3236         }
3237     }
3238 
3239     //======================================================================
3240     // New tests for the ::BEGIN/::END syntax
3241     //======================================================================
3242 
3243     private static final String[] BEGIN_END_RULES = new String[] {
3244         // [0]
3245         "abc > xy;"
3246         + "aba > z;",
3247 
3248         // [1]
3249         /*
3250         "::BEGIN;"
3251         + "abc > xy;"
3252         + "::END;"
3253         + "::BEGIN;"
3254         + "aba > z;"
3255         + "::END;",
3256          */
3257         "", // test case commented out below, this is here to keep from messing up the indexes
3258 
3259         // [2]
3260         /*
3261         "abc > xy;"
3262         + "::BEGIN;"
3263         + "aba > z;"
3264         + "::END;",
3265          */
3266         "", // test case commented out below, this is here to keep from messing up the indexes
3267 
3268         // [3]
3269         /*
3270         "::BEGIN;"
3271         + "abc > xy;"
3272         + "::END;"
3273         + "aba > z;",
3274          */
3275         "", // test case commented out below, this is here to keep from messing up the indexes
3276 
3277         // [4]
3278         "abc > xy;"
3279         + "::Null;"
3280         + "aba > z;",
3281 
3282         // [5]
3283         "::Upper;"
3284         + "ABC > xy;"
3285         + "AB > x;"
3286         + "C > z;"
3287         + "::Upper;"
3288         + "XYZ > p;"
3289         + "XY > q;"
3290         + "Z > r;"
3291         + "::Upper;",
3292 
3293         // [6]
3294         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3295         + "$delim = [\\-$ws];"
3296         + "$ws $delim* > ' ';"
3297         + "'-' $delim* > '-';",
3298 
3299         // [7]
3300         "::Null;"
3301         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3302         + "$delim = [\\-$ws];"
3303         + "$ws $delim* > ' ';"
3304         + "'-' $delim* > '-';",
3305 
3306         // [8]
3307         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3308         + "$delim = [\\-$ws];"
3309         + "$ws $delim* > ' ';"
3310         + "'-' $delim* > '-';"
3311         + "::Null;",
3312 
3313         // [9]
3314         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3315         + "$delim = [\\-$ws];"
3316         + "::Null;"
3317         + "$ws $delim* > ' ';"
3318         + "'-' $delim* > '-';",
3319 
3320         // [10]
3321         /*
3322         "::BEGIN;"
3323         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3324         + "$delim = [\\-$ws];"
3325         + "::END;"
3326         + "$ws $delim* > ' ';"
3327         + "'-' $delim* > '-';",
3328          */
3329         "", // test case commented out below, this is here to keep from messing up the indexes
3330 
3331         // [11]
3332         /*
3333         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3334         + "$delim = [\\-$ws];"
3335         + "::BEGIN;"
3336         + "$ws $delim* > ' ';"
3337         + "'-' $delim* > '-';"
3338         + "::END;",
3339          */
3340         "", // test case commented out below, this is here to keep from messing up the indexes
3341 
3342         // [12]
3343         /*
3344         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3345         + "$delim = [\\-$ws];"
3346         + "$ab = [ab];"
3347         + "::BEGIN;"
3348         + "$ws $delim* > ' ';"
3349         + "'-' $delim* > '-';"
3350         + "::END;"
3351         + "::BEGIN;"
3352         + "$ab { ' ' } $ab > '-';"
3353         + "c { ' ' > ;"
3354         + "::END;"
3355         + "::BEGIN;"
3356         + "'a-a' > a\\%|a;"
3357         + "::END;",
3358          */
3359         "", // test case commented out below, this is here to keep from messing up the indexes
3360 
3361         // [13]
3362         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3363         + "$delim = [\\-$ws];"
3364         + "$ab = [ab];"
3365         + "::Null;"
3366         + "$ws $delim* > ' ';"
3367         + "'-' $delim* > '-';"
3368         + "::Null;"
3369         + "$ab { ' ' } $ab > '-';"
3370         + "c { ' ' > ;"
3371         + "::Null;"
3372         + "'a-a' > a\\%|a;",
3373 
3374         // [14]
3375         /*
3376         "::[abc];"
3377         + "::BEGIN;"
3378         + "abc > xy;"
3379         + "::END;"
3380         + "::BEGIN;"
3381         + "aba > yz;"
3382         + "::END;"
3383         + "::Upper;",
3384          */
3385         "", // test case commented out below, this is here to keep from messing up the indexes
3386 
3387         // [15]
3388         "::[abc];"
3389         + "abc > xy;"
3390         + "::Null;"
3391         + "aba > yz;"
3392         + "::Upper;",
3393 
3394         // [16]
3395         /*
3396         "::[abc];"
3397         + "::BEGIN;"
3398         + "abc <> xy;"
3399         + "::END;"
3400         + "::BEGIN;"
3401         + "aba <> yz;"
3402         + "::END;"
3403         + "::Upper(Lower);"
3404         + "::([XYZ]);",
3405          */
3406         "", // test case commented out below, this is here to keep from messing up the indexes
3407 
3408         // [17]
3409         "::[abc];"
3410         + "abc <> xy;"
3411         + "::Null;"
3412         + "aba <> yz;"
3413         + "::Upper(Lower);"
3414         + "::([XYZ]);"
3415     };
3416 
3417     /*
3418 (This entire test is commented out below and will need some heavy revision when we re-add
3419 the ::BEGIN/::END stuff)
3420     private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3421         // [7]
3422         "::BEGIN;"
3423         + "abc > xy;"
3424         + "::BEGIN;"
3425         + "aba > z;"
3426         + "::END;"
3427         + "::END;",
3428 
3429         // [8]
3430         "abc > xy;"
3431         + " aba > z;"
3432         + "::END;",
3433 
3434         // [9]
3435         "::BEGIN;"
3436         + "::Upper;"
3437         + "::END;"
3438     };
3439      */
3440 
3441     private static final String[] BEGIN_END_TEST_CASES = new String[] {
3442         BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3443         //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3444         //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3445         //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3446         BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3447         BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3448 
3449         BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",
3450         BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",
3451         BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",
3452         BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",
3453         //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",
3454         //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",
3455         //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",
3456         //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",
3457         //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3458         BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",
3459         BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",
3460         BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3461 
3462         //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3463         BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3464         //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3465         BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3466     };
3467 
3468     @Test
TestBeginEnd()3469     public void TestBeginEnd() {
3470         // run through the list of test cases above
3471         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3472             expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3473         }
3474 
3475         // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3476         Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3477                 Transliterator.REVERSE);
3478         expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3479 
3480         // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3481         // that all of them cause errors
3482         /*
3483 (commented out until we have the real ::BEGIN/::END stuff in place
3484         for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3485             try {
3486                 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3487                         Transliterator.FORWARD);
3488                 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3489             }
3490             catch (IllegalArgumentException e) {
3491                 // this is supposed to happen; do nothing here
3492             }
3493         }
3494          */
3495     }
3496 
3497     @Test
TestBeginEndToRules()3498     public void TestBeginEndToRules() {
3499         // run through the same list of test cases we used above, but this time, instead of just
3500         // instantiating a Transliterator from the rules and running the test against it, we instantiate
3501         // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3502         // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3503         // to (i.e., does the same thing as) the original rule set
3504         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3505             Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3506                     Transliterator.FORWARD);
3507             String rules = t.toRules(false);
3508             Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3509             expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3510         }
3511 
3512         // do the same thing for the reversible test case
3513         Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3514                 Transliterator.REVERSE);
3515         String rules = reversed.toRules(false);
3516         Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3517         expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3518     }
3519 
3520     @Test
TestRegisterAlias()3521     public void TestRegisterAlias() {
3522         String longID = "Lower;[aeiou]Upper";
3523         String shortID = "Any-CapVowels";
3524         String reallyShortID = "CapVowels";
3525 
3526         Transliterator.registerAlias(shortID, longID);
3527 
3528         Transliterator t1 = Transliterator.getInstance(longID);
3529         Transliterator t2 = Transliterator.getInstance(reallyShortID);
3530 
3531         if (!t1.getID().equals(longID))
3532             errln("Transliterator instantiated with long ID doesn't have long ID");
3533         if (!t2.getID().equals(reallyShortID))
3534             errln("Transliterator instantiated with short ID doesn't have short ID");
3535 
3536         if (!t1.toRules(true).equals(t2.toRules(true)))
3537             errln("Alias transliterators aren't the same");
3538 
3539         Transliterator.unregister(shortID);
3540 
3541         try {
3542             t1 = Transliterator.getInstance(shortID);
3543             errln("Instantiation with short ID succeeded after short ID was unregistered");
3544         }
3545         catch (IllegalArgumentException e) {
3546         }
3547 
3548         // try the same thing again, but this time with something other than
3549         // an instance of CompoundTransliterator
3550         String realID = "Latin-Greek";
3551         String fakeID = "Latin-dlgkjdflkjdl";
3552         Transliterator.registerAlias(fakeID, realID);
3553 
3554         t1 = Transliterator.getInstance(realID);
3555         t2 = Transliterator.getInstance(fakeID);
3556 
3557         if (!t1.toRules(true).equals(t2.toRules(true)))
3558             errln("Alias transliterators aren't the same");
3559 
3560         Transliterator.unregister(fakeID);
3561     }
3562 
3563     /**
3564      * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3565      */
3566     @Test
TestHalfwidthFullwidth()3567     public void TestHalfwidthFullwidth() {
3568         Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3569         Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3570 
3571         // Array of 3n items
3572         // Each item is
3573         //   "hf"|"fh"|"both",
3574         //   <Halfwidth>,
3575         //   <Fullwidth>
3576         String[] DATA = {
3577                 "both",
3578                 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3579                 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3580         };
3581 
3582         for (int i=0; i<DATA.length; i+=3) {
3583             switch (DATA[i].charAt(0)) {
3584             case 'h': // Halfwidth-Fullwidth only
3585                 expect(hf, DATA[i+1], DATA[i+2]);
3586                 break;
3587             case 'f': // Fullwidth-Halfwidth only
3588                 expect(fh, DATA[i+2], DATA[i+1]);
3589                 break;
3590             case 'b': // both directions
3591                 expect(hf, DATA[i+1], DATA[i+2]);
3592                 expect(fh, DATA[i+2], DATA[i+1]);
3593                 break;
3594             }
3595         }
3596 
3597     }
3598 
3599     /**
3600      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3601      *              TODO: confirm that the expected results are correct.
3602      *              For now, test just confirms that C++ and Java give identical results.
3603      */
3604     @Test
TestThai()3605     public void TestThai() {
3606         // The expectations in this test heavily depends on the Thai dictionary.
3607         // Therefore, we skip this test under the LSTM configuration.
3608         org.junit.Assume.assumeTrue(!RBBITstUtils.skipDictionaryTest());
3609         Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3610         String thaiText =
3611             "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3612             "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3613             "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3614             "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3615             "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3616             "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3617             "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3618             "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3619             "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3620             "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3621             "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3622             "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3623             "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3624             "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3625             "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3626             "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3627             "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3628             "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3629             "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3630             "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3631             "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3632             "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3633             "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3634             "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3635             " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3636             "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3637             "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3638             " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3639             "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3640             "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3641 
3642         String latinText =
3643             "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3644             "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3645             "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3646             "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3647             "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3648             " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3649             "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3650             "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3651             "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3652             "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3653             "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3654             "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3655             " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3656             "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3657             " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3658             "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3659             "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3660             "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3661 
3662         expect(tr, thaiText, latinText);
3663     }
3664 
3665 
3666     //======================================================================
3667     // These tests are not mirrored (yet) in icu4c at
3668     // source/test/intltest/transtst.cpp
3669     //======================================================================
3670 
3671     /**
3672      * Improve code coverage.
3673      */
3674     @Test
TestCoverage()3675     public void TestCoverage() {
3676         // NullTransliterator
3677         Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3678         expect(t, "a", "a");
3679 
3680         // Source, target set
3681         t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3682         t.setFilter(new UnicodeSet("[A-Z]"));
3683         logln("source = " + t.getSourceSet());
3684         logln("target = " + t.getTargetSet());
3685 
3686         t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3687         logln("source = " + t.getSourceSet());
3688         logln("target = " + t.getTargetSet());
3689     }
3690     /*
3691      * Test case for threading problem in NormalizationTransliterator
3692      * reported by ticket#5160
3693      */
3694     @Test
TestT5160()3695     public void TestT5160() {
3696         final String[] testData = {
3697                 "a",
3698                 "b",
3699                 "\u09BE",
3700                 "A\u0301",
3701         };
3702         final String[] expected = {
3703                 "a",
3704                 "b",
3705                 "\u09BE",
3706                 "\u00C1",
3707         };
3708         Transliterator translit = Transliterator.getInstance("NFC");
3709         NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
3710         for (int i = 0; i < tasks.length; i++) {
3711             tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
3712         }
3713         TestUtil.runUntilDone(tasks);
3714 
3715         for (int i = 0; i < tasks.length; i++) {
3716             if (tasks[i].getErrorMessage() != null) {
3717                 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
3718                 break;
3719             }
3720         }
3721     }
3722 
3723     static class NormTranslitTask implements Runnable {
3724         Transliterator translit;
3725         String testData;
3726         String expectedData;
3727         String errorMsg;
3728 
NormTranslitTask(Transliterator translit, String testData, String expectedData)3729         NormTranslitTask(Transliterator translit, String testData, String expectedData) {
3730             this.translit = translit;
3731             this.testData = testData;
3732             this.expectedData = expectedData;
3733         }
3734 
3735         @Override
run()3736         public void run() {
3737             errorMsg = null;
3738             StringBuffer inBuf = new StringBuffer(testData);
3739             StringBuffer expectedBuf = new StringBuffer(expectedData);
3740 
3741             for(int i = 0; i < 1000; i++) {
3742                 String in = inBuf.toString();
3743                 String out = translit.transliterate(in);
3744                 String expected = expectedBuf.toString();
3745                 if (!out.equals(expected)) {
3746                     errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
3747                     break;
3748                 }
3749                 inBuf.append(testData);
3750                 expectedBuf.append(expectedData);
3751             }
3752         }
3753 
getErrorMessage()3754         public String getErrorMessage() {
3755             return errorMsg;
3756         }
3757     }
3758 
3759     //======================================================================
3760     // Support methods
3761     //======================================================================
expect(String rules, String source, String expectedResult, Transliterator.Position pos)3762     static void expect(String rules,
3763             String source,
3764             String expectedResult,
3765             Transliterator.Position pos) {
3766         Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
3767         expect(t, source, expectedResult, pos);
3768     }
3769 
expect(String rules, String source, String expectedResult)3770     static void expect(String rules, String source, String expectedResult) {
3771         expect(rules, source, expectedResult, null);
3772     }
3773 
expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)3774     static void expect(Transliterator t, String source, String expectedResult,
3775             Transliterator reverseTransliterator) {
3776         expect(t, source, expectedResult);
3777         if (reverseTransliterator != null) {
3778             expect(reverseTransliterator, expectedResult, source);
3779         }
3780     }
3781 
expect(Transliterator t, String source, String expectedResult)3782     static void expect(Transliterator t, String source, String expectedResult) {
3783         expect(t, source, expectedResult, (Transliterator.Position) null);
3784     }
3785 
expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)3786     static void expect(Transliterator t, String source, String expectedResult,
3787             Transliterator.Position pos) {
3788         if (pos == null) {
3789             String result = t.transliterate(source);
3790             if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
3791         }
3792 
3793         Transliterator.Position index = null;
3794         if (pos == null) {
3795             index = new Transliterator.Position(0, source.length(), 0, source.length());
3796         } else {
3797             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
3798                     pos.start, pos.limit);
3799         }
3800 
3801         ReplaceableString rsource = new ReplaceableString(source);
3802 
3803         t.finishTransliteration(rsource, index);
3804         // Do it all at once -- below we do it incrementally
3805 
3806         if (index.start != index.limit) {
3807             expectAux(t.getID() + ":UNFINISHED", source,
3808                     "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
3809             return;
3810         }
3811         String result = rsource.toString();
3812         if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
3813 
3814 
3815         if (pos == null) {
3816             index = new Transliterator.Position();
3817         } else {
3818             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
3819                     pos.start, pos.limit);
3820         }
3821 
3822         // Test incremental transliteration -- this result
3823         // must be the same after we finalize (see below).
3824         List<String> v = new ArrayList<String>();
3825         v.add(source);
3826         rsource.replace(0, rsource.length(), "");
3827         if (pos != null) {
3828             rsource.replace(0, 0, source);
3829             v.add(UtilityExtensions.formatInput(rsource, index));
3830             t.transliterate(rsource, index);
3831             v.add(UtilityExtensions.formatInput(rsource, index));
3832         } else {
3833             for (int i=0; i<source.length(); ++i) {
3834                 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
3835                 //log.append(source.charAt(i)).append(" -> "));
3836                 t.transliterate(rsource, index, source.charAt(i));
3837                 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
3838                 v.add(UtilityExtensions.formatInput(rsource, index) +
3839                         ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
3840             }
3841         }
3842 
3843         // As a final step in keyboard transliteration, we must call
3844         // transliterate to finish off any pending partial matches that
3845         // were waiting for more input.
3846         t.finishTransliteration(rsource, index);
3847         result = rsource.toString();
3848         //log.append(" => ").append(rsource.toString());
3849         v.add(result);
3850 
3851         String[] results = new String[v.size()];
3852         v.toArray(results);
3853         expectAux(t.getID() + ":Incremental", results,
3854                 result.equals(expectedResult),
3855                 expectedResult);
3856     }
3857 
3858     static boolean expectAux(String tag, String source,
3859             String result, String expectedResult) {
3860         return expectAux(tag, new String[] {source, result},
3861                 result.equals(expectedResult),
3862                 expectedResult);
3863     }
3864 
3865     static boolean expectAux(String tag, String source,
3866             String result, boolean pass,
3867             String expectedResult) {
3868         return expectAux(tag, new String[] {source, result},
3869                 pass,
3870                 expectedResult);
3871     }
3872 
3873     static boolean expectAux(String tag, String source,
3874             boolean pass,
3875             String expectedResult) {
3876         return expectAux(tag, new String[] {source},
3877                 pass,
3878                 expectedResult);
3879     }
3880 
3881     static boolean expectAux(String tag, String[] results, boolean pass,
3882             String expectedResult) {
3883         msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
3884 
3885         for (int i = 0; i < results.length; ++i) {
3886             String label;
3887             if (i == 0) {
3888                 label = "source:   ";
3889             } else if (i == results.length - 1) {
3890                 label = "result:   ";
3891             } else {
3892                 if (!isVerbose() && pass) continue;
3893                 label = "interm" + i + ":  ";
3894             }
3895             msg("    " + label + results[i], pass ? LOG : ERR, false, true);
3896         }
3897 
3898         if (!pass) {
3899             msg(  "    expected: " + expectedResult, ERR, false, true);
3900         }
3901 
3902         return pass;
3903     }
3904 
3905     static private void assertTransform(String message, String expected, StringTransform t, String source) {
3906         assertEquals(message + " " + source, expected, t.transform(source));
3907     }
3908 
3909 
3910     static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
3911         assertEquals(message + " " +source, expected, t.transform(source));
3912         assertEquals(message + " " +source2, expected, t.transform(source2));
3913         assertEquals(message + " " + expected, source, back.transform(expected));
3914     }
3915 
3916     /*
3917      * Tests the method public Enumeration<String> getAvailableTargets(String source)
3918      */
3919     @Test
3920     public void TestGetAvailableTargets() {
3921         try {
3922             // Tests when if (targets == null) is true
3923             Transliterator.getAvailableTargets("");
3924         } catch (Exception e) {
3925             errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
3926         }
3927     }
3928 
3929     /*
3930      * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
3931      */
3932     @Test
3933     public void TestGetAvailableVariants() {
3934         try {
3935             // Tests when if (targets == null) is true
3936             Transliterator.getAvailableVariants("", "");
3937         } catch (Exception e) {
3938             errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
3939         }
3940     }
3941 
3942     /*
3943      * Tests the method String nextLine() in RuleBody
3944      */
3945     @Test
3946     public void TestNextLine() {
3947         // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
3948         try{
3949             Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
3950         } catch(Exception e){
3951             errln("TransliteratorParser.nextLine() was not suppose to return an " +
3952             "exception for a rule of '\\'");
3953         }
3954     }
3955 
3956     /**
3957      * Tests equals and hashCode implementation of Transliterator.Position
3958      */
3959     @Test
3960     public void TestPositionEquals() {
3961         Transliterator.Position position1 = new Transliterator.Position(1, 0, 0, 0);
3962         Transliterator.Position position2 = new Transliterator.Position(0, 0, 0, 0);
3963         assertNotEquals("2 different positions are not equal", position1, position2);
3964         assertNotEquals("2 different positions have different hash codes", position1.hashCode(), position2.hashCode());
3965         Transliterator.Position position3 = new Transliterator.Position(1, 0, 0, 0);
3966         assertEquals("2 positions are equal", position1, position3);
3967         assertEquals("2 positions have the same hash codes", position1.hashCode(), position3.hashCode());
3968     }
3969 }
3970