• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.test.translit;
10 
11 import java.util.ArrayList;
12 import java.util.Enumeration;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.List;
17 import java.util.Locale;
18 import java.util.Map.Entry;
19 
20 import org.junit.Ignore;
21 import org.junit.Test;
22 
23 import com.ibm.icu.dev.test.TestFmwk;
24 import com.ibm.icu.dev.test.TestUtil;
25 import com.ibm.icu.dev.util.UnicodeMap;
26 import com.ibm.icu.impl.Utility;
27 import com.ibm.icu.impl.UtilityExtensions;
28 import com.ibm.icu.lang.CharSequences;
29 import com.ibm.icu.lang.UCharacter;
30 import com.ibm.icu.lang.UScript;
31 import com.ibm.icu.text.CanonicalIterator;
32 import com.ibm.icu.text.Normalizer2;
33 import com.ibm.icu.text.Replaceable;
34 import com.ibm.icu.text.ReplaceableString;
35 import com.ibm.icu.text.StringTransform;
36 import com.ibm.icu.text.Transliterator;
37 import com.ibm.icu.text.UTF16;
38 import com.ibm.icu.text.UnicodeFilter;
39 import com.ibm.icu.text.UnicodeSet;
40 import com.ibm.icu.text.UnicodeSetIterator;
41 import com.ibm.icu.util.CaseInsensitiveString;
42 import com.ibm.icu.util.ULocale;
43 
44 /***********************************************************************
45 
46                      HOW TO USE THIS TEST FILE
47                                -or-
48                   How I developed on two platforms
49                 without losing (too much of) my mind
50 
51 
52 1. Add new tests by copying/pasting/changing existing tests.  On Java,
53    any public void method named Test...() taking no parameters becomes
54    a test.  On C++, you need to modify the header and add a line to
55    the runIndexedTest() dispatch method.
56 
57 2. Make liberal use of the expect() method; it is your friend.
58 
59 3. The tests in this file exactly match those in a sister file on the
60    other side.  The two files are:
61 
62    icu4j:  src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
63    icu4c:  source/test/intltest/transtst.cpp
64 
65                   ==> THIS IS THE IMPORTANT PART <==
66 
67    When you add a test in this file, add it in transtst.cpp too.
68    Give it the same name and put it in the same relative place.  This
69    makes maintenance a lot simpler for any poor soul who ends up
70    trying to synchronize the tests between icu4j and icu4c.
71 
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73    then add it in the special non-mirrored section.  These are
74    labeled
75 
76      "icu4j ONLY"
77 
78    or
79 
80      "icu4c ONLY"
81 
82    Make sure you document the reason the test is here and not there.
83 
84 
85 Thank you.
86 The Management
87  ***********************************************************************/
88 
89 /**
90  * @test
91  * @summary General test of Transliterator
92  */
93 public class TransliteratorTest extends TestFmwk {
94     @Test
TestHangul()95     public void TestHangul() {
96 
97         Transliterator lh = Transliterator.getInstance("Latin-Hangul");
98         Transliterator hl = lh.getInverse();
99 
100         assertTransform("Transform", "\uCE20", lh, "ch");
101 
102         assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
103         assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
104         assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
105         assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
106         assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
107         assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
108         assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
109         assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
110         assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
111         assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
112         assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
113         assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
114         assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
115         assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
116         assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
117         assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
118         assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
119         assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
120         assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
121         assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
122         assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
123         assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
124         assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
125         assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
126         assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
127         assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
128 
129     }
130 
131     @Test
TestChinese()132     public void TestChinese() {
133         Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
134         assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
135         assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
136     }
137 
138     @Test
TestRegistry()139     public void TestRegistry() {
140         checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
141         checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
142         checkRegistry("foo1", "[:letter:] a > b;");
143         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
144             String id = (String) e.nextElement();
145             checkRegistry(id);
146         }
147     }
148 
checkRegistry(String id, String rules)149     private void checkRegistry (String id, String rules) {
150         Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
151         Transliterator.registerInstance(foo);
152         checkRegistry(id);
153     }
154 
checkRegistry(String id)155     private void checkRegistry(String id) {
156         Transliterator fie = Transliterator.getInstance(id);
157         final UnicodeSet fae = new UnicodeSet("[a-z5]");
158         fie.setFilter(fae);
159         Transliterator foe = Transliterator.getInstance(id);
160         UnicodeFilter fee = foe.getFilter();
161         if (fae.equals(fee)) {
162             errln("Changed what is in registry for " + id);
163         }
164     }
165 
166     // Android-changed: increase timeout.
167     @Test(timeout = 3000000L)
TestInstantiation()168     public void TestInstantiation() {
169         long ms = System.currentTimeMillis();
170         String ID;
171         for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
172             ID = (String) e.nextElement();
173             if (ID.equals("Latin-Han/definition")) {
174                 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later");
175                 continue;
176             }
177             Transliterator t = null;
178             try {
179                 t = Transliterator.getInstance(ID);
180                 // This is only true for some subclasses
181                 //                // We should get a new instance if we try again
182                 //                Transliterator t2 = Transliterator.getInstance(ID);
183                 //                if (t != t2) {
184                 //                    logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);
185                 //                } else {
186                 //                    errln("FAIL: " + ID + " returned identical instances");
187                 //                    t = null;
188                 //                }
189             } catch (IllegalArgumentException ex) {
190                 errln("FAIL: " + ID);
191                 throw ex;
192             }
193 
194             //            if (t.getFilter() != null) {
195             //                errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter());
196             //            }
197 
198             if (t != null) {
199                 // Now test toRules
200                 String rules = null;
201                 try {
202                     rules = t.toRules(true);
203 
204                     Transliterator.createFromRules("x", rules, Transliterator.FORWARD);
205                 } catch (IllegalArgumentException ex2) {
206                     errln("FAIL: " + ID + ".toRules() => bad rules: " +
207                             rules);
208                     throw ex2;
209                 }
210             }
211         }
212 
213         // Now test the failure path
214         try {
215             ID = "<Not a valid Transliterator ID>";
216             Transliterator t = Transliterator.getInstance(ID);
217             errln("FAIL: " + ID + " returned " + t);
218         } catch (IllegalArgumentException ex) {
219             logln("OK: Bogus ID handled properly");
220         }
221 
222         ms = System.currentTimeMillis() - ms;
223         logln("Elapsed time: " + ms + " ms");
224     }
225 
226     @Test
TestSimpleRules()227     public void TestSimpleRules() {
228         /* Example: rules 1. ab>x|y
229          *                2. yc>z
230          *
231          * []|eabcd  start - no match, copy e to tranlated buffer
232          * [e]|abcd  match rule 1 - copy output & adjust cursor
233          * [ex|y]cd  match rule 2 - copy output & adjust cursor
234          * [exz]|d   no match, copy d to transliterated buffer
235          * [exzd]|   done
236          */
237         expect("ab>x|y;" +
238                 "yc>z",
239                 "eabcd", "exzd");
240 
241         /* Another set of rules:
242          *    1. ab>x|yzacw
243          *    2. za>q
244          *    3. qc>r
245          *    4. cw>n
246          *
247          * []|ab       Rule 1
248          * [x|yzacw]   No match
249          * [xy|zacw]   Rule 2
250          * [xyq|cw]    Rule 4
251          * [xyqn]|     Done
252          */
253         expect("ab>x|yzacw;" +
254                 "za>q;" +
255                 "qc>r;" +
256                 "cw>n",
257                 "ab", "xyqn");
258 
259         /* Test categories
260          */
261         Transliterator t = Transliterator.createFromRules("<ID>",
262                 "$dummy=\uE100;" +
263                 "$vowel=[aeiouAEIOU];" +
264                 "$lu=[:Lu:];" +
265                 "$vowel } $lu > '!';" +
266                 "$vowel > '&';" +
267                 "'!' { $lu > '^';" +
268                 "$lu > '*';" +
269                 "a>ERROR",
270                 Transliterator.FORWARD);
271         expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
272     }
273 
274     /**
275      * Test inline set syntax and set variable syntax.
276      */
277     @Test
TestInlineSet()278     public void TestInlineSet() {
279         expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
280         expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
281 
282         expect("$digit = [0-9];" +
283                 "$alpha = [a-zA-Z];" +
284                 "$alphanumeric = [$digit $alpha];" + // ***
285                 "$special = [^$alphanumeric];" +     // ***
286                 "$alphanumeric > '-';" +
287                 "$special > '*';",
288 
289                 "thx-1138", "---*----");
290     }
291 
292     /**
293      * Create some inverses and confirm that they work.  We have to be
294      * careful how we do this, since the inverses will not be true
295      * inverses -- we can't throw any random string at the composition
296      * of the transliterators and expect the identity function.  F x
297      * F' != I.  However, if we are careful about the input, we will
298      * get the expected results.
299      */
300     @Test
TestRuleBasedInverse()301     public void TestRuleBasedInverse() {
302         String RULES =
303             "abc>zyx;" +
304             "ab>yz;" +
305             "bc>zx;" +
306             "ca>xy;" +
307             "a>x;" +
308             "b>y;" +
309             "c>z;" +
310 
311             "abc<zyx;" +
312             "ab<yz;" +
313             "bc<zx;" +
314             "ca<xy;" +
315             "a<x;" +
316             "b<y;" +
317             "c<z;" +
318 
319             "";
320 
321         String[] DATA = {
322                 // Careful here -- random strings will not work.  If we keep
323                 // the left side to the domain and the right side to the range
324                 // we will be okay though (left, abc; right xyz).
325                 "a", "x",
326                 "abcacab", "zyxxxyy",
327                 "caccb", "xyzzy",
328         };
329 
330         Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
331         Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
332         for (int i=0; i<DATA.length; i+=2) {
333             expect(fwd, DATA[i], DATA[i+1]);
334             expect(rev, DATA[i+1], DATA[i]);
335         }
336     }
337 
338     /**
339      * Basic test of keyboard.
340      */
341     @Test
TestKeyboard()342     public void TestKeyboard() {
343         Transliterator t = Transliterator.createFromRules("<ID>",
344                 "psch>Y;"
345                 +"ps>y;"
346                 +"ch>x;"
347                 +"a>A;", Transliterator.FORWARD);
348         String DATA[] = {
349                 // insertion, buffer
350                 "a", "A",
351                 "p", "Ap",
352                 "s", "Aps",
353                 "c", "Apsc",
354                 "a", "AycA",
355                 "psch", "AycAY",
356                 null, "AycAY", // null means finishKeyboardTransliteration
357         };
358 
359         keyboardAux(t, DATA);
360     }
361 
362     /**
363      * Basic test of keyboard with cursor.
364      */
365     @Test
TestKeyboard2()366     public void TestKeyboard2() {
367         Transliterator t = Transliterator.createFromRules("<ID>",
368                 "ych>Y;"
369                 +"ps>|y;"
370                 +"ch>x;"
371                 +"a>A;", Transliterator.FORWARD);
372         String DATA[] = {
373                 // insertion, buffer
374                 "a", "A",
375                 "p", "Ap",
376                 "s", "Aps", // modified for rollback - "Ay",
377                 "c", "Apsc", // modified for rollback - "Ayc",
378                 "a", "AycA",
379                 "p", "AycAp",
380                 "s", "AycAps", // modified for rollback - "AycAy",
381                 "c", "AycApsc", // modified for rollback - "AycAyc",
382                 "h", "AycAY",
383                 null, "AycAY", // null means finishKeyboardTransliteration
384         };
385 
386         keyboardAux(t, DATA);
387     }
388 
389     /**
390      * Test keyboard transliteration with back-replacement.
391      */
392     @Test
TestKeyboard3()393     public void TestKeyboard3() {
394         // We want th>z but t>y.  Furthermore, during keyboard
395         // transliteration we want t>y then yh>z if t, then h are
396         // typed.
397         String RULES =
398             "t>|y;" +
399             "yh>z;" +
400             "";
401 
402         String[] DATA = {
403                 // Column 1: characters to add to buffer (as if typed)
404                 // Column 2: expected appearance of buffer after
405                 //           keyboard xliteration.
406                 "a", "a",
407                 "b", "ab",
408                 "t", "abt", // modified for rollback - "aby",
409                 "c", "abyc",
410                 "t", "abyct", // modified for rollback - "abycy",
411                 "h", "abycz",
412                 null, "abycz", // null means finishKeyboardTransliteration
413         };
414 
415         Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
416         keyboardAux(t, DATA);
417     }
418 
keyboardAux(Transliterator t, String[] DATA)419     private void keyboardAux(Transliterator t, String[] DATA) {
420         Transliterator.Position index = new Transliterator.Position();
421         ReplaceableString s = new ReplaceableString();
422         for (int i=0; i<DATA.length; i+=2) {
423             StringBuffer log;
424             if (DATA[i] != null) {
425                 log = new StringBuffer(s.toString() + " + "
426                         + DATA[i]
427                                + " -> ");
428                 t.transliterate(s, index, DATA[i]);
429             } else {
430                 log = new StringBuffer(s.toString() + " => ");
431                 t.finishTransliteration(s, index);
432             }
433             UtilityExtensions.formatInput(log, s, index);
434             if (s.toString().equals(DATA[i+1])) {
435                 logln(log.toString());
436             } else {
437                 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
438             }
439         }
440     }
441 
442     // Latin-Arabic has been temporarily removed until it can be
443     // done correctly.
444 
445     //  public void TestArabic() {
446     //      String DATA[] = {
447     //          "Arabic",
448     //              "\u062a\u062a\u0645\u062a\u0639 "+
449     //              "\u0627\u0644\u0644\u063a\u0629 "+
450     //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
451     //              "\u0628\u0628\u0646\u0638\u0645 "+
452     //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
453     //              "\u062c\u0645\u064a\u0644\u0629"
454     //      };
455 
456     //      Transliterator t = Transliterator.getInstance("Latin-Arabic");
457     //      for (int i=0; i<DATA.length; i+=2) {
458     //          expect(t, DATA[i], DATA[i+1]);
459     //      }
460     //  }
461 
462     /**
463      * Compose the Kana transliterator forward and reverse and try
464      * some strings that should come out unchanged.
465      */
466     @Test
TestCompoundKana()467     public void TestCompoundKana() {
468         Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
469         expect(t, "aaaaa", "aaaaa");
470     }
471 
472     /**
473      * Compose the hex transliterators forward and reverse.
474      */
475     @Test
TestCompoundHex()476     public void TestCompoundHex() {
477         Transliterator a = Transliterator.getInstance("Any-Hex");
478         Transliterator b = Transliterator.getInstance("Hex-Any");
479         // Transliterator[] trans = { a, b };
480         // Transliterator ab = Transliterator.getInstance(trans);
481         Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
482 
483         // Do some basic tests of b
484         expect(b, "\\u0030\\u0031", "01");
485 
486         String s = "abcde";
487         expect(ab, s, s);
488 
489         // trans = new Transliterator[] { b, a };
490         // Transliterator ba = Transliterator.getInstance(trans);
491         Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
492         ReplaceableString str = new ReplaceableString(s);
493         a.transliterate(str);
494         expect(ba, str.toString(), str.toString());
495     }
496 
497     /**
498      * Do some basic tests of filtering.
499      */
500     @Test
TestFiltering()501     public void TestFiltering() {
502 
503         Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
504         tempTrans.setFilter(new UnicodeSet("[a]"));
505         String tempResult = tempTrans.transform("xa");
506         assertEquals("context should not be filtered ", "xb", tempResult);
507 
508         tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
509         tempResult = tempTrans.transform("xa");
510         assertEquals("context should not be filtered ", "xb", tempResult);
511 
512         Transliterator hex = Transliterator.getInstance("Any-Hex");
513         hex.setFilter(new UnicodeFilter() {
514             public boolean contains(int c) {
515                 return c != 'c';
516             }
517             public String toPattern(boolean escapeUnprintable) {
518                 return "";
519             }
520             public boolean matchesIndexValue(int v) {
521                 return false;
522             }
523             public void addMatchSetTo(UnicodeSet toUnionTo) {}
524         });
525         String s = "abcde";
526         String out = hex.transliterate(s);
527         String exp = "\\u0061\\u0062c\\u0064\\u0065";
528         if (out.equals(exp)) {
529             logln("Ok:   \"" + exp + "\"");
530         } else {
531             logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
532         }
533     }
534 
535     /**
536      * Test anchors
537      */
538     @Test
TestAnchors()539     public void TestAnchors() {
540         expect("^ab  > 01 ;" +
541                 " ab  > |8 ;" +
542                 "  b  > k ;" +
543                 " 8x$ > 45 ;" +
544                 " 8x  > 77 ;",
545 
546                 "ababbabxabx",
547         "018k7745");
548         expect("$s = [z$] ;" +
549                 "$s{ab    > 01 ;" +
550                 "   ab    > |8 ;" +
551                 "    b    > k ;" +
552                 "   8x}$s > 45 ;" +
553                 "   8x    > 77 ;",
554 
555                 "abzababbabxzabxabx",
556         "01z018k45z01x45");
557     }
558 
559     /**
560      * Test pattern quoting and escape mechanisms.
561      */
562     @Test
TestPatternQuoting()563     public void TestPatternQuoting() {
564         // Array of 3n items
565         // Each item is <rules>, <input>, <expected output>
566         String[] DATA = {
567                 "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
568         };
569 
570         for (int i=0; i<DATA.length; i+=3) {
571             logln("Pattern: " + Utility.escape(DATA[i]));
572             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
573             expect(t, DATA[i+1], DATA[i+2]);
574         }
575     }
576 
577     @Test
TestVariableNames()578     public void TestVariableNames() {
579         Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
580         if (gl == null) {
581             errln("FAIL: null Transliterator returned.");
582         }
583     }
584 
585     /**
586      * Regression test for bugs found in Greek transliteration.
587      */
588     @Test
TestJ277()589     public void TestJ277() {
590         Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
591 
592         char sigma = (char)0x3C3;
593         char upsilon = (char)0x3C5;
594         char nu = (char)0x3BD;
595         // not used char PHI = (char)0x3A6;
596         char alpha = (char)0x3B1;
597         // not used char omega = (char)0x3C9;
598         // not used char omicron = (char)0x3BF;
599         // not used char epsilon = (char)0x3B5;
600 
601         // sigma upsilon nu -> syn
602         StringBuffer buf = new StringBuffer();
603         buf.append(sigma).append(upsilon).append(nu);
604         String syn = buf.toString();
605         expect(gl, syn, "syn");
606 
607         // sigma alpha upsilon nu -> saun
608         buf.setLength(0);
609         buf.append(sigma).append(alpha).append(upsilon).append(nu);
610         String sayn = buf.toString();
611         expect(gl, sayn, "saun");
612 
613         // Again, using a smaller rule set
614         String rules =
615             "$alpha   = \u03B1;" +
616             "$nu      = \u03BD;" +
617             "$sigma   = \u03C3;" +
618             "$ypsilon = \u03C5;" +
619             "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +
620             "s <>           $sigma;" +
621             "a <>           $alpha;" +
622             "u <>  $vowel { $ypsilon;" +
623             "y <>           $ypsilon;" +
624             "n <>           $nu;";
625         Transliterator mini = Transliterator.createFromRules
626         ("mini", rules, Transliterator.REVERSE);
627         expect(mini, syn, "syn");
628         expect(mini, sayn, "saun");
629 
630         //|    // Transliterate the Greek locale data
631         //|    Locale el("el");
632         //|    DateFormatSymbols syms(el, status);
633         //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
634         //|    int32_t i, count;
635         //|    const UnicodeString* data = syms.getMonths(count);
636         //|    for (i=0; i<count; ++i) {
637         //|        if (data[i].length() == 0) {
638         //|            continue;
639         //|        }
640         //|        UnicodeString out(data[i]);
641         //|        gl->transliterate(out);
642         //|        bool_t ok = TRUE;
643         //|        if (data[i].length() >= 2 && out.length() >= 2 &&
644         //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
645         //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
646         //|                ok = FALSE;
647         //|            }
648         //|        }
649         //|        if (ok) {
650         //|            logln(prettify(data[i] + " -> " + out));
651         //|        } else {
652         //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
653         //|        }
654         //|    }
655     }
656 
657     //    /**
658     //     * Prefix, suffix support in hex transliterators
659     //     */
660     //    public void TestJ243() {
661     //        // Test default Hex-Any, which should handle
662     //        // \\u, \\U, u+, and U+
663     //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
664     //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
665     //
666     //        // Try a custom Hex-Any
667     //        // \\uXXXX and &#xXXXX;
668     //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
669     //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
670     //               "abcd5fx012&#x00033;");
671     //
672     //        // Try custom Any-Hex (default is tested elsewhere)
673     //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
674     //        expect(hex3, "012", "&#x30;&#x31;&#x32;");
675     //    }
676 
677     @Test
TestJ329()678     public void TestJ329() {
679 
680         Object[] DATA = {
681                 Boolean.FALSE, "a > b; c > d",
682                 Boolean.TRUE,  "a > b; no operator; c > d",
683         };
684 
685         for (int i=0; i<DATA.length; i+=2) {
686             String err = null;
687             try {
688                 Transliterator.createFromRules("<ID>",
689                         (String) DATA[i+1],
690                         Transliterator.FORWARD);
691             } catch (IllegalArgumentException e) {
692                 err = e.getMessage();
693             }
694             boolean gotError = (err != null);
695             String desc = (String) DATA[i+1] +
696             (gotError ? (" -> error: " + err) : " -> no error");
697             if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
698                 logln("Ok:   " + desc);
699             } else {
700                 errln("FAIL: " + desc);
701             }
702         }
703     }
704 
705     /**
706      * Test segments and segment references.
707      */
708     @Test
TestSegments()709     public void TestSegments() {
710         // Array of 3n items
711         // Each item is <rules>, <input>, <expected output>
712         String[] DATA = {
713                 "([a-z]) '.' ([0-9]) > $2 '-' $1",
714                 "abc.123.xyz.456",
715                 "ab1-c23.xy4-z56",
716         };
717 
718         for (int i=0; i<DATA.length; i+=3) {
719             logln("Pattern: " + Utility.escape(DATA[i]));
720             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
721             expect(t, DATA[i+1], DATA[i+2]);
722         }
723     }
724 
725     /**
726      * Test cursor positioning outside of the key
727      */
728     @Test
TestCursorOffset()729     public void TestCursorOffset() {
730         // Array of 3n items
731         // Each item is <rules>, <input>, <expected output>
732         String[] DATA = {
733                 "pre {alpha} post > | @ ALPHA ;" +
734                 "eALPHA > beta ;" +
735                 "pre {beta} post > BETA @@ | ;" +
736                 "post > xyz",
737 
738                 "prealphapost prebetapost",
739                 "prbetaxyz preBETApost",
740         };
741 
742         for (int i=0; i<DATA.length; i+=3) {
743             logln("Pattern: " + Utility.escape(DATA[i]));
744             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
745             expect(t, DATA[i+1], DATA[i+2]);
746         }
747     }
748 
749     /**
750      * Test zero length and > 1 char length variable values.  Test
751      * use of variable refs in UnicodeSets.
752      */
753     @Test
TestArbitraryVariableValues()754     public void TestArbitraryVariableValues() {
755         // Array of 3n items
756         // Each item is <rules>, <input>, <expected output>
757         String[] DATA = {
758                 "$abe = ab;" +
759                 "$pat = x[yY]z;" +
760                 "$ll  = 'a-z';" +
761                 "$llZ = [$ll];" +
762                 "$llY = [$ll$pat];" +
763                 "$emp = ;" +
764 
765                 "$abe > ABE;" +
766                 "$pat > END;" +
767                 "$llZ > 1;" +
768                 "$llY > 2;" +
769                 "7$emp 8 > 9;" +
770                 "",
771 
772                 "ab xYzxyz stY78",
773                 "ABE ENDEND 1129",
774         };
775 
776         for (int i=0; i<DATA.length; i+=3) {
777             logln("Pattern: " + Utility.escape(DATA[i]));
778             Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
779             expect(t, DATA[i+1], DATA[i+2]);
780         }
781     }
782 
783     /**
784      * Confirm that the contextStart, contextLimit, start, and limit
785      * behave correctly.
786      */
787     @Test
TestPositionHandling()788     public void TestPositionHandling() {
789         // Array of 3n items
790         // Each item is <rules>, <input>, <expected output>
791         String[] DATA = {
792                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
793                 "xtat txtb", // pos 0,9,0,9
794                 "xTTaSS TTxUUb",
795 
796                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
797                 "xtat txtb", // pos 2,9,3,8
798                 "xtaSS TTxUUb",
799 
800                 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
801                 "xtat txtb", // pos 3,8,3,8
802                 "xtaTT TTxTTb",
803         };
804 
805         // Array of 4n positions -- these go with the DATA array
806         // They are: contextStart, contextLimit, start, limit
807         int[] POS = {
808                 0, 9, 0, 9,
809                 2, 9, 3, 8,
810                 3, 8, 3, 8,
811         };
812 
813         int n = DATA.length/3;
814         for (int i=0; i<n; i++) {
815             Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
816             Transliterator.Position pos = new Transliterator.Position(
817                     POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
818             ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
819             t.transliterate(rsource, pos);
820             t.finishTransliteration(rsource, pos);
821             String result = rsource.toString();
822             String exp = DATA[3*i+2];
823             expectAux(Utility.escape(DATA[3*i]),
824                     DATA[3*i+1],
825                     result,
826                     result.equals(exp),
827                     exp);
828         }
829     }
830 
831     /**
832      * Test the Hiragana-Katakana transliterator.
833      */
834     @Test
TestHiraganaKatakana()835     public void TestHiraganaKatakana() {
836         Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
837         Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
838 
839         // Array of 3n items
840         // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
841         String[] DATA = {
842                 "both",
843                 "\u3042\u3090\u3099\u3092\u3050",
844                 "\u30A2\u30F8\u30F2\u30B0",
845 
846                 "kh",
847                 "\u307C\u3051\u3060\u3042\u3093\u30FC",
848                 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
849         };
850 
851         for (int i=0; i<DATA.length; i+=3) {
852             switch (DATA[i].charAt(0)) {
853             case 'h': // Hiragana-Katakana
854                 expect(hk, DATA[i+1], DATA[i+2]);
855                 break;
856             case 'k': // Katakana-Hiragana
857                 expect(kh, DATA[i+2], DATA[i+1]);
858                 break;
859             case 'b': // both
860                 expect(hk, DATA[i+1], DATA[i+2]);
861                 expect(kh, DATA[i+2], DATA[i+1]);
862                 break;
863             }
864         }
865 
866     }
867 
868     @Test
TestCopyJ476()869     public void TestCopyJ476() {
870         // This is a C++-only copy constructor test
871     }
872 
873     /**
874      * Test inter-Indic transliterators.  These are composed.
875      */
876     @Test
TestInterIndic()877     public void TestInterIndic() {
878         String ID = "Devanagari-Gujarati";
879         Transliterator dg = Transliterator.getInstance(ID);
880         if (dg == null) {
881             errln("FAIL: getInstance(" + ID + ") returned null");
882             return;
883         }
884         String id = dg.getID();
885         if (!id.equals(ID)) {
886             errln("FAIL: getInstance(" + ID + ").getID() => " + id);
887         }
888         String dev = "\u0901\u090B\u0925";
889         String guj = "\u0A81\u0A8B\u0AA5";
890         expect(dg, dev, guj);
891     }
892 
893     /**
894      * Test filter syntax in IDs. (J23)
895      */
896     @Test
TestFilterIDs()897     public void TestFilterIDs() {
898         String[] DATA = {
899                 "[aeiou]Any-Hex", // ID
900                 "[aeiou]Hex-Any", // expected inverse ID
901                 "quizzical",      // src
902                 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
903 
904                 "[aeiou]Any-Hex;[^5]Hex-Any",
905                 "[^5]Any-Hex;[aeiou]Hex-Any",
906                 "quizzical",
907                 "q\\u0075izzical",
908 
909                 "[abc]Null",
910                 "[abc]Null",
911                 "xyz",
912                 "xyz",
913         };
914 
915         for (int i=0; i<DATA.length; i+=4) {
916             String ID = DATA[i];
917             Transliterator t = Transliterator.getInstance(ID);
918             expect(t, DATA[i+2], DATA[i+3]);
919 
920             // Check the ID
921             if (!ID.equals(t.getID())) {
922                 errln("FAIL: getInstance(" + ID + ").getID() => " +
923                         t.getID());
924             }
925 
926             // Check the inverse
927             String uID = DATA[i+1];
928             Transliterator u = t.getInverse();
929             if (u == null) {
930                 errln("FAIL: " + ID + ".getInverse() returned NULL");
931             } else if (!u.getID().equals(uID)) {
932                 errln("FAIL: " + ID + ".getInverse().getID() => " +
933                         u.getID() + ", expected " + uID);
934             }
935         }
936     }
937 
938     /**
939      * Test the case mapping transliterators.
940      */
941     @Test
TestCaseMap()942     public void TestCaseMap() {
943         Transliterator toUpper =
944             Transliterator.getInstance("Any-Upper[^xyzXYZ]");
945         Transliterator toLower =
946             Transliterator.getInstance("Any-Lower[^xyzXYZ]");
947         Transliterator toTitle =
948             Transliterator.getInstance("Any-Title[^xyzXYZ]");
949 
950         expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
951         "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
952         expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
953         "the quick brown foX jumped over the lazY dogs.");
954         expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
955         "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
956     }
957 
958     /**
959      * Test the name mapping transliterators.
960      */
961     @Test
TestNameMap()962     public void TestNameMap() {
963         Transliterator uni2name =
964             Transliterator.getInstance("Any-Name[^abc]");
965         Transliterator name2uni =
966             Transliterator.getInstance("Name-Any");
967 
968         expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
969         "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
970         expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
971         "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
972 
973         // round trip
974         Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
975 
976         String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
977         expect(t, s, s);
978     }
979 
980     /**
981      * Test liberalized ID syntax.  1006c
982      */
983     @Test
TestLiberalizedID()984     public void TestLiberalizedID() {
985         // Some test cases have an expected getID() value of NULL.  This
986         // means I have disabled the test case for now.  This stuff is
987         // still under development, and I haven't decided whether to make
988         // getID() return canonical case yet.  It will all get rewritten
989         // with the move to Source-Target/Variant IDs anyway. [aliu]
990         String DATA[] = {
991                 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
992                 "  Null  ", "Null", "whitespace",
993                 " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
994                 "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",
995         };
996 
997         for (int i=0; i<DATA.length; i+=3) {
998             try {
999                 Transliterator t = Transliterator.getInstance(DATA[i]);
1000                 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
1001                     logln("Ok: " + DATA[i+2] +
1002                             " create ID \"" + DATA[i] + "\" => \"" +
1003                             t.getID() + "\"");
1004                 } else {
1005                     errln("FAIL: " + DATA[i+2] +
1006                             " create ID \"" + DATA[i] + "\" => \"" +
1007                             t.getID() + "\", exp \"" + DATA[i+1] + "\"");
1008                 }
1009             } catch (IllegalArgumentException e) {
1010                 errln("FAIL: " + DATA[i+2] +
1011                         " create ID \"" + DATA[i] + "\"");
1012             }
1013         }
1014     }
1015 
1016     @Test
TestCreateInstance()1017     public void TestCreateInstance() {
1018         String FORWARD = "F";
1019         String REVERSE = "R";
1020         String DATA[] = {
1021                 // Column 1: id
1022                 // Column 2: direction
1023                 // Column 3: expected ID, or "" if expect failure
1024                 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1025 
1026                 // JB#2689: bad compound causes crash
1027                 "InvalidSource-InvalidTarget", FORWARD, "",
1028                 "InvalidSource-InvalidTarget", REVERSE, "",
1029                 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1030                 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1031                 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1032                 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1033 
1034                 null
1035         };
1036 
1037         for (int i=0; DATA[i]!=null; i+=3) {
1038             String id=DATA[i];
1039             int dir = (DATA[i+1]==FORWARD)?
1040                     Transliterator.FORWARD:Transliterator.REVERSE;
1041             String expID=DATA[i+2];
1042             Exception e = null;
1043             Transliterator t;
1044             try {
1045                 t = Transliterator.getInstance(id,dir);
1046             } catch (Exception e1) {
1047                 e = e1;
1048                 t = null;
1049             }
1050             String newID = (t!=null)?t.getID():"";
1051             boolean ok = (newID.equals(expID));
1052             if (t==null) {
1053                 newID = e.getMessage();
1054             }
1055             if (ok) {
1056                 logln("Ok: createInstance(" +
1057                         id + "," + DATA[i+1] + ") => " + newID);
1058             } else {
1059                 errln("FAIL: createInstance(" +
1060                         id + "," + DATA[i+1] + ") => " + newID +
1061                         ", expected " + expID);
1062             }
1063         }
1064     }
1065 
1066     /**
1067      * Test the normalization transliterator.
1068      */
1069     @Test
TestNormalizationTransliterator()1070     public void TestNormalizationTransliterator() {
1071         // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
1072         // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1073         String[][] CANON = {
1074                 // Input               Decomposed            Composed
1075                 {"cat",                "cat",                "cat"               },
1076                 {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },
1077 
1078                 {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above
1079                 {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above
1080 
1081                 {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above
1082                 {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below
1083                 {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above
1084 
1085                 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1086                 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1087 
1088                 {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave
1089                 {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave
1090                 {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron
1091 
1092                 {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign
1093                 {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring
1094 
1095                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0
1096                 {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0
1097 
1098                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1099                 {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },
1100 
1101                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1102                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1103                 {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten
1104                 {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten
1105                 {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten
1106 
1107                 {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },
1108         };
1109 
1110         String[][] COMPAT = {
1111                 // Input               Decomposed            Composed
1112                 {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed
1113 
1114                 {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0
1115                 {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i
1116 
1117                 {"Henry IV",           "Henry IV",           "Henry IV"          },
1118                 {"Henry \u2163",       "Henry IV",           "Henry IV"          },
1119 
1120                 {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1121                 {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1122 
1123                 {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten
1124         };
1125 
1126         Transliterator NFD = Transliterator.getInstance("NFD");
1127         Transliterator NFC = Transliterator.getInstance("NFC");
1128         for (int i=0; i<CANON.length; ++i) {
1129             String in = CANON[i][0];
1130             String expd = CANON[i][1];
1131             String expc = CANON[i][2];
1132             expect(NFD, in, expd);
1133             expect(NFC, in, expc);
1134         }
1135 
1136         Transliterator NFKD = Transliterator.getInstance("NFKD");
1137         Transliterator NFKC = Transliterator.getInstance("NFKC");
1138         for (int i=0; i<COMPAT.length; ++i) {
1139             String in = COMPAT[i][0];
1140             String expkd = COMPAT[i][1];
1141             String expkc = COMPAT[i][2];
1142             expect(NFKD, in, expkd);
1143             expect(NFKC, in, expkc);
1144         }
1145 
1146         Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1147         expect(t, "\u010dx", "c\u030C");
1148     }
1149 
1150     /**
1151      * Test compound RBT rules.
1152      */
1153     @Test
TestCompoundRBT()1154     public void TestCompoundRBT() {
1155         // Careful with spacing and ';' here:  Phrase this exactly
1156         // as toRules() is going to return it.  If toRules() changes
1157         // with regard to spacing or ';', then adjust this string.
1158         String rule = "::Hex-Any;\n" +
1159         "::Any-Lower;\n" +
1160         "a > '.A.';\n" +
1161         "b > '.B.';\n" +
1162         "::[^t]Any-Upper;";
1163         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1164         if (t == null) {
1165             errln("FAIL: createFromRules failed");
1166             return;
1167         }
1168         expect(t, "\u0043at in the hat, bat on the mat",
1169         "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1170         String r = t.toRules(true);
1171         if (r.equals(rule)) {
1172             logln("OK: toRules() => " + r);
1173         } else {
1174             errln("FAIL: toRules() => " + r +
1175                     ", expected " + rule);
1176         }
1177 
1178         // Now test toRules
1179         t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1180         if (t == null) {
1181             errln("FAIL: createInstance failed");
1182             return;
1183         }
1184         String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1185         r = t.toRules(true);
1186         if (!r.equals(exp)) {
1187             errln("FAIL: toRules() => " + r +
1188                     ", expected " + exp);
1189         } else {
1190             logln("OK: toRules() => " + r);
1191         }
1192 
1193         // Round trip the result of toRules
1194         t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1195         if (t == null) {
1196             errln("FAIL: createFromRules #2 failed");
1197             return;
1198         } else {
1199             logln("OK: createFromRules(" + r + ") succeeded");
1200         }
1201 
1202         // Test toRules again
1203         r = t.toRules(true);
1204         if (!r.equals(exp)) {
1205             errln("FAIL: toRules() => " + r +
1206                     ", expected " + exp);
1207         } else {
1208             logln("OK: toRules() => " + r);
1209         }
1210 
1211         // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1212         // to what the regenerated ID will look like.
1213         String id = "Upper(Lower);(NFKC)";
1214         t = Transliterator.getInstance(id, Transliterator.FORWARD);
1215         if (t == null) {
1216             errln("FAIL: createInstance #2 failed");
1217             return;
1218         }
1219         if (t.getID().equals(id)) {
1220             logln("OK: created " + id);
1221         } else {
1222             errln("FAIL: createInstance(" + id +
1223                     ").getID() => " + t.getID());
1224         }
1225 
1226         Transliterator u = t.getInverse();
1227         if (u == null) {
1228             errln("FAIL: createInverse failed");
1229             return;
1230         }
1231         exp = "NFKC();Lower(Upper)";
1232         if (u.getID().equals(exp)) {
1233             logln("OK: createInverse(" + id + ") => " +
1234                     u.getID());
1235         } else {
1236             errln("FAIL: createInverse(" + id + ") => " +
1237                     u.getID());
1238         }
1239     }
1240 
1241     /**
1242      * Compound filter semantics were orginially not implemented
1243      * correctly.  Originally, each component filter f(i) is replaced by
1244      * f'(i) = f(i) && g, where g is the filter for the compound
1245      * transliterator.
1246      *
1247      * From Mark:
1248      *
1249      * Suppose and I have a transliterator X. Internally X is
1250      * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1251      *
1252      * The compound should convert all greek characters (through latin) to
1253      * cyrillic, then lowercase the result. The filter should say "don't
1254      * touch 'A' in the original". But because an intermediate result
1255      * happens to go through "A", the Greek Alpha gets hung up.
1256      */
1257     @Test
TestCompoundFilter()1258     public void TestCompoundFilter() {
1259         Transliterator t = Transliterator.getInstance
1260         ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1261         t.setFilter(new UnicodeSet("[^A]"));
1262 
1263         // Only the 'A' at index 1 should remain unchanged
1264         expect(t,
1265                 CharsToUnicodeString("BA\\u039A\\u0391"),
1266                 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1267     }
1268 
1269     /**
1270      * Test the "Remove" transliterator.
1271      */
1272     @Test
TestRemove()1273     public void TestRemove() {
1274         Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1275         expect(t, "The quick brown fox.",
1276         "Th qck brwn fx.");
1277     }
1278 
1279     @Test
TestToRules()1280     public void TestToRules() {
1281         String RBT = "rbt";
1282         String SET = "set";
1283         String[] DATA = {
1284                 RBT,
1285                 "$a=\\u4E61; [$a] > A;",
1286                 "[\\u4E61] > A;",
1287 
1288                 RBT,
1289                 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1290                 "[[:Zs:][:Zl:]]{a} > A;",
1291 
1292                 SET,
1293                 "[[:Zs:][:Zl:]]",
1294                 "[[:Zs:][:Zl:]]",
1295 
1296                 SET,
1297                 "[:Ps:]",
1298                 "[:Ps:]",
1299 
1300                 SET,
1301                 "[:L:]",
1302                 "[:L:]",
1303 
1304                 SET,
1305                 "[[:L:]-[A]]",
1306                 "[[:L:]-[A]]",
1307 
1308                 SET,
1309                 "[~[:Lu:][:Ll:]]",
1310                 "[~[:Lu:][:Ll:]]",
1311 
1312                 SET,
1313                 "[~[a-z]]",
1314                 "[~[a-z]]",
1315 
1316                 RBT,
1317                 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1318                 "[^[:Zs:]]{a} > A;",
1319 
1320                 RBT,
1321                 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1322                 "[[a-z]-[:Zs:]]{a} > A;",
1323 
1324                 RBT,
1325                 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1326                 "[[:Zs:]&[a-z]]{a} > A;",
1327 
1328                 RBT,
1329                 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1330                 "[x[:Zs:]]{a} > A;",
1331 
1332                 RBT,
1333                 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1334                 "$macron = \\u0304 ;"+
1335                 "$evowel = [aeiouyAEIOUY] ;"+
1336                 "$iotasub = \\u0345 ;"+
1337                 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1338                 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1339 
1340                 RBT,
1341                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1342                 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1343         };
1344 
1345         for (int d=0; d < DATA.length; d+=3) {
1346             if (DATA[d] == RBT) {
1347                 // Transliterator test
1348                 Transliterator t = Transliterator.createFromRules("ID",
1349                         DATA[d+1], Transliterator.FORWARD);
1350                 if (t == null) {
1351                     errln("FAIL: createFromRules failed");
1352                     return;
1353                 }
1354                 String rules, escapedRules;
1355                 rules = t.toRules(false);
1356                 escapedRules = t.toRules(true);
1357                 String expRules = Utility.unescape(DATA[d+2]);
1358                 String expEscapedRules = DATA[d+2];
1359                 if (rules.equals(expRules)) {
1360                     logln("Ok: " + DATA[d+1] +
1361                             " => " + Utility.escape(rules));
1362                 } else {
1363                     errln("FAIL: " + DATA[d+1] +
1364                             " => " + Utility.escape(rules + ", exp " + expRules));
1365                 }
1366                 if (escapedRules.equals(expEscapedRules)) {
1367                     logln("Ok: " + DATA[d+1] +
1368                             " => " + escapedRules);
1369                 } else {
1370                     errln("FAIL: " + DATA[d+1] +
1371                             " => " + escapedRules + ", exp " + expEscapedRules);
1372                 }
1373 
1374             } else {
1375                 // UnicodeSet test
1376                 String pat = DATA[d+1];
1377                 String expToPat = DATA[d+2];
1378                 UnicodeSet set = new UnicodeSet(pat);
1379 
1380                 // Adjust spacing etc. as necessary.
1381                 String toPat;
1382                 toPat = set.toPattern(true);
1383                 if (expToPat.equals(toPat)) {
1384                     logln("Ok: " + pat +
1385                             " => " + toPat);
1386                 } else {
1387                     errln("FAIL: " + pat +
1388                             " => " + Utility.escape(toPat) +
1389                             ", exp " + Utility.escape(pat));
1390                 }
1391             }
1392         }
1393     }
1394 
1395     @Test
TestContext()1396     public void TestContext() {
1397         Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1398 
1399         expect("de > x; {d}e > y;",
1400                 "de",
1401                 "ye",
1402                 pos);
1403 
1404         expect("ab{c} > z;",
1405                 "xadabdabcy",
1406         "xadabdabzy");
1407     }
1408 
CharsToUnicodeString(String s)1409     static final String CharsToUnicodeString(String s) {
1410         return Utility.unescape(s);
1411     }
1412 
1413     @Test
TestSupplemental()1414     public void TestSupplemental() {
1415 
1416         expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1417         "a > $a; $s > i;"),
1418         CharsToUnicodeString("ab\\U0001030Fx"),
1419         CharsToUnicodeString("\\U00010300bix"));
1420 
1421         expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1422                 "$b=[A-Z\\U00010400-\\U0001044D];" +
1423         "($a)($b) > $2 $1;"),
1424         CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1425         CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1426 
1427         // k|ax\\U00010300xm
1428 
1429         // k|a\\U00010400\\U00010300xm
1430         // ky|\\U00010400\\U00010300xm
1431         // ky\\U00010400|\\U00010300xm
1432 
1433         // ky\\U00010400|\\U00010300\\U00010400m
1434         // ky\\U00010400y|\\U00010400m
1435         expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1436                 "$a {x} > | @ \\U00010400;" +
1437         "{$a} [^\\u0000-\\uFFFF] > y;"),
1438         CharsToUnicodeString("kax\\U00010300xm"),
1439         CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1440 
1441         expect(Transliterator.getInstance("Any-Name"),
1442                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1443         "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1444 
1445         expect(Transliterator.getInstance("Name-Any"),
1446                 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1447                 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1448 
1449         expect(Transliterator.getInstance("Any-Hex/Unicode"),
1450                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1451         "U+10330U+10FF00U+E0061U+00A0");
1452 
1453         expect(Transliterator.getInstance("Any-Hex/C"),
1454                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1455         "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1456 
1457         expect(Transliterator.getInstance("Any-Hex/Perl"),
1458                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1459         "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1460 
1461         expect(Transliterator.getInstance("Any-Hex/Java"),
1462                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1463         "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1464 
1465         expect(Transliterator.getInstance("Any-Hex/XML"),
1466                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1467         "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1468 
1469         expect(Transliterator.getInstance("Any-Hex/XML10"),
1470                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1471         "&#66352;&#1113856;&#917601;&#160;");
1472 
1473         expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1474                 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1475                 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1476     }
1477 
1478     @Test
TestQuantifier()1479     public void TestQuantifier() {
1480 
1481         // Make sure @ in a quantified anteContext works
1482         expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1483                 "AAAAAb",
1484         "aaa(aac)");
1485 
1486         // Make sure @ in a quantified postContext works
1487         expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1488                 "baaaaa",
1489         "caa(aaa)");
1490 
1491         // Make sure @ in a quantified postContext with seg ref works
1492         expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1493                 "baaaaa",
1494         "baa(aaa)");
1495 
1496         // Make sure @ past ante context doesn't enter ante context
1497         Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1498         expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1499                 "xxxab",
1500                 "xxx(ac)",
1501                 pos);
1502 
1503         // Make sure @ past post context doesn't pass limit
1504         Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1505         expect("{b} a+ > c @@ |; x > y; a > A;",
1506                 "baxx",
1507                 "caxx",
1508                 pos2);
1509 
1510         // Make sure @ past post context doesn't enter post context
1511         expect("{b} a+ > c @@ |; x > y; a > A;",
1512                 "baxx",
1513         "cayy");
1514 
1515         expect("(ab)? c > d;",
1516                 "c abc ababc",
1517         "d d abd");
1518 
1519         // NOTE: The (ab)+ when referenced just yields a single "ab",
1520         // not the full sequence of them.  This accords with perl behavior.
1521         expect("(ab)+ {x} > '(' $1 ')';",
1522                 "x abx ababxy",
1523         "x ab(ab) abab(ab)y");
1524 
1525         expect("b+ > x;",
1526                 "ac abc abbc abbbc",
1527         "ac axc axc axc");
1528 
1529         expect("[abc]+ > x;",
1530                 "qac abrc abbcs abtbbc",
1531         "qx xrx xs xtx");
1532 
1533         expect("q{(ab)+} > x;",
1534                 "qa qab qaba qababc qaba",
1535         "qa qx qxa qxc qxa");
1536 
1537         expect("q(ab)* > x;",
1538                 "qa qab qaba qababc",
1539         "xa x xa xc");
1540 
1541         // NOTE: The (ab)+ when referenced just yields a single "ab",
1542         // not the full sequence of them.  This accords with perl behavior.
1543         expect("q(ab)* > '(' $1 ')';",
1544                 "qa qab qaba qababc",
1545         "()a (ab) (ab)a (ab)c");
1546 
1547         // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1548         // quoted string
1549         expect("'ab'+ > x;",
1550                 "bb ab ababb",
1551         "bb x xb");
1552 
1553         // $foo+ and $foo* -- the quantifier should apply to the entire
1554         // variable reference
1555         expect("$var = ab; $var+ > x;",
1556                 "bb ab ababb",
1557         "bb x xb");
1558     }
1559 
1560     static class TestFact implements Transliterator.Factory {
1561         static class NameableNullTrans extends Transliterator {
NameableNullTrans(String id)1562             public NameableNullTrans(String id) {
1563                 super(id, null);
1564             }
handleTransliterate(Replaceable text, Position offsets, boolean incremental)1565             protected void handleTransliterate(Replaceable text,
1566                     Position offsets, boolean incremental) {
1567                 offsets.start = offsets.limit;
1568             }
1569         }
1570         String id;
TestFact(String theID)1571         public TestFact(String theID) {
1572             id = theID;
1573         }
getInstance(String ignoredID)1574         public Transliterator getInstance(String ignoredID) {
1575             return new NameableNullTrans(id);
1576         }
1577     }
1578 
1579     @Test
TestSTV()1580     public void TestSTV() {
1581         Enumeration es = Transliterator.getAvailableSources();
1582         for (int i=0; es.hasMoreElements(); ++i) {
1583             String source = (String) es.nextElement();
1584             logln("" + i + ": " + source);
1585             if (source.length() == 0) {
1586                 errln("FAIL: empty source");
1587                 continue;
1588             }
1589             Enumeration et = Transliterator.getAvailableTargets(source);
1590             for (int j=0; et.hasMoreElements(); ++j) {
1591                 String target = (String) et.nextElement();
1592                 logln(" " + j + ": " + target);
1593                 if (target.length() == 0) {
1594                     errln("FAIL: empty target");
1595                     continue;
1596                 }
1597                 Enumeration ev = Transliterator.getAvailableVariants(source, target);
1598                 for (int k=0; ev.hasMoreElements(); ++k) {
1599                     String variant = (String) ev.nextElement();
1600                     if (variant.length() == 0) {
1601                         logln("  " + k + ": <empty>");
1602                     } else {
1603                         logln("  " + k + ": " + variant);
1604                     }
1605                 }
1606             }
1607         }
1608 
1609         // Test registration
1610         String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1611         String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1612         String[] SOURCES = { null, "Seoridf", "Oewoir" };
1613         for (int i=0; i<3; ++i) {
1614             Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1615             try {
1616                 Transliterator t = Transliterator.getInstance(IDS[i]);
1617                 if (t.getID().equals(IDS[i])) {
1618                     logln("Ok: Registration/creation succeeded for ID " +
1619                             IDS[i]);
1620                 } else {
1621                     errln("FAIL: Registration of ID " +
1622                             IDS[i] + " creates ID " + t.getID());
1623                 }
1624                 Transliterator.unregister(IDS[i]);
1625                 try {
1626                     t = Transliterator.getInstance(IDS[i]);
1627                     errln("FAIL: Unregistration failed for ID " +
1628                             IDS[i] + "; still receiving ID " + t.getID());
1629                 } catch (IllegalArgumentException e2) {
1630                     // Good; this is what we expect
1631                     logln("Ok; Unregistered " + IDS[i]);
1632                 }
1633             } catch (IllegalArgumentException e) {
1634                 errln("FAIL: Registration/creation failed for ID " +
1635                         IDS[i]);
1636             } finally {
1637                 Transliterator.unregister(IDS[i]);
1638             }
1639         }
1640 
1641         // Make sure getAvailable API reflects removal
1642         for (Enumeration e = Transliterator.getAvailableIDs();
1643         e.hasMoreElements(); ) {
1644             String id = (String) e.nextElement();
1645             for (int i=0; i<3; ++i) {
1646                 if (id.equals(FULL_IDS[i])) {
1647                     errln("FAIL: unregister(" + id + ") failed");
1648                 }
1649             }
1650         }
1651         for (Enumeration e = Transliterator.getAvailableTargets("Any");
1652         e.hasMoreElements(); ) {
1653             String t = (String) e.nextElement();
1654             if (t.equals(IDS[0])) {
1655                 errln("FAIL: unregister(Any-" + t + ") failed");
1656             }
1657         }
1658         for (Enumeration e = Transliterator.getAvailableSources();
1659         e.hasMoreElements(); ) {
1660             String s = (String) e.nextElement();
1661             for (int i=0; i<3; ++i) {
1662                 if (SOURCES[i] == null) continue;
1663                 if (s.equals(SOURCES[i])) {
1664                     errln("FAIL: unregister(" + s + "-*) failed");
1665                 }
1666             }
1667         }
1668     }
1669 
1670     /**
1671      * Test inverse of Greek-Latin; Title()
1672      */
1673     @Test
TestCompoundInverse()1674     public void TestCompoundInverse() {
1675         Transliterator t = Transliterator.getInstance
1676         ("Greek-Latin; Title()", Transliterator.REVERSE);
1677         if (t == null) {
1678             errln("FAIL: createInstance");
1679             return;
1680         }
1681         String exp = "(Title);Latin-Greek";
1682         if (t.getID().equals(exp)) {
1683             logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1684                     t.getID());
1685         } else {
1686             errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1687                     t.getID() + "\", expected \"" + exp + "\"");
1688         }
1689     }
1690 
1691     /**
1692      * Test NFD chaining with RBT
1693      */
1694     @Test
TestNFDChainRBT()1695     public void TestNFDChainRBT() {
1696         Transliterator t = Transliterator.createFromRules(
1697                 "TEST", "::NFD; aa > Q; a > q;",
1698                 Transliterator.FORWARD);
1699         logln(t.toRules(true));
1700         expect(t, "aa", "Q");
1701     }
1702 
1703     /**
1704      * Inverse of "Null" should be "Null". (J21)
1705      */
1706     @Test
TestNullInverse()1707     public void TestNullInverse() {
1708         Transliterator t = Transliterator.getInstance("Null");
1709         Transliterator u = t.getInverse();
1710         if (!u.getID().equals("Null")) {
1711             errln("FAIL: Inverse of Null should be Null");
1712         }
1713     }
1714 
1715     /**
1716      * Check ID of inverse of alias. (J22)
1717      */
1718     @Test
TestAliasInverseID()1719     public void TestAliasInverseID() {
1720         String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1721         Transliterator t = Transliterator.getInstance(ID);
1722         Transliterator u = t.getInverse();
1723         String exp = "Hangul-Latin";
1724         String got = u.getID();
1725         if (!got.equals(exp)) {
1726             errln("FAIL: Inverse of " + ID + " is " + got +
1727                     ", expected " + exp);
1728         }
1729     }
1730 
1731     /**
1732      * Test IDs of inverses of compound transliterators. (J20)
1733      */
1734     @Test
TestCompoundInverseID()1735     public void TestCompoundInverseID() {
1736         String ID = "Latin-Jamo;NFC(NFD)";
1737         Transliterator t = Transliterator.getInstance(ID);
1738         Transliterator u = t.getInverse();
1739         String exp = "NFD(NFC);Jamo-Latin";
1740         String got = u.getID();
1741         if (!got.equals(exp)) {
1742             errln("FAIL: Inverse of " + ID + " is " + got +
1743                     ", expected " + exp);
1744         }
1745     }
1746 
1747     /**
1748      * Test undefined variable.
1749      */
1750     @Test
TestUndefinedVariable()1751     public void TestUndefinedVariable() {
1752         String rule = "$initial } a <> \u1161;";
1753         try {
1754             Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1755         } catch (IllegalArgumentException e) {
1756             logln("OK: Got exception for " + rule + ", as expected: " +
1757                     e.getMessage());
1758             return;
1759         }
1760         errln("Fail: bogus rule " + rule + " compiled without error");
1761     }
1762 
1763     /**
1764      * Test empty context.
1765      */
1766     @Test
TestEmptyContext()1767     public void TestEmptyContext() {
1768         expect(" { a } > b;", "xay a ", "xby b ");
1769     }
1770 
1771     /**
1772      * Test compound filter ID syntax
1773      */
1774     @Test
TestCompoundFilterID()1775     public void TestCompoundFilterID() {
1776         String[] DATA = {
1777                 // Col. 1 = ID or rule set (latter must start with #)
1778 
1779                 // = columns > 1 are null if expect col. 1 to be illegal =
1780 
1781                 // Col. 2 = direction, "F..." or "R..."
1782                 // Col. 3 = source string
1783                 // Col. 4 = exp result
1784 
1785                 "[abc]; [abc]", null, null, null, // multiple filters
1786                 "Latin-Greek; [abc];", null, null, null, // misplaced filter
1787                 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1788                 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1789                 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1790                 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1791         };
1792 
1793         for (int i=0; i<DATA.length; i+=4) {
1794             String id = DATA[i];
1795             int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1796                     Transliterator.REVERSE : Transliterator.FORWARD;
1797             String source = DATA[i+2];
1798             String exp = DATA[i+3];
1799             boolean expOk = (DATA[i+1] != null);
1800             Transliterator t = null;
1801             IllegalArgumentException e = null;
1802             try {
1803                 if (id.charAt(0) == '#') {
1804                     t = Transliterator.createFromRules("ID", id, direction);
1805                 } else {
1806                     t = Transliterator.getInstance(id, direction);
1807                 }
1808             } catch (IllegalArgumentException ee) {
1809                 e = ee;
1810             }
1811             boolean ok = (t != null && e == null);
1812             if (ok == expOk) {
1813                 logln("Ok: " + id + " => " + t +
1814                         (e != null ? (", " + e.getMessage()) : ""));
1815                 if (source != null) {
1816                     expect(t, source, exp);
1817                 }
1818             } else {
1819                 errln("FAIL: " + id + " => " + t +
1820                         (e != null ? (", " + e.getMessage()) : ""));
1821             }
1822         }
1823     }
1824 
1825     /**
1826      * Test new property set syntax
1827      */
1828     @Test
TestPropertySet()1829     public void TestPropertySet() {
1830         expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1831         expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1832         "[ a stitch ]\n[ in time ]\r[ saves 9]");
1833     }
1834 
1835     /**
1836      * Test various failure points of the new 2.0 engine.
1837      */
1838     @Test
TestNewEngine()1839     public void TestNewEngine() {
1840         Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1841         // Katakana should be untouched
1842         expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1843 
1844         if (true) {
1845             // This test will only work if Transliterator.ROLLBACK is
1846             // true.  Otherwise, this test will fail, revealing a
1847             // limitation of global filters in incremental mode.
1848 
1849             Transliterator a =
1850                 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1851             Transliterator A =
1852                 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1853 
1854             //Transliterator array[] = new Transliterator[] {
1855             //    a,
1856             //    Transliterator.getInstance("NFD"),
1857             //    A };
1858             //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1859 
1860             try {
1861                 Transliterator.registerInstance(a);
1862                 Transliterator.registerInstance(A);
1863 
1864                 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1865                 expect(t, "aAaA", "bAbA");
1866 
1867                 Transliterator[] u = t.getElements();
1868                 assertTrue("getElements().length", u.length == 3);
1869                 assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1870                 assertEquals("getElements()[1]", u[1].getID(), "NFD");
1871                 assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1872 
1873                 t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1874                 t.setFilter(new UnicodeSet("[:Ll:]"));
1875                 expect(t, "aAaA", "bAbA");
1876             } finally {
1877                 Transliterator.unregister("a_to_A");
1878                 Transliterator.unregister("A_to_b");
1879             }
1880         }
1881 
1882         expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1883                 "a",
1884         "ax");
1885 
1886         String gr =
1887             "$ddot = \u0308 ;" +
1888             "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1889             "$rough = \u0314 ;" +
1890             "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1891             "\u03b1 <> a ;" +
1892             "$rough <> h ;";
1893 
1894         expect(gr, "\u03B1\u0314", "ha");
1895     }
1896 
1897     /**
1898      * Test quantified segment behavior.  We want:
1899      * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1900      */
1901     @Test
TestQuantifiedSegment()1902     public void TestQuantifiedSegment() {
1903         // The normal case
1904         expect("([abc]+) > x $1 x;", "cba", "xcbax");
1905 
1906         // The tricky case; the quantifier is around the segment
1907         expect("([abc])+ > x $1 x;", "cba", "xax");
1908 
1909         // Tricky case in reverse direction
1910         expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1911 
1912         // Check post-context segment
1913         expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1914 
1915         // Test toRule/toPattern for non-quantified segment.
1916         // Careful with spacing here.
1917         String r = "([a-c]){q} > x $1 x;";
1918         Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1919         String rr = t.toRules(true);
1920         if (!r.equals(rr)) {
1921             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1922         } else {
1923             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1924         }
1925 
1926         // Test toRule/toPattern for quantified segment.
1927         // Careful with spacing here.
1928         r = "([a-c])+{q} > x $1 x;";
1929         t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1930         rr = t.toRules(true);
1931         if (!r.equals(rr)) {
1932             errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1933         } else {
1934             logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1935         }
1936     }
1937 
1938     //======================================================================
1939     // Ram's tests
1940     //======================================================================
1941     /* this test performs  test of rules in ISO 15915 */
1942     @Test
TestDevanagariLatinRT()1943     public void  TestDevanagariLatinRT(){
1944         String[]  source = {
1945                 "bh\u0101rata",
1946                 "kra",
1947                 "k\u1E63a",
1948                 "khra",
1949                 "gra",
1950                 "\u1E45ra",
1951                 "cra",
1952                 "chra",
1953                 "j\u00F1a",
1954                 "jhra",
1955                 "\u00F1ra",
1956                 "\u1E6Dya",
1957                 "\u1E6Dhra",
1958                 "\u1E0Dya",
1959                 //"r\u0323ya", // \u095c is not valid in Devanagari
1960                 "\u1E0Dhya",
1961                 "\u1E5Bhra",
1962                 "\u1E47ra",
1963                 "tta",
1964                 "thra",
1965                 "dda",
1966                 "dhra",
1967                 "nna",
1968                 "pra",
1969                 "phra",
1970                 "bra",
1971                 "bhra",
1972                 "mra",
1973                 "\u1E49ra",
1974                 //"l\u0331ra",
1975                 "yra",
1976                 "\u1E8Fra",
1977                 //"l-",
1978                 "vra",
1979                 "\u015Bra",
1980                 "\u1E63ra",
1981                 "sra",
1982                 "hma",
1983                 "\u1E6D\u1E6Da",
1984                 "\u1E6D\u1E6Dha",
1985                 "\u1E6Dh\u1E6Dha",
1986                 "\u1E0D\u1E0Da",
1987                 "\u1E0D\u1E0Dha",
1988                 "\u1E6Dya",
1989                 "\u1E6Dhya",
1990                 "\u1E0Dya",
1991                 "\u1E0Dhya",
1992                 // Not roundtrippable --
1993                 // \u0939\u094d\u094d\u092E  - hma
1994                 // \u0939\u094d\u092E         - hma
1995                 // CharsToUnicodeString("hma"),
1996                 "hya",
1997                 "\u015Br\u0325",
1998                 "\u015Bca",
1999                 "\u0115",
2000                 "san\u0304j\u012Bb s\u0113nagupta",
2001                 "\u0101nand vaddir\u0101ju",
2002         };
2003         String[]  expected = {
2004                 "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */
2005                 "\u0915\u094D\u0930",          /* kra         */
2006                 "\u0915\u094D\u0937",          /* ks\u0323a  */
2007                 "\u0916\u094D\u0930",          /* khra        */
2008                 "\u0917\u094D\u0930",          /* gra         */
2009                 "\u0919\u094D\u0930",          /* n\u0307ra  */
2010                 "\u091A\u094D\u0930",          /* cra         */
2011                 "\u091B\u094D\u0930",          /* chra        */
2012                 "\u091C\u094D\u091E",          /* jn\u0303a  */
2013                 "\u091D\u094D\u0930",          /* jhra        */
2014                 "\u091E\u094D\u0930",          /* n\u0303ra  */
2015                 "\u091F\u094D\u092F",          /* t\u0323ya  */
2016                 "\u0920\u094D\u0930",          /* t\u0323hra */
2017                 "\u0921\u094D\u092F",          /* d\u0323ya  */
2018                 //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari
2019                 "\u0922\u094D\u092F",          /* d\u0323hya */
2020                 "\u0922\u093C\u094D\u0930",    /* r\u0323hra */
2021                 "\u0923\u094D\u0930",          /* n\u0323ra  */
2022                 "\u0924\u094D\u0924",          /* tta         */
2023                 "\u0925\u094D\u0930",          /* thra        */
2024                 "\u0926\u094D\u0926",          /* dda         */
2025                 "\u0927\u094D\u0930",          /* dhra        */
2026                 "\u0928\u094D\u0928",          /* nna         */
2027                 "\u092A\u094D\u0930",          /* pra         */
2028                 "\u092B\u094D\u0930",          /* phra        */
2029                 "\u092C\u094D\u0930",          /* bra         */
2030                 "\u092D\u094D\u0930",          /* bhra        */
2031                 "\u092E\u094D\u0930",          /* mra         */
2032                 "\u0929\u094D\u0930",          /* n\u0331ra  */
2033                 //"\u0934\u094D\u0930",          /* l\u0331ra  */
2034                 "\u092F\u094D\u0930",          /* yra         */
2035                 "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */
2036                 //"l-",
2037                 "\u0935\u094D\u0930",          /* vra         */
2038                 "\u0936\u094D\u0930",          /* s\u0301ra  */
2039                 "\u0937\u094D\u0930",          /* s\u0323ra  */
2040                 "\u0938\u094D\u0930",          /* sra         */
2041                 "\u0939\u094d\u092E",          /* hma         */
2042                 "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */
2043                 "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */
2044                 "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/
2045                 "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */
2046                 "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */
2047                 "\u091F\u094D\u092F",          /* t\u0323ya  */
2048                 "\u0920\u094D\u092F",          /* t\u0323hya */
2049                 "\u0921\u094D\u092F",          /* d\u0323ya  */
2050                 "\u0922\u094D\u092F",          /* d\u0323hya */
2051                 // "hma",                         /* hma         */
2052                 "\u0939\u094D\u092F",          /* hya         */
2053                 "\u0936\u0943",                /* s\u0301r\u0325a  */
2054                 "\u0936\u094D\u091A",          /* s\u0301ca  */
2055                 "\u090d",                      /* e\u0306    */
2056                 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2057                 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2058         };
2059 
2060         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2061         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2062 
2063         for(int i= 0; i<source.length; i++){
2064             expect(latinToDev,(source[i]),(expected[i]));
2065             expect(devToLatin,(expected[i]),(source[i]));
2066         }
2067 
2068     }
2069     @Test
TestTeluguLatinRT()2070     public void  TestTeluguLatinRT(){
2071         String[]  source = {
2072                 "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */
2073                 "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */
2074                 "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */
2075                 "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */
2076                 "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */
2077                 "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */
2078                 "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */
2079                 "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */
2080                 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */
2081                 "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */
2082         };
2083 
2084         String[]  expected = {
2085                 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2086                 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2087                 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2088                 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2089                 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2090                 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2091                 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2092                 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2093                 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2094                 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2095         };
2096 
2097 
2098         Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2099         Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2100 
2101         for(int i= 0; i<source.length; i++){
2102             expect(latinToDev,(source[i]),(expected[i]));
2103             expect(devToLatin,(expected[i]),(source[i]));
2104         }
2105     }
2106 
2107     @Test
TestSanskritLatinRT()2108     public void  TestSanskritLatinRT(){
2109         int MAX_LEN =15;
2110         String[]  source = {
2111                 "rmk\u1E63\u0113t",
2112                 "\u015Br\u012Bmad",
2113                 "bhagavadg\u012Bt\u0101",
2114                 "adhy\u0101ya",
2115                 "arjuna",
2116                 "vi\u1E63\u0101da",
2117                 "y\u014Dga",
2118                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2119                 "uv\u0101cr\u0325",
2120                 "dharmak\u1E63\u0113tr\u0113",
2121                 "kuruk\u1E63\u0113tr\u0113",
2122                 "samav\u0113t\u0101",
2123                 "yuyutsava\u1E25",
2124                 "m\u0101mak\u0101\u1E25",
2125                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2126                 "kimakurvata",
2127                 "san\u0304java",
2128         };
2129         String[]  expected = {
2130                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2131                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2132                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2133                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2134                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2135                 "\u0935\u093f\u0937\u093e\u0926",
2136                 "\u092f\u094b\u0917",
2137                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2138                 "\u0909\u0935\u093E\u091A\u0943",
2139                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2140                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2141                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2142                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2143                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2144                 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2145                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2146                 "\u0938\u0902\u091c\u0935",
2147         };
2148 
2149         Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2150         Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2151         for(int i= 0; i<MAX_LEN; i++){
2152             expect(latinToDev,(source[i]),(expected[i]));
2153             expect(devToLatin,(expected[i]),(source[i]));
2154         }
2155     }
2156 
2157     @Test
TestCompoundLatinRT()2158     public void  TestCompoundLatinRT(){
2159         int MAX_LEN =15;
2160         String[]  source = {
2161                 "rmk\u1E63\u0113t",
2162                 "\u015Br\u012Bmad",
2163                 "bhagavadg\u012Bt\u0101",
2164                 "adhy\u0101ya",
2165                 "arjuna",
2166                 "vi\u1E63\u0101da",
2167                 "y\u014Dga",
2168                 "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2169                 "uv\u0101cr\u0325",
2170                 "dharmak\u1E63\u0113tr\u0113",
2171                 "kuruk\u1E63\u0113tr\u0113",
2172                 "samav\u0113t\u0101",
2173                 "yuyutsava\u1E25",
2174                 "m\u0101mak\u0101\u1E25",
2175                 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2176                 "kimakurvata",
2177                 "san\u0304java"
2178         };
2179         String[]  expected = {
2180                 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2181                 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2182                 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2183                 "\u0905\u0927\u094d\u092f\u093e\u092f",
2184                 "\u0905\u0930\u094d\u091c\u0941\u0928",
2185                 "\u0935\u093f\u0937\u093e\u0926",
2186                 "\u092f\u094b\u0917",
2187                 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2188                 "\u0909\u0935\u093E\u091A\u0943",
2189                 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2190                 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2191                 "\u0938\u092e\u0935\u0947\u0924\u093e",
2192                 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2193                 "\u092e\u093e\u092e\u0915\u093e\u0903",
2194                 //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2195                 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2196                 "\u0938\u0902\u091c\u0935"
2197         };
2198 
2199         Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2200         Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2201         for(int i= 0; i<MAX_LEN; i++){
2202             expect(latinToDevToLatin,(source[i]),(source[i]));
2203             expect(devToLatinToDev,(expected[i]),(expected[i]));
2204         }
2205     }
2206     /**
2207      * Test Gurmukhi-Devanagari Tippi and Bindi
2208      */
2209     @Test
TestGurmukhiDevanagari()2210     public void TestGurmukhiDevanagari(){
2211         // the rule says:
2212         // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2213         // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2214 
2215         UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2216         UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2217 
2218         UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2219         UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2220         Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2221         StringBuffer src = new StringBuffer(" \u0902");
2222         StringBuffer expect = new StringBuffer(" \u0A02");
2223         while(vIter.next()){
2224             src.setCharAt(0,(char) vIter.codepoint);
2225             expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2226             expect(trans,src.toString(),expect.toString());
2227         }
2228 
2229         expect.setCharAt(1,'\u0A70');
2230         while(nvIter.next()){
2231             //src.setCharAt(0,(char) nvIter.codepoint);
2232             src.setCharAt(0,(char)nvIter.codepoint);
2233             expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2234             expect(trans,src.toString(),expect.toString());
2235         }
2236     }
2237     /**
2238      * Test instantiation from a locale.
2239      */
2240     @Test
TestLocaleInstantiation()2241     public void TestLocaleInstantiation() {
2242         Transliterator t;
2243         try{
2244             t = Transliterator.getInstance("te_IN-Latin");
2245             //expect(t, "\u0430", "a");
2246         }catch(IllegalArgumentException ex){
2247             warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2248         }
2249         try{
2250             t = Transliterator.getInstance("ru_RU-Latin");
2251             expect(t, "\u0430", "a");
2252         }catch(IllegalArgumentException ex){
2253             warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2254         }
2255         try{
2256             t = Transliterator.getInstance("en-el");
2257             expect(t, "a", "\u03B1");
2258         }catch(IllegalArgumentException ex){
2259             warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2260         }
2261     }
2262 
2263     /**
2264      * Test title case handling of accent (should ignore accents)
2265      */
2266     @Test
TestTitleAccents()2267     public void TestTitleAccents() {
2268         Transliterator t = Transliterator.getInstance("Title");
2269         expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2270     }
2271 
2272     /**
2273      * Basic test of a locale resource based rule.
2274      */
2275     @Test
TestLocaleResource()2276     public void TestLocaleResource() {
2277         String DATA[] = {
2278                 // id                    from             to
2279                 "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",
2280                 "Latin-el",              "b",             "\u03bc\u03c0",
2281                 "Latin-Greek",           "b",             "\u03B2",
2282                 "Greek-Latin/UNGEGN",    "\u03B2",        "v",
2283                 "el-Latin",              "\u03B2",        "v",
2284                 "Greek-Latin",           "\u03B2",        "b",
2285         };
2286         for (int i=0; i<DATA.length; i+=3) {
2287             Transliterator t = Transliterator.getInstance(DATA[i]);
2288             expect(t, DATA[i+1], DATA[i+2]);
2289         }
2290     }
2291 
2292     /**
2293      * Make sure parse errors reference the right line.
2294      */
2295     @Test
TestParseError()2296     public void TestParseError() {
2297         String rule =
2298             "a > b;\n" +
2299             "# more stuff\n" +
2300             "d << b;";
2301         try {
2302             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2303             if(t!=null){
2304                 errln("FAIL: Did not get expected exception");
2305             }
2306         } catch (IllegalArgumentException e) {
2307             String err = e.getMessage();
2308             if (err.indexOf("d << b") >= 0) {
2309                 logln("Ok: " + err);
2310             } else {
2311                 errln("FAIL: " + err);
2312             }
2313             return;
2314         }
2315         errln("FAIL: no syntax error");
2316     }
2317 
2318     /**
2319      * Make sure sets on output are disallowed.
2320      */
2321     @Test
TestOutputSet()2322     public void TestOutputSet() {
2323         String rule = "$set = [a-cm-n]; b > $set;";
2324         Transliterator t = null;
2325         try {
2326             t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2327             if(t!=null){
2328                 errln("FAIL: Did not get the expected exception");
2329             }
2330         } catch (IllegalArgumentException e) {
2331             logln("Ok: " + e.getMessage());
2332             return;
2333         }
2334         errln("FAIL: No syntax error");
2335     }
2336 
2337     /**
2338      * Test the use variable range pragma, making sure that use of
2339      * variable range characters is detected and flagged as an error.
2340      */
2341     @Test
TestVariableRange()2342     public void TestVariableRange() {
2343         String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2344         try {
2345             Transliterator t =
2346                 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2347             if(t!=null){
2348                 errln("FAIL: Did not get the expected exception");
2349             }
2350         } catch (IllegalArgumentException e) {
2351             logln("Ok: " + e.getMessage());
2352             return;
2353         }
2354         errln("FAIL: No syntax error");
2355     }
2356 
2357     /**
2358      * Test invalid post context error handling
2359      */
2360     @Test
TestInvalidPostContext()2361     public void TestInvalidPostContext() {
2362         try {
2363             Transliterator t =
2364                 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2365             if(t!=null){
2366                 errln("FAIL: Did not get the expected exception");
2367             }
2368         } catch (IllegalArgumentException e) {
2369             String msg = e.getMessage();
2370             if (msg.indexOf("a}b{c") >= 0) {
2371                 logln("Ok: " + msg);
2372             } else {
2373                 errln("FAIL: " + msg);
2374             }
2375             return;
2376         }
2377         errln("FAIL: No syntax error");
2378     }
2379 
2380     /**
2381      * Test ID form variants
2382      */
2383     @Test
TestIDForms()2384     public void TestIDForms() {
2385         String DATA[] = {
2386                 "NFC", null, "NFD",
2387                 "nfd", null, "NFC", // make sure case is ignored
2388                 "Any-NFKD", null, "Any-NFKC",
2389                 "Null", null, "Null",
2390                 "-nfkc", "nfkc", "NFKD",
2391                 "-nfkc/", "nfkc", "NFKD",
2392                 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2393                 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2394                 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2395                 "Source-", null, null,
2396                 "Source/Variant-", null, null,
2397                 "Source-/Variant", null, null,
2398                 "/Variant", null, null,
2399                 "/Variant-", null, null,
2400                 "-/Variant", null, null,
2401                 "-/", null, null,
2402                 "-", null, null,
2403                 "/", null, null,
2404         };
2405 
2406         for (int i=0; i<DATA.length; i+=3) {
2407             String ID = DATA[i];
2408             String expID = DATA[i+1];
2409             String expInvID = DATA[i+2];
2410             boolean expValid = (expInvID != null);
2411             if (expID == null) {
2412                 expID = ID;
2413             }
2414             try {
2415                 Transliterator t =
2416                     Transliterator.getInstance(ID);
2417                 Transliterator u = t.getInverse();
2418                 if (t.getID().equals(expID) &&
2419                         u.getID().equals(expInvID)) {
2420                     logln("Ok: " + ID + ".getInverse() => " + expInvID);
2421                 } else {
2422                     errln("FAIL: getInstance(" + ID + ") => " +
2423                             t.getID() + " x getInverse() => " + u.getID() +
2424                             ", expected " + expInvID);
2425                 }
2426             } catch (IllegalArgumentException e) {
2427                 if (!expValid) {
2428                     logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2429                 } else {
2430                     errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2431                 }
2432             }
2433         }
2434     }
2435 
checkRules(String label, Transliterator t2, String testRulesForward)2436     void checkRules(String label, Transliterator t2, String testRulesForward) {
2437         String rules2 = t2.toRules(true);
2438         //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2439         rules2 = TestUtility.replace(rules2, " ", "");
2440         rules2 = TestUtility.replace(rules2, "\n", "");
2441         rules2 = TestUtility.replace(rules2, "\r", "");
2442         testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2443 
2444         if (!rules2.equals(testRulesForward)) {
2445             errln(label);
2446             logln("GENERATED RULES: " + rules2);
2447             logln("SHOULD BE:       " + testRulesForward);
2448         }
2449     }
2450 
2451     /**
2452      * Mark's toRules test.
2453      */
2454     @Test
TestToRulesMark()2455     public void TestToRulesMark() {
2456 
2457         String testRules =
2458             "::[[:Latin:][:Mark:]];"
2459             + "::NFKD (NFC);"
2460             + "::Lower (Lower);"
2461             + "a <> \\u03B1;" // alpha
2462             + "::NFKC (NFD);"
2463             + "::Upper (Lower);"
2464             + "::Lower ();"
2465             + "::([[:Greek:][:Mark:]]);"
2466             ;
2467         String testRulesForward =
2468             "::[[:Latin:][:Mark:]];"
2469             + "::NFKD(NFC);"
2470             + "::Lower(Lower);"
2471             + "a > \\u03B1;"
2472             + "::NFKC(NFD);"
2473             + "::Upper (Lower);"
2474             + "::Lower ();"
2475             ;
2476         String testRulesBackward =
2477             "::[[:Greek:][:Mark:]];"
2478             + "::Lower (Upper);"
2479             + "::NFD(NFKC);"
2480             + "\\u03B1 > a;"
2481             + "::Lower(Lower);"
2482             + "::NFC(NFKD);"
2483             ;
2484         String source = "\u00E1"; // a-acute
2485         String target = "\u03AC"; // alpha-acute
2486 
2487         Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2488         Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2489 
2490         expect(t2, source, target);
2491         expect(t3, target, source);
2492 
2493         checkRules("Failed toRules FORWARD", t2, testRulesForward);
2494         checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2495     }
2496 
2497     /**
2498      * Test Escape and Unescape transliterators.
2499      */
2500     @Test
TestEscape()2501     public void TestEscape() {
2502         expect(Transliterator.getInstance("Hex-Any"),
2503                 "\\x{40}\\U00000031&#x32;&#81;",
2504         "@12Q");
2505         expect(Transliterator.getInstance("Any-Hex/C"),
2506                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2507         "\\u0041\\U0010BEEF\\uFEED");
2508         expect(Transliterator.getInstance("Any-Hex/Java"),
2509                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2510         "\\u0041\\uDBEF\\uDEEF\\uFEED");
2511         expect(Transliterator.getInstance("Any-Hex/Perl"),
2512                 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2513         "\\x{41}\\x{10BEEF}\\x{FEED}");
2514     }
2515 
2516     /**
2517      * Make sure display names of variants look reasonable.
2518      */
2519     @Test
TestDisplayName()2520     public void TestDisplayName() {
2521         String DATA[] = {
2522                 // ID, forward name, reverse name
2523                 // Update the text as necessary -- the important thing is
2524                 // not the text itself, but how various cases are handled.
2525 
2526                 // Basic test
2527                 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2528 
2529                 // Variants
2530                 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2531 
2532                 // Target-only IDs
2533                 "NFC", "Any to NFC", "Any to NFD",
2534         };
2535 
2536         Locale US = Locale.US;
2537 
2538         for (int i=0; i<DATA.length; i+=3) {
2539             String name = Transliterator.getDisplayName(DATA[i], US);
2540             if (!name.equals(DATA[i+1])) {
2541                 errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2542                         name + ", expected " + DATA[i+1]);
2543             } else {
2544                 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2545             }
2546             Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2547             name = Transliterator.getDisplayName(t.getID(), US);
2548             if (!name.equals(DATA[i+2])) {
2549                 errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2550                         name + ", expected " + DATA[i+2]);
2551             } else {
2552                 logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2553             }
2554 
2555             // Cover getDisplayName(String)
2556             ULocale save = ULocale.getDefault();
2557             ULocale.setDefault(ULocale.US);
2558             String name2 = Transliterator.getDisplayName(t.getID());
2559             if (!name.equals(name2))
2560                 errln("FAIL: getDisplayName with default locale failed");
2561             ULocale.setDefault(save);
2562         }
2563     }
2564 
2565     /**
2566      * Test anchor masking
2567      */
2568     @Test
TestAnchorMasking()2569     public void TestAnchorMasking() {
2570         String rule = "^a > Q; a > q;";
2571         try {
2572             Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2573             if(t==null){
2574                 errln("FAIL: Did not get the expected exception");
2575             }
2576         } catch (IllegalArgumentException e) {
2577             errln("FAIL: " + rule + " => " + e);
2578         }
2579     }
2580 
2581     /**
2582      * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2583      * during ICU4J modularization to remove dependency of tests on Transliterator.
2584      */
2585     @Test
TestScriptAllCodepoints()2586     public void TestScriptAllCodepoints(){
2587         int code;
2588         HashSet  scriptIdsChecked   = new HashSet();
2589         HashSet  scriptAbbrsChecked = new HashSet();
2590         for( int i =0; i <= 0x10ffff; i++){
2591             code = UScript.getScript(i);
2592             if(code==UScript.INVALID_CODE){
2593                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2594             }
2595             String id =UScript.getName(code);
2596             String abbr = UScript.getShortName(code);
2597             if (!scriptIdsChecked.contains(id)) {
2598                 scriptIdsChecked.add(id);
2599                 String newId ="[:"+id+":];NFD";
2600                 try{
2601                     Transliterator t = Transliterator.getInstance(newId);
2602                     if(t==null){
2603                         errln("Failed to create transliterator for "+hex(i)+
2604                                 " script code: " +id);
2605                     }
2606                 }catch(Exception e){
2607                     errln("Failed to create transliterator for "+hex(i)
2608                             +" script code: " +id
2609                             + " Exception: "+e.getMessage());
2610                 }
2611             }
2612             if (!scriptAbbrsChecked.contains(abbr)) {
2613                 scriptAbbrsChecked.add(abbr);
2614                 String newAbbrId ="[:"+abbr+":];NFD";
2615                 try{
2616                     Transliterator t = Transliterator.getInstance(newAbbrId);
2617                     if(t==null){
2618                         errln("Failed to create transliterator for "+hex(i)+
2619                                 " script code: " +abbr);
2620                     }
2621                 }catch(Exception e){
2622                     errln("Failed to create transliterator for "+hex(i)
2623                             +" script code: " +abbr
2624                             + " Exception: "+e.getMessage());
2625                 }
2626             }
2627         }
2628     }
2629 
2630 
2631     static final String[][] registerRules = {
2632         {"Any-Dev1", "x > X; y > Y;"},
2633         {"Any-Dev2", "XY > Z"},
2634         {"Greek-Latin/FAKE",
2635             "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2636             "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2637             "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2638             "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2639         },
2640     };
2641 
2642     static final String DESERET_DEE = UTF16.valueOf(0x10414);
2643     static final String DESERET_dee = UTF16.valueOf(0x1043C);
2644 
2645     static final String[][] testCases = {
2646 
2647         // NORMALIZATION
2648         // should add more test cases
2649         {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2650         {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2651         {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2652         {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2653 
2654         // mp -> b BUG
2655         {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2656         {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2657 
2658         // check for devanagari bug
2659         {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2660 
2661         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2662         {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2663             "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2664             //TODO: enable this test once Titlecase works right
2665             //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2666             //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2667 
2668             {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2669                 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2670                 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2671                     "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2672 
2673                     {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2674                     {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2675 
2676                     // FORMS OF S
2677                     {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2678                     {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2679                     {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2680                     {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2681 
2682                     // Tatiana bug
2683                     // Upper: TAT\u02B9\u00C2NA
2684                     // Lower: tat\u02B9\u00E2na
2685                     // Title: Tat\u02B9\u00E2na
2686                     {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2687                     {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2688                     {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2689     };
2690 
2691     @Test
TestSpecialCases()2692     public void TestSpecialCases() {
2693 
2694         for (int i = 0; i < registerRules.length; ++i) {
2695             Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2696                     registerRules[i][1], Transliterator.FORWARD);
2697             DummyFactory.add(registerRules[i][0], t);
2698         }
2699         for (int i = 0; i < testCases.length; ++i) {
2700             String name = testCases[i][0];
2701             Transliterator t = Transliterator.getInstance(name);
2702             String id = t.getID();
2703             String source = testCases[i][1];
2704             String target = null;
2705 
2706             // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2707 
2708             if (testCases[i].length > 2)    target = testCases[i][2];
2709             else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
2710             else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
2711             else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
2712             else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
2713             else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
2714             else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);
2715 
2716             expect(t, source, target);
2717         }
2718         for (int i = 0; i < registerRules.length; ++i) {
2719             Transliterator.unregister(registerRules[i][0]);
2720         }
2721     }
2722 
2723     // seems like there should be an easier way to just register an instance of a transliterator
2724 
2725     static class DummyFactory implements Transliterator.Factory {
2726         static DummyFactory singleton = new DummyFactory();
2727         static HashMap m = new HashMap();
2728 
2729         // Since Transliterators are immutable, we don't have to clone on set & get
add(String ID, Transliterator t)2730         static void add(String ID, Transliterator t) {
2731             m.put(ID, t);
2732             //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2733             Transliterator.registerFactory(ID, singleton);
2734         }
getInstance(String ID)2735         public Transliterator getInstance(String ID) {
2736             return (Transliterator) m.get(ID);
2737         }
2738     }
2739 
2740     @Test
TestCasing()2741     public void TestCasing() {
2742         Transliterator toLower = Transliterator.getInstance("lower");
2743         Transliterator toCasefold = Transliterator.getInstance("casefold");
2744         Transliterator toUpper = Transliterator.getInstance("upper");
2745         Transliterator toTitle = Transliterator.getInstance("title");
2746         for (int i = 0; i < 0x600; ++i) {
2747             String s = UTF16.valueOf(i);
2748 
2749             String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2750             assertEquals("Lowercase", lower, toLower.transform(s));
2751 
2752             String casefold = UCharacter.foldCase(s, true);
2753             assertEquals("Casefold", casefold, toCasefold.transform(s));
2754 
2755             String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2756             assertEquals("Title", title, toTitle.transform(s));
2757 
2758             String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2759             assertEquals("Upper", upper, toUpper.transform(s));
2760         }
2761     }
2762 
2763     @Test
TestSurrogateCasing()2764     public void TestSurrogateCasing () {
2765         // check that casing handles surrogates
2766         // titlecase is currently defective
2767         int dee = UTF16.charAt(DESERET_dee,0);
2768         int DEE = UCharacter.toTitleCase(dee);
2769         if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2770             errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2771         }
2772 
2773         if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2774             errln("Fails uppercase of surrogates");
2775         }
2776 
2777         if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2778             errln("Fails lowercase of surrogates");
2779         }
2780     }
2781 
2782     // Check to see that incremental gets at least part way through a reasonable string.
2783     // TODO(junit): should be working - also should be converted to parameterized test
2784     @Ignore
2785     @Test
TestIncrementalProgress()2786     public void TestIncrementalProgress() {
2787         String latinTest = "The Quick Brown Fox.";
2788         String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest);
2789         String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest);
2790         String[][] tests = {
2791                 {"Any", latinTest},
2792                 {"Latin", latinTest},
2793                 {"Halfwidth", latinTest},
2794                 {"Devanagari", devaTest},
2795                 {"Katakana", kataTest},
2796         };
2797 
2798         Enumeration sources = Transliterator.getAvailableSources();
2799         while(sources.hasMoreElements()) {
2800             String source = (String) sources.nextElement();
2801             String test = findMatch(source, tests);
2802             if (test == null) {
2803                 logln("Skipping " + source + "-X");
2804                 continue;
2805             }
2806             Enumeration targets = Transliterator.getAvailableTargets(source);
2807             while(targets.hasMoreElements()) {
2808                 String target = (String) targets.nextElement();
2809                 Enumeration variants = Transliterator.getAvailableVariants(source, target);
2810                 while(variants.hasMoreElements()) {
2811                     String variant = (String) variants.nextElement();
2812                     String id = source + "-" + target + "/" + variant;
2813                     logln("id: " + id);
2814 
2815                     Transliterator t = Transliterator.getInstance(id);
2816                     CheckIncrementalAux(t, test);
2817 
2818                     String rev = t.transliterate(test);
2819                     Transliterator inv = t.getInverse();
2820                     CheckIncrementalAux(inv, rev);
2821                 }
2822             }
2823         }
2824     }
2825 
findMatch(String source, String[][] pairs)2826     public String findMatch (String source, String[][] pairs) {
2827         for (int i = 0; i < pairs.length; ++i) {
2828             if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1];
2829         }
2830         return null;
2831     }
2832 
CheckIncrementalAux(Transliterator t, String input)2833     public void CheckIncrementalAux(Transliterator t, String input) {
2834 
2835         Replaceable test = new ReplaceableString(input);
2836         Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length());
2837         t.transliterate(test, pos);
2838         boolean gotError = false;
2839 
2840         // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
2841 
2842         if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) {
2843             errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2844             gotError = true;
2845         } else {
2846             logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2847         }
2848         t.finishTransliteration(test, pos);
2849         if (pos.start != pos.limit) {
2850             errln("Incomplete, " + t.getID() + ":  " + UtilityExtensions.formatInput(test, pos));
2851             gotError = true;
2852         }
2853         if(!gotError){
2854             //errln("FAIL: Did not get expected error");
2855         }
2856     }
2857 
2858     @Test
TestFunction()2859     public void TestFunction() {
2860         // Careful with spacing and ';' here:  Phrase this exactly
2861         // as toRules() is going to return it.  If toRules() changes
2862         // with regard to spacing or ';', then adjust this string.
2863         String rule =
2864             "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2865 
2866         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2867         if (t == null) {
2868             errln("FAIL: createFromRules failed");
2869             return;
2870         }
2871 
2872         String r = t.toRules(true);
2873         if (r.equals(rule)) {
2874             logln("OK: toRules() => " + r);
2875         } else {
2876             errln("FAIL: toRules() => " + r +
2877                     ", expected " + rule);
2878         }
2879 
2880         expect(t, "The Quick Brown Fox",
2881         "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2882         rule =
2883             "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2884 
2885         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2886         if (t == null) {
2887             errln("FAIL: createFromRules failed");
2888             return;
2889         }
2890 
2891         r = t.toRules(true);
2892         if (r.equals(rule)) {
2893             logln("OK: toRules() => " + r);
2894         } else {
2895             errln("FAIL: toRules() => " + r +
2896                     ", expected " + rule);
2897         }
2898 
2899         expect(t, "\u0301",
2900         "U+0301 \\N{COMBINING ACUTE ACCENT}");
2901     }
2902 
2903     @Test
TestInvalidBackRef()2904     public void TestInvalidBackRef() {
2905         String rule =  ". > $1;";
2906         String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2907         try {
2908             Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2909             if (t != null) {
2910                 errln("FAIL: createFromRules should have returned NULL");
2911             }
2912             errln("FAIL: Ok: . > $1; => no error");
2913             Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2914             if (t2 != null) {
2915                 errln("FAIL: createFromRules should have returned NULL");
2916             }
2917             errln("FAIL: Ok: . > $1; => no error");
2918         } catch (IllegalArgumentException e) {
2919             logln("Ok: . > $1; => " + e.getMessage());
2920         }
2921     }
2922 
2923     @Test
TestMulticharStringSet()2924     public void TestMulticharStringSet() {
2925         // Basic testing
2926         String rule =
2927             "       [{aa}]       > x;" +
2928             "         a          > y;" +
2929             "       [b{bc}]      > z;" +
2930             "[{gd}] { e          > q;" +
2931             "         e } [{fg}] > r;" ;
2932 
2933         Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2934         if (t == null) {
2935             errln("FAIL: createFromRules failed");
2936             return;
2937         }
2938 
2939         expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2940         "y x yz z d gd de gdq gdqfg ddrfg");
2941 
2942         // Overlapped string test.  Make sure that when multiple
2943         // strings can match that the longest one is matched.
2944         rule =
2945             "    [a {ab} {abc}]    > x;" +
2946             "           b          > y;" +
2947             "           c          > z;" +
2948             " q [t {st} {rst}] { e > p;" ;
2949 
2950         t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2951         if (t == null) {
2952             errln("FAIL: createFromRules failed");
2953             return;
2954         }
2955 
2956         expect(t, "a ab abc qte qste qrste",
2957         "x x x qtp qstp qrstp");
2958     }
2959 
2960     /**
2961      * Test that user-registered transliterators can be used under function
2962      * syntax.
2963      */
2964     @Test
TestUserFunction()2965     public void TestUserFunction() {
2966         Transliterator t;
2967 
2968         // There's no need to register inverses if we don't use them
2969         TestUserFunctionFactory.add("Any-gif",
2970                 Transliterator.createFromRules("gif",
2971                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2972                         Transliterator.FORWARD));
2973         //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2974 
2975         TestUserFunctionFactory.add("Any-RemoveCurly",
2976                 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2977         //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2978 
2979         logln("Trying &hex");
2980         t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2981         logln("Registering");
2982         TestUserFunctionFactory.add("Any-hex2", t);
2983         t = Transliterator.getInstance("Any-hex2");
2984         expect(t, "abc", "\\u0061\\u0062\\u0063");
2985 
2986         logln("Trying &gif");
2987         t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2988         logln("Registering");
2989         TestUserFunctionFactory.add("Any-gif2", t);
2990         t = Transliterator.getInstance("Any-gif2");
2991         expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2992         "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2993 
2994         // Test that filters are allowed after &
2995         t = Transliterator.createFromRules("test",
2996                 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2997         expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2998 
2999         // Unregister our test stuff
3000         TestUserFunctionFactory.unregister();
3001     }
3002 
3003     static class TestUserFunctionFactory implements Transliterator.Factory {
3004         static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
3005         static HashMap m = new HashMap();
3006 
add(String ID, Transliterator t)3007         static void add(String ID, Transliterator t) {
3008             m.put(new CaseInsensitiveString(ID), t);
3009             Transliterator.registerFactory(ID, singleton);
3010         }
3011 
getInstance(String ID)3012         public Transliterator getInstance(String ID) {
3013             return (Transliterator) m.get(new CaseInsensitiveString(ID));
3014         }
3015 
unregister()3016         static void unregister() {
3017             Iterator ids = m.keySet().iterator();
3018             while (ids.hasNext()) {
3019                 CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
3020                 Transliterator.unregister(id.getString());
3021                 ids.remove(); // removes pair from m
3022             }
3023         }
3024     }
3025 
3026     /**
3027      * Test the Any-X transliterators.
3028      */
3029     @Test
TestAnyX()3030     public void TestAnyX() {
3031         Transliterator anyLatin =
3032             Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3033 
3034         expect(anyLatin,
3035                 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
3036         "greek:abkABK hiragana:abuku cyrillic:abc");
3037     }
3038 
3039     /**
3040      * Test Any-X transliterators with sample letters from all scripts.
3041      */
3042     @Test
TestAny()3043     public void TestAny() {
3044         UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
3045         StringBuffer testString = new StringBuffer();
3046         for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
3047             UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
3048             int count = 5;
3049             for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
3050                 testString.append(it.getString());
3051                 if (--count < 0) break;
3052             }
3053         }
3054         logln("Sample set for Any-Latin: " + testString);
3055         Transliterator anyLatin = Transliterator.getInstance("any-Latn");
3056         String result = anyLatin.transliterate(testString.toString());
3057         logln("Sample result for Any-Latin: " + result);
3058     }
3059 
3060 
3061     /**
3062      * Test the source and target set API.  These are only implemented
3063      * for RBT and CompoundTransliterator at this time.
3064      */
3065     @Test
TestSourceTargetSet()3066     public void TestSourceTargetSet() {
3067         // Rules
3068         String r =
3069             "a > b; " +
3070             "r [x{lu}] > q;";
3071 
3072         // Expected source
3073         UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
3074 
3075         // Expected target
3076         UnicodeSet expTrg = new UnicodeSet("[bq]");
3077 
3078         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3079         UnicodeSet src = t.getSourceSet();
3080         UnicodeSet trg = t.getTargetSet();
3081 
3082         if (src.equals(expSrc) && trg.equals(expTrg)) {
3083             logln("Ok: " + r + " => source = " + src.toPattern(true) +
3084                     ", target = " + trg.toPattern(true));
3085         } else {
3086             errln("FAIL: " + r + " => source = " + src.toPattern(true) +
3087                     ", expected " + expSrc.toPattern(true) +
3088                     "; target = " + trg.toPattern(true) +
3089                     ", expected " + expTrg.toPattern(true));
3090         }
3091     }
3092 
3093     @Test
TestSourceTargetSet2()3094     public void TestSourceTargetSet2() {
3095 
3096 
3097         Normalizer2 nfc = Normalizer2.getNFCInstance();
3098         Normalizer2 nfd = Normalizer2.getNFDInstance();
3099 
3100         //        Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
3101         //        UnicodeSet nfkdSource = new UnicodeSet();
3102         //        UnicodeSet nfkdTarget = new UnicodeSet();
3103         //        for (int i = 0; i <= 0x10FFFF; ++i) {
3104         //            if (nfkd.isInert(i)) {
3105         //                continue;
3106         //            }
3107         //            nfkdSource.add(i);
3108         //            String t = nfkd.getDecomposition(i);
3109         //            if (t != null) {
3110         //                nfkdTarget.addAll(t);
3111         //            } else {
3112         //                nfkdTarget.add(i);
3113         //            }
3114         //        }
3115         //        nfkdSource.freeze();
3116         //        nfkdTarget.freeze();
3117         //        logln("NFKD Source: " + nfkdSource.toPattern(false));
3118         //        logln("NFKD Target: " + nfkdTarget.toPattern(false));
3119 
3120         UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
3121         UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
3122         UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
3123         CanonicalIterator can = new CanonicalIterator("");
3124 
3125         UnicodeSet disorderedMarks = new UnicodeSet();
3126 
3127         for (int i = 0; i <= 0x10FFFF; ++i) {
3128             String s = nfd.getDecomposition(i);
3129             if (s == null) {
3130                 continue;
3131             }
3132 
3133             can.setSource(s);
3134             for (String t = can.next(); t != null; t = can.next()) {
3135                 disorderedMarks.add(t);
3136             }
3137 
3138             // if s has two code points, (or more), add the lead/trail information
3139             int first = s.codePointAt(0);
3140             int firstCount = Character.charCount(first);
3141             if (s.length() == firstCount) continue;
3142             String trailString = s.substring(firstCount);
3143 
3144             // add all the trail characters
3145             if (!nonStarters.containsSome(trailString)) {
3146                 continue;
3147             }
3148             UnicodeSet trailSet = leadToTrail.get(first);
3149             if (trailSet == null) {
3150                 leadToTrail.put(first, trailSet = new UnicodeSet());
3151             }
3152             trailSet.addAll(trailString); // add remaining trails
3153 
3154             // add the sources
3155             UnicodeSet sourcesSet = leadToSources.get(first);
3156             if (sourcesSet == null) {
3157                 leadToSources.put(first, sourcesSet = new UnicodeSet());
3158             }
3159             sourcesSet.add(i);
3160         }
3161 
3162 
3163         for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
3164             String lead = x.getKey();
3165             UnicodeSet sources = x.getValue();
3166             UnicodeSet trailSet = leadToTrail.get(lead);
3167             for (String source : sources) {
3168                 for (String trail : trailSet) {
3169                     can.setSource(source + trail);
3170                     for (String t = can.next(); t != null; t = can.next()) {
3171                         if (t.endsWith(trail)) continue;
3172                         disorderedMarks.add(t);
3173                     }
3174                 }
3175             }
3176         }
3177 
3178 
3179         for (String s : nonStarters) {
3180             disorderedMarks.add("\u0345" + s);
3181             disorderedMarks.add(s+"\u0323");
3182             String xx = nfc.normalize("\u01EC" + s);
3183             if (!xx.startsWith("\u01EC")) {
3184                 logln("??");
3185             }
3186         }
3187 
3188         //        for (int i = 0; i <= 0x10FFFF; ++i) {
3189         //            String s = nfkd.getDecomposition(i);
3190         //            if (s != null) {
3191         //                disorderedMarks.add(s);
3192         //                disorderedMarks.add(nfc.normalize(s));
3193         //                addDerivedStrings(nfc, disorderedMarks, s);
3194         //            }
3195         //            s = nfd.getDecomposition(i);
3196         //            if (s != null) {
3197         //                disorderedMarks.add(s);
3198         //            }
3199         //            if (!nfc.isInert(i)) {
3200         //                if (i == 0x00C0) {
3201         //                    logln("\u00C0");
3202         //                }
3203         //                can.setSource(s+"\u0334");
3204         //                for (String t = can.next(); t != null; t = can.next()) {
3205         //                    addDerivedStrings(nfc, disorderedMarks, t);
3206         //                }
3207         //                can.setSource(s+"\u0345");
3208         //                for (String t = can.next(); t != null; t = can.next()) {
3209         //                    addDerivedStrings(nfc, disorderedMarks, t);
3210         //                }
3211         //                can.setSource(s+"\u0323");
3212         //                for (String t = can.next(); t != null; t = can.next()) {
3213         //                    addDerivedStrings(nfc, disorderedMarks, t);
3214         //                }
3215         //            }
3216         //        }
3217         logln("Test cases: " + disorderedMarks.size());
3218         disorderedMarks.addAll(0,0x10FFFF).freeze();
3219         logln("isInert \u0104 " + nfc.isInert('\u0104'));
3220 
3221         Object[][] rules = {
3222                 {":: [:sc=COMMON:] any-name;", null},
3223 
3224                 {":: [:Greek:] hex-any/C;", null},
3225                 {":: [:Greek:] any-hex/C;", null},
3226 
3227                 {":: [[:Mn:][:Me:]] remove;", null},
3228                 {":: [[:Mn:][:Me:]] null;", null},
3229 
3230 
3231                 {":: lower;", null},
3232                 {":: upper;", null},
3233                 {":: title;", null},
3234                 {":: CaseFold;", null},
3235 
3236                 {":: NFD;", null},
3237                 {":: NFC;", null},
3238                 {":: NFKD;", null},
3239                 {":: NFKC;", null},
3240 
3241                 {":: [[:Mn:][:Me:]] NFKD;", null},
3242                 {":: Latin-Greek;", null},
3243                 {":: [:Latin:] NFKD;", null},
3244                 {":: NFKD;", null},
3245                 {":: NFKD;\n" +
3246                     ":: [[:Mn:][:Me:]] remove;\n" +
3247                     ":: NFC;", null},
3248         };
3249         for (Object[] rulex : rules) {
3250             String rule = (String) rulex[0];
3251             Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
3252             UnicodeSet actualSource = trans.getSourceSet();
3253             UnicodeSet actualTarget = trans.getTargetSet();
3254             UnicodeSet empiricalSource = new UnicodeSet();
3255             UnicodeSet empiricalTarget = new UnicodeSet();
3256             String ruleDisplay = rule.replace("\n", "\t\t");
3257             UnicodeSet toTest = disorderedMarks;
3258             //            if (rulex[1] != null) {
3259             //                toTest = new UnicodeSet(disorderedMarks);
3260             //                toTest.addAll((UnicodeSet) rulex[1]);
3261             //            }
3262 
3263             String test = nfd.normalize("\u0104");
3264             boolean DEBUG = true;
3265             @SuppressWarnings("unused")
3266             int count = 0; // for debugging
3267             for (String s : toTest) {
3268                 if (s.equals(test)) {
3269                     logln(test);
3270                 }
3271                 String t = trans.transform(s);
3272                 if (!s.equals(t)) {
3273                     if (!isAtomic(s, t, trans)) {
3274                         isAtomic(s, t, trans);
3275                         continue;
3276                     }
3277 
3278                     // only keep the part that changed; so skip the front and end.
3279                     //                    int start = findSharedStartLength(s,t);
3280                     //                    int end = findSharedEndLength(s,t);
3281                     //                    if (start != 0 || end != 0) {
3282                     //                        s = s.substring(start, s.length() - end);
3283                     //                        t = t.substring(start, t.length() - end);
3284                     //                    }
3285                     if (DEBUG) {
3286                         if (!actualSource.containsAll(s)) {
3287                             count++;
3288                         }
3289                         if (!actualTarget.containsAll(t)) {
3290                             count++;
3291                         }
3292                     }
3293                     addSourceTarget(s, empiricalSource, t, empiricalTarget);
3294                 }
3295             }
3296             assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
3297             assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
3298         }
3299     }
3300 
3301     @Test
TestSourceTargetSetFilter()3302     public void TestSourceTargetSetFilter() {
3303         String[][] tests = {
3304                 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
3305                 {"[] Latin-Greek", null, "[\']"},
3306                 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
3307                 {"[] Any-Latin"},
3308                 {"[] casefold"},
3309                 {"[] NFKD;"},
3310                 {"[] NFKC;"},
3311                 {"[] hex"},
3312                 {"[] lower"},
3313                 {"[] null"},
3314                 {"[] remove"},
3315                 {"[] title"},
3316                 {"[] upper"},
3317         };
3318         UnicodeSet expectedSource = UnicodeSet.EMPTY;
3319         for (String[] testPair : tests) {
3320             String test = testPair[0];
3321             Transliterator t0;
3322             try {
3323                 t0 = Transliterator.getInstance(test);
3324             } catch (Exception e) {
3325                 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3326             }
3327             Transliterator t1;
3328             try {
3329                 t1 = t0.getInverse();
3330             } catch (Exception e) {
3331                 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3332             }
3333             int targetIndex = 0;
3334             for (Transliterator t : new Transliterator[]{t0, t1}) {
3335                 boolean ok;
3336                 UnicodeSet source = t.getSourceSet();
3337                 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3338                 targetIndex++;
3339                 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3340                         : testPair[targetIndex] == null ? expectedSource
3341                                 : testPair[targetIndex].length() == 0 ? expectedSource
3342                                         : new UnicodeSet(testPair[targetIndex]);
3343                 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3344                 if (!ok) { // for debugging
3345                     source = t.getSourceSet();
3346                 }
3347                 UnicodeSet target = t.getTargetSet();
3348                 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3349                 if (!ok) { // for debugging
3350                     target = t.getTargetSet();
3351                 }
3352             }
3353         }
3354     }
3355 
isAtomic(String s, String t, Transliterator trans)3356     private boolean isAtomic(String s, String t, Transliterator trans) {
3357         for (int i = 1; i < s.length(); ++i) {
3358             if (!CharSequences.onCharacterBoundary(s, i)) {
3359                 continue;
3360             }
3361             String q = trans.transform(s.substring(0,i));
3362             if (t.startsWith(q)) {
3363                 String r = trans.transform(s.substring(i));
3364                 if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3365                     return false;
3366                 }
3367             }
3368         }
3369         return true;
3370         //        // make sure that every part is different
3371         //        if (s.codePointCount(0, s.length()) > 1) {
3372         //            int[] codePoints = It.codePoints(s);
3373         //            for (int k = 0; k < codePoints.length; ++k) {
3374         //                int pos = indexOf(t,codePoints[k]);
3375         //                if (pos >= 0) {
3376         //                    int x;
3377         //                }
3378         //            }
3379         //            if (s.contains("\u00C0")) {
3380         //                logln("\u00C0");
3381         //            }
3382         //        }
3383     }
3384 
addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget)3385     private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3386         expectedSource.addAll(s);
3387         if (t.length() > 0) {
3388             expectedTarget.addAll(t);
3389         }
3390     }
3391 
3392 //    private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3393 //        disorderedMarks.add(s);
3394 //        for (int j = 1; j < s.length(); ++j) {
3395 //            if (CharSequences.onCharacterBoundary(s, j)) {
3396 //                String shorter = s.substring(0,j);
3397 //                disorderedMarks.add(shorter);
3398 //                disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3399 //            }
3400 //        }
3401 //    }
3402 
3403     @Test
TestCharUtils()3404     public void TestCharUtils() {
3405         String[][] startTests = {
3406                 {"1", "a", "ab"},
3407                 {"0", "a", "xb"},
3408                 {"0", "\uD800", "\uD800\uDC01"},
3409                 {"1", "\uD800a", "\uD800b"},
3410                 {"0", "\uD800\uDC00", "\uD800\uDC01"},
3411         };
3412         for (String[] row : startTests) {
3413             int actual = findSharedStartLength(row[1], row[2]);
3414             assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3415                     Integer.parseInt(row[0]),
3416                     actual);
3417         }
3418         String[][] endTests = {
3419                 {"0", "\uDC00", "\uD801\uDC00"},
3420                 {"1", "a", "ba"},
3421                 {"0", "a", "bx"},
3422                 {"1", "a\uDC00", "b\uDC00"},
3423                 {"0", "\uD800\uDC00", "\uD801\uDC00"},
3424         };
3425         for (String[] row : endTests) {
3426             int actual = findSharedEndLength(row[1], row[2]);
3427             assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3428                     Integer.parseInt(row[0]),
3429                     actual);
3430         }
3431     }
3432 
3433     /**
3434      * @param s
3435      * @param t
3436      * @return
3437      */
3438     // TODO make generally available
findSharedStartLength(CharSequence s, CharSequence t)3439     private static int findSharedStartLength(CharSequence s, CharSequence t) {
3440         int min = Math.min(s.length(), t.length());
3441         int i;
3442         char sch, tch;
3443         for (i = 0; i < min; ++i) {
3444             sch = s.charAt(i);
3445             tch = t.charAt(i);
3446             if (sch != tch) {
3447                 break;
3448             }
3449         }
3450         return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3451     }
3452 
3453     /**
3454      * @param s
3455      * @param t
3456      * @return
3457      */
3458     // TODO make generally available
findSharedEndLength(CharSequence s, CharSequence t)3459     private static int findSharedEndLength(CharSequence s, CharSequence t) {
3460         int slength = s.length();
3461         int tlength = t.length();
3462         int min = Math.min(slength, tlength);
3463         int i;
3464         char sch, tch;
3465         // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3466         for (i = 0; i < min; ++i) {
3467             sch = s.charAt(slength - i - 1);
3468             tch = t.charAt(tlength - i - 1);
3469             if (sch != tch) {
3470                 break;
3471             }
3472         }
3473         return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3474     }
3475 
3476     enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3477 
assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert)3478     void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3479         boolean haveError = false;
3480         if (!actual.containsAll(empirical)) {
3481             UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3482             errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3483             haveError = true;
3484         }
3485         if (!empirical.containsAll(actual)) {
3486             UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3487             logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3488             haveError = true;
3489         }
3490         if (!haveError) {
3491             logln("OK " + message + ' ' + toPattern(empirical));
3492         }
3493     }
3494 
toPattern(UnicodeSet missing)3495     private String toPattern(UnicodeSet missing) {
3496         String result = missing.toPattern(false);
3497         if (result.length() < 200) {
3498             return result;
3499         }
3500         return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3501     }
3502 
3503 
3504     /**
3505      * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3506      */
3507     @Test
TestPatternWhitespace()3508     public void TestPatternWhitespace() {
3509         // Rules
3510         String r = "a > \u200E b;";
3511 
3512         Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3513 
3514         expect(t, "a", "b");
3515 
3516         // UnicodeSet
3517         UnicodeSet set = new UnicodeSet("[a \u200E]");
3518 
3519         if (set.contains(0x200E)) {
3520             errln("FAIL: U+200E not being ignored by UnicodeSet");
3521         }
3522     }
3523 
3524     @Test
TestAlternateSyntax()3525     public void TestAlternateSyntax() {
3526         // U+2206 == &
3527         // U+2190 == <
3528         // U+2192 == >
3529         // U+2194 == <>
3530         expect("a \u2192 x; b \u2190 y; c \u2194 z",
3531                 "abc",
3532         "xbz");
3533         expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3534                 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3535         "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3536     }
3537 
3538     @Test
TestPositionAPI()3539     public void TestPositionAPI() {
3540         Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3541         Transliterator.Position b = new Transliterator.Position(a);
3542         Transliterator.Position c = new Transliterator.Position();
3543         c.set(a);
3544         // Call the toString() API:
3545         if (a.equals(b) && a.equals(c)) {
3546             logln("Ok: " + a + " == " + b + " == " + c);
3547         } else {
3548             errln("FAIL: " + a + " != " + b + " != " + c);
3549         }
3550     }
3551 
3552     //======================================================================
3553     // New tests for the ::BEGIN/::END syntax
3554     //======================================================================
3555 
3556     private static final String[] BEGIN_END_RULES = new String[] {
3557         // [0]
3558         "abc > xy;"
3559         + "aba > z;",
3560 
3561         // [1]
3562         /*
3563         "::BEGIN;"
3564         + "abc > xy;"
3565         + "::END;"
3566         + "::BEGIN;"
3567         + "aba > z;"
3568         + "::END;",
3569          */
3570         "", // test case commented out below, this is here to keep from messing up the indexes
3571 
3572         // [2]
3573         /*
3574         "abc > xy;"
3575         + "::BEGIN;"
3576         + "aba > z;"
3577         + "::END;",
3578          */
3579         "", // test case commented out below, this is here to keep from messing up the indexes
3580 
3581         // [3]
3582         /*
3583         "::BEGIN;"
3584         + "abc > xy;"
3585         + "::END;"
3586         + "aba > z;",
3587          */
3588         "", // test case commented out below, this is here to keep from messing up the indexes
3589 
3590         // [4]
3591         "abc > xy;"
3592         + "::Null;"
3593         + "aba > z;",
3594 
3595         // [5]
3596         "::Upper;"
3597         + "ABC > xy;"
3598         + "AB > x;"
3599         + "C > z;"
3600         + "::Upper;"
3601         + "XYZ > p;"
3602         + "XY > q;"
3603         + "Z > r;"
3604         + "::Upper;",
3605 
3606         // [6]
3607         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3608         + "$delim = [\\-$ws];"
3609         + "$ws $delim* > ' ';"
3610         + "'-' $delim* > '-';",
3611 
3612         // [7]
3613         "::Null;"
3614         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3615         + "$delim = [\\-$ws];"
3616         + "$ws $delim* > ' ';"
3617         + "'-' $delim* > '-';",
3618 
3619         // [8]
3620         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3621         + "$delim = [\\-$ws];"
3622         + "$ws $delim* > ' ';"
3623         + "'-' $delim* > '-';"
3624         + "::Null;",
3625 
3626         // [9]
3627         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3628         + "$delim = [\\-$ws];"
3629         + "::Null;"
3630         + "$ws $delim* > ' ';"
3631         + "'-' $delim* > '-';",
3632 
3633         // [10]
3634         /*
3635         "::BEGIN;"
3636         + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3637         + "$delim = [\\-$ws];"
3638         + "::END;"
3639         + "$ws $delim* > ' ';"
3640         + "'-' $delim* > '-';",
3641          */
3642         "", // test case commented out below, this is here to keep from messing up the indexes
3643 
3644         // [11]
3645         /*
3646         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3647         + "$delim = [\\-$ws];"
3648         + "::BEGIN;"
3649         + "$ws $delim* > ' ';"
3650         + "'-' $delim* > '-';"
3651         + "::END;",
3652          */
3653         "", // test case commented out below, this is here to keep from messing up the indexes
3654 
3655         // [12]
3656         /*
3657         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3658         + "$delim = [\\-$ws];"
3659         + "$ab = [ab];"
3660         + "::BEGIN;"
3661         + "$ws $delim* > ' ';"
3662         + "'-' $delim* > '-';"
3663         + "::END;"
3664         + "::BEGIN;"
3665         + "$ab { ' ' } $ab > '-';"
3666         + "c { ' ' > ;"
3667         + "::END;"
3668         + "::BEGIN;"
3669         + "'a-a' > a\\%|a;"
3670         + "::END;",
3671          */
3672         "", // test case commented out below, this is here to keep from messing up the indexes
3673 
3674         // [13]
3675         "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3676         + "$delim = [\\-$ws];"
3677         + "$ab = [ab];"
3678         + "::Null;"
3679         + "$ws $delim* > ' ';"
3680         + "'-' $delim* > '-';"
3681         + "::Null;"
3682         + "$ab { ' ' } $ab > '-';"
3683         + "c { ' ' > ;"
3684         + "::Null;"
3685         + "'a-a' > a\\%|a;",
3686 
3687         // [14]
3688         /*
3689         "::[abc];"
3690         + "::BEGIN;"
3691         + "abc > xy;"
3692         + "::END;"
3693         + "::BEGIN;"
3694         + "aba > yz;"
3695         + "::END;"
3696         + "::Upper;",
3697          */
3698         "", // test case commented out below, this is here to keep from messing up the indexes
3699 
3700         // [15]
3701         "::[abc];"
3702         + "abc > xy;"
3703         + "::Null;"
3704         + "aba > yz;"
3705         + "::Upper;",
3706 
3707         // [16]
3708         /*
3709         "::[abc];"
3710         + "::BEGIN;"
3711         + "abc <> xy;"
3712         + "::END;"
3713         + "::BEGIN;"
3714         + "aba <> yz;"
3715         + "::END;"
3716         + "::Upper(Lower);"
3717         + "::([XYZ]);",
3718          */
3719         "", // test case commented out below, this is here to keep from messing up the indexes
3720 
3721         // [17]
3722         "::[abc];"
3723         + "abc <> xy;"
3724         + "::Null;"
3725         + "aba <> yz;"
3726         + "::Upper(Lower);"
3727         + "::([XYZ]);"
3728     };
3729 
3730     /*
3731 (This entire test is commented out below and will need some heavy revision when we re-add
3732 the ::BEGIN/::END stuff)
3733     private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3734         // [7]
3735         "::BEGIN;"
3736         + "abc > xy;"
3737         + "::BEGIN;"
3738         + "aba > z;"
3739         + "::END;"
3740         + "::END;",
3741 
3742         // [8]
3743         "abc > xy;"
3744         + " aba > z;"
3745         + "::END;",
3746 
3747         // [9]
3748         "::BEGIN;"
3749         + "::Upper;"
3750         + "::END;"
3751     };
3752      */
3753 
3754     private static final String[] BEGIN_END_TEST_CASES = new String[] {
3755         BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3756         //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3757         //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3758         //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3759         BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3760         BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3761 
3762         BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",
3763         BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",
3764         BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",
3765         BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",
3766         //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",
3767         //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",
3768         //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",
3769         //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",
3770         //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3771         BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",
3772         BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",
3773         BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3774 
3775         //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3776         BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3777         //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3778         BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3779     };
3780 
3781     @Test
TestBeginEnd()3782     public void TestBeginEnd() {
3783         // run through the list of test cases above
3784         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3785             expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3786         }
3787 
3788         // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3789         Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3790                 Transliterator.REVERSE);
3791         expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3792 
3793         // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3794         // that all of them cause errors
3795         /*
3796 (commented out until we have the real ::BEGIN/::END stuff in place
3797         for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3798             try {
3799                 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3800                         Transliterator.FORWARD);
3801                 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3802             }
3803             catch (IllegalArgumentException e) {
3804                 // this is supposed to happen; do nothing here
3805             }
3806         }
3807          */
3808     }
3809 
3810     @Test
TestBeginEndToRules()3811     public void TestBeginEndToRules() {
3812         // run through the same list of test cases we used above, but this time, instead of just
3813         // instantiating a Transliterator from the rules and running the test against it, we instantiate
3814         // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3815         // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3816         // to (i.e., does the same thing as) the original rule set
3817         for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3818             Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3819                     Transliterator.FORWARD);
3820             String rules = t.toRules(false);
3821             Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3822             expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3823         }
3824 
3825         // do the same thing for the reversible test case
3826         Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3827                 Transliterator.REVERSE);
3828         String rules = reversed.toRules(false);
3829         Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3830         expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3831     }
3832 
3833     @Test
TestRegisterAlias()3834     public void TestRegisterAlias() {
3835         String longID = "Lower;[aeiou]Upper";
3836         String shortID = "Any-CapVowels";
3837         String reallyShortID = "CapVowels";
3838 
3839         Transliterator.registerAlias(shortID, longID);
3840 
3841         Transliterator t1 = Transliterator.getInstance(longID);
3842         Transliterator t2 = Transliterator.getInstance(reallyShortID);
3843 
3844         if (!t1.getID().equals(longID))
3845             errln("Transliterator instantiated with long ID doesn't have long ID");
3846         if (!t2.getID().equals(reallyShortID))
3847             errln("Transliterator instantiated with short ID doesn't have short ID");
3848 
3849         if (!t1.toRules(true).equals(t2.toRules(true)))
3850             errln("Alias transliterators aren't the same");
3851 
3852         Transliterator.unregister(shortID);
3853 
3854         try {
3855             t1 = Transliterator.getInstance(shortID);
3856             errln("Instantiation with short ID succeeded after short ID was unregistered");
3857         }
3858         catch (IllegalArgumentException e) {
3859         }
3860 
3861         // try the same thing again, but this time with something other than
3862         // an instance of CompoundTransliterator
3863         String realID = "Latin-Greek";
3864         String fakeID = "Latin-dlgkjdflkjdl";
3865         Transliterator.registerAlias(fakeID, realID);
3866 
3867         t1 = Transliterator.getInstance(realID);
3868         t2 = Transliterator.getInstance(fakeID);
3869 
3870         if (!t1.toRules(true).equals(t2.toRules(true)))
3871             errln("Alias transliterators aren't the same");
3872 
3873         Transliterator.unregister(fakeID);
3874     }
3875 
3876     /**
3877      * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3878      */
3879     @Test
TestHalfwidthFullwidth()3880     public void TestHalfwidthFullwidth() {
3881         Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3882         Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3883 
3884         // Array of 3n items
3885         // Each item is
3886         //   "hf"|"fh"|"both",
3887         //   <Halfwidth>,
3888         //   <Fullwidth>
3889         String[] DATA = {
3890                 "both",
3891                 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3892                 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3893         };
3894 
3895         for (int i=0; i<DATA.length; i+=3) {
3896             switch (DATA[i].charAt(0)) {
3897             case 'h': // Halfwidth-Fullwidth only
3898                 expect(hf, DATA[i+1], DATA[i+2]);
3899                 break;
3900             case 'f': // Fullwidth-Halfwidth only
3901                 expect(fh, DATA[i+2], DATA[i+1]);
3902                 break;
3903             case 'b': // both directions
3904                 expect(hf, DATA[i+1], DATA[i+2]);
3905                 expect(fh, DATA[i+2], DATA[i+1]);
3906                 break;
3907             }
3908         }
3909 
3910     }
3911 
3912     /**
3913      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3914      *              TODO: confirm that the expected results are correct.
3915      *              For now, test just confirms that C++ and Java give identical results.
3916      */
3917     @Test
TestThai()3918     public void TestThai() {
3919         Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3920         String thaiText =
3921             "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3922             "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3923             "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3924             "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3925             "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3926             "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3927             "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3928             "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3929             "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3930             "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3931             "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3932             "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3933             "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3934             "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3935             "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3936             "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3937             "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3938             "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3939             "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3940             "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3941             "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3942             "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3943             "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3944             "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3945             " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3946             "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3947             "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3948             " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3949             "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3950             "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3951 
3952         String latinText =
3953             "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3954             "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3955             "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3956             "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3957             "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3958             " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3959             "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3960             "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3961             "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3962             "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3963             "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3964             "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3965             " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3966             "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3967             " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3968             "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3969             "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3970             "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3971 
3972         expect(tr, thaiText, latinText);
3973     }
3974 
3975 
3976     //======================================================================
3977     // These tests are not mirrored (yet) in icu4c at
3978     // source/test/intltest/transtst.cpp
3979     //======================================================================
3980 
3981     /**
3982      * Improve code coverage.
3983      */
3984     @Test
TestCoverage()3985     public void TestCoverage() {
3986         // NullTransliterator
3987         Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3988         expect(t, "a", "a");
3989 
3990         // Source, target set
3991         t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3992         t.setFilter(new UnicodeSet("[A-Z]"));
3993         logln("source = " + t.getSourceSet());
3994         logln("target = " + t.getTargetSet());
3995 
3996         t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3997         logln("source = " + t.getSourceSet());
3998         logln("target = " + t.getTargetSet());
3999     }
4000     /*
4001      * Test case for threading problem in NormalizationTransliterator
4002      * reported by ticket#5160
4003      */
4004     @Test
TestT5160()4005     public void TestT5160() {
4006         final String[] testData = {
4007                 "a",
4008                 "b",
4009                 "\u09BE",
4010                 "A\u0301",
4011         };
4012         final String[] expected = {
4013                 "a",
4014                 "b",
4015                 "\u09BE",
4016                 "\u00C1",
4017         };
4018         Transliterator translit = Transliterator.getInstance("NFC");
4019         NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
4020         for (int i = 0; i < tasks.length; i++) {
4021             tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
4022         }
4023         TestUtil.runUntilDone(tasks);
4024 
4025         for (int i = 0; i < tasks.length; i++) {
4026             if (tasks[i].getErrorMessage() != null) {
4027                 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
4028                 break;
4029             }
4030         }
4031     }
4032 
4033     static class NormTranslitTask implements Runnable {
4034         Transliterator translit;
4035         String testData;
4036         String expectedData;
4037         String errorMsg;
4038 
NormTranslitTask(Transliterator translit, String testData, String expectedData)4039         NormTranslitTask(Transliterator translit, String testData, String expectedData) {
4040             this.translit = translit;
4041             this.testData = testData;
4042             this.expectedData = expectedData;
4043         }
4044 
run()4045         public void run() {
4046             errorMsg = null;
4047             StringBuffer inBuf = new StringBuffer(testData);
4048             StringBuffer expectedBuf = new StringBuffer(expectedData);
4049 
4050             for(int i = 0; i < 1000; i++) {
4051                 String in = inBuf.toString();
4052                 String out = translit.transliterate(in);
4053                 String expected = expectedBuf.toString();
4054                 if (!out.equals(expected)) {
4055                     errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
4056                     break;
4057                 }
4058                 inBuf.append(testData);
4059                 expectedBuf.append(expectedData);
4060             }
4061         }
4062 
getErrorMessage()4063         public String getErrorMessage() {
4064             return errorMsg;
4065         }
4066     }
4067 
4068     //======================================================================
4069     // Support methods
4070     //======================================================================
expect(String rules, String source, String expectedResult, Transliterator.Position pos)4071     static void expect(String rules,
4072             String source,
4073             String expectedResult,
4074             Transliterator.Position pos) {
4075         Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
4076         expect(t, source, expectedResult, pos);
4077     }
4078 
expect(String rules, String source, String expectedResult)4079     static void expect(String rules, String source, String expectedResult) {
4080         expect(rules, source, expectedResult, null);
4081     }
4082 
expect(Transliterator t, String source, String expectedResult, Transliterator reverseTransliterator)4083     static void expect(Transliterator t, String source, String expectedResult,
4084             Transliterator reverseTransliterator) {
4085         expect(t, source, expectedResult);
4086         if (reverseTransliterator != null) {
4087             expect(reverseTransliterator, expectedResult, source);
4088         }
4089     }
4090 
expect(Transliterator t, String source, String expectedResult)4091     static void expect(Transliterator t, String source, String expectedResult) {
4092         expect(t, source, expectedResult, (Transliterator.Position) null);
4093     }
4094 
expect(Transliterator t, String source, String expectedResult, Transliterator.Position pos)4095     static void expect(Transliterator t, String source, String expectedResult,
4096             Transliterator.Position pos) {
4097         if (pos == null) {
4098             String result = t.transliterate(source);
4099             if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
4100         }
4101 
4102         Transliterator.Position index = null;
4103         if (pos == null) {
4104             index = new Transliterator.Position(0, source.length(), 0, source.length());
4105         } else {
4106             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4107                     pos.start, pos.limit);
4108         }
4109 
4110         ReplaceableString rsource = new ReplaceableString(source);
4111 
4112         t.finishTransliteration(rsource, index);
4113         // Do it all at once -- below we do it incrementally
4114 
4115         if (index.start != index.limit) {
4116             expectAux(t.getID() + ":UNFINISHED", source,
4117                     "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
4118             return;
4119         }
4120         String result = rsource.toString();
4121         if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
4122 
4123 
4124         if (pos == null) {
4125             index = new Transliterator.Position();
4126         } else {
4127             index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4128                     pos.start, pos.limit);
4129         }
4130 
4131         // Test incremental transliteration -- this result
4132         // must be the same after we finalize (see below).
4133         List<String> v = new ArrayList<String>();
4134         v.add(source);
4135         rsource.replace(0, rsource.length(), "");
4136         if (pos != null) {
4137             rsource.replace(0, 0, source);
4138             v.add(UtilityExtensions.formatInput(rsource, index));
4139             t.transliterate(rsource, index);
4140             v.add(UtilityExtensions.formatInput(rsource, index));
4141         } else {
4142             for (int i=0; i<source.length(); ++i) {
4143                 //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
4144                 //log.append(source.charAt(i)).append(" -> "));
4145                 t.transliterate(rsource, index, source.charAt(i));
4146                 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
4147                 v.add(UtilityExtensions.formatInput(rsource, index) +
4148                         ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
4149             }
4150         }
4151 
4152         // As a final step in keyboard transliteration, we must call
4153         // transliterate to finish off any pending partial matches that
4154         // were waiting for more input.
4155         t.finishTransliteration(rsource, index);
4156         result = rsource.toString();
4157         //log.append(" => ").append(rsource.toString());
4158         v.add(result);
4159 
4160         String[] results = new String[v.size()];
4161         v.toArray(results);
4162         expectAux(t.getID() + ":Incremental", results,
4163                 result.equals(expectedResult),
4164                 expectedResult);
4165     }
4166 
4167     static boolean expectAux(String tag, String source,
4168             String result, String expectedResult) {
4169         return expectAux(tag, new String[] {source, result},
4170                 result.equals(expectedResult),
4171                 expectedResult);
4172     }
4173 
4174     static boolean expectAux(String tag, String source,
4175             String result, boolean pass,
4176             String expectedResult) {
4177         return expectAux(tag, new String[] {source, result},
4178                 pass,
4179                 expectedResult);
4180     }
4181 
4182     static boolean expectAux(String tag, String source,
4183             boolean pass,
4184             String expectedResult) {
4185         return expectAux(tag, new String[] {source},
4186                 pass,
4187                 expectedResult);
4188     }
4189 
4190     static boolean expectAux(String tag, String[] results, boolean pass,
4191             String expectedResult) {
4192         msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
4193 
4194         for (int i = 0; i < results.length; ++i) {
4195             String label;
4196             if (i == 0) {
4197                 label = "source:   ";
4198             } else if (i == results.length - 1) {
4199                 label = "result:   ";
4200             } else {
4201                 if (!isVerbose() && pass) continue;
4202                 label = "interm" + i + ":  ";
4203             }
4204             msg("    " + label + results[i], pass ? LOG : ERR, false, true);
4205         }
4206 
4207         if (!pass) {
4208             msg(  "    expected: " + expectedResult, ERR, false, true);
4209         }
4210 
4211         return pass;
4212     }
4213 
4214     static private void assertTransform(String message, String expected, StringTransform t, String source) {
4215         assertEquals(message + " " + source, expected, t.transform(source));
4216     }
4217 
4218 
4219     static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
4220         assertEquals(message + " " +source, expected, t.transform(source));
4221         assertEquals(message + " " +source2, expected, t.transform(source2));
4222         assertEquals(message + " " + expected, source, back.transform(expected));
4223     }
4224 
4225     /*
4226      * Tests the method public Enumeration<String> getAvailableTargets(String source)
4227      */
4228     @Test
4229     public void TestGetAvailableTargets() {
4230         try {
4231             // Tests when if (targets == null) is true
4232             Transliterator.getAvailableTargets("");
4233         } catch (Exception e) {
4234             errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
4235         }
4236     }
4237 
4238     /*
4239      * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
4240      */
4241     @Test
4242     public void TestGetAvailableVariants() {
4243         try {
4244             // Tests when if (targets == null) is true
4245             Transliterator.getAvailableVariants("", "");
4246         } catch (Exception e) {
4247             errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
4248         }
4249     }
4250 
4251     /*
4252      * Tests the mehtod String nextLine() in RuleBody
4253      */
4254     @Test
4255     public void TestNextLine() {
4256         // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
4257         try{
4258             Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
4259         } catch(Exception e){
4260             errln("TransliteratorParser.nextLine() was not suppose to return an " +
4261             "exception for a rule of '\\'");
4262         }
4263     }
4264 }
4265