• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 1999-2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/10/99    aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_TRANSLITERATION
14 
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "rbt_pars.h"
30 #include "anytrans.h"
31 #include "esctrn.h"
32 #include "name2uni.h"
33 #include "nortrans.h"
34 #include "remtrans.h"
35 #include "titletrn.h"
36 #include "tolowtrn.h"
37 #include "toupptrn.h"
38 #include "unesctrn.h"
39 #include "uni2name.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include <stdio.h>
43 
44 /***********************************************************************
45 
46                      HOW TO USE THIS TEST FILE
47                                -or-
48                   How I developed on two platforms
49                 without losing (too much of) my mind
50 
51 
52 1. Add new tests by copying/pasting/changing existing tests.  On Java,
53    any public void method named Test...() taking no parameters becomes
54    a test.  On C++, you need to modify the header and add a line to
55    the runIndexedTest() dispatch method.
56 
57 2. Make liberal use of the expect() method; it is your friend.
58 
59 3. The tests in this file exactly match those in a sister file on the
60    other side.  The two files are:
61 
62    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
63    icu4c:  source/test/intltest/transtst.cpp
64 
65                   ==> THIS IS THE IMPORTANT PART <==
66 
67    When you add a test in this file, add it in TransliteratorTest.java
68    too.  Give it the same name and put it in the same relative place.
69    This makes maintenance a lot simpler for any poor soul who ends up
70    trying to synchronize the tests between icu4j and icu4c.
71 
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73    then add it in the special non-mirrored section.  These are
74    labeled
75 
76      "icu4j ONLY"
77 
78    or
79 
80      "icu4c ONLY"
81 
82    Make sure you document the reason the test is here and not there.
83 
84 
85 Thank you.
86 The Management
87 ***********************************************************************/
88 
89 // Define character constants thusly to be EBCDIC-friendly
90 enum {
91     LEFT_BRACE=((UChar)0x007B), /*{*/
92     PIPE      =((UChar)0x007C), /*|*/
93     ZERO      =((UChar)0x0030), /*0*/
94     UPPER_A   =((UChar)0x0041)  /*A*/
95 };
96 
TransliteratorTest()97 TransliteratorTest::TransliteratorTest()
98 :   DESERET_DEE((UChar32)0x10414),
99     DESERET_dee((UChar32)0x1043C)
100 {
101 }
102 
~TransliteratorTest()103 TransliteratorTest::~TransliteratorTest() {}
104 
105 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
107                                    const char* &name, char* /*par*/) {
108     switch (index) {
109         TESTCASE(0,TestInstantiation);
110         TESTCASE(1,TestSimpleRules);
111         TESTCASE(2,TestRuleBasedInverse);
112         TESTCASE(3,TestKeyboard);
113         TESTCASE(4,TestKeyboard2);
114         TESTCASE(5,TestKeyboard3);
115         TESTCASE(6,TestArabic);
116         TESTCASE(7,TestCompoundKana);
117         TESTCASE(8,TestCompoundHex);
118         TESTCASE(9,TestFiltering);
119         TESTCASE(10,TestInlineSet);
120         TESTCASE(11,TestPatternQuoting);
121         TESTCASE(12,TestJ277);
122         TESTCASE(13,TestJ243);
123         TESTCASE(14,TestJ329);
124         TESTCASE(15,TestSegments);
125         TESTCASE(16,TestCursorOffset);
126         TESTCASE(17,TestArbitraryVariableValues);
127         TESTCASE(18,TestPositionHandling);
128         TESTCASE(19,TestHiraganaKatakana);
129         TESTCASE(20,TestCopyJ476);
130         TESTCASE(21,TestAnchors);
131         TESTCASE(22,TestInterIndic);
132         TESTCASE(23,TestFilterIDs);
133         TESTCASE(24,TestCaseMap);
134         TESTCASE(25,TestNameMap);
135         TESTCASE(26,TestLiberalizedID);
136         TESTCASE(27,TestCreateInstance);
137         TESTCASE(28,TestNormalizationTransliterator);
138         TESTCASE(29,TestCompoundRBT);
139         TESTCASE(30,TestCompoundFilter);
140         TESTCASE(31,TestRemove);
141         TESTCASE(32,TestToRules);
142         TESTCASE(33,TestContext);
143         TESTCASE(34,TestSupplemental);
144         TESTCASE(35,TestQuantifier);
145         TESTCASE(36,TestSTV);
146         TESTCASE(37,TestCompoundInverse);
147         TESTCASE(38,TestNFDChainRBT);
148         TESTCASE(39,TestNullInverse);
149         TESTCASE(40,TestAliasInverseID);
150         TESTCASE(41,TestCompoundInverseID);
151         TESTCASE(42,TestUndefinedVariable);
152         TESTCASE(43,TestEmptyContext);
153         TESTCASE(44,TestCompoundFilterID);
154         TESTCASE(45,TestPropertySet);
155         TESTCASE(46,TestNewEngine);
156         TESTCASE(47,TestQuantifiedSegment);
157         TESTCASE(48,TestDevanagariLatinRT);
158         TESTCASE(49,TestTeluguLatinRT);
159         TESTCASE(50,TestCompoundLatinRT);
160         TESTCASE(51,TestSanskritLatinRT);
161         TESTCASE(52,TestLocaleInstantiation);
162         TESTCASE(53,TestTitleAccents);
163         TESTCASE(54,TestLocaleResource);
164         TESTCASE(55,TestParseError);
165         TESTCASE(56,TestOutputSet);
166         TESTCASE(57,TestVariableRange);
167         TESTCASE(58,TestInvalidPostContext);
168         TESTCASE(59,TestIDForms);
169         TESTCASE(60,TestToRulesMark);
170         TESTCASE(61,TestEscape);
171         TESTCASE(62,TestAnchorMasking);
172         TESTCASE(63,TestDisplayName);
173         TESTCASE(64,TestSpecialCases);
174         TESTCASE(65,TestIncrementalProgress);
175         TESTCASE(66,TestSurrogateCasing);
176         TESTCASE(67,TestFunction);
177         TESTCASE(68,TestInvalidBackRef);
178         TESTCASE(69,TestMulticharStringSet);
179         TESTCASE(70,TestUserFunction);
180         TESTCASE(71,TestAnyX);
181         TESTCASE(72,TestSourceTargetSet);
182         TESTCASE(73,TestGurmukhiDevanagari);
183         TESTCASE(74,TestRuleWhitespace);
184         TESTCASE(75,TestAllCodepoints);
185         TESTCASE(76,TestBoilerplate);
186         TESTCASE(77,TestAlternateSyntax);
187         TESTCASE(78,TestBeginEnd);
188         TESTCASE(79,TestBeginEndToRules);
189         TESTCASE(80,TestRegisterAlias);
190         TESTCASE(81,TestRuleStripping);
191         default: name = ""; break;
192     }
193 }
194 
195 static const UVersionInfo ICU_39 = {3,9,0,0};
196 /**
197  * Make sure every system transliterator can be instantiated.
198  *
199  * ALSO test that the result of toRules() for each rule is a valid
200  * rule.  Do this here so we don't have to have another test that
201  * instantiates everything as well.
202  */
TestInstantiation()203 void TransliteratorTest::TestInstantiation() {
204     UErrorCode ec = U_ZERO_ERROR;
205     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
206     assertSuccess("getAvailableIDs()", ec);
207     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
208     int32_t n = Transliterator::countAvailableIDs();
209     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
210                avail->count(ec) == n);
211     assertSuccess("count()", ec);
212     UnicodeString name;
213     for (int32_t i=0; i<n; ++i) {
214         const UnicodeString& id = *avail->snext(ec);
215         if (!assertSuccess("snext()", ec) ||
216             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
217             break;
218         }
219         UnicodeString id2 = Transliterator::getAvailableID(i);
220         if (id.length() < 1) {
221             errln(UnicodeString("FAIL: getAvailableID(") +
222                   i + ") returned empty string");
223             continue;
224         }
225         if (id != id2) {
226             errln(UnicodeString("FAIL: getAvailableID(") +
227                   i + ") != getAvailableIDs().snext()");
228             continue;
229         }
230         if(id2.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_39)){
231             /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
232             continue;
233         }
234         UParseError parseError;
235         UErrorCode status = U_ZERO_ERROR;
236         Transliterator* t = Transliterator::createInstance(id,
237                               UTRANS_FORWARD, parseError,status);
238         name.truncate(0);
239         Transliterator::getDisplayName(id, name);
240         if (t == 0) {
241             errln(UnicodeString("FAIL: Couldn't create ") + id +
242                   /*", parse error " + parseError.code +*/
243                   ", line " + parseError.line +
244                   ", offset " + parseError.offset +
245                   ", pre-context " + prettify(parseError.preContext, TRUE) +
246                   ", post-context " +prettify(parseError.postContext,TRUE) +
247                   ", Error: " + u_errorName(status));
248             // When createInstance fails, it deletes the failing
249             // entry from the available ID list.  We detect this
250             // here by looking for a change in countAvailableIDs.
251             int32_t nn = Transliterator::countAvailableIDs();
252             if (nn == (n - 1)) {
253                 n = nn;
254                 --i; // Compensate for deleted entry
255             }
256         } else {
257             logln(UnicodeString("OK: ") + name + " (" + id + ")");
258 
259             // Now test toRules
260             UnicodeString rules;
261             t->toRules(rules, TRUE);
262             Transliterator *u = Transliterator::createFromRules("x",
263                                     rules, UTRANS_FORWARD, parseError,status);
264             if (u == 0) {
265                 errln(UnicodeString("FAIL: ") + id +
266                       ".createFromRules() => bad rules" +
267                       /*", parse error " + parseError.code +*/
268                       ", line " + parseError.line +
269                       ", offset " + parseError.offset +
270                       ", context " + prettify(parseError.preContext, TRUE) +
271                       ", rules: " + prettify(rules, TRUE));
272             } else {
273                 delete u;
274             }
275             delete t;
276         }
277     }
278     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
279     assertSuccess("snext()", ec);
280     delete avail;
281 
282     // Now test the failure path
283     UParseError parseError;
284     UErrorCode status = U_ZERO_ERROR;
285     UnicodeString id("<Not a valid Transliterator ID>");
286     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
287     if (t != 0) {
288         errln("FAIL: " + id + " returned a transliterator");
289         delete t;
290     } else {
291         logln("OK: Bogus ID handled properly");
292     }
293 }
294 
TestSimpleRules(void)295 void TransliteratorTest::TestSimpleRules(void) {
296     /* Example: rules 1. ab>x|y
297      *                2. yc>z
298      *
299      * []|eabcd  start - no match, copy e to tranlated buffer
300      * [e]|abcd  match rule 1 - copy output & adjust cursor
301      * [ex|y]cd  match rule 2 - copy output & adjust cursor
302      * [exz]|d   no match, copy d to transliterated buffer
303      * [exzd]|   done
304      */
305     expect(UnicodeString("ab>x|y;", "") +
306            "yc>z",
307            "eabcd", "exzd");
308 
309     /* Another set of rules:
310      *    1. ab>x|yzacw
311      *    2. za>q
312      *    3. qc>r
313      *    4. cw>n
314      *
315      * []|ab       Rule 1
316      * [x|yzacw]   No match
317      * [xy|zacw]   Rule 2
318      * [xyq|cw]    Rule 4
319      * [xyqn]|     Done
320      */
321     expect(UnicodeString("ab>x|yzacw;") +
322            "za>q;" +
323            "qc>r;" +
324            "cw>n",
325            "ab", "xyqn");
326 
327     /* Test categories
328      */
329     UErrorCode status = U_ZERO_ERROR;
330     UParseError parseError;
331     Transliterator *t = Transliterator::createFromRules(
332         "<ID>",
333         UnicodeString("$dummy=").append((UChar)0xE100) +
334         UnicodeString(";"
335                       "$vowel=[aeiouAEIOU];"
336                       "$lu=[:Lu:];"
337                       "$vowel } $lu > '!';"
338                       "$vowel > '&';"
339                       "'!' { $lu > '^';"
340                       "$lu > '*';"
341                       "a > ERROR", ""),
342         UTRANS_FORWARD, parseError,
343         status);
344     if (U_FAILURE(status)) {
345         errln("FAIL: RBT constructor failed");
346         return;
347     }
348     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
349     delete t;
350 }
351 
352 /**
353  * Test inline set syntax and set variable syntax.
354  */
TestInlineSet(void)355 void TransliteratorTest::TestInlineSet(void) {
356     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
357     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
358 
359     expect(UnicodeString(
360            "$digit = [0-9];"
361            "$alpha = [a-zA-Z];"
362            "$alphanumeric = [$digit $alpha];" // ***
363            "$special = [^$alphanumeric];"     // ***
364            "$alphanumeric > '-';"
365            "$special > '*';", ""),
366 
367            "thx-1138", "---*----");
368 }
369 
370 /**
371  * Create some inverses and confirm that they work.  We have to be
372  * careful how we do this, since the inverses will not be true
373  * inverses -- we can't throw any random string at the composition
374  * of the transliterators and expect the identity function.  F x
375  * F' != I.  However, if we are careful about the input, we will
376  * get the expected results.
377  */
TestRuleBasedInverse(void)378 void TransliteratorTest::TestRuleBasedInverse(void) {
379     UnicodeString RULES =
380         UnicodeString("abc>zyx;") +
381         "ab>yz;" +
382         "bc>zx;" +
383         "ca>xy;" +
384         "a>x;" +
385         "b>y;" +
386         "c>z;" +
387 
388         "abc<zyx;" +
389         "ab<yz;" +
390         "bc<zx;" +
391         "ca<xy;" +
392         "a<x;" +
393         "b<y;" +
394         "c<z;" +
395 
396         "";
397 
398     const char* DATA[] = {
399         // Careful here -- random strings will not work.  If we keep
400         // the left side to the domain and the right side to the range
401         // we will be okay though (left, abc; right xyz).
402         "a", "x",
403         "abcacab", "zyxxxyy",
404         "caccb", "xyzzy",
405     };
406 
407     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
408 
409     UErrorCode status = U_ZERO_ERROR;
410     UParseError parseError;
411     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
412                                 UTRANS_FORWARD, parseError, status);
413     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
414                                 UTRANS_REVERSE, parseError, status);
415     if (U_FAILURE(status)) {
416         errln("FAIL: RBT constructor failed");
417         return;
418     }
419     for (int32_t i=0; i<DATA_length; i+=2) {
420         expect(*fwd, DATA[i], DATA[i+1]);
421         expect(*rev, DATA[i+1], DATA[i]);
422     }
423     delete fwd;
424     delete rev;
425 }
426 
427 /**
428  * Basic test of keyboard.
429  */
TestKeyboard(void)430 void TransliteratorTest::TestKeyboard(void) {
431     UParseError parseError;
432     UErrorCode status = U_ZERO_ERROR;
433     Transliterator *t = Transliterator::createFromRules("<ID>",
434                               UnicodeString("psch>Y;")
435                               +"ps>y;"
436                               +"ch>x;"
437                               +"a>A;",
438                               UTRANS_FORWARD, parseError,
439                               status);
440     if (U_FAILURE(status)) {
441         errln("FAIL: RBT constructor failed");
442         return;
443     }
444     const char* DATA[] = {
445         // insertion, buffer
446         "a", "A",
447         "p", "Ap",
448         "s", "Aps",
449         "c", "Apsc",
450         "a", "AycA",
451         "psch", "AycAY",
452         0, "AycAY", // null means finishKeyboardTransliteration
453     };
454 
455     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
456     delete t;
457 }
458 
459 /**
460  * Basic test of keyboard with cursor.
461  */
TestKeyboard2(void)462 void TransliteratorTest::TestKeyboard2(void) {
463     UParseError parseError;
464     UErrorCode status = U_ZERO_ERROR;
465     Transliterator *t = Transliterator::createFromRules("<ID>",
466                               UnicodeString("ych>Y;")
467                               +"ps>|y;"
468                               +"ch>x;"
469                               +"a>A;",
470                               UTRANS_FORWARD, parseError,
471                               status);
472     if (U_FAILURE(status)) {
473         errln("FAIL: RBT constructor failed");
474         return;
475     }
476     const char* DATA[] = {
477         // insertion, buffer
478         "a", "A",
479         "p", "Ap",
480         "s", "Aps", // modified for rollback - "Ay",
481         "c", "Apsc", // modified for rollback - "Ayc",
482         "a", "AycA",
483         "p", "AycAp",
484         "s", "AycAps", // modified for rollback - "AycAy",
485         "c", "AycApsc", // modified for rollback - "AycAyc",
486         "h", "AycAY",
487         0, "AycAY", // null means finishKeyboardTransliteration
488     };
489 
490     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
491     delete t;
492 }
493 
494 /**
495  * Test keyboard transliteration with back-replacement.
496  */
TestKeyboard3(void)497 void TransliteratorTest::TestKeyboard3(void) {
498     // We want th>z but t>y.  Furthermore, during keyboard
499     // transliteration we want t>y then yh>z if t, then h are
500     // typed.
501     UnicodeString RULES("t>|y;"
502                         "yh>z;");
503 
504     const char* DATA[] = {
505         // Column 1: characters to add to buffer (as if typed)
506         // Column 2: expected appearance of buffer after
507         //           keyboard xliteration.
508         "a", "a",
509         "b", "ab",
510         "t", "abt", // modified for rollback - "aby",
511         "c", "abyc",
512         "t", "abyct", // modified for rollback - "abycy",
513         "h", "abycz",
514         0, "abycz", // null means finishKeyboardTransliteration
515     };
516 
517     UParseError parseError;
518     UErrorCode status = U_ZERO_ERROR;
519     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
520     if (U_FAILURE(status)) {
521         errln("FAIL: RBT constructor failed");
522         return;
523     }
524     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
525     delete t;
526 }
527 
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)528 void TransliteratorTest::keyboardAux(const Transliterator& t,
529                                      const char* DATA[], int32_t DATA_length) {
530     UErrorCode status = U_ZERO_ERROR;
531     UTransPosition index={0, 0, 0, 0};
532     UnicodeString s;
533     for (int32_t i=0; i<DATA_length; i+=2) {
534         UnicodeString log;
535         if (DATA[i] != 0) {
536             log = s + " + "
537                 + DATA[i]
538                 + " -> ";
539             t.transliterate(s, index, DATA[i], status);
540         } else {
541             log = s + " => ";
542             t.finishTransliteration(s, index);
543         }
544         // Show the start index '{' and the cursor '|'
545         UnicodeString a, b, c;
546         s.extractBetween(0, index.contextStart, a);
547         s.extractBetween(index.contextStart, index.start, b);
548         s.extractBetween(index.start, s.length(), c);
549         log.append(a).
550             append((UChar)LEFT_BRACE).
551             append(b).
552             append((UChar)PIPE).
553             append(c);
554         if (s == DATA[i+1] && U_SUCCESS(status)) {
555             logln(log);
556         } else {
557             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
558         }
559     }
560 }
561 
TestArabic(void)562 void TransliteratorTest::TestArabic(void) {
563 // Test disabled for 2.0 until new Arabic transliterator can be written.
564 //    /*
565 //    const char* DATA[] = {
566 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
567 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
568 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
569 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
570 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
571 //                  "\u062c\u0645\u064a\u0644\u0629",
572 //    };
573 //    */
574 //
575 //    UChar ar_raw[] = {
576 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
577 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
578 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
579 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
580 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
581 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
582 //    };
583 //    UnicodeString ar(ar_raw);
584 //    UErrorCode status=U_ZERO_ERROR;
585 //    UParseError parseError;
586 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
587 //    if (t == 0) {
588 //        errln("FAIL: createInstance failed");
589 //        return;
590 //    }
591 //    expect(*t, "Arabic", ar);
592 //    delete t;
593 }
594 
595 /**
596  * Compose the Kana transliterator forward and reverse and try
597  * some strings that should come out unchanged.
598  */
TestCompoundKana(void)599 void TransliteratorTest::TestCompoundKana(void) {
600     UParseError parseError;
601     UErrorCode status = U_ZERO_ERROR;
602     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
603     if (t == 0) {
604         errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
605     } else {
606         expect(*t, "aaaaa", "aaaaa");
607         delete t;
608     }
609 }
610 
611 /**
612  * Compose the hex transliterators forward and reverse.
613  */
TestCompoundHex(void)614 void TransliteratorTest::TestCompoundHex(void) {
615     UParseError parseError;
616     UErrorCode status = U_ZERO_ERROR;
617     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
618     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
619     Transliterator* transab[] = { a, b };
620     Transliterator* transba[] = { b, a };
621     if (a == 0 || b == 0) {
622         errln("FAIL: construction failed");
623         delete a;
624         delete b;
625         return;
626     }
627     // Do some basic tests of a
628     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
629     // Do some basic tests of b
630     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
631 
632     Transliterator* ab = new CompoundTransliterator(transab, 2);
633     UnicodeString s("abcde", "");
634     expect(*ab, s, s);
635 
636     UnicodeString str(s);
637     a->transliterate(str);
638     Transliterator* ba = new CompoundTransliterator(transba, 2);
639     expect(*ba, str, str);
640 
641     delete ab;
642     delete ba;
643     delete a;
644     delete b;
645 }
646 
647 int gTestFilterClassID = 0;
648 /**
649  * Used by TestFiltering().
650  */
651 class TestFilter : public UnicodeFilter {
clone() const652     virtual UnicodeFunctor* clone() const {
653         return new TestFilter(*this);
654     }
contains(UChar32 c) const655     virtual UBool contains(UChar32 c) const {
656         return c != (UChar)0x0063 /*c*/;
657     }
658     // Stubs
toPattern(UnicodeString & result,UBool) const659     virtual UnicodeString& toPattern(UnicodeString& result,
660                                      UBool /*escapeUnprintable*/) const {
661         return result;
662     }
matchesIndexValue(uint8_t) const663     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
664         return FALSE;
665     }
addMatchSetTo(UnicodeSet &) const666     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
667 public:
getDynamicClassID() const668     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
669 };
670 
671 /**
672  * Do some basic tests of filtering.
673  */
TestFiltering(void)674 void TransliteratorTest::TestFiltering(void) {
675     UParseError parseError;
676     UErrorCode status = U_ZERO_ERROR;
677     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
678     if (hex == 0) {
679         errln("FAIL: createInstance(Any-Hex) failed");
680         return;
681     }
682     hex->adoptFilter(new TestFilter());
683     UnicodeString s("abcde");
684     hex->transliterate(s);
685     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
686     if (s == exp) {
687         logln(UnicodeString("Ok:   \"") + exp + "\"");
688     } else {
689         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
690     }
691 
692     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
693     UnicodeFilter *f = hex->orphanFilter();
694     if (f == NULL){
695         errln("FAIL: orphanFilter() should get a UnicodeFilter");
696     } else {
697         delete f;
698     }
699     delete hex;
700 }
701 
702 /**
703  * Test anchors
704  */
TestAnchors(void)705 void TransliteratorTest::TestAnchors(void) {
706     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
707            "aaa",
708            "012");
709     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
710            "aaa",
711            "012");
712     expect(UnicodeString("^ab  > 01 ;"
713            " ab  > |8 ;"
714            "  b  > k ;"
715            " 8x$ > 45 ;"
716            " 8x  > 77 ;", ""),
717 
718            "ababbabxabx",
719            "018k7745");
720     expect(UnicodeString("$s = [z$] ;"
721            "$s{ab    > 01 ;"
722            "   ab    > |8 ;"
723            "    b    > k ;"
724            "   8x}$s > 45 ;"
725            "   8x    > 77 ;", ""),
726 
727            "abzababbabxzabxabx",
728            "01z018k45z01x45");
729 }
730 
731 /**
732  * Test pattern quoting and escape mechanisms.
733  */
TestPatternQuoting(void)734 void TransliteratorTest::TestPatternQuoting(void) {
735     // Array of 3n items
736     // Each item is <rules>, <input>, <expected output>
737     const UnicodeString DATA[] = {
738         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
739         UnicodeString(UChar(0x4E01)),
740         "[male adult]"
741     };
742 
743     for (int32_t i=0; i<3; i+=3) {
744         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
745         UParseError parseError;
746         UErrorCode status = U_ZERO_ERROR;
747         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
748         if (U_FAILURE(status)) {
749             errln("RBT constructor failed");
750         } else {
751             expect(*t, DATA[i+1], DATA[i+2]);
752         }
753         delete t;
754     }
755 }
756 
757 /**
758  * Regression test for bugs found in Greek transliteration.
759  */
TestJ277(void)760 void TransliteratorTest::TestJ277(void) {
761     UErrorCode status = U_ZERO_ERROR;
762     UParseError parseError;
763     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
764     if (gl == NULL) {
765         errln("FAIL: createInstance(Greek-Latin) returned NULL");
766         return;
767     }
768 
769     UChar sigma = 0x3C3;
770     UChar upsilon = 0x3C5;
771     UChar nu = 0x3BD;
772 //    UChar PHI = 0x3A6;
773     UChar alpha = 0x3B1;
774 //    UChar omega = 0x3C9;
775 //    UChar omicron = 0x3BF;
776 //    UChar epsilon = 0x3B5;
777 
778     // sigma upsilon nu -> syn
779     UnicodeString syn;
780     syn.append(sigma).append(upsilon).append(nu);
781     expect(*gl, syn, "syn");
782 
783     // sigma alpha upsilon nu -> saun
784     UnicodeString sayn;
785     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
786     expect(*gl, sayn, "saun");
787 
788     // Again, using a smaller rule set
789     UnicodeString rules(
790                 "$alpha   = \\u03B1;"
791                 "$nu      = \\u03BD;"
792                 "$sigma   = \\u03C3;"
793                 "$ypsilon = \\u03C5;"
794                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
795                 "s <>           $sigma;"
796                 "a <>           $alpha;"
797                 "u <>  $vowel { $ypsilon;"
798                 "y <>           $ypsilon;"
799                 "n <>           $nu;",
800                 "");
801     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
802     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
803     expect(*mini, syn, "syn");
804     expect(*mini, sayn, "saun");
805     delete mini;
806     mini = NULL;
807 
808 #if !UCONFIG_NO_FORMATTING
809     // Transliterate the Greek locale data
810     Locale el("el");
811     DateFormatSymbols syms(el, status);
812     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
813     int32_t i, count;
814     const UnicodeString* data = syms.getMonths(count);
815     for (i=0; i<count; ++i) {
816         if (data[i].length() == 0) {
817             continue;
818         }
819         UnicodeString out(data[i]);
820         gl->transliterate(out);
821         UBool ok = TRUE;
822         if (data[i].length() >= 2 && out.length() >= 2 &&
823             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
824             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
825                 ok = FALSE;
826             }
827         }
828         if (ok) {
829             logln(prettify(data[i] + " -> " + out));
830         } else {
831             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
832         }
833     }
834 #endif
835 
836     delete gl;
837 }
838 
839 /**
840  * Prefix, suffix support in hex transliterators
841  */
TestJ243(void)842 void TransliteratorTest::TestJ243(void) {
843     UErrorCode ec = U_ZERO_ERROR;
844 
845     // Test default Hex-Any, which should handle
846     // \u, \U, u+, and U+
847     Transliterator *hex =
848         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
849     if (assertSuccess("getInstance", ec)) {
850         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
851     }
852     delete hex;
853 
854 //    // Try a custom Hex-Unicode
855 //    // \uXXXX and &#xXXXX;
856 //    ec = U_ZERO_ERROR;
857 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
858 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
859 //           "abcd5fx012&#x00033;");
860 //    // Try custom Any-Hex (default is tested elsewhere)
861 //    ec = U_ZERO_ERROR;
862 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
863 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
864 }
865 
866 /**
867  * Parsers need better syntax error messages.
868  */
TestJ329(void)869 void TransliteratorTest::TestJ329(void) {
870 
871     struct { UBool containsErrors; const char* rule; } DATA[] = {
872         { FALSE, "a > b; c > d" },
873         { TRUE,  "a > b; no operator; c > d" },
874     };
875     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
876 
877     for (int32_t i=0; i<DATA_length; ++i) {
878         UErrorCode status = U_ZERO_ERROR;
879         UParseError parseError;
880         Transliterator *rbt = Transliterator::createFromRules("<ID>",
881                                     DATA[i].rule,
882                                     UTRANS_FORWARD,
883                                     parseError,
884                                     status);
885         UBool gotError = U_FAILURE(status);
886         UnicodeString desc(DATA[i].rule);
887         desc.append(gotError ? " -> error" : " -> no error");
888         if (gotError) {
889             desc = desc + ", ParseError code=" + u_errorName(status) +
890                 " line=" + parseError.line +
891                 " offset=" + parseError.offset +
892                 " context=" + parseError.preContext;
893         }
894         if (gotError == DATA[i].containsErrors) {
895             logln(UnicodeString("Ok:   ") + desc);
896         } else {
897             errln(UnicodeString("FAIL: ") + desc);
898         }
899         delete rbt;
900     }
901 }
902 
903 /**
904  * Test segments and segment references.
905  */
TestSegments(void)906 void TransliteratorTest::TestSegments(void) {
907     // Array of 3n items
908     // Each item is <rules>, <input>, <expected output>
909     UnicodeString DATA[] = {
910         "([a-z]) '.' ([0-9]) > $2 '-' $1",
911         "abc.123.xyz.456",
912         "ab1-c23.xy4-z56",
913 
914         // nested
915         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
916         "a1 b2",
917         "a1.a.1 b2.b.2",
918     };
919     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
920 
921     for (int32_t i=0; i<DATA_length; i+=3) {
922         logln("Pattern: " + prettify(DATA[i]));
923         UParseError parseError;
924         UErrorCode status = U_ZERO_ERROR;
925         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
926         if (U_FAILURE(status)) {
927             errln("FAIL: RBT constructor");
928         } else {
929             expect(*t, DATA[i+1], DATA[i+2]);
930         }
931         delete t;
932     }
933 }
934 
935 /**
936  * Test cursor positioning outside of the key
937  */
TestCursorOffset(void)938 void TransliteratorTest::TestCursorOffset(void) {
939     // Array of 3n items
940     // Each item is <rules>, <input>, <expected output>
941     UnicodeString DATA[] = {
942         "pre {alpha} post > | @ ALPHA ;"
943         "eALPHA > beta ;"
944         "pre {beta} post > BETA @@ | ;"
945         "post > xyz",
946 
947         "prealphapost prebetapost",
948 
949         "prbetaxyz preBETApost",
950     };
951     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
952 
953     for (int32_t i=0; i<DATA_length; i+=3) {
954         logln("Pattern: " + prettify(DATA[i]));
955         UParseError parseError;
956         UErrorCode status = U_ZERO_ERROR;
957         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
958         if (U_FAILURE(status)) {
959             errln("FAIL: RBT constructor");
960         } else {
961             expect(*t, DATA[i+1], DATA[i+2]);
962         }
963         delete t;
964     }
965 }
966 
967 /**
968  * Test zero length and > 1 char length variable values.  Test
969  * use of variable refs in UnicodeSets.
970  */
TestArbitraryVariableValues(void)971 void TransliteratorTest::TestArbitraryVariableValues(void) {
972     // Array of 3n items
973     // Each item is <rules>, <input>, <expected output>
974     UnicodeString DATA[] = {
975         "$abe = ab;"
976         "$pat = x[yY]z;"
977         "$ll  = 'a-z';"
978         "$llZ = [$ll];"
979         "$llY = [$ll$pat];"
980         "$emp = ;"
981 
982         "$abe > ABE;"
983         "$pat > END;"
984         "$llZ > 1;"
985         "$llY > 2;"
986         "7$emp 8 > 9;"
987         "",
988 
989         "ab xYzxyz stY78",
990         "ABE ENDEND 1129",
991     };
992     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
993 
994     for (int32_t i=0; i<DATA_length; i+=3) {
995         logln("Pattern: " + prettify(DATA[i]));
996         UParseError parseError;
997         UErrorCode status = U_ZERO_ERROR;
998         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
999         if (U_FAILURE(status)) {
1000             errln("FAIL: RBT constructor");
1001         } else {
1002             expect(*t, DATA[i+1], DATA[i+2]);
1003         }
1004         delete t;
1005     }
1006 }
1007 
1008 /**
1009  * Confirm that the contextStart, contextLimit, start, and limit
1010  * behave correctly. J474.
1011  */
TestPositionHandling(void)1012 void TransliteratorTest::TestPositionHandling(void) {
1013     // Array of 3n items
1014     // Each item is <rules>, <input>, <expected output>
1015     const char* DATA[] = {
1016         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1017         "xtat txtb", // pos 0,9,0,9
1018         "xTTaSS TTxUUb",
1019 
1020         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1021         "xtat txtb", // pos 2,9,3,8
1022         "xtaSS TTxUUb",
1023 
1024         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1025         "xtat txtb", // pos 3,8,3,8
1026         "xtaTT TTxTTb",
1027     };
1028 
1029     // Array of 4n positions -- these go with the DATA array
1030     // They are: contextStart, contextLimit, start, limit
1031     int32_t POS[] = {
1032         0, 9, 0, 9,
1033         2, 9, 3, 8,
1034         3, 8, 3, 8,
1035     };
1036 
1037     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1038     for (int32_t i=0; i<n; i++) {
1039         UErrorCode status = U_ZERO_ERROR;
1040         UParseError parseError;
1041         Transliterator *t = Transliterator::createFromRules("<ID>",
1042                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1043         if (U_FAILURE(status)) {
1044             delete t;
1045             errln("FAIL: RBT constructor");
1046             return;
1047         }
1048         UTransPosition pos;
1049         pos.contextStart= POS[4*i];
1050         pos.contextLimit = POS[4*i+1];
1051         pos.start = POS[4*i+2];
1052         pos.limit = POS[4*i+3];
1053         UnicodeString rsource(DATA[3*i+1]);
1054         t->transliterate(rsource, pos, status);
1055         if (U_FAILURE(status)) {
1056             delete t;
1057             errln("FAIL: transliterate");
1058             return;
1059         }
1060         t->finishTransliteration(rsource, pos);
1061         expectAux(DATA[3*i],
1062                   DATA[3*i+1],
1063                   rsource,
1064                   DATA[3*i+2]);
1065         delete t;
1066     }
1067 }
1068 
1069 /**
1070  * Test the Hiragana-Katakana transliterator.
1071  */
TestHiraganaKatakana(void)1072 void TransliteratorTest::TestHiraganaKatakana(void) {
1073     UParseError parseError;
1074     UErrorCode status = U_ZERO_ERROR;
1075     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1076     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1077     if (hk == 0 || kh == 0) {
1078         errln("FAIL: createInstance failed");
1079         delete hk;
1080         delete kh;
1081         return;
1082     }
1083 
1084     // Array of 3n items
1085     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1086     const char* DATA[] = {
1087         "both",
1088         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1089         "\\u30A2\\u30F8\\u30F2\\u30B0",
1090 
1091         "kh",
1092         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1093         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1094     };
1095     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1096 
1097     for (int32_t i=0; i<DATA_length; i+=3) {
1098         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1099         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1100         switch (*DATA[i]) {
1101         case 0x68: //'h': // Hiragana-Katakana
1102             expect(*hk, h, k);
1103             break;
1104         case 0x6B: //'k': // Katakana-Hiragana
1105             expect(*kh, k, h);
1106             break;
1107         case 0x62: //'b': // both
1108             expect(*hk, h, k);
1109             expect(*kh, k, h);
1110             break;
1111         }
1112     }
1113     delete hk;
1114     delete kh;
1115 }
1116 
1117 /**
1118  * Test cloning / copy constructor of RBT.
1119  */
TestCopyJ476(void)1120 void TransliteratorTest::TestCopyJ476(void) {
1121     // The real test here is what happens when the destructors are
1122     // called.  So we let one object get destructed, and check to
1123     // see that its copy still works.
1124     Transliterator *t2 = 0;
1125     {
1126         UParseError parseError;
1127         UErrorCode status = U_ZERO_ERROR;
1128         Transliterator *t1 = Transliterator::createFromRules("t1",
1129             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1130         if (U_FAILURE(status)) {
1131             errln("FAIL: RBT constructor");
1132             return;
1133         }
1134         t2 = t1->clone(); // Call copy constructor under the covers.
1135         expect(*t1, "abcfoofoo", "ABcbar");
1136         delete t1;
1137     }
1138     expect(*t2, "abcfoofoo", "ABcbar");
1139     delete t2;
1140 }
1141 
1142 /**
1143  * Test inter-Indic transliterators.  These are composed.
1144  * ICU4C Jitterbug 483.
1145  */
TestInterIndic(void)1146 void TransliteratorTest::TestInterIndic(void) {
1147     UnicodeString ID("Devanagari-Gujarati", "");
1148     UErrorCode status = U_ZERO_ERROR;
1149     UParseError parseError;
1150     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1151     if (dg == 0) {
1152         errln("FAIL: createInstance(" + ID + ") returned NULL");
1153         return;
1154     }
1155     UnicodeString id = dg->getID();
1156     if (id != ID) {
1157         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1158     }
1159     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1160     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1161     expect(*dg, dev, guj);
1162     delete dg;
1163 }
1164 
1165 /**
1166  * Test filter syntax in IDs. (J918)
1167  */
TestFilterIDs(void)1168 void TransliteratorTest::TestFilterIDs(void) {
1169     // Array of 3n strings:
1170     // <id>, <inverse id>, <input>, <expected output>
1171     const char* DATA[] = {
1172         "[aeiou]Any-Hex", // ID
1173         "[aeiou]Hex-Any", // expected inverse ID
1174         "quizzical",      // src
1175         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1176 
1177         "[aeiou]Any-Hex;[^5]Hex-Any",
1178         "[^5]Any-Hex;[aeiou]Hex-Any",
1179         "quizzical",
1180         "q\\u0075izzical",
1181 
1182         "[abc]Null",
1183         "[abc]Null",
1184         "xyz",
1185         "xyz",
1186     };
1187     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1188 
1189     for (int i=0; i<DATA_length; i+=4) {
1190         UnicodeString ID(DATA[i], "");
1191         UnicodeString uID(DATA[i+1], "");
1192         UnicodeString data2(DATA[i+2], "");
1193         UnicodeString data3(DATA[i+3], "");
1194         UParseError parseError;
1195         UErrorCode status = U_ZERO_ERROR;
1196         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1197         if (t == 0) {
1198             errln("FAIL: createInstance(" + ID + ") returned NULL");
1199             return;
1200         }
1201         expect(*t, data2, data3);
1202 
1203         // Check the ID
1204         if (ID != t->getID()) {
1205             errln("FAIL: createInstance(" + ID + ").getID() => " +
1206                   t->getID());
1207         }
1208 
1209         // Check the inverse
1210         Transliterator *u = t->createInverse(status);
1211         if (u == 0) {
1212             errln("FAIL: " + ID + ".createInverse() returned NULL");
1213         } else if (u->getID() != uID) {
1214             errln("FAIL: " + ID + ".createInverse().getID() => " +
1215                   u->getID() + ", expected " + uID);
1216         }
1217 
1218         delete t;
1219         delete u;
1220     }
1221 }
1222 
1223 /**
1224  * Test the case mapping transliterators.
1225  */
TestCaseMap(void)1226 void TransliteratorTest::TestCaseMap(void) {
1227     UParseError parseError;
1228     UErrorCode status = U_ZERO_ERROR;
1229     Transliterator* toUpper =
1230         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1231     Transliterator* toLower =
1232         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1233     Transliterator* toTitle =
1234         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1235     if (toUpper==0 || toLower==0 || toTitle==0) {
1236         errln("FAIL: createInstance returned NULL");
1237         delete toUpper;
1238         delete toLower;
1239         delete toTitle;
1240         return;
1241     }
1242 
1243     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1244            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1245     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1246            "the quick brown foX jumped over the lazY dogs.");
1247     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1248            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1249 
1250     delete toUpper;
1251     delete toLower;
1252     delete toTitle;
1253 }
1254 
1255 /**
1256  * Test the name mapping transliterators.
1257  */
TestNameMap(void)1258 void TransliteratorTest::TestNameMap(void) {
1259     UParseError parseError;
1260     UErrorCode status = U_ZERO_ERROR;
1261     Transliterator* uni2name =
1262         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1263     Transliterator* name2uni =
1264         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1265     if (uni2name==0 || name2uni==0) {
1266         errln("FAIL: createInstance returned NULL");
1267         delete uni2name;
1268         delete name2uni;
1269         return;
1270     }
1271 
1272     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1273     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1274            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1275     expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
1276            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1277 
1278     delete uni2name;
1279     delete name2uni;
1280 
1281     // round trip
1282     Transliterator* t =
1283         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1284     if (t==0) {
1285         errln("FAIL: createInstance returned NULL");
1286         delete t;
1287         return;
1288     }
1289 
1290     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1291     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1292     expect(*t, s, s);
1293     delete t;
1294 }
1295 
1296 /**
1297  * Test liberalized ID syntax.  1006c
1298  */
TestLiberalizedID(void)1299 void TransliteratorTest::TestLiberalizedID(void) {
1300     // Some test cases have an expected getID() value of NULL.  This
1301     // means I have disabled the test case for now.  This stuff is
1302     // still under development, and I haven't decided whether to make
1303     // getID() return canonical case yet.  It will all get rewritten
1304     // with the move to Source-Target/Variant IDs anyway. [aliu]
1305     const char* DATA[] = {
1306         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1307         "  Null  ", "Null", "whitespace",
1308         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1309         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1310     };
1311     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1312     UParseError parseError;
1313     UErrorCode status= U_ZERO_ERROR;
1314     for (int32_t i=0; i<DATA_length; i+=3) {
1315         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1316         if (t == 0) {
1317             errln(UnicodeString("FAIL: ") + DATA[i+2] +
1318                   " cannot create ID \"" + DATA[i] + "\"");
1319         } else {
1320             UnicodeString exp;
1321             if (DATA[i+1]) {
1322                 exp = UnicodeString(DATA[i+1], "");
1323             }
1324             // Don't worry about getID() if the expected char*
1325             // is NULL -- see above.
1326             if (exp.length() == 0 || exp == t->getID()) {
1327                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1328                       " create ID \"" + DATA[i] + "\" => \"" +
1329                       exp + "\"");
1330             } else {
1331                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1332                       " create ID \"" + DATA[i] + "\" => \"" +
1333                       t->getID() + "\", exp \"" + exp + "\"");
1334             }
1335             delete t;
1336         }
1337     }
1338 }
1339 
1340 /* test for Jitterbug 912 */
TestCreateInstance()1341 void TransliteratorTest::TestCreateInstance(){
1342     const char* FORWARD = "F";
1343     const char* REVERSE = "R";
1344     const char* DATA[] = {
1345         // Column 1: id
1346         // Column 2: direction
1347         // Column 3: expected ID, or "" if expect failure
1348         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1349 
1350         // JB#2689: bad compound causes crash
1351         "InvalidSource-InvalidTarget", FORWARD, "",
1352         "InvalidSource-InvalidTarget", REVERSE, "",
1353         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1354         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1355         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1356         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1357 
1358         NULL
1359     };
1360 
1361     for (int32_t i=0; DATA[i]; i+=3) {
1362         UParseError err;
1363         UErrorCode ec = U_ZERO_ERROR;
1364         UnicodeString id(DATA[i]);
1365         UTransDirection dir = (DATA[i+1]==FORWARD)?
1366             UTRANS_FORWARD:UTRANS_REVERSE;
1367         UnicodeString expID(DATA[i+2]);
1368         Transliterator* t =
1369             Transliterator::createInstance(id,dir,err,ec);
1370         UnicodeString newID;
1371         if (t) {
1372             newID = t->getID();
1373         }
1374         UBool ok = (newID == expID);
1375         if (!t) {
1376             newID = u_errorName(ec);
1377         }
1378         if (ok) {
1379             logln((UnicodeString)"Ok: createInstance(" +
1380                   id + "," + DATA[i+1] + ") => " + newID);
1381         } else {
1382             errln((UnicodeString)"FAIL: createInstance(" +
1383                   id + "," + DATA[i+1] + ") => " + newID +
1384                   ", expected " + expID);
1385         }
1386         delete t;
1387     }
1388 }
1389 
1390 /**
1391  * Test the normalization transliterator.
1392  */
TestNormalizationTransliterator()1393 void TransliteratorTest::TestNormalizationTransliterator() {
1394     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1395     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1396     const char* CANON[] = {
1397         // Input               Decomposed            Composed
1398         "cat",                "cat",                "cat"               ,
1399         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1400 
1401         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1402         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1403 
1404         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1405         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1406         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1407 
1408         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1409         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1410 
1411         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1412         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1413         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1414 
1415         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1416         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1417 
1418         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1419         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1420 
1421         "Henry IV",           "Henry IV",           "Henry IV"          ,
1422         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1423 
1424         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1425         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1426         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1427         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1428         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1429 
1430         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1431         0 // end
1432     };
1433 
1434     const char* COMPAT[] = {
1435         // Input               Decomposed            Composed
1436         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1437 
1438         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1439         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1440 
1441         "Henry IV",           "Henry IV",           "Henry IV"          ,
1442         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1443 
1444         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1445         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1446 
1447         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1448         0 // end
1449     };
1450 
1451     int32_t i;
1452     UParseError parseError;
1453     UErrorCode status = U_ZERO_ERROR;
1454     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1455     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1456     if (!NFD || !NFC) {
1457         errln("FAIL: createInstance failed");
1458         delete NFD;
1459         delete NFC;
1460         return;
1461     }
1462     for (i=0; CANON[i]; i+=3) {
1463         UnicodeString in = CharsToUnicodeString(CANON[i]);
1464         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1465         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1466         expect(*NFD, in, expd);
1467         expect(*NFC, in, expc);
1468     }
1469     delete NFD;
1470     delete NFC;
1471 
1472     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1473     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1474     if (!NFKD || !NFKC) {
1475         errln("FAIL: createInstance failed");
1476         delete NFKD;
1477         delete NFKC;
1478         return;
1479     }
1480     for (i=0; COMPAT[i]; i+=3) {
1481         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1482         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1483         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1484         expect(*NFKD, in, expkd);
1485         expect(*NFKC, in, expkc);
1486     }
1487     delete NFKD;
1488     delete NFKC;
1489 
1490     UParseError pe;
1491     status = U_ZERO_ERROR;
1492     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1493                                                        UTRANS_FORWARD,
1494                                                        pe, status);
1495     if (t == 0) {
1496         errln("FAIL: createInstance failed");
1497     }
1498     expect(*t, CharsToUnicodeString("\\u010dx"),
1499            CharsToUnicodeString("c\\u030C"));
1500     delete t;
1501 }
1502 
1503 /**
1504  * Test compound RBT rules.
1505  */
TestCompoundRBT(void)1506 void TransliteratorTest::TestCompoundRBT(void) {
1507     // Careful with spacing and ';' here:  Phrase this exactly
1508     // as toRules() is going to return it.  If toRules() changes
1509     // with regard to spacing or ';', then adjust this string.
1510     UnicodeString rule("::Hex-Any;\n"
1511                        "::Any-Lower;\n"
1512                        "a > '.A.';\n"
1513                        "b > '.B.';\n"
1514                        "::[^t]Any-Upper;", "");
1515     UParseError parseError;
1516     UErrorCode status = U_ZERO_ERROR;
1517     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1518     if (t == 0) {
1519         errln("FAIL: createFromRules failed");
1520         return;
1521     }
1522     expect(*t, "\\u0043at in the hat, bat on the mat",
1523            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1524     UnicodeString r;
1525     t->toRules(r, TRUE);
1526     if (r == rule) {
1527         logln((UnicodeString)"OK: toRules() => " + r);
1528     } else {
1529         errln((UnicodeString)"FAIL: toRules() => " + r +
1530               ", expected " + rule);
1531     }
1532     delete t;
1533 
1534     // Now test toRules
1535     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1536     if (t == 0) {
1537         errln("FAIL: createInstance failed");
1538         return;
1539     }
1540     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1541     t->toRules(r, TRUE);
1542     if (r != exp) {
1543         errln((UnicodeString)"FAIL: toRules() => " + r +
1544               ", expected " + exp);
1545     } else {
1546         logln((UnicodeString)"OK: toRules() => " + r);
1547     }
1548     delete t;
1549 
1550     // Round trip the result of toRules
1551     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1552     if (t == 0) {
1553         errln("FAIL: createFromRules #2 failed");
1554         return;
1555     } else {
1556         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1557     }
1558 
1559     // Test toRules again
1560     t->toRules(r, TRUE);
1561     if (r != exp) {
1562         errln((UnicodeString)"FAIL: toRules() => " + r +
1563               ", expected " + exp);
1564     } else {
1565         logln((UnicodeString)"OK: toRules() => " + r);
1566     }
1567 
1568     delete t;
1569 
1570     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1571     // to what the regenerated ID will look like.
1572     UnicodeString id("Upper(Lower);(NFKC)", "");
1573     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1574     if (t == 0) {
1575         errln("FAIL: createInstance #2 failed");
1576         return;
1577     }
1578     if (t->getID() == id) {
1579         logln((UnicodeString)"OK: created " + id);
1580     } else {
1581         errln((UnicodeString)"FAIL: createInstance(" + id +
1582               ").getID() => " + t->getID());
1583     }
1584 
1585     Transliterator *u = t->createInverse(status);
1586     if (u == 0) {
1587         errln("FAIL: createInverse failed");
1588         delete t;
1589         return;
1590     }
1591     exp = "NFKC();Lower(Upper)";
1592     if (u->getID() == exp) {
1593         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1594               u->getID());
1595     } else {
1596         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1597               u->getID());
1598     }
1599     delete t;
1600     delete u;
1601 }
1602 
1603 /**
1604  * Compound filter semantics were orginially not implemented
1605  * correctly.  Originally, each component filter f(i) is replaced by
1606  * f'(i) = f(i) && g, where g is the filter for the compound
1607  * transliterator.
1608  *
1609  * From Mark:
1610  *
1611  * Suppose and I have a transliterator X. Internally X is
1612  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1613  *
1614  * The compound should convert all greek characters (through latin) to
1615  * cyrillic, then lowercase the result. The filter should say "don't
1616  * touch 'A' in the original". But because an intermediate result
1617  * happens to go through "A", the Greek Alpha gets hung up.
1618  */
TestCompoundFilter(void)1619 void TransliteratorTest::TestCompoundFilter(void) {
1620     UParseError parseError;
1621     UErrorCode status = U_ZERO_ERROR;
1622     Transliterator *t = Transliterator::createInstance
1623         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1624     if (t == 0) {
1625         errln("FAIL: createInstance failed");
1626         return;
1627     }
1628     t->adoptFilter(new UnicodeSet("[^A]", status));
1629     if (U_FAILURE(status)) {
1630         errln("FAIL: UnicodeSet ct failed");
1631         delete t;
1632         return;
1633     }
1634 
1635     // Only the 'A' at index 1 should remain unchanged
1636     expect(*t,
1637            CharsToUnicodeString("BA\\u039A\\u0391"),
1638            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1639     delete t;
1640 }
1641 
TestRemove(void)1642 void TransliteratorTest::TestRemove(void) {
1643     UParseError parseError;
1644     UErrorCode status = U_ZERO_ERROR;
1645     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1646     if (t == 0) {
1647         errln("FAIL: createInstance failed");
1648         return;
1649     }
1650 
1651     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1652 
1653     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1654     // duplicating the filter
1655     Transliterator* t2 = t->clone();
1656     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1657 
1658     delete t;
1659     delete t2;
1660 }
1661 
TestToRules(void)1662 void TransliteratorTest::TestToRules(void) {
1663     const char* RBT = "rbt";
1664     const char* SET = "set";
1665     static const char* DATA[] = {
1666         RBT,
1667         "$a=\\u4E61; [$a] > A;",
1668         "[\\u4E61] > A;",
1669 
1670         RBT,
1671         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1672         "[[:Zs:][:Zl:]]{a} > A;",
1673 
1674         SET,
1675         "[[:Zs:][:Zl:]]",
1676         "[[:Zs:][:Zl:]]",
1677 
1678         SET,
1679         "[:Ps:]",
1680         "[:Ps:]",
1681 
1682         SET,
1683         "[:L:]",
1684         "[:L:]",
1685 
1686         SET,
1687         "[[:L:]-[A]]",
1688         "[[:L:]-[A]]",
1689 
1690         SET,
1691         "[~[:Lu:][:Ll:]]",
1692         "[~[:Lu:][:Ll:]]",
1693 
1694         SET,
1695         "[~[a-z]]",
1696         "[~[a-z]]",
1697 
1698         RBT,
1699         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1700         "[^[:Zs:]]{a} > A;",
1701 
1702         RBT,
1703         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1704         "[[a-z]-[:Zs:]]{a} > A;",
1705 
1706         RBT,
1707         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1708         "[[:Zs:]&[a-z]]{a} > A;",
1709 
1710         RBT,
1711         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1712         "[x[:Zs:]]{a} > A;",
1713 
1714         RBT,
1715         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1716         "$macron = \\u0304 ;"
1717         "$evowel = [aeiouyAEIOUY] ;"
1718         "$iotasub = \\u0345 ;"
1719         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1720         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1721 
1722         RBT,
1723         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1724         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1725     };
1726     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1727 
1728     for (int32_t d=0; d < DATA_length; d+=3) {
1729         if (DATA[d] == RBT) {
1730             // Transliterator test
1731             UParseError parseError;
1732             UErrorCode status = U_ZERO_ERROR;
1733             Transliterator *t = Transliterator::createFromRules("ID",
1734                                                                 DATA[d+1], UTRANS_FORWARD, parseError, status);
1735             if (t == 0) {
1736                 errln("FAIL: createFromRules failed");
1737                 return;
1738             }
1739             UnicodeString rules, escapedRules;
1740             t->toRules(rules, FALSE);
1741             t->toRules(escapedRules, TRUE);
1742             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1743             UnicodeString expEscapedRules(DATA[d+2]);
1744             if (rules == expRules) {
1745                 logln((UnicodeString)"Ok: " + DATA[d+1] +
1746                       " => " + rules);
1747             } else {
1748                 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1749                       " => " + rules + ", exp " + expRules);
1750             }
1751             if (escapedRules == expEscapedRules) {
1752                 logln((UnicodeString)"Ok: " + DATA[d+1] +
1753                       " => " + escapedRules);
1754             } else {
1755                 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1756                       " => " + escapedRules + ", exp " + expEscapedRules);
1757             }
1758             delete t;
1759 
1760         } else {
1761             // UnicodeSet test
1762             UErrorCode status = U_ZERO_ERROR;
1763             UnicodeString pat(DATA[d+1]);
1764             UnicodeString expToPat(DATA[d+2]);
1765             UnicodeSet set(pat, status);
1766             if (U_FAILURE(status)) {
1767                 errln("FAIL: UnicodeSet ct failed");
1768                 return;
1769             }
1770             // Adjust spacing etc. as necessary.
1771             UnicodeString toPat;
1772             set.toPattern(toPat);
1773             if (expToPat == toPat) {
1774                 logln((UnicodeString)"Ok: " + pat +
1775                       " => " + toPat);
1776             } else {
1777                 errln((UnicodeString)"FAIL: " + pat +
1778                       " => " + prettify(toPat, TRUE) +
1779                       ", exp " + prettify(pat, TRUE));
1780             }
1781         }
1782     }
1783 }
1784 
TestContext()1785 void TransliteratorTest::TestContext() {
1786     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1787     expect("de > x; {d}e > y;",
1788            "de",
1789            "ye",
1790            &pos);
1791 
1792     expect("ab{c} > z;",
1793            "xadabdabcy",
1794            "xadabdabzy");
1795 }
1796 
TestSupplemental()1797 void TransliteratorTest::TestSupplemental() {
1798 
1799     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1800                                 "a > $a; $s > i;"),
1801            CharsToUnicodeString("ab\\U0001030Fx"),
1802            CharsToUnicodeString("\\U00010300bix"));
1803 
1804     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1805                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1806                                 "($a)($b) > $2 $1;"),
1807            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1808            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1809 
1810     // k|ax\\U00010300xm
1811 
1812     // k|a\\U00010400\\U00010300xm
1813     // ky|\\U00010400\\U00010300xm
1814     // ky\\U00010400|\\U00010300xm
1815 
1816     // ky\\U00010400|\\U00010300\\U00010400m
1817     // ky\\U00010400y|\\U00010400m
1818     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1819                                 "$a {x} > | @ \\U00010400;"
1820                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1821            CharsToUnicodeString("kax\\U00010300xm"),
1822            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1823 
1824     expectT("Any-Name",
1825            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1826            "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1827 
1828     expectT("Any-Hex/Unicode",
1829            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1830            "U+10330U+10FF00U+E0061U+00A0");
1831 
1832     expectT("Any-Hex/C",
1833            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1834            "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1835 
1836     expectT("Any-Hex/Perl",
1837            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838            "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1839 
1840     expectT("Any-Hex/Java",
1841            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842            "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1843 
1844     expectT("Any-Hex/XML",
1845            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1847 
1848     expectT("Any-Hex/XML10",
1849            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850            "&#66352;&#1113856;&#917601;&#160;");
1851 
1852     expectT("[\\U000E0000-\\U000E0FFF] Remove",
1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1855 }
1856 
TestQuantifier()1857 void TransliteratorTest::TestQuantifier() {
1858 
1859     // Make sure @ in a quantified anteContext works
1860     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1861            "AAAAAb",
1862            "aaa(aac)");
1863 
1864     // Make sure @ in a quantified postContext works
1865     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1866            "baaaaa",
1867            "caa(aaa)");
1868 
1869     // Make sure @ in a quantified postContext with seg ref works
1870     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1871            "baaaaa",
1872            "baa(aaa)");
1873 
1874     // Make sure @ past ante context doesn't enter ante context
1875     UTransPosition pos = {0, 5, 3, 5};
1876     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1877            "xxxab",
1878            "xxx(ac)",
1879            &pos);
1880 
1881     // Make sure @ past post context doesn't pass limit
1882     UTransPosition pos2 = {0, 4, 0, 2};
1883     expect("{b} a+ > c @@ |; x > y; a > A;",
1884            "baxx",
1885            "caxx",
1886            &pos2);
1887 
1888     // Make sure @ past post context doesn't enter post context
1889     expect("{b} a+ > c @@ |; x > y; a > A;",
1890            "baxx",
1891            "cayy");
1892 
1893     expect("(ab)? c > d;",
1894            "c abc ababc",
1895            "d d abd");
1896 
1897     // NOTE: The (ab)+ when referenced just yields a single "ab",
1898     // not the full sequence of them.  This accords with perl behavior.
1899     expect("(ab)+ {x} > '(' $1 ')';",
1900            "x abx ababxy",
1901            "x ab(ab) abab(ab)y");
1902 
1903     expect("b+ > x;",
1904            "ac abc abbc abbbc",
1905            "ac axc axc axc");
1906 
1907     expect("[abc]+ > x;",
1908            "qac abrc abbcs abtbbc",
1909            "qx xrx xs xtx");
1910 
1911     expect("q{(ab)+} > x;",
1912            "qa qab qaba qababc qaba",
1913            "qa qx qxa qxc qxa");
1914 
1915     expect("q(ab)* > x;",
1916            "qa qab qaba qababc",
1917            "xa x xa xc");
1918 
1919     // NOTE: The (ab)+ when referenced just yields a single "ab",
1920     // not the full sequence of them.  This accords with perl behavior.
1921     expect("q(ab)* > '(' $1 ')';",
1922            "qa qab qaba qababc",
1923            "()a (ab) (ab)a (ab)c");
1924 
1925     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1926     // quoted string
1927     expect("'ab'+ > x;",
1928            "bb ab ababb",
1929            "bb x xb");
1930 
1931     // $foo+ and $foo* -- the quantifier should apply to the entire
1932     // variable reference
1933     expect("$var = ab; $var+ > x;",
1934            "bb ab ababb",
1935            "bb x xb");
1936 }
1937 
1938 class TestTrans : public Transliterator {
1939 public:
TestTrans(const UnicodeString & id)1940     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1941     }
clone(void) const1942     virtual Transliterator* clone(void) const {
1943         return new TestTrans(getID());
1944     }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1945     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1946         UBool /*isIncremental*/) const
1947     {
1948         offsets.start = offsets.limit;
1949     }
1950     virtual UClassID getDynamicClassID() const;
1951     static UClassID U_EXPORT2 getStaticClassID();
1952 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1953 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1954 
1955 /**
1956  * Test Source-Target/Variant.
1957  */
1958 void TransliteratorTest::TestSTV(void) {
1959     int32_t ns = Transliterator::countAvailableSources();
1960     if (ns < 0 || ns > 255) {
1961         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1962         return;
1963     }
1964     int32_t i, j;
1965     for (i=0; i<ns; ++i) {
1966         UnicodeString source;
1967         Transliterator::getAvailableSource(i, source);
1968         logln((UnicodeString)"" + i + ": " + source);
1969         if (source.length() == 0) {
1970             errln("FAIL: empty source");
1971             continue;
1972         }
1973         int32_t nt = Transliterator::countAvailableTargets(source);
1974         if (nt < 0 || nt > 255) {
1975             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1976             continue;
1977         }
1978         for (int32_t j=0; j<nt; ++j) {
1979             UnicodeString target;
1980             Transliterator::getAvailableTarget(j, source, target);
1981             logln((UnicodeString)" " + j + ": " + target);
1982             if (target.length() == 0) {
1983                 errln("FAIL: empty target");
1984                 continue;
1985             }
1986             int32_t nv = Transliterator::countAvailableVariants(source, target);
1987             if (nv < 0 || nv > 255) {
1988                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1989                 continue;
1990             }
1991             for (int32_t k=0; k<nv; ++k) {
1992                 UnicodeString variant;
1993                 Transliterator::getAvailableVariant(k, source, target, variant);
1994                 if (variant.length() == 0) {
1995                     logln((UnicodeString)"  " + k + ": <empty>");
1996                 } else {
1997                     logln((UnicodeString)"  " + k + ": " + variant);
1998                 }
1999             }
2000         }
2001     }
2002 
2003     // Test registration
2004     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2005     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2006     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2007     for (i=0; i<3; ++i) {
2008         Transliterator *t = new TestTrans(IDS[i]);
2009         if (t == 0) {
2010             errln("FAIL: out of memory");
2011             return;
2012         }
2013         if (t->getID() != IDS[i]) {
2014             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2015             delete t;
2016             return;
2017         }
2018         Transliterator::registerInstance(t);
2019         UErrorCode status = U_ZERO_ERROR;
2020         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2021         if (t == NULL) {
2022             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2023                   IDS[i]);
2024         } else {
2025             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2026                   IDS[i]);
2027             delete t;
2028         }
2029         Transliterator::unregister(IDS[i]);
2030         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2031         if (t != NULL) {
2032             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2033                   IDS[i]);
2034             delete t;
2035         }
2036     }
2037 
2038     // Make sure getAvailable API reflects removal
2039     int32_t n = Transliterator::countAvailableIDs();
2040     for (i=0; i<n; ++i) {
2041         UnicodeString id = Transliterator::getAvailableID(i);
2042         for (j=0; j<3; ++j) {
2043             if (id.caseCompare(FULL_IDS[j],0)==0) {
2044                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2045             }
2046         }
2047     }
2048     n = Transliterator::countAvailableTargets("Any");
2049     for (i=0; i<n; ++i) {
2050         UnicodeString t;
2051         Transliterator::getAvailableTarget(i, "Any", t);
2052         if (t.caseCompare(IDS[0],0)==0) {
2053             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2054         }
2055     }
2056     n = Transliterator::countAvailableSources();
2057     for (i=0; i<n; ++i) {
2058         UnicodeString s;
2059         Transliterator::getAvailableSource(i, s);
2060         for (j=0; j<3; ++j) {
2061             if (SOURCES[j] == NULL) continue;
2062             if (s.caseCompare(SOURCES[j],0)==0) {
2063                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2064             }
2065         }
2066     }
2067 }
2068 
2069 /**
2070  * Test inverse of Greek-Latin; Title()
2071  */
TestCompoundInverse(void)2072 void TransliteratorTest::TestCompoundInverse(void) {
2073     UParseError parseError;
2074     UErrorCode status = U_ZERO_ERROR;
2075     Transliterator *t = Transliterator::createInstance
2076         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2077     if (t == 0) {
2078         errln("FAIL: createInstance");
2079         return;
2080     }
2081     UnicodeString exp("(Title);Latin-Greek");
2082     if (t->getID() == exp) {
2083         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2084               t->getID());
2085     } else {
2086         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2087               t->getID() + "\", expected \"" + exp + "\"");
2088     }
2089     delete t;
2090 }
2091 
2092 /**
2093  * Test NFD chaining with RBT
2094  */
TestNFDChainRBT()2095 void TransliteratorTest::TestNFDChainRBT() {
2096     UParseError pe;
2097     UErrorCode ec = U_ZERO_ERROR;
2098     Transliterator* t = Transliterator::createFromRules(
2099                                "TEST", "::NFD; aa > Q; a > q;",
2100                                UTRANS_FORWARD, pe, ec);
2101     if (t == NULL || U_FAILURE(ec)) {
2102         errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2103         return;
2104     }
2105     expect(*t, "aa", "Q");
2106     delete t;
2107 
2108     // TEMPORARY TESTS -- BEING DEBUGGED
2109 //=-    UnicodeString s, s2;
2110 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2111 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2112 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2113 //=-    expect(*t, s, s2);
2114 //=-    delete t;
2115 //=-
2116 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2117 //=-    expect(*t, s2, s);
2118 //=-    delete t;
2119 //=-
2120 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2121 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2122 //=-    expect(*t, s, s);
2123 //=-    delete t;
2124 
2125 //    const char* source[] = {
2126 //        /*
2127 //        "\\u015Br\\u012Bmad",
2128 //        "bhagavadg\\u012Bt\\u0101",
2129 //        "adhy\\u0101ya",
2130 //        "arjuna",
2131 //        "vi\\u1E63\\u0101da",
2132 //        "y\\u014Dga",
2133 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2134 //        "uv\\u0101cr\\u0325",
2135 //        */
2136 //        "rmk\\u1E63\\u0113t",
2137 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2138 //        /*
2139 //        "kuruk\\u1E63\\u0113tr\\u0113",
2140 //        "samav\\u0113t\\u0101",
2141 //        "yuyutsava-\\u1E25",
2142 //        "m\\u0101mak\\u0101-\\u1E25",
2143 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2144 //        "kimakurvata",
2145 //        "san\\u0304java",
2146 //        */
2147 //
2148 //        0
2149 //    };
2150 //    const char* expected[] = {
2151 //        /*
2152 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2153 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2154 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2155 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2156 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2157 //        "\\u092f\\u094b\\u0917",
2158 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2159 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2160 //        */
2161 //        "\\u0927",
2162 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2163 //        /*
2164 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2165 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2166 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2167 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2168 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2169 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2170 //        "\\u0938\\u0902\\u091c\\u0935",
2171 //        */
2172 //        0
2173 //    };
2174 //    UErrorCode status = U_ZERO_ERROR;
2175 //    UParseError parseError;
2176 //    UnicodeString message;
2177 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2178 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2179 //    if(U_FAILURE(status)){
2180 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2181 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2182 //        delete latinToDevToLatin;
2183 //        delete devToLatinToDev;
2184 //        return;
2185 //    }
2186 //    UnicodeString gotResult;
2187 //    for(int i= 0; source[i] != 0; i++){
2188 //        gotResult = source[i];
2189 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2190 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2191 //    }
2192 //    delete latinToDevToLatin;
2193 //    delete devToLatinToDev;
2194 }
2195 
2196 /**
2197  * Inverse of "Null" should be "Null". (J21)
2198  */
TestNullInverse()2199 void TransliteratorTest::TestNullInverse() {
2200     UParseError pe;
2201     UErrorCode ec = U_ZERO_ERROR;
2202     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2203     if (t == 0 || U_FAILURE(ec)) {
2204         errln("FAIL: createInstance");
2205         return;
2206     }
2207     Transliterator *u = t->createInverse(ec);
2208     if (u == 0 || U_FAILURE(ec)) {
2209         errln("FAIL: createInverse");
2210         delete t;
2211         return;
2212     }
2213     if (u->getID() != "Null") {
2214         errln("FAIL: Inverse of Null should be Null");
2215     }
2216     delete t;
2217     delete u;
2218 }
2219 
2220 /**
2221  * Check ID of inverse of alias. (J22)
2222  */
TestAliasInverseID()2223 void TransliteratorTest::TestAliasInverseID() {
2224     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2225     UParseError pe;
2226     UErrorCode ec = U_ZERO_ERROR;
2227     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2228     if (t == 0 || U_FAILURE(ec)) {
2229         errln("FAIL: createInstance");
2230         return;
2231     }
2232     Transliterator *u = t->createInverse(ec);
2233     if (u == 0 || U_FAILURE(ec)) {
2234         errln("FAIL: createInverse");
2235         delete t;
2236         return;
2237     }
2238     UnicodeString exp = "Hangul-Latin";
2239     UnicodeString got = u->getID();
2240     if (got != exp) {
2241         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2242               ", expected " + exp);
2243     }
2244     delete t;
2245     delete u;
2246 }
2247 
2248 /**
2249  * Test IDs of inverses of compound transliterators. (J20)
2250  */
TestCompoundInverseID()2251 void TransliteratorTest::TestCompoundInverseID() {
2252     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2253     UParseError pe;
2254     UErrorCode ec = U_ZERO_ERROR;
2255     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2256     if (t == 0 || U_FAILURE(ec)) {
2257         errln("FAIL: createInstance");
2258         return;
2259     }
2260     Transliterator *u = t->createInverse(ec);
2261     if (u == 0 || U_FAILURE(ec)) {
2262         errln("FAIL: createInverse");
2263         delete t;
2264         return;
2265     }
2266     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2267     UnicodeString got = u->getID();
2268     if (got != exp) {
2269         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2270               ", expected " + exp);
2271     }
2272     delete t;
2273     delete u;
2274 }
2275 
2276 /**
2277  * Test undefined variable.
2278 
2279  */
TestUndefinedVariable()2280 void TransliteratorTest::TestUndefinedVariable() {
2281     UnicodeString rule = "$initial } a <> \\u1161;";
2282     UParseError pe;
2283     UErrorCode ec = U_ZERO_ERROR;
2284     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2285     delete t;
2286     if (U_FAILURE(ec)) {
2287         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2288               u_errorName(ec));
2289         return;
2290     }
2291     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2292           u_errorName(ec));
2293 }
2294 
2295 /**
2296  * Test empty context.
2297  */
TestEmptyContext()2298 void TransliteratorTest::TestEmptyContext() {
2299     expect(" { a } > b;", "xay a ", "xby b ");
2300 }
2301 
2302 /**
2303 * Test compound filter ID syntax
2304 */
TestCompoundFilterID(void)2305 void TransliteratorTest::TestCompoundFilterID(void) {
2306     static const char* DATA[] = {
2307         // Col. 1 = ID or rule set (latter must start with #)
2308 
2309         // = columns > 1 are null if expect col. 1 to be illegal =
2310 
2311         // Col. 2 = direction, "F..." or "R..."
2312         // Col. 3 = source string
2313         // Col. 4 = exp result
2314 
2315         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2316         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2317         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2318         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2319         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2320         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2321         NULL,
2322     };
2323 
2324     for (int32_t i=0; DATA[i]; i+=4) {
2325         UnicodeString id = CharsToUnicodeString(DATA[i]);
2326         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2327             UTRANS_REVERSE : UTRANS_FORWARD;
2328         UnicodeString source;
2329         UnicodeString exp;
2330         if (DATA[i+2] != NULL) {
2331             source = CharsToUnicodeString(DATA[i+2]);
2332             exp = CharsToUnicodeString(DATA[i+3]);
2333         }
2334         UBool expOk = (DATA[i+1] != NULL);
2335         Transliterator* t = NULL;
2336         UParseError pe;
2337         UErrorCode ec = U_ZERO_ERROR;
2338         if (id.charAt(0) == 0x23/*#*/) {
2339             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2340         } else {
2341             t = Transliterator::createInstance(id, direction, pe, ec);
2342         }
2343         UBool ok = (t != NULL && U_SUCCESS(ec));
2344         UnicodeString transID;
2345         if (t!=0) {
2346             transID = t->getID();
2347         }
2348         else {
2349             transID = UnicodeString("NULL", "");
2350         }
2351         if (ok == expOk) {
2352             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2353                   u_errorName(ec));
2354             if (source.length() != 0) {
2355                 expect(*t, source, exp);
2356             }
2357             delete t;
2358         } else {
2359             errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2360                   u_errorName(ec));
2361         }
2362     }
2363 }
2364 
2365 /**
2366  * Test new property set syntax
2367  */
TestPropertySet()2368 void TransliteratorTest::TestPropertySet() {
2369     expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2370     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2371            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2372 }
2373 
2374 /**
2375  * Test various failure points of the new 2.0 engine.
2376  */
TestNewEngine()2377 void TransliteratorTest::TestNewEngine() {
2378     UParseError pe;
2379     UErrorCode ec = U_ZERO_ERROR;
2380     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2381     if (t == 0 || U_FAILURE(ec)) {
2382         errln("FAIL: createInstance Latin-Hiragana");
2383         return;
2384     }
2385     // Katakana should be untouched
2386     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2387            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2388 
2389     delete t;
2390 
2391 #if 1
2392     // This test will only work if Transliterator.ROLLBACK is
2393     // true.  Otherwise, this test will fail, revealing a
2394     // limitation of global filters in incremental mode.
2395     Transliterator *a =
2396         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2397     Transliterator *A =
2398         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2399     if (U_FAILURE(ec)) {
2400         delete a;
2401         delete A;
2402         return;
2403     }
2404 
2405     Transliterator* array[3];
2406     array[0] = a;
2407     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2408     array[2] = A;
2409     if (U_FAILURE(ec)) {
2410         errln("FAIL: createInstance NFD");
2411         delete a;
2412         delete A;
2413         delete array[1];
2414         return;
2415     }
2416 
2417     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2418     if (U_FAILURE(ec)) {
2419         errln("FAIL: UnicodeSet constructor");
2420         delete a;
2421         delete A;
2422         delete array[1];
2423         delete t;
2424         return;
2425     }
2426 
2427     expect(*t, "aAaA", "bAbA");
2428 
2429     assertTrue("countElements", t->countElements() == 3);
2430     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2431     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2432     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2433     assertSuccess("getElement", ec);
2434 
2435     delete a;
2436     delete A;
2437     delete array[1];
2438     delete t;
2439 #endif
2440 
2441     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2442            "a",
2443            "ax");
2444 
2445     UnicodeString gr = CharsToUnicodeString(
2446         "$ddot = \\u0308 ;"
2447         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2448         "$rough = \\u0314 ;"
2449         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2450         "\\u03b1 <> a ;"
2451         "$rough <> h ;");
2452 
2453     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2454 }
2455 
2456 /**
2457  * Test quantified segment behavior.  We want:
2458  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2459  */
TestQuantifiedSegment(void)2460 void TransliteratorTest::TestQuantifiedSegment(void) {
2461     // The normal case
2462     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2463 
2464     // The tricky case; the quantifier is around the segment
2465     expect("([abc])+ > x $1 x;", "cba", "xax");
2466 
2467     // Tricky case in reverse direction
2468     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2469 
2470     // Check post-context segment
2471     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2472 
2473     // Test toRule/toPattern for non-quantified segment.
2474     // Careful with spacing here.
2475     UnicodeString r("([a-c]){q} > x $1 x;");
2476     UParseError pe;
2477     UErrorCode ec = U_ZERO_ERROR;
2478     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2479     if (U_FAILURE(ec)) {
2480         errln("FAIL: createFromRules");
2481         delete t;
2482         return;
2483     }
2484     UnicodeString rr;
2485     t->toRules(rr, TRUE);
2486     if (r != rr) {
2487         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2488     } else {
2489         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2490     }
2491     delete t;
2492 
2493     // Test toRule/toPattern for quantified segment.
2494     // Careful with spacing here.
2495     r = "([a-c])+{q} > x $1 x;";
2496     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2497     if (U_FAILURE(ec)) {
2498         errln("FAIL: createFromRules");
2499         delete t;
2500         return;
2501     }
2502     t->toRules(rr, TRUE);
2503     if (r != rr) {
2504         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2505     } else {
2506         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2507     }
2508     delete t;
2509 }
2510 
2511 //======================================================================
2512 // Ram's tests
2513 //======================================================================
TestDevanagariLatinRT()2514 void TransliteratorTest::TestDevanagariLatinRT(){
2515     const int MAX_LEN= 52;
2516     const char* const source[MAX_LEN] = {
2517         "bh\\u0101rata",
2518         "kra",
2519         "k\\u1E63a",
2520         "khra",
2521         "gra",
2522         "\\u1E45ra",
2523         "cra",
2524         "chra",
2525         "j\\u00F1a",
2526         "jhra",
2527         "\\u00F1ra",
2528         "\\u1E6Dya",
2529         "\\u1E6Dhra",
2530         "\\u1E0Dya",
2531       //"r\\u0323ya", // \u095c is not valid in Devanagari
2532         "\\u1E0Dhya",
2533         "\\u1E5Bhra",
2534         "\\u1E47ra",
2535         "tta",
2536         "thra",
2537         "dda",
2538         "dhra",
2539         "nna",
2540         "pra",
2541         "phra",
2542         "bra",
2543         "bhra",
2544         "mra",
2545         "\\u1E49ra",
2546       //"l\\u0331ra",
2547         "yra",
2548         "\\u1E8Fra",
2549       //"l-",
2550         "vra",
2551         "\\u015Bra",
2552         "\\u1E63ra",
2553         "sra",
2554         "hma",
2555         "\\u1E6D\\u1E6Da",
2556         "\\u1E6D\\u1E6Dha",
2557         "\\u1E6Dh\\u1E6Dha",
2558         "\\u1E0D\\u1E0Da",
2559         "\\u1E0D\\u1E0Dha",
2560         "\\u1E6Dya",
2561         "\\u1E6Dhya",
2562         "\\u1E0Dya",
2563         "\\u1E0Dhya",
2564         // Not roundtrippable --
2565         // \\u0939\\u094d\\u094d\\u092E  - hma
2566         // \\u0939\\u094d\\u092E         - hma
2567         // CharsToUnicodeString("hma"),
2568         "hya",
2569         "\\u015Br\\u0325",
2570         "\\u015Bca",
2571         "\\u0115",
2572         "san\\u0304j\\u012Bb s\\u0113nagupta",
2573         "\\u0101nand vaddir\\u0101ju",
2574         "\\u0101",
2575         "a"
2576     };
2577     const char* const expected[MAX_LEN] = {
2578         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2579         "\\u0915\\u094D\\u0930",          /* kra         */
2580         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2581         "\\u0916\\u094D\\u0930",          /* khra        */
2582         "\\u0917\\u094D\\u0930",          /* gra         */
2583         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2584         "\\u091A\\u094D\\u0930",          /* cra         */
2585         "\\u091B\\u094D\\u0930",          /* chra        */
2586         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2587         "\\u091D\\u094D\\u0930",          /* jhra        */
2588         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2589         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2590         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2591         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2592       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2593         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2594         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2595         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2596         "\\u0924\\u094D\\u0924",          /* tta         */
2597         "\\u0925\\u094D\\u0930",          /* thra        */
2598         "\\u0926\\u094D\\u0926",          /* dda         */
2599         "\\u0927\\u094D\\u0930",          /* dhra        */
2600         "\\u0928\\u094D\\u0928",          /* nna         */
2601         "\\u092A\\u094D\\u0930",          /* pra         */
2602         "\\u092B\\u094D\\u0930",          /* phra        */
2603         "\\u092C\\u094D\\u0930",          /* bra         */
2604         "\\u092D\\u094D\\u0930",          /* bhra        */
2605         "\\u092E\\u094D\\u0930",          /* mra         */
2606         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2607       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2608         "\\u092F\\u094D\\u0930",          /* yra         */
2609         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2610       //"l-",
2611         "\\u0935\\u094D\\u0930",          /* vra         */
2612         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2613         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2614         "\\u0938\\u094D\\u0930",          /* sra         */
2615         "\\u0939\\u094d\\u092E",          /* hma         */
2616         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2617         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2618         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2619         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2620         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2621         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2622         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2623         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2624         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2625      // "hma",                         /* hma         */
2626         "\\u0939\\u094D\\u092F",          /* hya         */
2627         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2628         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2629         "\\u090d",                        /* e\\u0306    */
2630         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2631         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2632         "\\u0906",
2633         "\\u0905",
2634     };
2635     UErrorCode status = U_ZERO_ERROR;
2636     UParseError parseError;
2637     UnicodeString message;
2638     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2639     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2640     if(U_FAILURE(status)){
2641         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2642         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2643         return;
2644     }
2645     UnicodeString gotResult;
2646     for(int i= 0; i<MAX_LEN; i++){
2647         gotResult = source[i];
2648         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2649         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2650     }
2651     delete latinToDev;
2652     delete devToLatin;
2653 }
2654 
TestTeluguLatinRT()2655 void TransliteratorTest::TestTeluguLatinRT(){
2656     const int MAX_LEN=10;
2657     const char* const source[MAX_LEN] = {
2658         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2659         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2660         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2661         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2662         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2663         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2664         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2665         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2666         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2667         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2668     };
2669 
2670     const char* const expected[MAX_LEN] = {
2671         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2672         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2673         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2674         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2675         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2676         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2677         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2679         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2680         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2681     };
2682 
2683     UErrorCode status = U_ZERO_ERROR;
2684     UParseError parseError;
2685     UnicodeString message;
2686     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2687     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2688     if(U_FAILURE(status)){
2689         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2690         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2691         return;
2692     }
2693     UnicodeString gotResult;
2694     for(int i= 0; i<MAX_LEN; i++){
2695         gotResult = source[i];
2696         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2697         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2698     }
2699     delete latinToDev;
2700     delete devToLatin;
2701 }
2702 
TestSanskritLatinRT()2703 void TransliteratorTest::TestSanskritLatinRT(){
2704     const int MAX_LEN =16;
2705     const char* const source[MAX_LEN] = {
2706         "rmk\\u1E63\\u0113t",
2707         "\\u015Br\\u012Bmad",
2708         "bhagavadg\\u012Bt\\u0101",
2709         "adhy\\u0101ya",
2710         "arjuna",
2711         "vi\\u1E63\\u0101da",
2712         "y\\u014Dga",
2713         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2714         "uv\\u0101cr\\u0325",
2715         "dharmak\\u1E63\\u0113tr\\u0113",
2716         "kuruk\\u1E63\\u0113tr\\u0113",
2717         "samav\\u0113t\\u0101",
2718         "yuyutsava\\u1E25",
2719         "m\\u0101mak\\u0101\\u1E25",
2720     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2721         "kimakurvata",
2722         "san\\u0304java",
2723     };
2724     const char* const expected[MAX_LEN] = {
2725         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2726         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2727         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2728         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2729         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2730         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2731         "\\u092f\\u094b\\u0917",
2732         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2733         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2734         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2735         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2736         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2737         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2738         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2739     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2740         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2741         "\\u0938\\u0902\\u091c\\u0935",
2742     };
2743     UErrorCode status = U_ZERO_ERROR;
2744     UParseError parseError;
2745     UnicodeString message;
2746     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2747     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2748     if(U_FAILURE(status)){
2749         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2750         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2751         return;
2752     }
2753     UnicodeString gotResult;
2754     for(int i= 0; i<MAX_LEN; i++){
2755         gotResult = source[i];
2756         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2757         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2758     }
2759     delete latinToDev;
2760     delete devToLatin;
2761 }
2762 
2763 
TestCompoundLatinRT()2764 void TransliteratorTest::TestCompoundLatinRT(){
2765     const char* const source[] = {
2766         "rmk\\u1E63\\u0113t",
2767         "\\u015Br\\u012Bmad",
2768         "bhagavadg\\u012Bt\\u0101",
2769         "adhy\\u0101ya",
2770         "arjuna",
2771         "vi\\u1E63\\u0101da",
2772         "y\\u014Dga",
2773         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2774         "uv\\u0101cr\\u0325",
2775         "dharmak\\u1E63\\u0113tr\\u0113",
2776         "kuruk\\u1E63\\u0113tr\\u0113",
2777         "samav\\u0113t\\u0101",
2778         "yuyutsava\\u1E25",
2779         "m\\u0101mak\\u0101\\u1E25",
2780      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2781         "kimakurvata",
2782         "san\\u0304java"
2783     };
2784     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2785     const char* const expected[MAX_LEN] = {
2786         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2787         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2788         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2789         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2790         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2791         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2792         "\\u092f\\u094b\\u0917",
2793         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2794         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2795         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2796         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2797         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2798         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2799         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2800     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2801         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2802         "\\u0938\\u0902\\u091c\\u0935"
2803     };
2804     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2805         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2806         return;
2807     }
2808 
2809     UErrorCode status = U_ZERO_ERROR;
2810     UParseError parseError;
2811     UnicodeString message;
2812     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2813     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2814     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2815     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2816 
2817     if(U_FAILURE(status)){
2818         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2819         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2820         return;
2821     }
2822     UnicodeString gotResult;
2823     for(int i= 0; i<MAX_LEN; i++){
2824         gotResult = source[i];
2825         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2826         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2827         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2828 
2829     }
2830     delete(latinToDevToLatin);
2831     delete(devToLatinToDev);
2832     delete(devToTelToDev);
2833     delete(latinToTelToLatin);
2834 }
2835 
2836 /**
2837  * Test Gurmukhi-Devanagari Tippi and Bindi
2838  */
TestGurmukhiDevanagari()2839 void TransliteratorTest::TestGurmukhiDevanagari(){
2840     // the rule says:
2841     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2842     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2843     UErrorCode status = U_ZERO_ERROR;
2844     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
2845     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
2846     UParseError parseError;
2847 
2848     UnicodeSetIterator vIter(vowel);
2849     UnicodeSetIterator nvIter(non_vowel);
2850     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2851     if(U_FAILURE(status)) {
2852       errln("Error creating transliterator %s", u_errorName(status));
2853       delete trans;
2854       return;
2855     }
2856     UnicodeString src (" \\u0902");
2857     UnicodeString expected(" \\u0A02");
2858     src = src.unescape();
2859     expected= expected.unescape();
2860 
2861     while(vIter.next()){
2862         src.setCharAt(0,(UChar) vIter.getCodepoint());
2863         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2864         expect(*trans,src,expected);
2865     }
2866 
2867     expected.setCharAt(1,0x0A70);
2868     while(nvIter.next()){
2869         //src.setCharAt(0,(char) nvIter.codepoint);
2870         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2871         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2872         expect(*trans,src,expected);
2873     }
2874     delete trans;
2875 }
2876 /**
2877  * Test instantiation from a locale.
2878  */
TestLocaleInstantiation(void)2879 void TransliteratorTest::TestLocaleInstantiation(void) {
2880     UParseError pe;
2881     UErrorCode ec = U_ZERO_ERROR;
2882     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2883     if (U_FAILURE(ec)) {
2884         errln("FAIL: createInstance(ru_RU-Latin)");
2885         delete t;
2886         return;
2887     }
2888     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2889     delete t;
2890 
2891     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2892     if (U_FAILURE(ec)) {
2893         errln("FAIL: createInstance(en-el)");
2894         delete t;
2895         return;
2896     }
2897     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2898     delete t;
2899 }
2900 
2901 /**
2902  * Test title case handling of accent (should ignore accents)
2903  */
TestTitleAccents(void)2904 void TransliteratorTest::TestTitleAccents(void) {
2905     UParseError pe;
2906     UErrorCode ec = U_ZERO_ERROR;
2907     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2908     if (U_FAILURE(ec)) {
2909         errln("FAIL: createInstance(Title)");
2910         delete t;
2911         return;
2912     }
2913     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2914     delete t;
2915 }
2916 
2917 /**
2918  * Basic test of a locale resource based rule.
2919  */
TestLocaleResource()2920 void TransliteratorTest::TestLocaleResource() {
2921     const char* DATA[] = {
2922         // id                    from               to
2923         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2924         "Latin-el",              "b",               "\\u03bc\\u03c0",
2925         "Latin-Greek",           "b",               "\\u03B2",
2926         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2927         "el-Latin",              "\\u03B2",         "v",
2928         "Greek-Latin",           "\\u03B2",         "b",
2929     };
2930     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2931     for (int32_t i=0; i<DATA_length; i+=3) {
2932         UParseError pe;
2933         UErrorCode ec = U_ZERO_ERROR;
2934         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2935         if (U_FAILURE(ec)) {
2936             errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2937             delete t;
2938             continue;
2939         }
2940         expect(*t, CharsToUnicodeString(DATA[i+1]),
2941                CharsToUnicodeString(DATA[i+2]));
2942         delete t;
2943     }
2944 }
2945 
2946 /**
2947  * Make sure parse errors reference the right line.
2948  */
TestParseError()2949 void TransliteratorTest::TestParseError() {
2950     static const char* rule =
2951         "a > b;\n"
2952         "# more stuff\n"
2953         "d << b;";
2954     UErrorCode ec = U_ZERO_ERROR;
2955     UParseError pe;
2956     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2957     delete t;
2958     if (U_FAILURE(ec)) {
2959         UnicodeString err(pe.preContext);
2960         err.append((UChar)124/*|*/).append(pe.postContext);
2961         if (err.indexOf("d << b") >= 0) {
2962             logln("Ok: " + err);
2963         } else {
2964             errln("FAIL: " + err);
2965         }
2966     }
2967     else {
2968         errln("FAIL: no syntax error");
2969     }
2970     static const char* maskingRule =
2971         "a>x;\n"
2972         "# more stuff\n"
2973         "ab>y;";
2974     ec = U_ZERO_ERROR;
2975     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2976     if (ec != U_RULE_MASK_ERROR) {
2977         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2978     }
2979     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2980         errln("FAIL: did not get expected precontext");
2981     }
2982     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2983         errln("FAIL: did not get expected postcontext");
2984     }
2985 }
2986 
2987 /**
2988  * Make sure sets on output are disallowed.
2989  */
TestOutputSet()2990 void TransliteratorTest::TestOutputSet() {
2991     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2992     UErrorCode ec = U_ZERO_ERROR;
2993     UParseError pe;
2994     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2995     delete t;
2996     if (U_FAILURE(ec)) {
2997         UnicodeString err(pe.preContext);
2998         err.append((UChar)124/*|*/).append(pe.postContext);
2999         logln("Ok: " + err);
3000         return;
3001     }
3002     errln("FAIL: No syntax error");
3003 }
3004 
3005 /**
3006  * Test the use variable range pragma, making sure that use of
3007  * variable range characters is detected and flagged as an error.
3008  */
TestVariableRange()3009 void TransliteratorTest::TestVariableRange() {
3010     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3011     UErrorCode ec = U_ZERO_ERROR;
3012     UParseError pe;
3013     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3014     delete t;
3015     if (U_FAILURE(ec)) {
3016         UnicodeString err(pe.preContext);
3017         err.append((UChar)124/*|*/).append(pe.postContext);
3018         logln("Ok: " + err);
3019         return;
3020     }
3021     errln("FAIL: No syntax error");
3022 }
3023 
3024 /**
3025  * Test invalid post context error handling
3026  */
TestInvalidPostContext()3027 void TransliteratorTest::TestInvalidPostContext() {
3028     UnicodeString rule = "a}b{c>d;";
3029     UErrorCode ec = U_ZERO_ERROR;
3030     UParseError pe;
3031     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3032     delete t;
3033     if (U_FAILURE(ec)) {
3034         UnicodeString err(pe.preContext);
3035         err.append((UChar)124/*|*/).append(pe.postContext);
3036         if (err.indexOf("a}b{c") >= 0) {
3037             logln("Ok: " + err);
3038         } else {
3039             errln("FAIL: " + err);
3040         }
3041         return;
3042     }
3043     errln("FAIL: No syntax error");
3044 }
3045 
3046 /**
3047  * Test ID form variants
3048  */
TestIDForms()3049 void TransliteratorTest::TestIDForms() {
3050     const char* DATA[] = {
3051         "NFC", NULL, "NFD",
3052         "nfd", NULL, "NFC", // make sure case is ignored
3053         "Any-NFKD", NULL, "Any-NFKC",
3054         "Null", NULL, "Null",
3055         "-nfkc", "nfkc", "NFKD",
3056         "-nfkc/", "nfkc", "NFKD",
3057         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3058         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3059         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3060         "Source-", NULL, NULL,
3061         "Source/Variant-", NULL, NULL,
3062         "Source-/Variant", NULL, NULL,
3063         "/Variant", NULL, NULL,
3064         "/Variant-", NULL, NULL,
3065         "-/Variant", NULL, NULL,
3066         "-/", NULL, NULL,
3067         "-", NULL, NULL,
3068         "/", NULL, NULL,
3069     };
3070     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3071 
3072     for (int32_t i=0; i<DATA_length; i+=3) {
3073         const char* ID = DATA[i];
3074         const char* expID = DATA[i+1];
3075         const char* expInvID = DATA[i+2];
3076         UBool expValid = (expInvID != NULL);
3077         if (expID == NULL) {
3078             expID = ID;
3079         }
3080         UParseError pe;
3081         UErrorCode ec = U_ZERO_ERROR;
3082         Transliterator *t =
3083             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3084         if (U_FAILURE(ec)) {
3085             if (!expValid) {
3086                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3087             } else {
3088                 errln((UnicodeString)"FAIL: Couldn't create " + ID);
3089             }
3090             delete t;
3091             continue;
3092         }
3093         Transliterator *u = t->createInverse(ec);
3094         if (U_FAILURE(ec)) {
3095             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3096             delete t;
3097             delete u;
3098             continue;
3099         }
3100         if (t->getID() == expID &&
3101             u->getID() == expInvID) {
3102             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3103         } else {
3104             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3105                   t->getID() + " x getInverse() => " + u->getID() +
3106                   ", expected " + expInvID);
3107         }
3108         delete t;
3109         delete u;
3110     }
3111 }
3112 
3113 static const UChar SPACE[]   = {32,0};
3114 static const UChar NEWLINE[] = {10,0};
3115 static const UChar RETURN[]  = {13,0};
3116 static const UChar EMPTY[]   = {0};
3117 
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3118 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3119                                     const UnicodeString& testRulesForward) {
3120     UnicodeString rules2; t2.toRules(rules2, TRUE);
3121     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3122     rules2.findAndReplace(SPACE, EMPTY);
3123     rules2.findAndReplace(NEWLINE, EMPTY);
3124     rules2.findAndReplace(RETURN, EMPTY);
3125 
3126     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3127 
3128     if (rules2 != testRules) {
3129         errln(label);
3130         logln((UnicodeString)"GENERATED RULES: " + rules2);
3131         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3132     }
3133 }
3134 
3135 /**
3136  * Mark's toRules test.
3137  */
TestToRulesMark()3138 void TransliteratorTest::TestToRulesMark() {
3139     const char* testRules =
3140         "::[[:Latin:][:Mark:]];"
3141         "::NFKD (NFC);"
3142         "::Lower (Lower);"
3143         "a <> \\u03B1;" // alpha
3144         "::NFKC (NFD);"
3145         "::Upper (Lower);"
3146         "::Lower ();"
3147         "::([[:Greek:][:Mark:]]);"
3148         ;
3149     const char* testRulesForward =
3150         "::[[:Latin:][:Mark:]];"
3151         "::NFKD(NFC);"
3152         "::Lower(Lower);"
3153         "a > \\u03B1;"
3154         "::NFKC(NFD);"
3155         "::Upper (Lower);"
3156         "::Lower ();"
3157         ;
3158     const char* testRulesBackward =
3159         "::[[:Greek:][:Mark:]];"
3160         "::Lower (Upper);"
3161         "::NFD(NFKC);"
3162         "\\u03B1 > a;"
3163         "::Lower(Lower);"
3164         "::NFC(NFKD);"
3165         ;
3166     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3167     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3168 
3169     UParseError pe;
3170     UErrorCode ec = U_ZERO_ERROR;
3171     Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
3172     Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
3173 
3174     if (U_FAILURE(ec)) {
3175         delete t2;
3176         delete t3;
3177         errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3178         return;
3179     }
3180 
3181     expect(*t2, source, target);
3182     expect(*t3, target, source);
3183 
3184     checkRules("Failed toRules FORWARD", *t2, testRulesForward);
3185     checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
3186 
3187     delete t2;
3188     delete t3;
3189 }
3190 
3191 /**
3192  * Test Escape and Unescape transliterators.
3193  */
TestEscape()3194 void TransliteratorTest::TestEscape() {
3195     UParseError pe;
3196     UErrorCode ec;
3197     Transliterator *t;
3198 
3199     ec = U_ZERO_ERROR;
3200     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3201     if (U_FAILURE(ec)) {
3202         errln((UnicodeString)"FAIL: createInstance");
3203     } else {
3204         expect(*t,
3205                "\\x{40}\\U00000031&#x32;&#81;",
3206                "@12Q");
3207     }
3208     delete t;
3209 
3210     ec = U_ZERO_ERROR;
3211     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3212     if (U_FAILURE(ec)) {
3213         errln((UnicodeString)"FAIL: createInstance");
3214     } else {
3215         expect(*t,
3216                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3217                "\\u0041\\U0010BEEF\\uFEED");
3218     }
3219     delete t;
3220 
3221     ec = U_ZERO_ERROR;
3222     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3223     if (U_FAILURE(ec)) {
3224         errln((UnicodeString)"FAIL: createInstance");
3225     } else {
3226         expect(*t,
3227                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3228                "\\u0041\\uDBEF\\uDEEF\\uFEED");
3229     }
3230     delete t;
3231 
3232     ec = U_ZERO_ERROR;
3233     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3234     if (U_FAILURE(ec)) {
3235         errln((UnicodeString)"FAIL: createInstance");
3236     } else {
3237         expect(*t,
3238                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3239                "\\x{41}\\x{10BEEF}\\x{FEED}");
3240     }
3241     delete t;
3242 }
3243 
3244 
TestAnchorMasking()3245 void TransliteratorTest::TestAnchorMasking(){
3246     UnicodeString rule ("^a > Q; a > q;");
3247     UErrorCode status= U_ZERO_ERROR;
3248     UParseError parseError;
3249 
3250     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3251     if(U_FAILURE(status)){
3252         errln(UnicodeString("FAIL: ") + "ID" +
3253               ".createFromRules() => bad rules" +
3254               /*", parse error " + parseError.code +*/
3255               ", line " + parseError.line +
3256               ", offset " + parseError.offset +
3257               ", context " + prettify(parseError.preContext, TRUE) +
3258               ", rules: " + prettify(rule, TRUE));
3259     }
3260     delete t;
3261 }
3262 
3263 /**
3264  * Make sure display names of variants look reasonable.
3265  */
TestDisplayName()3266 void TransliteratorTest::TestDisplayName() {
3267 #if UCONFIG_NO_FORMATTING
3268     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3269     return;
3270 #else
3271     static const char* DATA[] = {
3272         // ID, forward name, reverse name
3273         // Update the text as necessary -- the important thing is
3274         // not the text itself, but how various cases are handled.
3275 
3276         // Basic test
3277         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3278 
3279         // Variants
3280         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3281 
3282         // Target-only IDs
3283         "NFC", "Any to NFC", "Any to NFD",
3284     };
3285 
3286     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3287 
3288     Locale US("en", "US");
3289 
3290     for (int32_t i=0; i<DATA_length; i+=3) {
3291         UnicodeString name;
3292         Transliterator::getDisplayName(DATA[i], US, name);
3293         if (name != DATA[i+1]) {
3294             errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3295                   name + ", expected " + DATA[i+1]);
3296         } else {
3297             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3298         }
3299         UErrorCode ec = U_ZERO_ERROR;
3300         UParseError pe;
3301         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3302         if (U_FAILURE(ec)) {
3303             delete t;
3304             errln("FAIL: createInstance failed");
3305             continue;
3306         }
3307         name = Transliterator::getDisplayName(t->getID(), US, name);
3308         if (name != DATA[i+2]) {
3309             errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3310                   name + ", expected " + DATA[i+2]);
3311         } else {
3312             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3313         }
3314         delete t;
3315     }
3316 #endif
3317 }
3318 
TestSpecialCases(void)3319 void TransliteratorTest::TestSpecialCases(void) {
3320     const UnicodeString registerRules[] = {
3321         "Any-Dev1", "x > X; y > Y;",
3322         "Any-Dev2", "XY > Z",
3323         "Greek-Latin/FAKE",
3324             CharsToUnicodeString
3325             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3326         "" // END MARKER
3327     };
3328 
3329     const UnicodeString testCases[] = {
3330         // NORMALIZATION
3331         // should add more test cases
3332         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3334         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3335         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3336 
3337         // mp -> b BUG
3338         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3339         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3340 
3341         // check for devanagari bug
3342         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3343 
3344         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3345         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3346                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3347 
3348         //TODO: enable this test once Titlecase works right
3349         /*
3350         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352                  */
3353         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3355         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3357 
3358         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3359         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3360 
3361          // FORMS OF S
3362         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3363                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3364         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3365                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3366         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3367                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3368         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3369                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3370         // Tatiana bug
3371         // Upper: TAT\\u02B9\\u00C2NA
3372         // Lower: tat\\u02B9\\u00E2na
3373         // Title: Tat\\u02B9\\u00E2na
3374         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3375                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3376         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3377                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3378         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3379                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3380 
3381         "" // END MARKER
3382     };
3383 
3384     UParseError pos;
3385     int32_t i;
3386     for (i = 0; registerRules[i].length()!=0; i+=2) {
3387         UErrorCode status = U_ZERO_ERROR;
3388 
3389         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3390             registerRules[i+1], UTRANS_FORWARD, pos, status);
3391         if (U_FAILURE(status)) {
3392             errln("Fails: Unable to create the transliterator from rules.");
3393         } else {
3394             Transliterator::registerInstance(t);
3395         }
3396     }
3397     for (i = 0; testCases[i].length()!=0; i+=3) {
3398         UErrorCode ec = U_ZERO_ERROR;
3399         UParseError pe;
3400         const UnicodeString& name = testCases[i];
3401         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3402         if (U_FAILURE(ec)) {
3403             errln((UnicodeString)"FAIL: Couldn't create " + name);
3404             delete t;
3405             continue;
3406         }
3407         const UnicodeString& id = t->getID();
3408         const UnicodeString& source = testCases[i+1];
3409         UnicodeString target;
3410 
3411         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3412 
3413         if (testCases[i+2].length() > 0) {
3414             target = testCases[i+2];
3415         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3416             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3417         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3418             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3419         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3420             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3421         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3422             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3423         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3424             target = source;
3425             target.toLower(Locale::getUS());
3426         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3427             target = source;
3428             target.toUpper(Locale::getUS());
3429         }
3430         if (U_FAILURE(ec)) {
3431             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3432             continue;
3433         }
3434 
3435         expect(*t, source, target);
3436         delete t;
3437     }
3438     for (i = 0; registerRules[i].length()!=0; i+=2) {
3439         Transliterator::unregister(registerRules[i]);
3440     }
3441 }
3442 
Char32ToEscapedChars(UChar32 ch,char * buffer)3443 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3444     if (ch <= 0xFFFF) {
3445         sprintf(buffer, "\\u%04x", (int)ch);
3446     } else {
3447         sprintf(buffer, "\\U%08x", (int)ch);
3448     }
3449     return buffer;
3450 }
3451 
TestSurrogateCasing(void)3452 void TransliteratorTest::TestSurrogateCasing (void) {
3453     // check that casing handles surrogates
3454     // titlecase is currently defective
3455     char buffer[20];
3456     UChar buffer2[20];
3457     UChar32 dee;
3458     UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3459     UnicodeString DEE(u_totitle(dee));
3460     if (DEE != DESERET_DEE) {
3461         err("Fails titlecase of surrogates");
3462         err(Char32ToEscapedChars(dee, buffer));
3463         err(", ");
3464         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3465     }
3466 
3467     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3468     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3469     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3470     UErrorCode status= U_ZERO_ERROR;
3471 
3472     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3473     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3474         errln("Fails: Can't uppercase surrogates.");
3475     }
3476 
3477     status= U_ZERO_ERROR;
3478     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3480         errln("Fails: Can't lowercase surrogates.");
3481     }
3482 }
3483 
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3484 static void _trans(Transliterator& t, const UnicodeString& src,
3485                    UnicodeString& result) {
3486     result = src;
3487     t.transliterate(result);
3488 }
3489 
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3490 static void _trans(const UnicodeString& id, const UnicodeString& src,
3491                    UnicodeString& result, UErrorCode ec) {
3492     UParseError pe;
3493     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3494     if (U_SUCCESS(ec)) {
3495         _trans(*t, src, result);
3496     }
3497     delete t;
3498 }
3499 
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3500 static UnicodeString _findMatch(const UnicodeString& source,
3501                                        const UnicodeString* pairs) {
3502     UnicodeString empty;
3503     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3504         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3505             return pairs[i+1];
3506         }
3507     }
3508     return empty;
3509 }
3510 
3511 // Check to see that incremental gets at least part way through a reasonable string.
3512 
TestIncrementalProgress(void)3513 void TransliteratorTest::TestIncrementalProgress(void) {
3514     UErrorCode ec = U_ZERO_ERROR;
3515     UnicodeString latinTest = "The Quick Brown Fox.";
3516     UnicodeString devaTest;
3517     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3518     UnicodeString kataTest;
3519     _trans("Latin-Katakana", latinTest, kataTest, ec);
3520     if (U_FAILURE(ec)) {
3521         errln("FAIL: Internal error");
3522         return;
3523     }
3524     const UnicodeString tests[] = {
3525         "Any", latinTest,
3526         "Latin", latinTest,
3527         "Halfwidth", latinTest,
3528         "Devanagari", devaTest,
3529         "Katakana", kataTest,
3530         "" // END MARKER
3531     };
3532 
3533     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3534     int32_t i = 0, j=0, k=0;
3535     int32_t sources = Transliterator::countAvailableSources();
3536     for (i = 0; i < sources; i++) {
3537         UnicodeString source;
3538         Transliterator::getAvailableSource(i, source);
3539         UnicodeString test = _findMatch(source, tests);
3540         if (test.length() == 0) {
3541             logln((UnicodeString)"Skipping " + source + "-X");
3542             continue;
3543         }
3544         int32_t targets = Transliterator::countAvailableTargets(source);
3545         for (j = 0; j < targets; j++) {
3546             UnicodeString target;
3547             Transliterator::getAvailableTarget(j, source, target);
3548             int32_t variants = Transliterator::countAvailableVariants(source, target);
3549             for (k =0; k< variants; k++) {
3550                 UnicodeString variant;
3551                 UParseError err;
3552                 UErrorCode status = U_ZERO_ERROR;
3553 
3554                 Transliterator::getAvailableVariant(k, source, target, variant);
3555                 UnicodeString id = source + "-" + target + "/" + variant;
3556 
3557                 if(id.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_39)){
3558                     /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
3559                     continue;
3560                 }
3561                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3562                 if (U_FAILURE(status)) {
3563                     errln((UnicodeString)"FAIL: Could not create " + id);
3564                     delete t;
3565                     continue;
3566                 }
3567                 status = U_ZERO_ERROR;
3568                 CheckIncrementalAux(t, test);
3569 
3570                 UnicodeString rev;
3571                 _trans(*t, test, rev);
3572                 Transliterator *inv = t->createInverse(status);
3573                 if (U_FAILURE(status)) {
3574                     errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3575                     delete t;
3576                     delete inv;
3577                     continue;
3578                 }
3579                 CheckIncrementalAux(inv, rev);
3580                 delete t;
3581                 delete inv;
3582             }
3583         }
3584     }
3585 }
3586 
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3587 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3588                                                       const UnicodeString& input) {
3589     UErrorCode ec = U_ZERO_ERROR;
3590     UTransPosition pos;
3591     UnicodeString test = input;
3592 
3593     pos.contextStart = 0;
3594     pos.contextLimit = input.length();
3595     pos.start = 0;
3596     pos.limit = input.length();
3597 
3598     t->transliterate(test, pos, ec);
3599     if (U_FAILURE(ec)) {
3600         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3601         return;
3602     }
3603     UBool gotError = FALSE;
3604 
3605     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3606 
3607     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3608         errln((UnicodeString)"No Progress, " +
3609               t->getID() + ": " + formatInput(test, input, pos));
3610         gotError = TRUE;
3611     } else {
3612         logln((UnicodeString)"PASS Progress, " +
3613               t->getID() + ": " + formatInput(test, input, pos));
3614     }
3615     t->finishTransliteration(test, pos);
3616     if (pos.start != pos.limit) {
3617         errln((UnicodeString)"Incomplete, " +
3618               t->getID() + ": " + formatInput(test, input, pos));
3619         gotError = TRUE;
3620     }
3621 }
3622 
TestFunction()3623 void TransliteratorTest::TestFunction() {
3624     // Careful with spacing and ';' here:  Phrase this exactly
3625     // as toRules() is going to return it.  If toRules() changes
3626     // with regard to spacing or ';', then adjust this string.
3627     UnicodeString rule =
3628         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3629 
3630     UParseError pe;
3631     UErrorCode ec = U_ZERO_ERROR;
3632     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3633     if (t == NULL) {
3634         errln("FAIL: createFromRules failed");
3635         return;
3636     }
3637 
3638     UnicodeString r;
3639     t->toRules(r, TRUE);
3640     if (r == rule) {
3641         logln((UnicodeString)"OK: toRules() => " + r);
3642     } else {
3643         errln((UnicodeString)"FAIL: toRules() => " + r +
3644               ", expected " + rule);
3645     }
3646 
3647     expect(*t, "The Quick Brown Fox",
3648            "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3649 
3650     delete t;
3651 }
3652 
TestInvalidBackRef(void)3653 void TransliteratorTest::TestInvalidBackRef(void) {
3654     UnicodeString rule =  ". > $1;";
3655     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3656     UParseError pe;
3657     UErrorCode ec = U_ZERO_ERROR;
3658     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3659     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3660 
3661     if (t != NULL) {
3662         errln("FAIL: createFromRules should have returned NULL");
3663         delete t;
3664     }
3665 
3666     if (t2 != NULL) {
3667         errln("FAIL: createFromRules should have returned NULL");
3668         delete t2;
3669     }
3670 
3671     if (U_SUCCESS(ec)) {
3672         errln("FAIL: Ok: . > $1; => no error");
3673     } else {
3674         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3675     }
3676 }
3677 
TestMulticharStringSet()3678 void TransliteratorTest::TestMulticharStringSet() {
3679     // Basic testing
3680     const char* rule =
3681         "       [{aa}]       > x;"
3682         "         a          > y;"
3683         "       [b{bc}]      > z;"
3684         "[{gd}] { e          > q;"
3685         "         e } [{fg}] > r;" ;
3686 
3687     UParseError pe;
3688     UErrorCode ec = U_ZERO_ERROR;
3689     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3690     if (t == NULL || U_FAILURE(ec)) {
3691         delete t;
3692         errln("FAIL: createFromRules failed");
3693         return;
3694     }
3695 
3696     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3697            "y x yz z d gd de gdq gdqfg ddrfg");
3698     delete t;
3699 
3700     // Overlapped string test.  Make sure that when multiple
3701     // strings can match that the longest one is matched.
3702     rule =
3703         "    [a {ab} {abc}]    > x;"
3704         "           b          > y;"
3705         "           c          > z;"
3706         " q [t {st} {rst}] { e > p;" ;
3707 
3708     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3709     if (t == NULL || U_FAILURE(ec)) {
3710         delete t;
3711         errln("FAIL: createFromRules failed");
3712         return;
3713     }
3714 
3715     expect(*t, "a ab abc qte qste qrste",
3716            "x x x qtp qstp qrstp");
3717     delete t;
3718 }
3719 
3720 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3721 // BEGIN TestUserFunction support factory
3722 
3723 Transliterator* _TUFF[4];
3724 UnicodeString* _TUFID[4];
3725 
_TUFFactory(const UnicodeString &,Transliterator::Token context)3726 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3727                                    Transliterator::Token context) {
3728     return _TUFF[context.integer]->clone();
3729 }
3730 
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3731 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3732     _TUFF[n] = t;
3733     _TUFID[n] = new UnicodeString(ID);
3734     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3735 }
3736 
_TUFUnreg(int32_t n)3737 static void _TUFUnreg(int32_t n) {
3738     if (_TUFF[n] != NULL) {
3739         Transliterator::unregister(*_TUFID[n]);
3740         delete _TUFF[n];
3741         delete _TUFID[n];
3742     }
3743 }
3744 
3745 // END TestUserFunction support factory
3746 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3747 
3748 /**
3749  * Test that user-registered transliterators can be used under function
3750  * syntax.
3751  */
TestUserFunction()3752 void TransliteratorTest::TestUserFunction() {
3753 
3754     Transliterator* t;
3755     UParseError pe;
3756     UErrorCode ec = U_ZERO_ERROR;
3757 
3758     // Setup our factory
3759     int32_t i;
3760     for (i=0; i<4; ++i) {
3761         _TUFF[i] = NULL;
3762     }
3763 
3764     // There's no need to register inverses if we don't use them
3765     t = Transliterator::createFromRules("gif",
3766                                         "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
3767                                         UTRANS_FORWARD, pe, ec);
3768     if (t == NULL || U_FAILURE(ec)) {
3769         errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3770         return;
3771     }
3772     _TUFReg("Any-gif", t, 0);
3773 
3774     t = Transliterator::createFromRules("RemoveCurly",
3775                                         "[\\{\\}] > ; '\\N' > ;",
3776                                         UTRANS_FORWARD, pe, ec);
3777     if (t == NULL || U_FAILURE(ec)) {
3778         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3779         goto FAIL;
3780     }
3781     expect(*t, "\\N{name}", "name");
3782     _TUFReg("Any-RemoveCurly", t, 1);
3783 
3784     logln("Trying &hex");
3785     t = Transliterator::createFromRules("hex2",
3786                                         "(.) > &hex($1);",
3787                                         UTRANS_FORWARD, pe, ec);
3788     if (t == NULL || U_FAILURE(ec)) {
3789         errln("FAIL: createFromRules");
3790         goto FAIL;
3791     }
3792     logln("Registering");
3793     _TUFReg("Any-hex2", t, 2);
3794     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3795     if (t == NULL || U_FAILURE(ec)) {
3796         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3797         goto FAIL;
3798     }
3799     expect(*t, "abc", "\\u0061\\u0062\\u0063");
3800     delete t;
3801 
3802     logln("Trying &gif");
3803     t = Transliterator::createFromRules("gif2",
3804                                         "(.) > &Gif(&Hex2($1));",
3805                                         UTRANS_FORWARD, pe, ec);
3806     if (t == NULL || U_FAILURE(ec)) {
3807         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3808         goto FAIL;
3809     }
3810     logln("Registering");
3811     _TUFReg("Any-gif2", t, 3);
3812     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3813     if (t == NULL || U_FAILURE(ec)) {
3814         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3815         goto FAIL;
3816     }
3817     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3818            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3819     delete t;
3820 
3821     // Test that filters are allowed after &
3822     t = Transliterator::createFromRules("test",
3823                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3824                                         UTRANS_FORWARD, pe, ec);
3825     if (t == NULL || U_FAILURE(ec)) {
3826         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3827         goto FAIL;
3828     }
3829     expect(*t, "abc",
3830            "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
3831     delete t;
3832 
3833  FAIL:
3834     for (i=0; i<4; ++i) {
3835         _TUFUnreg(i);
3836     }
3837 }
3838 
3839 /**
3840  * Test the Any-X transliterators.
3841  */
TestAnyX(void)3842 void TransliteratorTest::TestAnyX(void) {
3843     UParseError parseError;
3844     UErrorCode status = U_ZERO_ERROR;
3845     Transliterator* anyLatin =
3846         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3847     if (anyLatin==0) {
3848         errln("FAIL: createInstance returned NULL");
3849         delete anyLatin;
3850         return;
3851     }
3852 
3853     expect(*anyLatin,
3854            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3855            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3856 
3857     delete anyLatin;
3858 }
3859 
3860 /**
3861  * Test the source and target set API.  These are only implemented
3862  * for RBT and CompoundTransliterator at this time.
3863  */
TestSourceTargetSet()3864 void TransliteratorTest::TestSourceTargetSet() {
3865     UErrorCode ec = U_ZERO_ERROR;
3866 
3867     // Rules
3868     const char* r =
3869         "a > b; "
3870         "r [x{lu}] > q;";
3871 
3872     // Expected source
3873     UnicodeSet expSrc("[arx{lu}]", ec);
3874 
3875     // Expected target
3876     UnicodeSet expTrg("[bq]", ec);
3877 
3878     UParseError pe;
3879     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3880 
3881     if (U_FAILURE(ec)) {
3882         delete t;
3883         errln("FAIL: Couldn't set up test");
3884         return;
3885     }
3886 
3887     UnicodeSet src; t->getSourceSet(src);
3888     UnicodeSet trg; t->getTargetSet(trg);
3889 
3890     if (src == expSrc && trg == expTrg) {
3891         UnicodeString a, b;
3892         logln((UnicodeString)"Ok: " +
3893               r + " => source = " + src.toPattern(a, TRUE) +
3894               ", target = " + trg.toPattern(b, TRUE));
3895     } else {
3896         UnicodeString a, b, c, d;
3897         errln((UnicodeString)"FAIL: " +
3898               r + " => source = " + src.toPattern(a, TRUE) +
3899               ", expected " + expSrc.toPattern(b, TRUE) +
3900               "; target = " + trg.toPattern(c, TRUE) +
3901               ", expected " + expTrg.toPattern(d, TRUE));
3902     }
3903 
3904     delete t;
3905 }
3906 
3907 /**
3908  * Test handling of rule whitespace, for both RBT and UnicodeSet.
3909  */
TestRuleWhitespace()3910 void TransliteratorTest::TestRuleWhitespace() {
3911     // Rules
3912     const char* r = "a > \\u200E b;";
3913 
3914     UErrorCode ec = U_ZERO_ERROR;
3915     UParseError pe;
3916     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3917 
3918     if (U_FAILURE(ec)) {
3919         errln("FAIL: Couldn't set up test");
3920     } else {
3921         expect(*t, "a", "b");
3922     }
3923     delete t;
3924 
3925     // UnicodeSet
3926     ec = U_ZERO_ERROR;
3927     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3928 
3929     if (U_FAILURE(ec)) {
3930         errln("FAIL: Couldn't set up test");
3931     } else {
3932         if (set.contains(0x200E)) {
3933             errln("FAIL: U+200E not being ignored by UnicodeSet");
3934         }
3935     }
3936 }
3937 //======================================================================
3938 // this method is in TestUScript.java
3939 //======================================================================
TestAllCodepoints()3940 void TransliteratorTest::TestAllCodepoints(){
3941     UScriptCode code= USCRIPT_INVALID_CODE;
3942     char id[256]={'\0'};
3943     char abbr[256]={'\0'};
3944     char newId[256]={'\0'};
3945     char newAbbrId[256]={'\0'};
3946     char oldId[256]={'\0'};
3947     char oldAbbrId[256]={'\0'};
3948 
3949     UErrorCode status =U_ZERO_ERROR;
3950     UParseError pe;
3951 
3952     for(uint32_t i = 0; i<=0x10ffff; i++){
3953         code =  uscript_getScript(i,&status);
3954         if(code == USCRIPT_INVALID_CODE){
3955             errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
3956         }
3957         const char* myId = uscript_getName(code);
3958         if(!myId) {
3959           errln("Valid script code returned NULL name. Check your data!");
3960           return;
3961         }
3962         uprv_strcpy(id,myId);
3963         uprv_strcpy(abbr,uscript_getShortName(code));
3964 
3965         uprv_strcpy(newId,"[:");
3966         uprv_strcat(newId,id);
3967         uprv_strcat(newId,":];NFD");
3968 
3969         uprv_strcpy(newAbbrId,"[:");
3970         uprv_strcat(newAbbrId,abbr);
3971         uprv_strcat(newAbbrId,":];NFD");
3972 
3973         if(uprv_strcmp(newId,oldId)!=0){
3974             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
3975             if(t==NULL || U_FAILURE(status)){
3976                 errln((UnicodeString)"FAIL: Could not create " + id);
3977             }
3978             delete t;
3979         }
3980         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
3981             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
3982             if(t==NULL || U_FAILURE(status)){
3983                 errln((UnicodeString)"FAIL: Could not create " + id);
3984             }
3985             delete t;
3986         }
3987         uprv_strcpy(oldId,newId);
3988         uprv_strcpy(oldAbbrId, newAbbrId);
3989 
3990     }
3991 
3992 }
3993 
3994 #define TEST_TRANSLIT_ID(id, cls) { \
3995   UErrorCode ec = U_ZERO_ERROR; \
3996   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3997   if (U_FAILURE(ec)) { \
3998     errln("FAIL: Couldn't create " id); \
3999   } else { \
4000     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4001       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4002     } \
4003     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4004   } \
4005   delete t; \
4006 }
4007 
4008 #define TEST_TRANSLIT_RULE(rule, cls) { \
4009   UErrorCode ec = U_ZERO_ERROR; \
4010   UParseError pe; \
4011   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4012   if (U_FAILURE(ec)) { \
4013     errln("FAIL: Couldn't create " rule); \
4014   } else { \
4015     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4016       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4017     } \
4018     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4019   } \
4020   delete t; \
4021 }
4022 
TestBoilerplate()4023 void TransliteratorTest::TestBoilerplate() {
4024     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4025     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4026     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4027     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4028     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4029     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4030     TEST_TRANSLIT_ID("Null", NullTransliterator);
4031     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4032     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4033     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4034     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4035     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4036     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4037 }
4038 
TestAlternateSyntax()4039 void TransliteratorTest::TestAlternateSyntax() {
4040     // U+2206 == &
4041     // U+2190 == <
4042     // U+2192 == >
4043     // U+2194 == <>
4044     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4045            "abc",
4046            "xbz");
4047     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4048            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4049            "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
4050 }
4051 
4052 static const char* BEGIN_END_RULES[] = {
4053     // [0]
4054     "abc > xy;"
4055     "aba > z;",
4056 
4057     // [1]
4058 /*
4059     "::BEGIN;"
4060     "abc > xy;"
4061     "::END;"
4062     "::BEGIN;"
4063     "aba > z;"
4064     "::END;",
4065 */
4066     "", // test case commented out below, this is here to keep from messing up the indexes
4067 
4068     // [2]
4069 /*
4070     "abc > xy;"
4071     "::BEGIN;"
4072     "aba > z;"
4073     "::END;",
4074 */
4075     "", // test case commented out below, this is here to keep from messing up the indexes
4076 
4077     // [3]
4078 /*
4079     "::BEGIN;"
4080     "abc > xy;"
4081     "::END;"
4082     "aba > z;",
4083 */
4084     "", // test case commented out below, this is here to keep from messing up the indexes
4085 
4086     // [4]
4087     "abc > xy;"
4088     "::Null;"
4089     "aba > z;",
4090 
4091     // [5]
4092     "::Upper;"
4093     "ABC > xy;"
4094     "AB > x;"
4095     "C > z;"
4096     "::Upper;"
4097     "XYZ > p;"
4098     "XY > q;"
4099     "Z > r;"
4100     "::Upper;",
4101 
4102     // [6]
4103     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4104     "$delim = [\\-$ws];"
4105     "$ws $delim* > ' ';"
4106     "'-' $delim* > '-';",
4107 
4108     // [7]
4109     "::Null;"
4110     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4111     "$delim = [\\-$ws];"
4112     "$ws $delim* > ' ';"
4113     "'-' $delim* > '-';",
4114 
4115     // [8]
4116     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4117     "$delim = [\\-$ws];"
4118     "$ws $delim* > ' ';"
4119     "'-' $delim* > '-';"
4120     "::Null;",
4121 
4122     // [9]
4123     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4124     "$delim = [\\-$ws];"
4125     "::Null;"
4126     "$ws $delim* > ' ';"
4127     "'-' $delim* > '-';",
4128 
4129     // [10]
4130 /*
4131     "::BEGIN;"
4132     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4133     "$delim = [\\-$ws];"
4134     "::END;"
4135     "$ws $delim* > ' ';"
4136     "'-' $delim* > '-';",
4137 */
4138     "", // test case commented out below, this is here to keep from messing up the indexes
4139 
4140     // [11]
4141 /*
4142     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4143     "$delim = [\\-$ws];"
4144     "::BEGIN;"
4145     "$ws $delim* > ' ';"
4146     "'-' $delim* > '-';"
4147     "::END;",
4148 */
4149     "", // test case commented out below, this is here to keep from messing up the indexes
4150 
4151     // [12]
4152 /*
4153     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4154     "$delim = [\\-$ws];"
4155     "$ab = [ab];"
4156     "::BEGIN;"
4157     "$ws $delim* > ' ';"
4158     "'-' $delim* > '-';"
4159     "::END;"
4160     "::BEGIN;"
4161     "$ab { ' ' } $ab > '-';"
4162     "c { ' ' > ;"
4163     "::END;"
4164     "::BEGIN;"
4165     "'a-a' > a\\%|a;"
4166     "::END;",
4167 */
4168     "", // test case commented out below, this is here to keep from messing up the indexes
4169 
4170     // [13]
4171     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4172     "$delim = [\\-$ws];"
4173     "$ab = [ab];"
4174     "::Null;"
4175     "$ws $delim* > ' ';"
4176     "'-' $delim* > '-';"
4177     "::Null;"
4178     "$ab { ' ' } $ab > '-';"
4179     "c { ' ' > ;"
4180     "::Null;"
4181     "'a-a' > a\\%|a;",
4182 
4183     // [14]
4184 /*
4185     "::[abc];"
4186     "::BEGIN;"
4187     "abc > xy;"
4188     "::END;"
4189     "::BEGIN;"
4190     "aba > yz;"
4191     "::END;"
4192     "::Upper;",
4193 */
4194     "", // test case commented out below, this is here to keep from messing up the indexes
4195 
4196     // [15]
4197     "::[abc];"
4198     "abc > xy;"
4199     "::Null;"
4200     "aba > yz;"
4201     "::Upper;",
4202 
4203     // [16]
4204 /*
4205     "::[abc];"
4206     "::BEGIN;"
4207     "abc <> xy;"
4208     "::END;"
4209     "::BEGIN;"
4210     "aba <> yz;"
4211     "::END;"
4212     "::Upper(Lower);"
4213     "::([XYZ]);"
4214 */
4215     "", // test case commented out below, this is here to keep from messing up the indexes
4216 
4217     // [17]
4218     "::[abc];"
4219     "abc <> xy;"
4220     "::Null;"
4221     "aba <> yz;"
4222     "::Upper(Lower);"
4223     "::([XYZ]);"
4224 };
4225 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4226 
4227 /*
4228 (This entire test is commented out below and will need some heavy revision when we re-add
4229 the ::BEGIN/::END stuff)
4230 static const char* BOGUS_BEGIN_END_RULES[] = {
4231     // [7]
4232     "::BEGIN;"
4233     "abc > xy;"
4234     "::BEGIN;"
4235     "aba > z;"
4236     "::END;"
4237     "::END;",
4238 
4239     // [8]
4240     "abc > xy;"
4241     " aba > z;"
4242     "::END;",
4243 
4244     // [9]
4245     "::BEGIN;"
4246     "::Upper;"
4247     "::END;"
4248 };
4249 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4250 */
4251 
4252 static const char* BEGIN_END_TEST_CASES[] = {
4253     // rules             input                   expected output
4254     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4255 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4256 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4257 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4258     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4259     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4260 
4261     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4262     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4263     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4264     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4265 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4266 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4267 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4268 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4269 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4270     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4271     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4272     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4273 
4274 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4275     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4276 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4277     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4278 };
4279 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4280 
TestBeginEnd()4281 void TransliteratorTest::TestBeginEnd() {
4282     // run through the list of test cases above
4283     int32_t i = 0;
4284     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4285         expect((UnicodeString)"Test case #" + (i / 3),
4286                UnicodeString(BEGIN_END_TEST_CASES[i]),
4287                UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
4288                UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
4289     }
4290 
4291     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4292     UParseError parseError;
4293     UErrorCode status = U_ZERO_ERROR;
4294     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4295             UTRANS_REVERSE, parseError, status);
4296     if (reversed == 0 || U_FAILURE(status)) {
4297         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4298     } else {
4299         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4300     }
4301     delete reversed;
4302 
4303     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4304     // that all of them cause errors
4305 /*
4306 (commented out until we have the real ::BEGIN/::END stuff in place
4307     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4308         UParseError parseError;
4309         UErrorCode status = U_ZERO_ERROR;
4310         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4311                 UTRANS_FORWARD, parseError, status);
4312         if (!U_FAILURE(status)) {
4313             delete t;
4314             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4315         }
4316     }
4317 */
4318 }
4319 
TestBeginEndToRules()4320 void TransliteratorTest::TestBeginEndToRules() {
4321     // run through the same list of test cases we used above, but this time, instead of just
4322     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4323     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4324     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4325     // to (i.e., does the same thing as) the original rule set
4326     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4327         UParseError parseError;
4328         UErrorCode status = U_ZERO_ERROR;
4329         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]),
4330                 UTRANS_FORWARD, parseError, status);
4331         if (U_FAILURE(status)) {
4332             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4333         } else {
4334             UnicodeString rules;
4335             t->toRules(rules, TRUE);
4336             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4337                     UTRANS_FORWARD, parseError, status);
4338             if (U_FAILURE(status)) {
4339                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4340                         parseError, status);
4341                 delete t;
4342             } else {
4343                 expect(*t2,
4344                        UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
4345                        UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
4346                 delete t;
4347                 delete t2;
4348             }
4349         }
4350     }
4351 
4352     // do the same thing for the reversible test case
4353     UParseError parseError;
4354     UErrorCode status = U_ZERO_ERROR;
4355     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4356             UTRANS_REVERSE, parseError, status);
4357     if (U_FAILURE(status)) {
4358         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4359     } else {
4360         UnicodeString rules;
4361         reversed->toRules(rules, FALSE);
4362         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4363                 parseError, status);
4364         if (U_FAILURE(status)) {
4365             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4366                     parseError, status);
4367             delete reversed;
4368         } else {
4369             expect(*reversed2,
4370                    UnicodeString("xy XY XYZ yz YZ"),
4371                    UnicodeString("xy abc xaba yz aba"));
4372             delete reversed;
4373             delete reversed2;
4374         }
4375     }
4376 }
4377 
TestRegisterAlias()4378 void TransliteratorTest::TestRegisterAlias() {
4379     UnicodeString longID("Lower;[aeiou]Upper");
4380     UnicodeString shortID("Any-CapVowels");
4381     UnicodeString reallyShortID("CapVowels");
4382 
4383     Transliterator::registerAlias(shortID, longID);
4384 
4385     UErrorCode err = U_ZERO_ERROR;
4386     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4387     if (U_FAILURE(err)) {
4388         errln("Failed to instantiate transliterator with long ID");
4389         Transliterator::unregister(shortID);
4390         return;
4391     }
4392     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4393     if (U_FAILURE(err)) {
4394         errln("Failed to instantiate transliterator with short ID");
4395         delete t1;
4396         Transliterator::unregister(shortID);
4397         return;
4398     }
4399 
4400     if (t1->getID() != longID)
4401         errln("Transliterator instantiated with long ID doesn't have long ID");
4402     if (t2->getID() != reallyShortID)
4403         errln("Transliterator instantiated with short ID doesn't have short ID");
4404 
4405     UnicodeString rules1;
4406     UnicodeString rules2;
4407 
4408     t1->toRules(rules1, TRUE);
4409     t2->toRules(rules2, TRUE);
4410     if (rules1 != rules2)
4411         errln("Alias transliterators aren't the same");
4412 
4413     delete t1;
4414     delete t2;
4415     Transliterator::unregister(shortID);
4416 
4417     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4418     if (U_SUCCESS(err)) {
4419         errln("Instantiation with short ID succeeded after short ID was unregistered");
4420         delete t1;
4421     }
4422 
4423     // try the same thing again, but this time with something other than
4424     // an instance of CompoundTransliterator
4425     UnicodeString realID("Latin-Greek");
4426     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4427     Transliterator::registerAlias(fakeID, realID);
4428 
4429     err = U_ZERO_ERROR;
4430     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4431     if (U_FAILURE(err)) {
4432         errln("Failed to instantiate transliterator with real ID");
4433         Transliterator::unregister(realID);
4434         return;
4435     }
4436     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4437     if (U_FAILURE(err)) {
4438         errln("Failed to instantiate transliterator with fake ID");
4439         delete t1;
4440         Transliterator::unregister(realID);
4441         return;
4442     }
4443 
4444     t1->toRules(rules1, TRUE);
4445     t2->toRules(rules2, TRUE);
4446     if (rules1 != rules2)
4447         errln("Alias transliterators aren't the same");
4448 
4449     delete t1;
4450     delete t2;
4451     Transliterator::unregister(fakeID);
4452 }
4453 
TestRuleStripping()4454 void TransliteratorTest::TestRuleStripping() {
4455     /*
4456 #
4457 \uE001>\u0C01; # SIGN
4458     */
4459     static const UChar rule[] = {
4460         0x0023,0x0020,0x000D,0x000A,
4461         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4462     };
4463     static const UChar expectedRule[] = {
4464         0xE001,0x003E,0x0C01,0x003B,0
4465     };
4466     UChar result[sizeof(rule)/sizeof(rule[0])];
4467     UErrorCode status = U_ZERO_ERROR;
4468     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4469     if (len != u_strlen(expectedRule)) {
4470         errln("utrans_stripRules return len = %d", len);
4471     }
4472     if (u_strncmp(expectedRule, result, len) != 0) {
4473         errln("utrans_stripRules did not return expected string");
4474     }
4475 }
4476 
4477 //======================================================================
4478 // Support methods
4479 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4480 void TransliteratorTest::expectT(const UnicodeString& id,
4481                                  const UnicodeString& source,
4482                                  const UnicodeString& expectedResult) {
4483     UErrorCode ec = U_ZERO_ERROR;
4484     UParseError pe;
4485     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4486     if (U_FAILURE(ec)) {
4487         errln((UnicodeString)"FAIL: Could not create " + id);
4488         delete t;
4489         return;
4490     }
4491     expect(*t, source, expectedResult);
4492     delete t;
4493 }
4494 
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4495 void TransliteratorTest::reportParseError(const UnicodeString& message,
4496                                           const UParseError& parseError,
4497                                           const UErrorCode& status) {
4498     errln(message +
4499           /*", parse error " + parseError.code +*/
4500           ", line " + parseError.line +
4501           ", offset " + parseError.offset +
4502           ", pre-context " + prettify(parseError.preContext, TRUE) +
4503           ", post-context " + prettify(parseError.postContext,TRUE) +
4504           ", Error: " + u_errorName(status));
4505 }
4506 
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4507 void TransliteratorTest::expect(const UnicodeString& rules,
4508                                 const UnicodeString& source,
4509                                 const UnicodeString& expectedResult,
4510                                 UTransPosition *pos) {
4511     expect("<ID>", rules, source, expectedResult, pos);
4512 }
4513 
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4514 void TransliteratorTest::expect(const UnicodeString& id,
4515                                 const UnicodeString& rules,
4516                                 const UnicodeString& source,
4517                                 const UnicodeString& expectedResult,
4518                                 UTransPosition *pos) {
4519     UErrorCode status = U_ZERO_ERROR;
4520     UParseError parseError;
4521     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4522     if (U_FAILURE(status)) {
4523         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4524     } else {
4525         expect(*t, source, expectedResult, pos);
4526     }
4527     delete t;
4528 }
4529 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4530 void TransliteratorTest::expect(const Transliterator& t,
4531                                 const UnicodeString& source,
4532                                 const UnicodeString& expectedResult,
4533                                 const Transliterator& reverseTransliterator) {
4534     expect(t, source, expectedResult);
4535     expect(reverseTransliterator, expectedResult, source);
4536 }
4537 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4538 void TransliteratorTest::expect(const Transliterator& t,
4539                                 const UnicodeString& source,
4540                                 const UnicodeString& expectedResult,
4541                                 UTransPosition *pos) {
4542     if (pos == 0) {
4543         UnicodeString result(source);
4544         t.transliterate(result);
4545         expectAux(t.getID() + ":String", source, result, expectedResult);
4546     }
4547     UTransPosition index={0, 0, 0, 0};
4548     if (pos != 0) {
4549         index = *pos;
4550     }
4551 
4552     UnicodeString rsource(source);
4553     if (pos == 0) {
4554         t.transliterate(rsource);
4555     } else {
4556         // Do it all at once -- below we do it incrementally
4557         t.finishTransliteration(rsource, *pos);
4558     }
4559     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4560 
4561     // Test keyboard (incremental) transliteration -- this result
4562     // must be the same after we finalize (see below).
4563     UnicodeString log;
4564     rsource.remove();
4565     if (pos != 0) {
4566         rsource = source;
4567         formatInput(log, rsource, index);
4568         log.append(" -> ");
4569         UErrorCode status = U_ZERO_ERROR;
4570         t.transliterate(rsource, index, status);
4571         formatInput(log, rsource, index);
4572     } else {
4573         for (int32_t i=0; i<source.length(); ++i) {
4574             if (i != 0) {
4575                 log.append(" + ");
4576             }
4577             log.append(source.charAt(i)).append(" -> ");
4578             UErrorCode status = U_ZERO_ERROR;
4579             t.transliterate(rsource, index, source.charAt(i), status);
4580             formatInput(log, rsource, index);
4581         }
4582     }
4583 
4584     // As a final step in keyboard transliteration, we must call
4585     // transliterate to finish off any pending partial matches that
4586     // were waiting for more input.
4587     t.finishTransliteration(rsource, index);
4588     log.append(" => ").append(rsource);
4589 
4590     expectAux(t.getID() + ":Keyboard", log,
4591               rsource == expectedResult,
4592               expectedResult);
4593 }
4594 
4595 
4596 /**
4597  * @param appendTo result is appended to this param.
4598  * @param input the string being transliterated
4599  * @param pos the index struct
4600  */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4601 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4602                                                const UnicodeString& input,
4603                                                const UTransPosition& pos) {
4604     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4605     // the {} indicate the context start and limit, and the ||
4606     // indicate the start and limit.
4607     if (0 <= pos.contextStart &&
4608         pos.contextStart <= pos.start &&
4609         pos.start <= pos.limit &&
4610         pos.limit <= pos.contextLimit &&
4611         pos.contextLimit <= input.length()) {
4612 
4613         UnicodeString a, b, c, d, e;
4614         input.extractBetween(0, pos.contextStart, a);
4615         input.extractBetween(pos.contextStart, pos.start, b);
4616         input.extractBetween(pos.start, pos.limit, c);
4617         input.extractBetween(pos.limit, pos.contextLimit, d);
4618         input.extractBetween(pos.contextLimit, input.length(), e);
4619         appendTo.append(a).append((UChar)123/*{*/).append(b).
4620             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4621             append((UChar)125/*}*/).append(e);
4622     } else {
4623         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4624                         pos.contextStart + ", s=" + pos.start + ", l=" +
4625                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4626                         input);
4627     }
4628     return appendTo;
4629 }
4630 
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4631 void TransliteratorTest::expectAux(const UnicodeString& tag,
4632                                    const UnicodeString& source,
4633                                    const UnicodeString& result,
4634                                    const UnicodeString& expectedResult) {
4635     expectAux(tag, source + " -> " + result,
4636               result == expectedResult,
4637               expectedResult);
4638 }
4639 
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4640 void TransliteratorTest::expectAux(const UnicodeString& tag,
4641                                    const UnicodeString& summary, UBool pass,
4642                                    const UnicodeString& expectedResult) {
4643     if (pass) {
4644         logln(UnicodeString("(")+tag+") " + prettify(summary));
4645     } else {
4646         errln(UnicodeString("FAIL: (")+tag+") "
4647               + prettify(summary)
4648               + ", expected " + prettify(expectedResult));
4649     }
4650 }
4651 
4652 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4653