• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 1999-2009, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/10/99    aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_TRANSLITERATION
14 
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "rbt_pars.h"
30 #include "anytrans.h"
31 #include "esctrn.h"
32 #include "name2uni.h"
33 #include "nortrans.h"
34 #include "remtrans.h"
35 #include "titletrn.h"
36 #include "tolowtrn.h"
37 #include "toupptrn.h"
38 #include "unesctrn.h"
39 #include "uni2name.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include <stdio.h>
43 
44 /***********************************************************************
45 
46                      HOW TO USE THIS TEST FILE
47                                -or-
48                   How I developed on two platforms
49                 without losing (too much of) my mind
50 
51 
52 1. Add new tests by copying/pasting/changing existing tests.  On Java,
53    any public void method named Test...() taking no parameters becomes
54    a test.  On C++, you need to modify the header and add a line to
55    the runIndexedTest() dispatch method.
56 
57 2. Make liberal use of the expect() method; it is your friend.
58 
59 3. The tests in this file exactly match those in a sister file on the
60    other side.  The two files are:
61 
62    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
63    icu4c:  source/test/intltest/transtst.cpp
64 
65                   ==> THIS IS THE IMPORTANT PART <==
66 
67    When you add a test in this file, add it in TransliteratorTest.java
68    too.  Give it the same name and put it in the same relative place.
69    This makes maintenance a lot simpler for any poor soul who ends up
70    trying to synchronize the tests between icu4j and icu4c.
71 
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73    then add it in the special non-mirrored section.  These are
74    labeled
75 
76      "icu4j ONLY"
77 
78    or
79 
80      "icu4c ONLY"
81 
82    Make sure you document the reason the test is here and not there.
83 
84 
85 Thank you.
86 The Management
87 ***********************************************************************/
88 
89 // Define character constants thusly to be EBCDIC-friendly
90 enum {
91     LEFT_BRACE=((UChar)0x007B), /*{*/
92     PIPE      =((UChar)0x007C), /*|*/
93     ZERO      =((UChar)0x0030), /*0*/
94     UPPER_A   =((UChar)0x0041)  /*A*/
95 };
96 
TransliteratorTest()97 TransliteratorTest::TransliteratorTest()
98 :   DESERET_DEE((UChar32)0x10414),
99     DESERET_dee((UChar32)0x1043C)
100 {
101 }
102 
~TransliteratorTest()103 TransliteratorTest::~TransliteratorTest() {}
104 
105 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
107                                    const char* &name, char* /*par*/) {
108     switch (index) {
109         TESTCASE(0,TestInstantiation);
110         TESTCASE(1,TestSimpleRules);
111         TESTCASE(2,TestRuleBasedInverse);
112         TESTCASE(3,TestKeyboard);
113         TESTCASE(4,TestKeyboard2);
114         TESTCASE(5,TestKeyboard3);
115         TESTCASE(6,TestArabic);
116         TESTCASE(7,TestCompoundKana);
117         TESTCASE(8,TestCompoundHex);
118         TESTCASE(9,TestFiltering);
119         TESTCASE(10,TestInlineSet);
120         TESTCASE(11,TestPatternQuoting);
121         TESTCASE(12,TestJ277);
122         TESTCASE(13,TestJ243);
123         TESTCASE(14,TestJ329);
124         TESTCASE(15,TestSegments);
125         TESTCASE(16,TestCursorOffset);
126         TESTCASE(17,TestArbitraryVariableValues);
127         TESTCASE(18,TestPositionHandling);
128         TESTCASE(19,TestHiraganaKatakana);
129         TESTCASE(20,TestCopyJ476);
130         TESTCASE(21,TestAnchors);
131         TESTCASE(22,TestInterIndic);
132         TESTCASE(23,TestFilterIDs);
133         TESTCASE(24,TestCaseMap);
134         TESTCASE(25,TestNameMap);
135         TESTCASE(26,TestLiberalizedID);
136         TESTCASE(27,TestCreateInstance);
137         TESTCASE(28,TestNormalizationTransliterator);
138         TESTCASE(29,TestCompoundRBT);
139         TESTCASE(30,TestCompoundFilter);
140         TESTCASE(31,TestRemove);
141         TESTCASE(32,TestToRules);
142         TESTCASE(33,TestContext);
143         TESTCASE(34,TestSupplemental);
144         TESTCASE(35,TestQuantifier);
145         TESTCASE(36,TestSTV);
146         TESTCASE(37,TestCompoundInverse);
147         TESTCASE(38,TestNFDChainRBT);
148         TESTCASE(39,TestNullInverse);
149         TESTCASE(40,TestAliasInverseID);
150         TESTCASE(41,TestCompoundInverseID);
151         TESTCASE(42,TestUndefinedVariable);
152         TESTCASE(43,TestEmptyContext);
153         TESTCASE(44,TestCompoundFilterID);
154         TESTCASE(45,TestPropertySet);
155         TESTCASE(46,TestNewEngine);
156         TESTCASE(47,TestQuantifiedSegment);
157         TESTCASE(48,TestDevanagariLatinRT);
158         TESTCASE(49,TestTeluguLatinRT);
159         TESTCASE(50,TestCompoundLatinRT);
160         TESTCASE(51,TestSanskritLatinRT);
161         TESTCASE(52,TestLocaleInstantiation);
162         TESTCASE(53,TestTitleAccents);
163         TESTCASE(54,TestLocaleResource);
164         TESTCASE(55,TestParseError);
165         TESTCASE(56,TestOutputSet);
166         TESTCASE(57,TestVariableRange);
167         TESTCASE(58,TestInvalidPostContext);
168         TESTCASE(59,TestIDForms);
169         TESTCASE(60,TestToRulesMark);
170         TESTCASE(61,TestEscape);
171         TESTCASE(62,TestAnchorMasking);
172         TESTCASE(63,TestDisplayName);
173         TESTCASE(64,TestSpecialCases);
174         TESTCASE(65,TestIncrementalProgress);
175         TESTCASE(66,TestSurrogateCasing);
176         TESTCASE(67,TestFunction);
177         TESTCASE(68,TestInvalidBackRef);
178         TESTCASE(69,TestMulticharStringSet);
179         TESTCASE(70,TestUserFunction);
180         TESTCASE(71,TestAnyX);
181         TESTCASE(72,TestSourceTargetSet);
182         TESTCASE(73,TestGurmukhiDevanagari);
183         TESTCASE(74,TestRuleWhitespace);
184         TESTCASE(75,TestAllCodepoints);
185         TESTCASE(76,TestBoilerplate);
186         TESTCASE(77,TestAlternateSyntax);
187         TESTCASE(78,TestBeginEnd);
188         TESTCASE(79,TestBeginEndToRules);
189         TESTCASE(80,TestRegisterAlias);
190         TESTCASE(81,TestRuleStripping);
191         TESTCASE(82,TestHalfwidthFullwidth);
192         TESTCASE(83,TestThai);
193         TESTCASE(84,TestAny);
194         default: name = ""; break;
195     }
196 }
197 
198 static const UVersionInfo ICU_39 = {3,9,4,0};
199 /**
200  * Make sure every system transliterator can be instantiated.
201  *
202  * ALSO test that the result of toRules() for each rule is a valid
203  * rule.  Do this here so we don't have to have another test that
204  * instantiates everything as well.
205  */
TestInstantiation()206 void TransliteratorTest::TestInstantiation() {
207     UErrorCode ec = U_ZERO_ERROR;
208     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
209     assertSuccess("getAvailableIDs()", ec);
210     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
211     int32_t n = Transliterator::countAvailableIDs();
212     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
213                avail->count(ec) == n);
214     assertSuccess("count()", ec);
215     UnicodeString name;
216     for (int32_t i=0; i<n; ++i) {
217         const UnicodeString& id = *avail->snext(ec);
218         if (!assertSuccess("snext()", ec) ||
219             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
220             break;
221         }
222         UnicodeString id2 = Transliterator::getAvailableID(i);
223         if (id.length() < 1) {
224             errln(UnicodeString("FAIL: getAvailableID(") +
225                   i + ") returned empty string");
226             continue;
227         }
228         if (id != id2) {
229             errln(UnicodeString("FAIL: getAvailableID(") +
230                   i + ") != getAvailableIDs().snext()");
231             continue;
232         }
233         UParseError parseError;
234         UErrorCode status = U_ZERO_ERROR;
235         Transliterator* t = Transliterator::createInstance(id,
236                               UTRANS_FORWARD, parseError,status);
237         name.truncate(0);
238         Transliterator::getDisplayName(id, name);
239         if (t == 0) {
240             errln(UnicodeString("FAIL: Couldn't create ") + id +
241                   /*", parse error " + parseError.code +*/
242                   ", line " + parseError.line +
243                   ", offset " + parseError.offset +
244                   ", pre-context " + prettify(parseError.preContext, TRUE) +
245                   ", post-context " +prettify(parseError.postContext,TRUE) +
246                   ", Error: " + u_errorName(status));
247             // When createInstance fails, it deletes the failing
248             // entry from the available ID list.  We detect this
249             // here by looking for a change in countAvailableIDs.
250             int32_t nn = Transliterator::countAvailableIDs();
251             if (nn == (n - 1)) {
252                 n = nn;
253                 --i; // Compensate for deleted entry
254             }
255         } else {
256             logln(UnicodeString("OK: ") + name + " (" + id + ")");
257 
258             // Now test toRules
259             UnicodeString rules;
260             t->toRules(rules, TRUE);
261             Transliterator *u = Transliterator::createFromRules("x",
262                                     rules, UTRANS_FORWARD, parseError,status);
263             if (u == 0) {
264                 errln(UnicodeString("FAIL: ") + id +
265                       ".createFromRules() => bad rules" +
266                       /*", parse error " + parseError.code +*/
267                       ", line " + parseError.line +
268                       ", offset " + parseError.offset +
269                       ", context " + prettify(parseError.preContext, TRUE) +
270                       ", rules: " + prettify(rules, TRUE));
271             } else {
272                 delete u;
273             }
274             delete t;
275         }
276     }
277     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
278     assertSuccess("snext()", ec);
279     delete avail;
280 
281     // Now test the failure path
282     UParseError parseError;
283     UErrorCode status = U_ZERO_ERROR;
284     UnicodeString id("<Not a valid Transliterator ID>");
285     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
286     if (t != 0) {
287         errln("FAIL: " + id + " returned a transliterator");
288         delete t;
289     } else {
290         logln("OK: Bogus ID handled properly");
291     }
292 }
293 
TestSimpleRules(void)294 void TransliteratorTest::TestSimpleRules(void) {
295     /* Example: rules 1. ab>x|y
296      *                2. yc>z
297      *
298      * []|eabcd  start - no match, copy e to tranlated buffer
299      * [e]|abcd  match rule 1 - copy output & adjust cursor
300      * [ex|y]cd  match rule 2 - copy output & adjust cursor
301      * [exz]|d   no match, copy d to transliterated buffer
302      * [exzd]|   done
303      */
304     expect(UnicodeString("ab>x|y;", "") +
305            "yc>z",
306            "eabcd", "exzd");
307 
308     /* Another set of rules:
309      *    1. ab>x|yzacw
310      *    2. za>q
311      *    3. qc>r
312      *    4. cw>n
313      *
314      * []|ab       Rule 1
315      * [x|yzacw]   No match
316      * [xy|zacw]   Rule 2
317      * [xyq|cw]    Rule 4
318      * [xyqn]|     Done
319      */
320     expect(UnicodeString("ab>x|yzacw;") +
321            "za>q;" +
322            "qc>r;" +
323            "cw>n",
324            "ab", "xyqn");
325 
326     /* Test categories
327      */
328     UErrorCode status = U_ZERO_ERROR;
329     UParseError parseError;
330     Transliterator *t = Transliterator::createFromRules(
331         "<ID>",
332         UnicodeString("$dummy=").append((UChar)0xE100) +
333         UnicodeString(";"
334                       "$vowel=[aeiouAEIOU];"
335                       "$lu=[:Lu:];"
336                       "$vowel } $lu > '!';"
337                       "$vowel > '&';"
338                       "'!' { $lu > '^';"
339                       "$lu > '*';"
340                       "a > ERROR", ""),
341         UTRANS_FORWARD, parseError,
342         status);
343     if (U_FAILURE(status)) {
344         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
345         return;
346     }
347     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
348     delete t;
349 }
350 
351 /**
352  * Test inline set syntax and set variable syntax.
353  */
TestInlineSet(void)354 void TransliteratorTest::TestInlineSet(void) {
355     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
356     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
357 
358     expect(UnicodeString(
359            "$digit = [0-9];"
360            "$alpha = [a-zA-Z];"
361            "$alphanumeric = [$digit $alpha];" // ***
362            "$special = [^$alphanumeric];"     // ***
363            "$alphanumeric > '-';"
364            "$special > '*';", ""),
365 
366            "thx-1138", "---*----");
367 }
368 
369 /**
370  * Create some inverses and confirm that they work.  We have to be
371  * careful how we do this, since the inverses will not be true
372  * inverses -- we can't throw any random string at the composition
373  * of the transliterators and expect the identity function.  F x
374  * F' != I.  However, if we are careful about the input, we will
375  * get the expected results.
376  */
TestRuleBasedInverse(void)377 void TransliteratorTest::TestRuleBasedInverse(void) {
378     UnicodeString RULES =
379         UnicodeString("abc>zyx;") +
380         "ab>yz;" +
381         "bc>zx;" +
382         "ca>xy;" +
383         "a>x;" +
384         "b>y;" +
385         "c>z;" +
386 
387         "abc<zyx;" +
388         "ab<yz;" +
389         "bc<zx;" +
390         "ca<xy;" +
391         "a<x;" +
392         "b<y;" +
393         "c<z;" +
394 
395         "";
396 
397     const char* DATA[] = {
398         // Careful here -- random strings will not work.  If we keep
399         // the left side to the domain and the right side to the range
400         // we will be okay though (left, abc; right xyz).
401         "a", "x",
402         "abcacab", "zyxxxyy",
403         "caccb", "xyzzy",
404     };
405 
406     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
407 
408     UErrorCode status = U_ZERO_ERROR;
409     UParseError parseError;
410     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
411                                 UTRANS_FORWARD, parseError, status);
412     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
413                                 UTRANS_REVERSE, parseError, status);
414     if (U_FAILURE(status)) {
415         errln("FAIL: RBT constructor failed");
416         return;
417     }
418     for (int32_t i=0; i<DATA_length; i+=2) {
419         expect(*fwd, DATA[i], DATA[i+1]);
420         expect(*rev, DATA[i+1], DATA[i]);
421     }
422     delete fwd;
423     delete rev;
424 }
425 
426 /**
427  * Basic test of keyboard.
428  */
TestKeyboard(void)429 void TransliteratorTest::TestKeyboard(void) {
430     UParseError parseError;
431     UErrorCode status = U_ZERO_ERROR;
432     Transliterator *t = Transliterator::createFromRules("<ID>",
433                               UnicodeString("psch>Y;")
434                               +"ps>y;"
435                               +"ch>x;"
436                               +"a>A;",
437                               UTRANS_FORWARD, parseError,
438                               status);
439     if (U_FAILURE(status)) {
440         errln("FAIL: RBT constructor failed");
441         return;
442     }
443     const char* DATA[] = {
444         // insertion, buffer
445         "a", "A",
446         "p", "Ap",
447         "s", "Aps",
448         "c", "Apsc",
449         "a", "AycA",
450         "psch", "AycAY",
451         0, "AycAY", // null means finishKeyboardTransliteration
452     };
453 
454     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
455     delete t;
456 }
457 
458 /**
459  * Basic test of keyboard with cursor.
460  */
TestKeyboard2(void)461 void TransliteratorTest::TestKeyboard2(void) {
462     UParseError parseError;
463     UErrorCode status = U_ZERO_ERROR;
464     Transliterator *t = Transliterator::createFromRules("<ID>",
465                               UnicodeString("ych>Y;")
466                               +"ps>|y;"
467                               +"ch>x;"
468                               +"a>A;",
469                               UTRANS_FORWARD, parseError,
470                               status);
471     if (U_FAILURE(status)) {
472         errln("FAIL: RBT constructor failed");
473         return;
474     }
475     const char* DATA[] = {
476         // insertion, buffer
477         "a", "A",
478         "p", "Ap",
479         "s", "Aps", // modified for rollback - "Ay",
480         "c", "Apsc", // modified for rollback - "Ayc",
481         "a", "AycA",
482         "p", "AycAp",
483         "s", "AycAps", // modified for rollback - "AycAy",
484         "c", "AycApsc", // modified for rollback - "AycAyc",
485         "h", "AycAY",
486         0, "AycAY", // null means finishKeyboardTransliteration
487     };
488 
489     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
490     delete t;
491 }
492 
493 /**
494  * Test keyboard transliteration with back-replacement.
495  */
TestKeyboard3(void)496 void TransliteratorTest::TestKeyboard3(void) {
497     // We want th>z but t>y.  Furthermore, during keyboard
498     // transliteration we want t>y then yh>z if t, then h are
499     // typed.
500     UnicodeString RULES("t>|y;"
501                         "yh>z;");
502 
503     const char* DATA[] = {
504         // Column 1: characters to add to buffer (as if typed)
505         // Column 2: expected appearance of buffer after
506         //           keyboard xliteration.
507         "a", "a",
508         "b", "ab",
509         "t", "abt", // modified for rollback - "aby",
510         "c", "abyc",
511         "t", "abyct", // modified for rollback - "abycy",
512         "h", "abycz",
513         0, "abycz", // null means finishKeyboardTransliteration
514     };
515 
516     UParseError parseError;
517     UErrorCode status = U_ZERO_ERROR;
518     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
519     if (U_FAILURE(status)) {
520         errln("FAIL: RBT constructor failed");
521         return;
522     }
523     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
524     delete t;
525 }
526 
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)527 void TransliteratorTest::keyboardAux(const Transliterator& t,
528                                      const char* DATA[], int32_t DATA_length) {
529     UErrorCode status = U_ZERO_ERROR;
530     UTransPosition index={0, 0, 0, 0};
531     UnicodeString s;
532     for (int32_t i=0; i<DATA_length; i+=2) {
533         UnicodeString log;
534         if (DATA[i] != 0) {
535             log = s + " + "
536                 + DATA[i]
537                 + " -> ";
538             t.transliterate(s, index, DATA[i], status);
539         } else {
540             log = s + " => ";
541             t.finishTransliteration(s, index);
542         }
543         // Show the start index '{' and the cursor '|'
544         UnicodeString a, b, c;
545         s.extractBetween(0, index.contextStart, a);
546         s.extractBetween(index.contextStart, index.start, b);
547         s.extractBetween(index.start, s.length(), c);
548         log.append(a).
549             append((UChar)LEFT_BRACE).
550             append(b).
551             append((UChar)PIPE).
552             append(c);
553         if (s == DATA[i+1] && U_SUCCESS(status)) {
554             logln(log);
555         } else {
556             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
557         }
558     }
559 }
560 
TestArabic(void)561 void TransliteratorTest::TestArabic(void) {
562 // Test disabled for 2.0 until new Arabic transliterator can be written.
563 //    /*
564 //    const char* DATA[] = {
565 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
566 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
567 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
568 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
569 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
570 //                  "\u062c\u0645\u064a\u0644\u0629",
571 //    };
572 //    */
573 //
574 //    UChar ar_raw[] = {
575 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
576 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
577 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
578 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
579 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
580 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
581 //    };
582 //    UnicodeString ar(ar_raw);
583 //    UErrorCode status=U_ZERO_ERROR;
584 //    UParseError parseError;
585 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
586 //    if (t == 0) {
587 //        errln("FAIL: createInstance failed");
588 //        return;
589 //    }
590 //    expect(*t, "Arabic", ar);
591 //    delete t;
592 }
593 
594 /**
595  * Compose the Kana transliterator forward and reverse and try
596  * some strings that should come out unchanged.
597  */
TestCompoundKana(void)598 void TransliteratorTest::TestCompoundKana(void) {
599     UParseError parseError;
600     UErrorCode status = U_ZERO_ERROR;
601     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
602     if (t == 0) {
603         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
604     } else {
605         expect(*t, "aaaaa", "aaaaa");
606         delete t;
607     }
608 }
609 
610 /**
611  * Compose the hex transliterators forward and reverse.
612  */
TestCompoundHex(void)613 void TransliteratorTest::TestCompoundHex(void) {
614     UParseError parseError;
615     UErrorCode status = U_ZERO_ERROR;
616     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
617     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
618     Transliterator* transab[] = { a, b };
619     Transliterator* transba[] = { b, a };
620     if (a == 0 || b == 0) {
621         errln("FAIL: construction failed");
622         delete a;
623         delete b;
624         return;
625     }
626     // Do some basic tests of a
627     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
628     // Do some basic tests of b
629     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
630 
631     Transliterator* ab = new CompoundTransliterator(transab, 2);
632     UnicodeString s("abcde", "");
633     expect(*ab, s, s);
634 
635     UnicodeString str(s);
636     a->transliterate(str);
637     Transliterator* ba = new CompoundTransliterator(transba, 2);
638     expect(*ba, str, str);
639 
640     delete ab;
641     delete ba;
642     delete a;
643     delete b;
644 }
645 
646 int gTestFilterClassID = 0;
647 /**
648  * Used by TestFiltering().
649  */
650 class TestFilter : public UnicodeFilter {
clone() const651     virtual UnicodeFunctor* clone() const {
652         return new TestFilter(*this);
653     }
contains(UChar32 c) const654     virtual UBool contains(UChar32 c) const {
655         return c != (UChar)0x0063 /*c*/;
656     }
657     // Stubs
toPattern(UnicodeString & result,UBool) const658     virtual UnicodeString& toPattern(UnicodeString& result,
659                                      UBool /*escapeUnprintable*/) const {
660         return result;
661     }
matchesIndexValue(uint8_t) const662     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
663         return FALSE;
664     }
addMatchSetTo(UnicodeSet &) const665     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
666 public:
getDynamicClassID() const667     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
668 };
669 
670 /**
671  * Do some basic tests of filtering.
672  */
TestFiltering(void)673 void TransliteratorTest::TestFiltering(void) {
674     UParseError parseError;
675     UErrorCode status = U_ZERO_ERROR;
676     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
677     if (hex == 0) {
678         errln("FAIL: createInstance(Any-Hex) failed");
679         return;
680     }
681     hex->adoptFilter(new TestFilter());
682     UnicodeString s("abcde");
683     hex->transliterate(s);
684     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
685     if (s == exp) {
686         logln(UnicodeString("Ok:   \"") + exp + "\"");
687     } else {
688         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
689     }
690 
691     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
692     UnicodeFilter *f = hex->orphanFilter();
693     if (f == NULL){
694         errln("FAIL: orphanFilter() should get a UnicodeFilter");
695     } else {
696         delete f;
697     }
698     delete hex;
699 }
700 
701 /**
702  * Test anchors
703  */
TestAnchors(void)704 void TransliteratorTest::TestAnchors(void) {
705     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
706            "aaa",
707            "012");
708     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
709            "aaa",
710            "012");
711     expect(UnicodeString("^ab  > 01 ;"
712            " ab  > |8 ;"
713            "  b  > k ;"
714            " 8x$ > 45 ;"
715            " 8x  > 77 ;", ""),
716 
717            "ababbabxabx",
718            "018k7745");
719     expect(UnicodeString("$s = [z$] ;"
720            "$s{ab    > 01 ;"
721            "   ab    > |8 ;"
722            "    b    > k ;"
723            "   8x}$s > 45 ;"
724            "   8x    > 77 ;", ""),
725 
726            "abzababbabxzabxabx",
727            "01z018k45z01x45");
728 }
729 
730 /**
731  * Test pattern quoting and escape mechanisms.
732  */
TestPatternQuoting(void)733 void TransliteratorTest::TestPatternQuoting(void) {
734     // Array of 3n items
735     // Each item is <rules>, <input>, <expected output>
736     const UnicodeString DATA[] = {
737         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
738         UnicodeString(UChar(0x4E01)),
739         "[male adult]"
740     };
741 
742     for (int32_t i=0; i<3; i+=3) {
743         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
744         UParseError parseError;
745         UErrorCode status = U_ZERO_ERROR;
746         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
747         if (U_FAILURE(status)) {
748             errln("RBT constructor failed");
749         } else {
750             expect(*t, DATA[i+1], DATA[i+2]);
751         }
752         delete t;
753     }
754 }
755 
756 /**
757  * Regression test for bugs found in Greek transliteration.
758  */
TestJ277(void)759 void TransliteratorTest::TestJ277(void) {
760     UErrorCode status = U_ZERO_ERROR;
761     UParseError parseError;
762     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
763     if (gl == NULL) {
764         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
765         return;
766     }
767 
768     UChar sigma = 0x3C3;
769     UChar upsilon = 0x3C5;
770     UChar nu = 0x3BD;
771 //    UChar PHI = 0x3A6;
772     UChar alpha = 0x3B1;
773 //    UChar omega = 0x3C9;
774 //    UChar omicron = 0x3BF;
775 //    UChar epsilon = 0x3B5;
776 
777     // sigma upsilon nu -> syn
778     UnicodeString syn;
779     syn.append(sigma).append(upsilon).append(nu);
780     expect(*gl, syn, "syn");
781 
782     // sigma alpha upsilon nu -> saun
783     UnicodeString sayn;
784     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
785     expect(*gl, sayn, "saun");
786 
787     // Again, using a smaller rule set
788     UnicodeString rules(
789                 "$alpha   = \\u03B1;"
790                 "$nu      = \\u03BD;"
791                 "$sigma   = \\u03C3;"
792                 "$ypsilon = \\u03C5;"
793                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
794                 "s <>           $sigma;"
795                 "a <>           $alpha;"
796                 "u <>  $vowel { $ypsilon;"
797                 "y <>           $ypsilon;"
798                 "n <>           $nu;",
799                 "");
800     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
801     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
802     expect(*mini, syn, "syn");
803     expect(*mini, sayn, "saun");
804     delete mini;
805     mini = NULL;
806 
807 #if !UCONFIG_NO_FORMATTING
808     // Transliterate the Greek locale data
809     Locale el("el");
810     DateFormatSymbols syms(el, status);
811     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
812     int32_t i, count;
813     const UnicodeString* data = syms.getMonths(count);
814     for (i=0; i<count; ++i) {
815         if (data[i].length() == 0) {
816             continue;
817         }
818         UnicodeString out(data[i]);
819         gl->transliterate(out);
820         UBool ok = TRUE;
821         if (data[i].length() >= 2 && out.length() >= 2 &&
822             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
823             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
824                 ok = FALSE;
825             }
826         }
827         if (ok) {
828             logln(prettify(data[i] + " -> " + out));
829         } else {
830             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
831         }
832     }
833 #endif
834 
835     delete gl;
836 }
837 
838 /**
839  * Prefix, suffix support in hex transliterators
840  */
TestJ243(void)841 void TransliteratorTest::TestJ243(void) {
842     UErrorCode ec = U_ZERO_ERROR;
843 
844     // Test default Hex-Any, which should handle
845     // \u, \U, u+, and U+
846     Transliterator *hex =
847         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
848     if (assertSuccess("getInstance", ec)) {
849         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
850     }
851     delete hex;
852 
853 //    // Try a custom Hex-Unicode
854 //    // \uXXXX and &#xXXXX;
855 //    ec = U_ZERO_ERROR;
856 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
857 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
858 //           "abcd5fx012&#x00033;");
859 //    // Try custom Any-Hex (default is tested elsewhere)
860 //    ec = U_ZERO_ERROR;
861 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
862 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
863 }
864 
865 /**
866  * Parsers need better syntax error messages.
867  */
TestJ329(void)868 void TransliteratorTest::TestJ329(void) {
869 
870     struct { UBool containsErrors; const char* rule; } DATA[] = {
871         { FALSE, "a > b; c > d" },
872         { TRUE,  "a > b; no operator; c > d" },
873     };
874     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
875 
876     for (int32_t i=0; i<DATA_length; ++i) {
877         UErrorCode status = U_ZERO_ERROR;
878         UParseError parseError;
879         Transliterator *rbt = Transliterator::createFromRules("<ID>",
880                                     DATA[i].rule,
881                                     UTRANS_FORWARD,
882                                     parseError,
883                                     status);
884         UBool gotError = U_FAILURE(status);
885         UnicodeString desc(DATA[i].rule);
886         desc.append(gotError ? " -> error" : " -> no error");
887         if (gotError) {
888             desc = desc + ", ParseError code=" + u_errorName(status) +
889                 " line=" + parseError.line +
890                 " offset=" + parseError.offset +
891                 " context=" + parseError.preContext;
892         }
893         if (gotError == DATA[i].containsErrors) {
894             logln(UnicodeString("Ok:   ") + desc);
895         } else {
896             errln(UnicodeString("FAIL: ") + desc);
897         }
898         delete rbt;
899     }
900 }
901 
902 /**
903  * Test segments and segment references.
904  */
TestSegments(void)905 void TransliteratorTest::TestSegments(void) {
906     // Array of 3n items
907     // Each item is <rules>, <input>, <expected output>
908     UnicodeString DATA[] = {
909         "([a-z]) '.' ([0-9]) > $2 '-' $1",
910         "abc.123.xyz.456",
911         "ab1-c23.xy4-z56",
912 
913         // nested
914         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
915         "a1 b2",
916         "a1.a.1 b2.b.2",
917     };
918     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
919 
920     for (int32_t i=0; i<DATA_length; i+=3) {
921         logln("Pattern: " + prettify(DATA[i]));
922         UParseError parseError;
923         UErrorCode status = U_ZERO_ERROR;
924         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
925         if (U_FAILURE(status)) {
926             errln("FAIL: RBT constructor");
927         } else {
928             expect(*t, DATA[i+1], DATA[i+2]);
929         }
930         delete t;
931     }
932 }
933 
934 /**
935  * Test cursor positioning outside of the key
936  */
TestCursorOffset(void)937 void TransliteratorTest::TestCursorOffset(void) {
938     // Array of 3n items
939     // Each item is <rules>, <input>, <expected output>
940     UnicodeString DATA[] = {
941         "pre {alpha} post > | @ ALPHA ;"
942         "eALPHA > beta ;"
943         "pre {beta} post > BETA @@ | ;"
944         "post > xyz",
945 
946         "prealphapost prebetapost",
947 
948         "prbetaxyz preBETApost",
949     };
950     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
951 
952     for (int32_t i=0; i<DATA_length; i+=3) {
953         logln("Pattern: " + prettify(DATA[i]));
954         UParseError parseError;
955         UErrorCode status = U_ZERO_ERROR;
956         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
957         if (U_FAILURE(status)) {
958             errln("FAIL: RBT constructor");
959         } else {
960             expect(*t, DATA[i+1], DATA[i+2]);
961         }
962         delete t;
963     }
964 }
965 
966 /**
967  * Test zero length and > 1 char length variable values.  Test
968  * use of variable refs in UnicodeSets.
969  */
TestArbitraryVariableValues(void)970 void TransliteratorTest::TestArbitraryVariableValues(void) {
971     // Array of 3n items
972     // Each item is <rules>, <input>, <expected output>
973     UnicodeString DATA[] = {
974         "$abe = ab;"
975         "$pat = x[yY]z;"
976         "$ll  = 'a-z';"
977         "$llZ = [$ll];"
978         "$llY = [$ll$pat];"
979         "$emp = ;"
980 
981         "$abe > ABE;"
982         "$pat > END;"
983         "$llZ > 1;"
984         "$llY > 2;"
985         "7$emp 8 > 9;"
986         "",
987 
988         "ab xYzxyz stY78",
989         "ABE ENDEND 1129",
990     };
991     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
992 
993     for (int32_t i=0; i<DATA_length; i+=3) {
994         logln("Pattern: " + prettify(DATA[i]));
995         UParseError parseError;
996         UErrorCode status = U_ZERO_ERROR;
997         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
998         if (U_FAILURE(status)) {
999             errln("FAIL: RBT constructor");
1000         } else {
1001             expect(*t, DATA[i+1], DATA[i+2]);
1002         }
1003         delete t;
1004     }
1005 }
1006 
1007 /**
1008  * Confirm that the contextStart, contextLimit, start, and limit
1009  * behave correctly. J474.
1010  */
TestPositionHandling(void)1011 void TransliteratorTest::TestPositionHandling(void) {
1012     // Array of 3n items
1013     // Each item is <rules>, <input>, <expected output>
1014     const char* DATA[] = {
1015         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1016         "xtat txtb", // pos 0,9,0,9
1017         "xTTaSS TTxUUb",
1018 
1019         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1020         "xtat txtb", // pos 2,9,3,8
1021         "xtaSS TTxUUb",
1022 
1023         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1024         "xtat txtb", // pos 3,8,3,8
1025         "xtaTT TTxTTb",
1026     };
1027 
1028     // Array of 4n positions -- these go with the DATA array
1029     // They are: contextStart, contextLimit, start, limit
1030     int32_t POS[] = {
1031         0, 9, 0, 9,
1032         2, 9, 3, 8,
1033         3, 8, 3, 8,
1034     };
1035 
1036     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1037     for (int32_t i=0; i<n; i++) {
1038         UErrorCode status = U_ZERO_ERROR;
1039         UParseError parseError;
1040         Transliterator *t = Transliterator::createFromRules("<ID>",
1041                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1042         if (U_FAILURE(status)) {
1043             delete t;
1044             errln("FAIL: RBT constructor");
1045             return;
1046         }
1047         UTransPosition pos;
1048         pos.contextStart= POS[4*i];
1049         pos.contextLimit = POS[4*i+1];
1050         pos.start = POS[4*i+2];
1051         pos.limit = POS[4*i+3];
1052         UnicodeString rsource(DATA[3*i+1]);
1053         t->transliterate(rsource, pos, status);
1054         if (U_FAILURE(status)) {
1055             delete t;
1056             errln("FAIL: transliterate");
1057             return;
1058         }
1059         t->finishTransliteration(rsource, pos);
1060         expectAux(DATA[3*i],
1061                   DATA[3*i+1],
1062                   rsource,
1063                   DATA[3*i+2]);
1064         delete t;
1065     }
1066 }
1067 
1068 /**
1069  * Test the Hiragana-Katakana transliterator.
1070  */
TestHiraganaKatakana(void)1071 void TransliteratorTest::TestHiraganaKatakana(void) {
1072     UParseError parseError;
1073     UErrorCode status = U_ZERO_ERROR;
1074     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1075     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1076     if (hk == 0 || kh == 0) {
1077         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1078         delete hk;
1079         delete kh;
1080         return;
1081     }
1082 
1083     // Array of 3n items
1084     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1085     const char* DATA[] = {
1086         "both",
1087         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1088         "\\u30A2\\u30F8\\u30F2\\u30B0",
1089 
1090         "kh",
1091         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1092         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1093     };
1094     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1095 
1096     for (int32_t i=0; i<DATA_length; i+=3) {
1097         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1098         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1099         switch (*DATA[i]) {
1100         case 0x68: //'h': // Hiragana-Katakana
1101             expect(*hk, h, k);
1102             break;
1103         case 0x6B: //'k': // Katakana-Hiragana
1104             expect(*kh, k, h);
1105             break;
1106         case 0x62: //'b': // both
1107             expect(*hk, h, k);
1108             expect(*kh, k, h);
1109             break;
1110         }
1111     }
1112     delete hk;
1113     delete kh;
1114 }
1115 
1116 /**
1117  * Test cloning / copy constructor of RBT.
1118  */
TestCopyJ476(void)1119 void TransliteratorTest::TestCopyJ476(void) {
1120     // The real test here is what happens when the destructors are
1121     // called.  So we let one object get destructed, and check to
1122     // see that its copy still works.
1123     Transliterator *t2 = 0;
1124     {
1125         UParseError parseError;
1126         UErrorCode status = U_ZERO_ERROR;
1127         Transliterator *t1 = Transliterator::createFromRules("t1",
1128             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1129         if (U_FAILURE(status)) {
1130             errln("FAIL: RBT constructor");
1131             return;
1132         }
1133         t2 = t1->clone(); // Call copy constructor under the covers.
1134         expect(*t1, "abcfoofoo", "ABcbar");
1135         delete t1;
1136     }
1137     expect(*t2, "abcfoofoo", "ABcbar");
1138     delete t2;
1139 }
1140 
1141 /**
1142  * Test inter-Indic transliterators.  These are composed.
1143  * ICU4C Jitterbug 483.
1144  */
TestInterIndic(void)1145 void TransliteratorTest::TestInterIndic(void) {
1146     UnicodeString ID("Devanagari-Gujarati", "");
1147     UErrorCode status = U_ZERO_ERROR;
1148     UParseError parseError;
1149     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1150     if (dg == 0) {
1151         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1152         return;
1153     }
1154     UnicodeString id = dg->getID();
1155     if (id != ID) {
1156         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1157     }
1158     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1159     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1160     expect(*dg, dev, guj);
1161     delete dg;
1162 }
1163 
1164 /**
1165  * Test filter syntax in IDs. (J918)
1166  */
TestFilterIDs(void)1167 void TransliteratorTest::TestFilterIDs(void) {
1168     // Array of 3n strings:
1169     // <id>, <inverse id>, <input>, <expected output>
1170     const char* DATA[] = {
1171         "[aeiou]Any-Hex", // ID
1172         "[aeiou]Hex-Any", // expected inverse ID
1173         "quizzical",      // src
1174         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1175 
1176         "[aeiou]Any-Hex;[^5]Hex-Any",
1177         "[^5]Any-Hex;[aeiou]Hex-Any",
1178         "quizzical",
1179         "q\\u0075izzical",
1180 
1181         "[abc]Null",
1182         "[abc]Null",
1183         "xyz",
1184         "xyz",
1185     };
1186     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1187 
1188     for (int i=0; i<DATA_length; i+=4) {
1189         UnicodeString ID(DATA[i], "");
1190         UnicodeString uID(DATA[i+1], "");
1191         UnicodeString data2(DATA[i+2], "");
1192         UnicodeString data3(DATA[i+3], "");
1193         UParseError parseError;
1194         UErrorCode status = U_ZERO_ERROR;
1195         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1196         if (t == 0) {
1197             errln("FAIL: createInstance(" + ID + ") returned NULL");
1198             return;
1199         }
1200         expect(*t, data2, data3);
1201 
1202         // Check the ID
1203         if (ID != t->getID()) {
1204             errln("FAIL: createInstance(" + ID + ").getID() => " +
1205                   t->getID());
1206         }
1207 
1208         // Check the inverse
1209         Transliterator *u = t->createInverse(status);
1210         if (u == 0) {
1211             errln("FAIL: " + ID + ".createInverse() returned NULL");
1212         } else if (u->getID() != uID) {
1213             errln("FAIL: " + ID + ".createInverse().getID() => " +
1214                   u->getID() + ", expected " + uID);
1215         }
1216 
1217         delete t;
1218         delete u;
1219     }
1220 }
1221 
1222 /**
1223  * Test the case mapping transliterators.
1224  */
TestCaseMap(void)1225 void TransliteratorTest::TestCaseMap(void) {
1226     UParseError parseError;
1227     UErrorCode status = U_ZERO_ERROR;
1228     Transliterator* toUpper =
1229         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1230     Transliterator* toLower =
1231         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1232     Transliterator* toTitle =
1233         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1234     if (toUpper==0 || toLower==0 || toTitle==0) {
1235         errln("FAIL: createInstance returned NULL");
1236         delete toUpper;
1237         delete toLower;
1238         delete toTitle;
1239         return;
1240     }
1241 
1242     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1243            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1244     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1245            "the quick brown foX jumped over the lazY dogs.");
1246     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1247            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1248 
1249     delete toUpper;
1250     delete toLower;
1251     delete toTitle;
1252 }
1253 
1254 /**
1255  * Test the name mapping transliterators.
1256  */
TestNameMap(void)1257 void TransliteratorTest::TestNameMap(void) {
1258     UParseError parseError;
1259     UErrorCode status = U_ZERO_ERROR;
1260     Transliterator* uni2name =
1261         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1262     Transliterator* name2uni =
1263         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1264     if (uni2name==0 || name2uni==0) {
1265         errln("FAIL: createInstance returned NULL");
1266         delete uni2name;
1267         delete name2uni;
1268         return;
1269     }
1270 
1271     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1272     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1273            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1274     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1275            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1276 
1277     delete uni2name;
1278     delete name2uni;
1279 
1280     // round trip
1281     Transliterator* t =
1282         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1283     if (t==0) {
1284         errln("FAIL: createInstance returned NULL");
1285         delete t;
1286         return;
1287     }
1288 
1289     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1290     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1291     expect(*t, s, s);
1292     delete t;
1293 }
1294 
1295 /**
1296  * Test liberalized ID syntax.  1006c
1297  */
TestLiberalizedID(void)1298 void TransliteratorTest::TestLiberalizedID(void) {
1299     // Some test cases have an expected getID() value of NULL.  This
1300     // means I have disabled the test case for now.  This stuff is
1301     // still under development, and I haven't decided whether to make
1302     // getID() return canonical case yet.  It will all get rewritten
1303     // with the move to Source-Target/Variant IDs anyway. [aliu]
1304     const char* DATA[] = {
1305         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1306         "  Null  ", "Null", "whitespace",
1307         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1308         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1309     };
1310     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1311     UParseError parseError;
1312     UErrorCode status= U_ZERO_ERROR;
1313     for (int32_t i=0; i<DATA_length; i+=3) {
1314         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1315         if (t == 0) {
1316             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1317                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1318         } else {
1319             UnicodeString exp;
1320             if (DATA[i+1]) {
1321                 exp = UnicodeString(DATA[i+1], "");
1322             }
1323             // Don't worry about getID() if the expected char*
1324             // is NULL -- see above.
1325             if (exp.length() == 0 || exp == t->getID()) {
1326                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1327                       " create ID \"" + DATA[i] + "\" => \"" +
1328                       exp + "\"");
1329             } else {
1330                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1331                       " create ID \"" + DATA[i] + "\" => \"" +
1332                       t->getID() + "\", exp \"" + exp + "\"");
1333             }
1334             delete t;
1335         }
1336     }
1337 }
1338 
1339 /* test for Jitterbug 912 */
TestCreateInstance()1340 void TransliteratorTest::TestCreateInstance(){
1341     const char* FORWARD = "F";
1342     const char* REVERSE = "R";
1343     const char* DATA[] = {
1344         // Column 1: id
1345         // Column 2: direction
1346         // Column 3: expected ID, or "" if expect failure
1347         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1348 
1349         // JB#2689: bad compound causes crash
1350         "InvalidSource-InvalidTarget", FORWARD, "",
1351         "InvalidSource-InvalidTarget", REVERSE, "",
1352         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1353         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1354         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1355         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1356 
1357         NULL
1358     };
1359 
1360     for (int32_t i=0; DATA[i]; i+=3) {
1361         UParseError err;
1362         UErrorCode ec = U_ZERO_ERROR;
1363         UnicodeString id(DATA[i]);
1364         UTransDirection dir = (DATA[i+1]==FORWARD)?
1365             UTRANS_FORWARD:UTRANS_REVERSE;
1366         UnicodeString expID(DATA[i+2]);
1367         Transliterator* t =
1368             Transliterator::createInstance(id,dir,err,ec);
1369         UnicodeString newID;
1370         if (t) {
1371             newID = t->getID();
1372         }
1373         UBool ok = (newID == expID);
1374         if (!t) {
1375             newID = u_errorName(ec);
1376         }
1377         if (ok) {
1378             logln((UnicodeString)"Ok: createInstance(" +
1379                   id + "," + DATA[i+1] + ") => " + newID);
1380         } else {
1381             dataerrln((UnicodeString)"FAIL: createInstance(" +
1382                   id + "," + DATA[i+1] + ") => " + newID +
1383                   ", expected " + expID);
1384         }
1385         delete t;
1386     }
1387 }
1388 
1389 /**
1390  * Test the normalization transliterator.
1391  */
TestNormalizationTransliterator()1392 void TransliteratorTest::TestNormalizationTransliterator() {
1393     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1394     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1395     const char* CANON[] = {
1396         // Input               Decomposed            Composed
1397         "cat",                "cat",                "cat"               ,
1398         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1399 
1400         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1401         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1402 
1403         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1404         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1405         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1406 
1407         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1408         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1409 
1410         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1411         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1412         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1413 
1414         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1415         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1416 
1417         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1418         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1419 
1420         "Henry IV",           "Henry IV",           "Henry IV"          ,
1421         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1422 
1423         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1424         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1425         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1426         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1427         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1428 
1429         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1430         0 // end
1431     };
1432 
1433     const char* COMPAT[] = {
1434         // Input               Decomposed            Composed
1435         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1436 
1437         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1438         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1439 
1440         "Henry IV",           "Henry IV",           "Henry IV"          ,
1441         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1442 
1443         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1444         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1445 
1446         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1447         0 // end
1448     };
1449 
1450     int32_t i;
1451     UParseError parseError;
1452     UErrorCode status = U_ZERO_ERROR;
1453     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1454     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1455     if (!NFD || !NFC) {
1456         errln("FAIL: createInstance failed");
1457         delete NFD;
1458         delete NFC;
1459         return;
1460     }
1461     for (i=0; CANON[i]; i+=3) {
1462         UnicodeString in = CharsToUnicodeString(CANON[i]);
1463         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1464         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1465         expect(*NFD, in, expd);
1466         expect(*NFC, in, expc);
1467     }
1468     delete NFD;
1469     delete NFC;
1470 
1471     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1472     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1473     if (!NFKD || !NFKC) {
1474         errln("FAIL: createInstance failed");
1475         delete NFKD;
1476         delete NFKC;
1477         return;
1478     }
1479     for (i=0; COMPAT[i]; i+=3) {
1480         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1481         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1482         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1483         expect(*NFKD, in, expkd);
1484         expect(*NFKC, in, expkc);
1485     }
1486     delete NFKD;
1487     delete NFKC;
1488 
1489     UParseError pe;
1490     status = U_ZERO_ERROR;
1491     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1492                                                        UTRANS_FORWARD,
1493                                                        pe, status);
1494     if (t == 0) {
1495         errln("FAIL: createInstance failed");
1496     }
1497     expect(*t, CharsToUnicodeString("\\u010dx"),
1498            CharsToUnicodeString("c\\u030C"));
1499     delete t;
1500 }
1501 
1502 /**
1503  * Test compound RBT rules.
1504  */
TestCompoundRBT(void)1505 void TransliteratorTest::TestCompoundRBT(void) {
1506     // Careful with spacing and ';' here:  Phrase this exactly
1507     // as toRules() is going to return it.  If toRules() changes
1508     // with regard to spacing or ';', then adjust this string.
1509     UnicodeString rule("::Hex-Any;\n"
1510                        "::Any-Lower;\n"
1511                        "a > '.A.';\n"
1512                        "b > '.B.';\n"
1513                        "::[^t]Any-Upper;", "");
1514     UParseError parseError;
1515     UErrorCode status = U_ZERO_ERROR;
1516     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1517     if (t == 0) {
1518         errln("FAIL: createFromRules failed");
1519         return;
1520     }
1521     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1522            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1523     UnicodeString r;
1524     t->toRules(r, TRUE);
1525     if (r == rule) {
1526         logln((UnicodeString)"OK: toRules() => " + r);
1527     } else {
1528         errln((UnicodeString)"FAIL: toRules() => " + r +
1529               ", expected " + rule);
1530     }
1531     delete t;
1532 
1533     // Now test toRules
1534     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1535     if (t == 0) {
1536         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1537         return;
1538     }
1539     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1540     t->toRules(r, TRUE);
1541     if (r != exp) {
1542         errln((UnicodeString)"FAIL: toRules() => " + r +
1543               ", expected " + exp);
1544     } else {
1545         logln((UnicodeString)"OK: toRules() => " + r);
1546     }
1547     delete t;
1548 
1549     // Round trip the result of toRules
1550     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1551     if (t == 0) {
1552         errln("FAIL: createFromRules #2 failed");
1553         return;
1554     } else {
1555         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1556     }
1557 
1558     // Test toRules again
1559     t->toRules(r, TRUE);
1560     if (r != exp) {
1561         errln((UnicodeString)"FAIL: toRules() => " + r +
1562               ", expected " + exp);
1563     } else {
1564         logln((UnicodeString)"OK: toRules() => " + r);
1565     }
1566 
1567     delete t;
1568 
1569     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1570     // to what the regenerated ID will look like.
1571     UnicodeString id("Upper(Lower);(NFKC)", "");
1572     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1573     if (t == 0) {
1574         errln("FAIL: createInstance #2 failed");
1575         return;
1576     }
1577     if (t->getID() == id) {
1578         logln((UnicodeString)"OK: created " + id);
1579     } else {
1580         errln((UnicodeString)"FAIL: createInstance(" + id +
1581               ").getID() => " + t->getID());
1582     }
1583 
1584     Transliterator *u = t->createInverse(status);
1585     if (u == 0) {
1586         errln("FAIL: createInverse failed");
1587         delete t;
1588         return;
1589     }
1590     exp = "NFKC();Lower(Upper)";
1591     if (u->getID() == exp) {
1592         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1593               u->getID());
1594     } else {
1595         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1596               u->getID());
1597     }
1598     delete t;
1599     delete u;
1600 }
1601 
1602 /**
1603  * Compound filter semantics were orginially not implemented
1604  * correctly.  Originally, each component filter f(i) is replaced by
1605  * f'(i) = f(i) && g, where g is the filter for the compound
1606  * transliterator.
1607  *
1608  * From Mark:
1609  *
1610  * Suppose and I have a transliterator X. Internally X is
1611  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1612  *
1613  * The compound should convert all greek characters (through latin) to
1614  * cyrillic, then lowercase the result. The filter should say "don't
1615  * touch 'A' in the original". But because an intermediate result
1616  * happens to go through "A", the Greek Alpha gets hung up.
1617  */
TestCompoundFilter(void)1618 void TransliteratorTest::TestCompoundFilter(void) {
1619     UParseError parseError;
1620     UErrorCode status = U_ZERO_ERROR;
1621     Transliterator *t = Transliterator::createInstance
1622         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1623     if (t == 0) {
1624         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1625         return;
1626     }
1627     t->adoptFilter(new UnicodeSet("[^A]", status));
1628     if (U_FAILURE(status)) {
1629         errln("FAIL: UnicodeSet ct failed");
1630         delete t;
1631         return;
1632     }
1633 
1634     // Only the 'A' at index 1 should remain unchanged
1635     expect(*t,
1636            CharsToUnicodeString("BA\\u039A\\u0391"),
1637            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1638     delete t;
1639 }
1640 
TestRemove(void)1641 void TransliteratorTest::TestRemove(void) {
1642     UParseError parseError;
1643     UErrorCode status = U_ZERO_ERROR;
1644     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1645     if (t == 0) {
1646         errln("FAIL: createInstance failed");
1647         return;
1648     }
1649 
1650     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1651 
1652     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1653     // duplicating the filter
1654     Transliterator* t2 = t->clone();
1655     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1656 
1657     delete t;
1658     delete t2;
1659 }
1660 
TestToRules(void)1661 void TransliteratorTest::TestToRules(void) {
1662     const char* RBT = "rbt";
1663     const char* SET = "set";
1664     static const char* DATA[] = {
1665         RBT,
1666         "$a=\\u4E61; [$a] > A;",
1667         "[\\u4E61] > A;",
1668 
1669         RBT,
1670         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1671         "[[:Zs:][:Zl:]]{a} > A;",
1672 
1673         SET,
1674         "[[:Zs:][:Zl:]]",
1675         "[[:Zs:][:Zl:]]",
1676 
1677         SET,
1678         "[:Ps:]",
1679         "[:Ps:]",
1680 
1681         SET,
1682         "[:L:]",
1683         "[:L:]",
1684 
1685         SET,
1686         "[[:L:]-[A]]",
1687         "[[:L:]-[A]]",
1688 
1689         SET,
1690         "[~[:Lu:][:Ll:]]",
1691         "[~[:Lu:][:Ll:]]",
1692 
1693         SET,
1694         "[~[a-z]]",
1695         "[~[a-z]]",
1696 
1697         RBT,
1698         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1699         "[^[:Zs:]]{a} > A;",
1700 
1701         RBT,
1702         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1703         "[[a-z]-[:Zs:]]{a} > A;",
1704 
1705         RBT,
1706         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1707         "[[:Zs:]&[a-z]]{a} > A;",
1708 
1709         RBT,
1710         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1711         "[x[:Zs:]]{a} > A;",
1712 
1713         RBT,
1714         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1715         "$macron = \\u0304 ;"
1716         "$evowel = [aeiouyAEIOUY] ;"
1717         "$iotasub = \\u0345 ;"
1718         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1719         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1720 
1721         RBT,
1722         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1723         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1724     };
1725     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1726 
1727     for (int32_t d=0; d < DATA_length; d+=3) {
1728         if (DATA[d] == RBT) {
1729             // Transliterator test
1730             UParseError parseError;
1731             UErrorCode status = U_ZERO_ERROR;
1732             Transliterator *t = Transliterator::createFromRules("ID",
1733                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1734             if (t == 0) {
1735                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1736                 return;
1737             }
1738             UnicodeString rules, escapedRules;
1739             t->toRules(rules, FALSE);
1740             t->toRules(escapedRules, TRUE);
1741             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1742             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1743             if (rules == expRules) {
1744                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1745                       " => " + rules);
1746             } else {
1747                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1748                       " => " + rules + ", exp " + expRules);
1749             }
1750             if (escapedRules == expEscapedRules) {
1751                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752                       " => " + escapedRules);
1753             } else {
1754                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755                       " => " + escapedRules + ", exp " + expEscapedRules);
1756             }
1757             delete t;
1758 
1759         } else {
1760             // UnicodeSet test
1761             UErrorCode status = U_ZERO_ERROR;
1762             UnicodeString pat(DATA[d+1], -1, US_INV);
1763             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1764             UnicodeSet set(pat, status);
1765             if (U_FAILURE(status)) {
1766                 errln("FAIL: UnicodeSet ct failed");
1767                 return;
1768             }
1769             // Adjust spacing etc. as necessary.
1770             UnicodeString toPat;
1771             set.toPattern(toPat);
1772             if (expToPat == toPat) {
1773                 logln((UnicodeString)"Ok: " + pat +
1774                       " => " + toPat);
1775             } else {
1776                 errln((UnicodeString)"FAIL: " + pat +
1777                       " => " + prettify(toPat, TRUE) +
1778                       ", exp " + prettify(pat, TRUE));
1779             }
1780         }
1781     }
1782 }
1783 
TestContext()1784 void TransliteratorTest::TestContext() {
1785     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1786     expect("de > x; {d}e > y;",
1787            "de",
1788            "ye",
1789            &pos);
1790 
1791     expect("ab{c} > z;",
1792            "xadabdabcy",
1793            "xadabdabzy");
1794 }
1795 
TestSupplemental()1796 void TransliteratorTest::TestSupplemental() {
1797 
1798     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1799                                 "a > $a; $s > i;"),
1800            CharsToUnicodeString("ab\\U0001030Fx"),
1801            CharsToUnicodeString("\\U00010300bix"));
1802 
1803     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1804                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1805                                 "($a)($b) > $2 $1;"),
1806            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1807            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1808 
1809     // k|ax\\U00010300xm
1810 
1811     // k|a\\U00010400\\U00010300xm
1812     // ky|\\U00010400\\U00010300xm
1813     // ky\\U00010400|\\U00010300xm
1814 
1815     // ky\\U00010400|\\U00010300\\U00010400m
1816     // ky\\U00010400y|\\U00010400m
1817     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1818                                 "$a {x} > | @ \\U00010400;"
1819                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1820            CharsToUnicodeString("kax\\U00010300xm"),
1821            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1822 
1823     expectT("Any-Name",
1824            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1825            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1826 
1827     expectT("Any-Hex/Unicode",
1828            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1829            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1830 
1831     expectT("Any-Hex/C",
1832            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1833            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1834 
1835     expectT("Any-Hex/Perl",
1836            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1837            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1838 
1839     expectT("Any-Hex/Java",
1840            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1841            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1842 
1843     expectT("Any-Hex/XML",
1844            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1845            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1846 
1847     expectT("Any-Hex/XML10",
1848            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1849            "&#66352;&#1113856;&#917601;&#160;");
1850 
1851     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1852            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1854 }
1855 
TestQuantifier()1856 void TransliteratorTest::TestQuantifier() {
1857 
1858     // Make sure @ in a quantified anteContext works
1859     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1860            "AAAAAb",
1861            "aaa(aac)");
1862 
1863     // Make sure @ in a quantified postContext works
1864     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1865            "baaaaa",
1866            "caa(aaa)");
1867 
1868     // Make sure @ in a quantified postContext with seg ref works
1869     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1870            "baaaaa",
1871            "baa(aaa)");
1872 
1873     // Make sure @ past ante context doesn't enter ante context
1874     UTransPosition pos = {0, 5, 3, 5};
1875     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1876            "xxxab",
1877            "xxx(ac)",
1878            &pos);
1879 
1880     // Make sure @ past post context doesn't pass limit
1881     UTransPosition pos2 = {0, 4, 0, 2};
1882     expect("{b} a+ > c @@ |; x > y; a > A;",
1883            "baxx",
1884            "caxx",
1885            &pos2);
1886 
1887     // Make sure @ past post context doesn't enter post context
1888     expect("{b} a+ > c @@ |; x > y; a > A;",
1889            "baxx",
1890            "cayy");
1891 
1892     expect("(ab)? c > d;",
1893            "c abc ababc",
1894            "d d abd");
1895 
1896     // NOTE: The (ab)+ when referenced just yields a single "ab",
1897     // not the full sequence of them.  This accords with perl behavior.
1898     expect("(ab)+ {x} > '(' $1 ')';",
1899            "x abx ababxy",
1900            "x ab(ab) abab(ab)y");
1901 
1902     expect("b+ > x;",
1903            "ac abc abbc abbbc",
1904            "ac axc axc axc");
1905 
1906     expect("[abc]+ > x;",
1907            "qac abrc abbcs abtbbc",
1908            "qx xrx xs xtx");
1909 
1910     expect("q{(ab)+} > x;",
1911            "qa qab qaba qababc qaba",
1912            "qa qx qxa qxc qxa");
1913 
1914     expect("q(ab)* > x;",
1915            "qa qab qaba qababc",
1916            "xa x xa xc");
1917 
1918     // NOTE: The (ab)+ when referenced just yields a single "ab",
1919     // not the full sequence of them.  This accords with perl behavior.
1920     expect("q(ab)* > '(' $1 ')';",
1921            "qa qab qaba qababc",
1922            "()a (ab) (ab)a (ab)c");
1923 
1924     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1925     // quoted string
1926     expect("'ab'+ > x;",
1927            "bb ab ababb",
1928            "bb x xb");
1929 
1930     // $foo+ and $foo* -- the quantifier should apply to the entire
1931     // variable reference
1932     expect("$var = ab; $var+ > x;",
1933            "bb ab ababb",
1934            "bb x xb");
1935 }
1936 
1937 class TestTrans : public Transliterator {
1938 public:
TestTrans(const UnicodeString & id)1939     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1940     }
clone(void) const1941     virtual Transliterator* clone(void) const {
1942         return new TestTrans(getID());
1943     }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1944     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1945         UBool /*isIncremental*/) const
1946     {
1947         offsets.start = offsets.limit;
1948     }
1949     virtual UClassID getDynamicClassID() const;
1950     static UClassID U_EXPORT2 getStaticClassID();
1951 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1952 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1953 
1954 /**
1955  * Test Source-Target/Variant.
1956  */
1957 void TransliteratorTest::TestSTV(void) {
1958     int32_t ns = Transliterator::countAvailableSources();
1959     if (ns < 0 || ns > 255) {
1960         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1961         return;
1962     }
1963     int32_t i, j;
1964     for (i=0; i<ns; ++i) {
1965         UnicodeString source;
1966         Transliterator::getAvailableSource(i, source);
1967         logln((UnicodeString)"" + i + ": " + source);
1968         if (source.length() == 0) {
1969             errln("FAIL: empty source");
1970             continue;
1971         }
1972         int32_t nt = Transliterator::countAvailableTargets(source);
1973         if (nt < 0 || nt > 255) {
1974             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1975             continue;
1976         }
1977         for (int32_t j=0; j<nt; ++j) {
1978             UnicodeString target;
1979             Transliterator::getAvailableTarget(j, source, target);
1980             logln((UnicodeString)" " + j + ": " + target);
1981             if (target.length() == 0) {
1982                 errln("FAIL: empty target");
1983                 continue;
1984             }
1985             int32_t nv = Transliterator::countAvailableVariants(source, target);
1986             if (nv < 0 || nv > 255) {
1987                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1988                 continue;
1989             }
1990             for (int32_t k=0; k<nv; ++k) {
1991                 UnicodeString variant;
1992                 Transliterator::getAvailableVariant(k, source, target, variant);
1993                 if (variant.length() == 0) {
1994                     logln((UnicodeString)"  " + k + ": <empty>");
1995                 } else {
1996                     logln((UnicodeString)"  " + k + ": " + variant);
1997                 }
1998             }
1999         }
2000     }
2001 
2002     // Test registration
2003     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2004     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2005     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2006     for (i=0; i<3; ++i) {
2007         Transliterator *t = new TestTrans(IDS[i]);
2008         if (t == 0) {
2009             errln("FAIL: out of memory");
2010             return;
2011         }
2012         if (t->getID() != IDS[i]) {
2013             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2014             delete t;
2015             return;
2016         }
2017         Transliterator::registerInstance(t);
2018         UErrorCode status = U_ZERO_ERROR;
2019         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2020         if (t == NULL) {
2021             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2022                   IDS[i]);
2023         } else {
2024             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2025                   IDS[i]);
2026             delete t;
2027         }
2028         Transliterator::unregister(IDS[i]);
2029         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2030         if (t != NULL) {
2031             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2032                   IDS[i]);
2033             delete t;
2034         }
2035     }
2036 
2037     // Make sure getAvailable API reflects removal
2038     int32_t n = Transliterator::countAvailableIDs();
2039     for (i=0; i<n; ++i) {
2040         UnicodeString id = Transliterator::getAvailableID(i);
2041         for (j=0; j<3; ++j) {
2042             if (id.caseCompare(FULL_IDS[j],0)==0) {
2043                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2044             }
2045         }
2046     }
2047     n = Transliterator::countAvailableTargets("Any");
2048     for (i=0; i<n; ++i) {
2049         UnicodeString t;
2050         Transliterator::getAvailableTarget(i, "Any", t);
2051         if (t.caseCompare(IDS[0],0)==0) {
2052             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2053         }
2054     }
2055     n = Transliterator::countAvailableSources();
2056     for (i=0; i<n; ++i) {
2057         UnicodeString s;
2058         Transliterator::getAvailableSource(i, s);
2059         for (j=0; j<3; ++j) {
2060             if (SOURCES[j] == NULL) continue;
2061             if (s.caseCompare(SOURCES[j],0)==0) {
2062                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2063             }
2064         }
2065     }
2066 }
2067 
2068 /**
2069  * Test inverse of Greek-Latin; Title()
2070  */
TestCompoundInverse(void)2071 void TransliteratorTest::TestCompoundInverse(void) {
2072     UParseError parseError;
2073     UErrorCode status = U_ZERO_ERROR;
2074     Transliterator *t = Transliterator::createInstance
2075         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2076     if (t == 0) {
2077         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2078         return;
2079     }
2080     UnicodeString exp("(Title);Latin-Greek");
2081     if (t->getID() == exp) {
2082         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2083               t->getID());
2084     } else {
2085         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2086               t->getID() + "\", expected \"" + exp + "\"");
2087     }
2088     delete t;
2089 }
2090 
2091 /**
2092  * Test NFD chaining with RBT
2093  */
TestNFDChainRBT()2094 void TransliteratorTest::TestNFDChainRBT() {
2095     UParseError pe;
2096     UErrorCode ec = U_ZERO_ERROR;
2097     Transliterator* t = Transliterator::createFromRules(
2098                                "TEST", "::NFD; aa > Q; a > q;",
2099                                UTRANS_FORWARD, pe, ec);
2100     if (t == NULL || U_FAILURE(ec)) {
2101         errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2102         return;
2103     }
2104     expect(*t, "aa", "Q");
2105     delete t;
2106 
2107     // TEMPORARY TESTS -- BEING DEBUGGED
2108 //=-    UnicodeString s, s2;
2109 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2110 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2111 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2112 //=-    expect(*t, s, s2);
2113 //=-    delete t;
2114 //=-
2115 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2116 //=-    expect(*t, s2, s);
2117 //=-    delete t;
2118 //=-
2119 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2120 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2121 //=-    expect(*t, s, s);
2122 //=-    delete t;
2123 
2124 //    const char* source[] = {
2125 //        /*
2126 //        "\\u015Br\\u012Bmad",
2127 //        "bhagavadg\\u012Bt\\u0101",
2128 //        "adhy\\u0101ya",
2129 //        "arjuna",
2130 //        "vi\\u1E63\\u0101da",
2131 //        "y\\u014Dga",
2132 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2133 //        "uv\\u0101cr\\u0325",
2134 //        */
2135 //        "rmk\\u1E63\\u0113t",
2136 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2137 //        /*
2138 //        "kuruk\\u1E63\\u0113tr\\u0113",
2139 //        "samav\\u0113t\\u0101",
2140 //        "yuyutsava-\\u1E25",
2141 //        "m\\u0101mak\\u0101-\\u1E25",
2142 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2143 //        "kimakurvata",
2144 //        "san\\u0304java",
2145 //        */
2146 //
2147 //        0
2148 //    };
2149 //    const char* expected[] = {
2150 //        /*
2151 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2152 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2153 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2154 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2155 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2156 //        "\\u092f\\u094b\\u0917",
2157 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2158 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2159 //        */
2160 //        "\\u0927",
2161 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2162 //        /*
2163 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2164 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2165 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2166 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2167 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2168 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2169 //        "\\u0938\\u0902\\u091c\\u0935",
2170 //        */
2171 //        0
2172 //    };
2173 //    UErrorCode status = U_ZERO_ERROR;
2174 //    UParseError parseError;
2175 //    UnicodeString message;
2176 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2177 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2178 //    if(U_FAILURE(status)){
2179 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2180 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2181 //        delete latinToDevToLatin;
2182 //        delete devToLatinToDev;
2183 //        return;
2184 //    }
2185 //    UnicodeString gotResult;
2186 //    for(int i= 0; source[i] != 0; i++){
2187 //        gotResult = source[i];
2188 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2189 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2190 //    }
2191 //    delete latinToDevToLatin;
2192 //    delete devToLatinToDev;
2193 }
2194 
2195 /**
2196  * Inverse of "Null" should be "Null". (J21)
2197  */
TestNullInverse()2198 void TransliteratorTest::TestNullInverse() {
2199     UParseError pe;
2200     UErrorCode ec = U_ZERO_ERROR;
2201     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2202     if (t == 0 || U_FAILURE(ec)) {
2203         errln("FAIL: createInstance");
2204         return;
2205     }
2206     Transliterator *u = t->createInverse(ec);
2207     if (u == 0 || U_FAILURE(ec)) {
2208         errln("FAIL: createInverse");
2209         delete t;
2210         return;
2211     }
2212     if (u->getID() != "Null") {
2213         errln("FAIL: Inverse of Null should be Null");
2214     }
2215     delete t;
2216     delete u;
2217 }
2218 
2219 /**
2220  * Check ID of inverse of alias. (J22)
2221  */
TestAliasInverseID()2222 void TransliteratorTest::TestAliasInverseID() {
2223     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2224     UParseError pe;
2225     UErrorCode ec = U_ZERO_ERROR;
2226     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2227     if (t == 0 || U_FAILURE(ec)) {
2228         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2229         return;
2230     }
2231     Transliterator *u = t->createInverse(ec);
2232     if (u == 0 || U_FAILURE(ec)) {
2233         errln("FAIL: createInverse");
2234         delete t;
2235         return;
2236     }
2237     UnicodeString exp = "Hangul-Latin";
2238     UnicodeString got = u->getID();
2239     if (got != exp) {
2240         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2241               ", expected " + exp);
2242     }
2243     delete t;
2244     delete u;
2245 }
2246 
2247 /**
2248  * Test IDs of inverses of compound transliterators. (J20)
2249  */
TestCompoundInverseID()2250 void TransliteratorTest::TestCompoundInverseID() {
2251     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2252     UParseError pe;
2253     UErrorCode ec = U_ZERO_ERROR;
2254     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2255     if (t == 0 || U_FAILURE(ec)) {
2256         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2257         return;
2258     }
2259     Transliterator *u = t->createInverse(ec);
2260     if (u == 0 || U_FAILURE(ec)) {
2261         errln("FAIL: createInverse");
2262         delete t;
2263         return;
2264     }
2265     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2266     UnicodeString got = u->getID();
2267     if (got != exp) {
2268         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2269               ", expected " + exp);
2270     }
2271     delete t;
2272     delete u;
2273 }
2274 
2275 /**
2276  * Test undefined variable.
2277 
2278  */
TestUndefinedVariable()2279 void TransliteratorTest::TestUndefinedVariable() {
2280     UnicodeString rule = "$initial } a <> \\u1161;";
2281     UParseError pe;
2282     UErrorCode ec = U_ZERO_ERROR;
2283     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2284     delete t;
2285     if (U_FAILURE(ec)) {
2286         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2287               u_errorName(ec));
2288         return;
2289     }
2290     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2291           u_errorName(ec));
2292 }
2293 
2294 /**
2295  * Test empty context.
2296  */
TestEmptyContext()2297 void TransliteratorTest::TestEmptyContext() {
2298     expect(" { a } > b;", "xay a ", "xby b ");
2299 }
2300 
2301 /**
2302 * Test compound filter ID syntax
2303 */
TestCompoundFilterID(void)2304 void TransliteratorTest::TestCompoundFilterID(void) {
2305     static const char* DATA[] = {
2306         // Col. 1 = ID or rule set (latter must start with #)
2307 
2308         // = columns > 1 are null if expect col. 1 to be illegal =
2309 
2310         // Col. 2 = direction, "F..." or "R..."
2311         // Col. 3 = source string
2312         // Col. 4 = exp result
2313 
2314         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2315         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2316         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2317         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2318         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2319         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2320         NULL,
2321     };
2322 
2323     for (int32_t i=0; DATA[i]; i+=4) {
2324         UnicodeString id = CharsToUnicodeString(DATA[i]);
2325         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2326             UTRANS_REVERSE : UTRANS_FORWARD;
2327         UnicodeString source;
2328         UnicodeString exp;
2329         if (DATA[i+2] != NULL) {
2330             source = CharsToUnicodeString(DATA[i+2]);
2331             exp = CharsToUnicodeString(DATA[i+3]);
2332         }
2333         UBool expOk = (DATA[i+1] != NULL);
2334         Transliterator* t = NULL;
2335         UParseError pe;
2336         UErrorCode ec = U_ZERO_ERROR;
2337         if (id.charAt(0) == 0x23/*#*/) {
2338             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2339         } else {
2340             t = Transliterator::createInstance(id, direction, pe, ec);
2341         }
2342         UBool ok = (t != NULL && U_SUCCESS(ec));
2343         UnicodeString transID;
2344         if (t!=0) {
2345             transID = t->getID();
2346         }
2347         else {
2348             transID = UnicodeString("NULL", "");
2349         }
2350         if (ok == expOk) {
2351             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2352                   u_errorName(ec));
2353             if (source.length() != 0) {
2354                 expect(*t, source, exp);
2355             }
2356             delete t;
2357         } else {
2358             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2359                   u_errorName(ec));
2360         }
2361     }
2362 }
2363 
2364 /**
2365  * Test new property set syntax
2366  */
TestPropertySet()2367 void TransliteratorTest::TestPropertySet() {
2368     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2369     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2370            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2371 }
2372 
2373 /**
2374  * Test various failure points of the new 2.0 engine.
2375  */
TestNewEngine()2376 void TransliteratorTest::TestNewEngine() {
2377     UParseError pe;
2378     UErrorCode ec = U_ZERO_ERROR;
2379     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2380     if (t == 0 || U_FAILURE(ec)) {
2381         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2382         return;
2383     }
2384     // Katakana should be untouched
2385     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2386            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2387 
2388     delete t;
2389 
2390 #if 1
2391     // This test will only work if Transliterator.ROLLBACK is
2392     // true.  Otherwise, this test will fail, revealing a
2393     // limitation of global filters in incremental mode.
2394     Transliterator *a =
2395         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2396     Transliterator *A =
2397         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2398     if (U_FAILURE(ec)) {
2399         delete a;
2400         delete A;
2401         return;
2402     }
2403 
2404     Transliterator* array[3];
2405     array[0] = a;
2406     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2407     array[2] = A;
2408     if (U_FAILURE(ec)) {
2409         errln("FAIL: createInstance NFD");
2410         delete a;
2411         delete A;
2412         delete array[1];
2413         return;
2414     }
2415 
2416     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2417     if (U_FAILURE(ec)) {
2418         errln("FAIL: UnicodeSet constructor");
2419         delete a;
2420         delete A;
2421         delete array[1];
2422         delete t;
2423         return;
2424     }
2425 
2426     expect(*t, "aAaA", "bAbA");
2427 
2428     assertTrue("countElements", t->countElements() == 3);
2429     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2430     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2431     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2432     assertSuccess("getElement", ec);
2433 
2434     delete a;
2435     delete A;
2436     delete array[1];
2437     delete t;
2438 #endif
2439 
2440     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2441            "a",
2442            "ax");
2443 
2444     UnicodeString gr = CharsToUnicodeString(
2445         "$ddot = \\u0308 ;"
2446         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2447         "$rough = \\u0314 ;"
2448         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2449         "\\u03b1 <> a ;"
2450         "$rough <> h ;");
2451 
2452     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2453 }
2454 
2455 /**
2456  * Test quantified segment behavior.  We want:
2457  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2458  */
TestQuantifiedSegment(void)2459 void TransliteratorTest::TestQuantifiedSegment(void) {
2460     // The normal case
2461     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2462 
2463     // The tricky case; the quantifier is around the segment
2464     expect("([abc])+ > x $1 x;", "cba", "xax");
2465 
2466     // Tricky case in reverse direction
2467     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2468 
2469     // Check post-context segment
2470     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2471 
2472     // Test toRule/toPattern for non-quantified segment.
2473     // Careful with spacing here.
2474     UnicodeString r("([a-c]){q} > x $1 x;");
2475     UParseError pe;
2476     UErrorCode ec = U_ZERO_ERROR;
2477     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2478     if (U_FAILURE(ec)) {
2479         errln("FAIL: createFromRules");
2480         delete t;
2481         return;
2482     }
2483     UnicodeString rr;
2484     t->toRules(rr, TRUE);
2485     if (r != rr) {
2486         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2487     } else {
2488         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2489     }
2490     delete t;
2491 
2492     // Test toRule/toPattern for quantified segment.
2493     // Careful with spacing here.
2494     r = "([a-c])+{q} > x $1 x;";
2495     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2496     if (U_FAILURE(ec)) {
2497         errln("FAIL: createFromRules");
2498         delete t;
2499         return;
2500     }
2501     t->toRules(rr, TRUE);
2502     if (r != rr) {
2503         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2504     } else {
2505         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2506     }
2507     delete t;
2508 }
2509 
2510 //======================================================================
2511 // Ram's tests
2512 //======================================================================
TestDevanagariLatinRT()2513 void TransliteratorTest::TestDevanagariLatinRT(){
2514     const int MAX_LEN= 52;
2515     const char* const source[MAX_LEN] = {
2516         "bh\\u0101rata",
2517         "kra",
2518         "k\\u1E63a",
2519         "khra",
2520         "gra",
2521         "\\u1E45ra",
2522         "cra",
2523         "chra",
2524         "j\\u00F1a",
2525         "jhra",
2526         "\\u00F1ra",
2527         "\\u1E6Dya",
2528         "\\u1E6Dhra",
2529         "\\u1E0Dya",
2530       //"r\\u0323ya", // \u095c is not valid in Devanagari
2531         "\\u1E0Dhya",
2532         "\\u1E5Bhra",
2533         "\\u1E47ra",
2534         "tta",
2535         "thra",
2536         "dda",
2537         "dhra",
2538         "nna",
2539         "pra",
2540         "phra",
2541         "bra",
2542         "bhra",
2543         "mra",
2544         "\\u1E49ra",
2545       //"l\\u0331ra",
2546         "yra",
2547         "\\u1E8Fra",
2548       //"l-",
2549         "vra",
2550         "\\u015Bra",
2551         "\\u1E63ra",
2552         "sra",
2553         "hma",
2554         "\\u1E6D\\u1E6Da",
2555         "\\u1E6D\\u1E6Dha",
2556         "\\u1E6Dh\\u1E6Dha",
2557         "\\u1E0D\\u1E0Da",
2558         "\\u1E0D\\u1E0Dha",
2559         "\\u1E6Dya",
2560         "\\u1E6Dhya",
2561         "\\u1E0Dya",
2562         "\\u1E0Dhya",
2563         // Not roundtrippable --
2564         // \\u0939\\u094d\\u094d\\u092E  - hma
2565         // \\u0939\\u094d\\u092E         - hma
2566         // CharsToUnicodeString("hma"),
2567         "hya",
2568         "\\u015Br\\u0325",
2569         "\\u015Bca",
2570         "\\u0115",
2571         "san\\u0304j\\u012Bb s\\u0113nagupta",
2572         "\\u0101nand vaddir\\u0101ju",
2573         "\\u0101",
2574         "a"
2575     };
2576     const char* const expected[MAX_LEN] = {
2577         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2578         "\\u0915\\u094D\\u0930",          /* kra         */
2579         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2580         "\\u0916\\u094D\\u0930",          /* khra        */
2581         "\\u0917\\u094D\\u0930",          /* gra         */
2582         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2583         "\\u091A\\u094D\\u0930",          /* cra         */
2584         "\\u091B\\u094D\\u0930",          /* chra        */
2585         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2586         "\\u091D\\u094D\\u0930",          /* jhra        */
2587         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2588         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2589         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2590         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2591       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2592         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2593         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2594         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2595         "\\u0924\\u094D\\u0924",          /* tta         */
2596         "\\u0925\\u094D\\u0930",          /* thra        */
2597         "\\u0926\\u094D\\u0926",          /* dda         */
2598         "\\u0927\\u094D\\u0930",          /* dhra        */
2599         "\\u0928\\u094D\\u0928",          /* nna         */
2600         "\\u092A\\u094D\\u0930",          /* pra         */
2601         "\\u092B\\u094D\\u0930",          /* phra        */
2602         "\\u092C\\u094D\\u0930",          /* bra         */
2603         "\\u092D\\u094D\\u0930",          /* bhra        */
2604         "\\u092E\\u094D\\u0930",          /* mra         */
2605         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2606       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2607         "\\u092F\\u094D\\u0930",          /* yra         */
2608         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2609       //"l-",
2610         "\\u0935\\u094D\\u0930",          /* vra         */
2611         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2612         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2613         "\\u0938\\u094D\\u0930",          /* sra         */
2614         "\\u0939\\u094d\\u092E",          /* hma         */
2615         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2616         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2617         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2618         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2619         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2620         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2621         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2622         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2623         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2624      // "hma",                         /* hma         */
2625         "\\u0939\\u094D\\u092F",          /* hya         */
2626         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2627         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2628         "\\u090d",                        /* e\\u0306    */
2629         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2630         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2631         "\\u0906",
2632         "\\u0905",
2633     };
2634     UErrorCode status = U_ZERO_ERROR;
2635     UParseError parseError;
2636     UnicodeString message;
2637     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2638     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2639     if(U_FAILURE(status)){
2640         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2641         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2642         return;
2643     }
2644     UnicodeString gotResult;
2645     for(int i= 0; i<MAX_LEN; i++){
2646         gotResult = source[i];
2647         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2648         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2649     }
2650     delete latinToDev;
2651     delete devToLatin;
2652 }
2653 
TestTeluguLatinRT()2654 void TransliteratorTest::TestTeluguLatinRT(){
2655     const int MAX_LEN=10;
2656     const char* const source[MAX_LEN] = {
2657         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2658         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2659         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2660         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2661         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2662         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2663         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2664         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2665         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2666         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2667     };
2668 
2669     const char* const expected[MAX_LEN] = {
2670         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2671         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2672         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2673         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2674         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2675         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2676         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2677         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2678         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2679         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2680     };
2681 
2682     UErrorCode status = U_ZERO_ERROR;
2683     UParseError parseError;
2684     UnicodeString message;
2685     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2686     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2687     if(U_FAILURE(status)){
2688         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2689         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2690         return;
2691     }
2692     UnicodeString gotResult;
2693     for(int i= 0; i<MAX_LEN; i++){
2694         gotResult = source[i];
2695         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2696         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2697     }
2698     delete latinToDev;
2699     delete devToLatin;
2700 }
2701 
TestSanskritLatinRT()2702 void TransliteratorTest::TestSanskritLatinRT(){
2703     const int MAX_LEN =16;
2704     const char* const source[MAX_LEN] = {
2705         "rmk\\u1E63\\u0113t",
2706         "\\u015Br\\u012Bmad",
2707         "bhagavadg\\u012Bt\\u0101",
2708         "adhy\\u0101ya",
2709         "arjuna",
2710         "vi\\u1E63\\u0101da",
2711         "y\\u014Dga",
2712         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2713         "uv\\u0101cr\\u0325",
2714         "dharmak\\u1E63\\u0113tr\\u0113",
2715         "kuruk\\u1E63\\u0113tr\\u0113",
2716         "samav\\u0113t\\u0101",
2717         "yuyutsava\\u1E25",
2718         "m\\u0101mak\\u0101\\u1E25",
2719     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2720         "kimakurvata",
2721         "san\\u0304java",
2722     };
2723     const char* const expected[MAX_LEN] = {
2724         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2725         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2726         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2727         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2728         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2729         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2730         "\\u092f\\u094b\\u0917",
2731         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2732         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2733         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2734         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2735         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2736         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2737         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2738     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2739         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2740         "\\u0938\\u0902\\u091c\\u0935",
2741     };
2742     UErrorCode status = U_ZERO_ERROR;
2743     UParseError parseError;
2744     UnicodeString message;
2745     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2746     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2747     if(U_FAILURE(status)){
2748         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2749         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2750         return;
2751     }
2752     UnicodeString gotResult;
2753     for(int i= 0; i<MAX_LEN; i++){
2754         gotResult = source[i];
2755         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2756         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2757     }
2758     delete latinToDev;
2759     delete devToLatin;
2760 }
2761 
2762 
TestCompoundLatinRT()2763 void TransliteratorTest::TestCompoundLatinRT(){
2764     const char* const source[] = {
2765         "rmk\\u1E63\\u0113t",
2766         "\\u015Br\\u012Bmad",
2767         "bhagavadg\\u012Bt\\u0101",
2768         "adhy\\u0101ya",
2769         "arjuna",
2770         "vi\\u1E63\\u0101da",
2771         "y\\u014Dga",
2772         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2773         "uv\\u0101cr\\u0325",
2774         "dharmak\\u1E63\\u0113tr\\u0113",
2775         "kuruk\\u1E63\\u0113tr\\u0113",
2776         "samav\\u0113t\\u0101",
2777         "yuyutsava\\u1E25",
2778         "m\\u0101mak\\u0101\\u1E25",
2779      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2780         "kimakurvata",
2781         "san\\u0304java"
2782     };
2783     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2784     const char* const expected[MAX_LEN] = {
2785         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2786         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2787         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2788         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2789         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2790         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2791         "\\u092f\\u094b\\u0917",
2792         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2793         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2794         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2795         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2796         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2797         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2798         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2799     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2800         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2801         "\\u0938\\u0902\\u091c\\u0935"
2802     };
2803     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2804         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2805         return;
2806     }
2807 
2808     UErrorCode status = U_ZERO_ERROR;
2809     UParseError parseError;
2810     UnicodeString message;
2811     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2812     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2813     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2814     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2815 
2816     if(U_FAILURE(status)){
2817         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2818         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2819         return;
2820     }
2821     UnicodeString gotResult;
2822     for(int i= 0; i<MAX_LEN; i++){
2823         gotResult = source[i];
2824         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2825         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2826         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2827 
2828     }
2829     delete(latinToDevToLatin);
2830     delete(devToLatinToDev);
2831     delete(devToTelToDev);
2832     delete(latinToTelToLatin);
2833 }
2834 
2835 /**
2836  * Test Gurmukhi-Devanagari Tippi and Bindi
2837  */
TestGurmukhiDevanagari()2838 void TransliteratorTest::TestGurmukhiDevanagari(){
2839     // the rule says:
2840     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2841     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2842     UErrorCode status = U_ZERO_ERROR;
2843     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2844     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2845     UParseError parseError;
2846 
2847     UnicodeSetIterator vIter(vowel);
2848     UnicodeSetIterator nvIter(non_vowel);
2849     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2850     if(U_FAILURE(status)) {
2851       dataerrln("Error creating transliterator %s", u_errorName(status));
2852       delete trans;
2853       return;
2854     }
2855     UnicodeString src (" \\u0902", -1, US_INV);
2856     UnicodeString expected(" \\u0A02", -1, US_INV);
2857     src = src.unescape();
2858     expected= expected.unescape();
2859 
2860     while(vIter.next()){
2861         src.setCharAt(0,(UChar) vIter.getCodepoint());
2862         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2863         expect(*trans,src,expected);
2864     }
2865 
2866     expected.setCharAt(1,0x0A70);
2867     while(nvIter.next()){
2868         //src.setCharAt(0,(char) nvIter.codepoint);
2869         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2870         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2871         expect(*trans,src,expected);
2872     }
2873     delete trans;
2874 }
2875 /**
2876  * Test instantiation from a locale.
2877  */
TestLocaleInstantiation(void)2878 void TransliteratorTest::TestLocaleInstantiation(void) {
2879     UParseError pe;
2880     UErrorCode ec = U_ZERO_ERROR;
2881     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2882     if (U_FAILURE(ec)) {
2883         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2884         delete t;
2885         return;
2886     }
2887     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2888     delete t;
2889 
2890     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2891     if (U_FAILURE(ec)) {
2892         errln("FAIL: createInstance(en-el)");
2893         delete t;
2894         return;
2895     }
2896     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2897     delete t;
2898 }
2899 
2900 /**
2901  * Test title case handling of accent (should ignore accents)
2902  */
TestTitleAccents(void)2903 void TransliteratorTest::TestTitleAccents(void) {
2904     UParseError pe;
2905     UErrorCode ec = U_ZERO_ERROR;
2906     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2907     if (U_FAILURE(ec)) {
2908         errln("FAIL: createInstance(Title)");
2909         delete t;
2910         return;
2911     }
2912     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2913     delete t;
2914 }
2915 
2916 /**
2917  * Basic test of a locale resource based rule.
2918  */
TestLocaleResource()2919 void TransliteratorTest::TestLocaleResource() {
2920     const char* DATA[] = {
2921         // id                    from               to
2922         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2923         "Latin-el",              "b",               "\\u03bc\\u03c0",
2924         "Latin-Greek",           "b",               "\\u03B2",
2925         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2926         "el-Latin",              "\\u03B2",         "v",
2927         "Greek-Latin",           "\\u03B2",         "b",
2928     };
2929     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2930     for (int32_t i=0; i<DATA_length; i+=3) {
2931         UParseError pe;
2932         UErrorCode ec = U_ZERO_ERROR;
2933         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2934         if (U_FAILURE(ec)) {
2935             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2936             delete t;
2937             continue;
2938         }
2939         expect(*t, CharsToUnicodeString(DATA[i+1]),
2940                CharsToUnicodeString(DATA[i+2]));
2941         delete t;
2942     }
2943 }
2944 
2945 /**
2946  * Make sure parse errors reference the right line.
2947  */
TestParseError()2948 void TransliteratorTest::TestParseError() {
2949     static const char* rule =
2950         "a > b;\n"
2951         "# more stuff\n"
2952         "d << b;";
2953     UErrorCode ec = U_ZERO_ERROR;
2954     UParseError pe;
2955     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2956     delete t;
2957     if (U_FAILURE(ec)) {
2958         UnicodeString err(pe.preContext);
2959         err.append((UChar)124/*|*/).append(pe.postContext);
2960         if (err.indexOf("d << b") >= 0) {
2961             logln("Ok: " + err);
2962         } else {
2963             errln("FAIL: " + err);
2964         }
2965     }
2966     else {
2967         errln("FAIL: no syntax error");
2968     }
2969     static const char* maskingRule =
2970         "a>x;\n"
2971         "# more stuff\n"
2972         "ab>y;";
2973     ec = U_ZERO_ERROR;
2974     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2975     if (ec != U_RULE_MASK_ERROR) {
2976         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2977     }
2978     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2979         errln("FAIL: did not get expected precontext");
2980     }
2981     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2982         errln("FAIL: did not get expected postcontext");
2983     }
2984 }
2985 
2986 /**
2987  * Make sure sets on output are disallowed.
2988  */
TestOutputSet()2989 void TransliteratorTest::TestOutputSet() {
2990     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2991     UErrorCode ec = U_ZERO_ERROR;
2992     UParseError pe;
2993     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2994     delete t;
2995     if (U_FAILURE(ec)) {
2996         UnicodeString err(pe.preContext);
2997         err.append((UChar)124/*|*/).append(pe.postContext);
2998         logln("Ok: " + err);
2999         return;
3000     }
3001     errln("FAIL: No syntax error");
3002 }
3003 
3004 /**
3005  * Test the use variable range pragma, making sure that use of
3006  * variable range characters is detected and flagged as an error.
3007  */
TestVariableRange()3008 void TransliteratorTest::TestVariableRange() {
3009     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3010     UErrorCode ec = U_ZERO_ERROR;
3011     UParseError pe;
3012     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3013     delete t;
3014     if (U_FAILURE(ec)) {
3015         UnicodeString err(pe.preContext);
3016         err.append((UChar)124/*|*/).append(pe.postContext);
3017         logln("Ok: " + err);
3018         return;
3019     }
3020     errln("FAIL: No syntax error");
3021 }
3022 
3023 /**
3024  * Test invalid post context error handling
3025  */
TestInvalidPostContext()3026 void TransliteratorTest::TestInvalidPostContext() {
3027     UnicodeString rule = "a}b{c>d;";
3028     UErrorCode ec = U_ZERO_ERROR;
3029     UParseError pe;
3030     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3031     delete t;
3032     if (U_FAILURE(ec)) {
3033         UnicodeString err(pe.preContext);
3034         err.append((UChar)124/*|*/).append(pe.postContext);
3035         if (err.indexOf("a}b{c") >= 0) {
3036             logln("Ok: " + err);
3037         } else {
3038             errln("FAIL: " + err);
3039         }
3040         return;
3041     }
3042     errln("FAIL: No syntax error");
3043 }
3044 
3045 /**
3046  * Test ID form variants
3047  */
TestIDForms()3048 void TransliteratorTest::TestIDForms() {
3049     const char* DATA[] = {
3050         "NFC", NULL, "NFD",
3051         "nfd", NULL, "NFC", // make sure case is ignored
3052         "Any-NFKD", NULL, "Any-NFKC",
3053         "Null", NULL, "Null",
3054         "-nfkc", "nfkc", "NFKD",
3055         "-nfkc/", "nfkc", "NFKD",
3056         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3057         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3058         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3059         "Source-", NULL, NULL,
3060         "Source/Variant-", NULL, NULL,
3061         "Source-/Variant", NULL, NULL,
3062         "/Variant", NULL, NULL,
3063         "/Variant-", NULL, NULL,
3064         "-/Variant", NULL, NULL,
3065         "-/", NULL, NULL,
3066         "-", NULL, NULL,
3067         "/", NULL, NULL,
3068     };
3069     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3070 
3071     for (int32_t i=0; i<DATA_length; i+=3) {
3072         const char* ID = DATA[i];
3073         const char* expID = DATA[i+1];
3074         const char* expInvID = DATA[i+2];
3075         UBool expValid = (expInvID != NULL);
3076         if (expID == NULL) {
3077             expID = ID;
3078         }
3079         UParseError pe;
3080         UErrorCode ec = U_ZERO_ERROR;
3081         Transliterator *t =
3082             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3083         if (U_FAILURE(ec)) {
3084             if (!expValid) {
3085                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3086             } else {
3087                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3088             }
3089             delete t;
3090             continue;
3091         }
3092         Transliterator *u = t->createInverse(ec);
3093         if (U_FAILURE(ec)) {
3094             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3095             delete t;
3096             delete u;
3097             continue;
3098         }
3099         if (t->getID() == expID &&
3100             u->getID() == expInvID) {
3101             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3102         } else {
3103             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3104                   t->getID() + " x getInverse() => " + u->getID() +
3105                   ", expected " + expInvID);
3106         }
3107         delete t;
3108         delete u;
3109     }
3110 }
3111 
3112 static const UChar SPACE[]   = {32,0};
3113 static const UChar NEWLINE[] = {10,0};
3114 static const UChar RETURN[]  = {13,0};
3115 static const UChar EMPTY[]   = {0};
3116 
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3117 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3118                                     const UnicodeString& testRulesForward) {
3119     UnicodeString rules2; t2.toRules(rules2, TRUE);
3120     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3121     rules2.findAndReplace(SPACE, EMPTY);
3122     rules2.findAndReplace(NEWLINE, EMPTY);
3123     rules2.findAndReplace(RETURN, EMPTY);
3124 
3125     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3126 
3127     if (rules2 != testRules) {
3128         errln(label);
3129         logln((UnicodeString)"GENERATED RULES: " + rules2);
3130         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3131     }
3132 }
3133 
3134 /**
3135  * Mark's toRules test.
3136  */
TestToRulesMark()3137 void TransliteratorTest::TestToRulesMark() {
3138     const char* testRules =
3139         "::[[:Latin:][:Mark:]];"
3140         "::NFKD (NFC);"
3141         "::Lower (Lower);"
3142         "a <> \\u03B1;" // alpha
3143         "::NFKC (NFD);"
3144         "::Upper (Lower);"
3145         "::Lower ();"
3146         "::([[:Greek:][:Mark:]]);"
3147         ;
3148     const char* testRulesForward =
3149         "::[[:Latin:][:Mark:]];"
3150         "::NFKD(NFC);"
3151         "::Lower(Lower);"
3152         "a > \\u03B1;"
3153         "::NFKC(NFD);"
3154         "::Upper (Lower);"
3155         "::Lower ();"
3156         ;
3157     const char* testRulesBackward =
3158         "::[[:Greek:][:Mark:]];"
3159         "::Lower (Upper);"
3160         "::NFD(NFKC);"
3161         "\\u03B1 > a;"
3162         "::Lower(Lower);"
3163         "::NFC(NFKD);"
3164         ;
3165     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3166     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3167 
3168     UParseError pe;
3169     UErrorCode ec = U_ZERO_ERROR;
3170     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3171     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3172 
3173     if (U_FAILURE(ec)) {
3174         delete t2;
3175         delete t3;
3176         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3177         return;
3178     }
3179 
3180     expect(*t2, source, target);
3181     expect(*t3, target, source);
3182 
3183     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3184     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3185 
3186     delete t2;
3187     delete t3;
3188 }
3189 
3190 /**
3191  * Test Escape and Unescape transliterators.
3192  */
TestEscape()3193 void TransliteratorTest::TestEscape() {
3194     UParseError pe;
3195     UErrorCode ec;
3196     Transliterator *t;
3197 
3198     ec = U_ZERO_ERROR;
3199     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3200     if (U_FAILURE(ec)) {
3201         errln((UnicodeString)"FAIL: createInstance");
3202     } else {
3203         expect(*t,
3204                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3205                "@12Q");
3206     }
3207     delete t;
3208 
3209     ec = U_ZERO_ERROR;
3210     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3211     if (U_FAILURE(ec)) {
3212         errln((UnicodeString)"FAIL: createInstance");
3213     } else {
3214         expect(*t,
3215                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3216                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3217     }
3218     delete t;
3219 
3220     ec = U_ZERO_ERROR;
3221     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3222     if (U_FAILURE(ec)) {
3223         errln((UnicodeString)"FAIL: createInstance");
3224     } else {
3225         expect(*t,
3226                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3227                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3228     }
3229     delete t;
3230 
3231     ec = U_ZERO_ERROR;
3232     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3233     if (U_FAILURE(ec)) {
3234         errln((UnicodeString)"FAIL: createInstance");
3235     } else {
3236         expect(*t,
3237                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3238                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3239     }
3240     delete t;
3241 }
3242 
3243 
TestAnchorMasking()3244 void TransliteratorTest::TestAnchorMasking(){
3245     UnicodeString rule ("^a > Q; a > q;");
3246     UErrorCode status= U_ZERO_ERROR;
3247     UParseError parseError;
3248 
3249     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3250     if(U_FAILURE(status)){
3251         errln(UnicodeString("FAIL: ") + "ID" +
3252               ".createFromRules() => bad rules" +
3253               /*", parse error " + parseError.code +*/
3254               ", line " + parseError.line +
3255               ", offset " + parseError.offset +
3256               ", context " + prettify(parseError.preContext, TRUE) +
3257               ", rules: " + prettify(rule, TRUE));
3258     }
3259     delete t;
3260 }
3261 
3262 /**
3263  * Make sure display names of variants look reasonable.
3264  */
TestDisplayName()3265 void TransliteratorTest::TestDisplayName() {
3266 #if UCONFIG_NO_FORMATTING
3267     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3268     return;
3269 #else
3270     static const char* DATA[] = {
3271         // ID, forward name, reverse name
3272         // Update the text as necessary -- the important thing is
3273         // not the text itself, but how various cases are handled.
3274 
3275         // Basic test
3276         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3277 
3278         // Variants
3279         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3280 
3281         // Target-only IDs
3282         "NFC", "Any to NFC", "Any to NFD",
3283     };
3284 
3285     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3286 
3287     Locale US("en", "US");
3288 
3289     for (int32_t i=0; i<DATA_length; i+=3) {
3290         UnicodeString name;
3291         Transliterator::getDisplayName(DATA[i], US, name);
3292         if (name != DATA[i+1]) {
3293             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3294                   name + ", expected " + DATA[i+1]);
3295         } else {
3296             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3297         }
3298         UErrorCode ec = U_ZERO_ERROR;
3299         UParseError pe;
3300         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3301         if (U_FAILURE(ec)) {
3302             delete t;
3303             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3304             continue;
3305         }
3306         name = Transliterator::getDisplayName(t->getID(), US, name);
3307         if (name != DATA[i+2]) {
3308             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3309                   name + ", expected " + DATA[i+2]);
3310         } else {
3311             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3312         }
3313         delete t;
3314     }
3315 #endif
3316 }
3317 
TestSpecialCases(void)3318 void TransliteratorTest::TestSpecialCases(void) {
3319     const UnicodeString registerRules[] = {
3320         "Any-Dev1", "x > X; y > Y;",
3321         "Any-Dev2", "XY > Z",
3322         "Greek-Latin/FAKE",
3323             CharsToUnicodeString
3324             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3325         "" // END MARKER
3326     };
3327 
3328     const UnicodeString testCases[] = {
3329         // NORMALIZATION
3330         // should add more test cases
3331         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3332         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3334         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3335 
3336         // mp -> b BUG
3337         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3338         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3339 
3340         // check for devanagari bug
3341         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3342 
3343         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3344         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3345                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3346 
3347         //TODO: enable this test once Titlecase works right
3348         /*
3349         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3350                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3351                  */
3352         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3353                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3354         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3355                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3356 
3357         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3358         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3359 
3360          // FORMS OF S
3361         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3362                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3363         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3364                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3365         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3366                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3367         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3368                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369         // Tatiana bug
3370         // Upper: TAT\\u02B9\\u00C2NA
3371         // Lower: tat\\u02B9\\u00E2na
3372         // Title: Tat\\u02B9\\u00E2na
3373         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3374                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3375         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3376                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3377         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3378                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3379 
3380         "" // END MARKER
3381     };
3382 
3383     UParseError pos;
3384     int32_t i;
3385     for (i = 0; registerRules[i].length()!=0; i+=2) {
3386         UErrorCode status = U_ZERO_ERROR;
3387 
3388         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3389             registerRules[i+1], UTRANS_FORWARD, pos, status);
3390         if (U_FAILURE(status)) {
3391             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3392         } else {
3393             Transliterator::registerInstance(t);
3394         }
3395     }
3396     for (i = 0; testCases[i].length()!=0; i+=3) {
3397         UErrorCode ec = U_ZERO_ERROR;
3398         UParseError pe;
3399         const UnicodeString& name = testCases[i];
3400         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3401         if (U_FAILURE(ec)) {
3402             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3403             delete t;
3404             continue;
3405         }
3406         const UnicodeString& id = t->getID();
3407         const UnicodeString& source = testCases[i+1];
3408         UnicodeString target;
3409 
3410         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3411 
3412         if (testCases[i+2].length() > 0) {
3413             target = testCases[i+2];
3414         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3415             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3416         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3417             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3418         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3419             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3420         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3421             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3422         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3423             target = source;
3424             target.toLower(Locale::getUS());
3425         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3426             target = source;
3427             target.toUpper(Locale::getUS());
3428         }
3429         if (U_FAILURE(ec)) {
3430             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3431             continue;
3432         }
3433 
3434         expect(*t, source, target);
3435         delete t;
3436     }
3437     for (i = 0; registerRules[i].length()!=0; i+=2) {
3438         Transliterator::unregister(registerRules[i]);
3439     }
3440 }
3441 
Char32ToEscapedChars(UChar32 ch,char * buffer)3442 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3443     if (ch <= 0xFFFF) {
3444         sprintf(buffer, "\\u%04x", (int)ch);
3445     } else {
3446         sprintf(buffer, "\\U%08x", (int)ch);
3447     }
3448     return buffer;
3449 }
3450 
TestSurrogateCasing(void)3451 void TransliteratorTest::TestSurrogateCasing (void) {
3452     // check that casing handles surrogates
3453     // titlecase is currently defective
3454     char buffer[20];
3455     UChar buffer2[20];
3456     UChar32 dee;
3457     UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3458     UnicodeString DEE(u_totitle(dee));
3459     if (DEE != DESERET_DEE) {
3460         err("Fails titlecase of surrogates");
3461         err(Char32ToEscapedChars(dee, buffer));
3462         err(", ");
3463         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3464     }
3465 
3466     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3467     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3468     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3469     UErrorCode status= U_ZERO_ERROR;
3470 
3471     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3472     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3473         errln("Fails: Can't uppercase surrogates.");
3474     }
3475 
3476     status= U_ZERO_ERROR;
3477     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3479         errln("Fails: Can't lowercase surrogates.");
3480     }
3481 }
3482 
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3483 static void _trans(Transliterator& t, const UnicodeString& src,
3484                    UnicodeString& result) {
3485     result = src;
3486     t.transliterate(result);
3487 }
3488 
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3489 static void _trans(const UnicodeString& id, const UnicodeString& src,
3490                    UnicodeString& result, UErrorCode ec) {
3491     UParseError pe;
3492     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3493     if (U_SUCCESS(ec)) {
3494         _trans(*t, src, result);
3495     }
3496     delete t;
3497 }
3498 
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3499 static UnicodeString _findMatch(const UnicodeString& source,
3500                                        const UnicodeString* pairs) {
3501     UnicodeString empty;
3502     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3503         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3504             return pairs[i+1];
3505         }
3506     }
3507     return empty;
3508 }
3509 
3510 // Check to see that incremental gets at least part way through a reasonable string.
3511 
TestIncrementalProgress(void)3512 void TransliteratorTest::TestIncrementalProgress(void) {
3513     UErrorCode ec = U_ZERO_ERROR;
3514     UnicodeString latinTest = "The Quick Brown Fox.";
3515     UnicodeString devaTest;
3516     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3517     UnicodeString kataTest;
3518     _trans("Latin-Katakana", latinTest, kataTest, ec);
3519     if (U_FAILURE(ec)) {
3520         errln("FAIL: Internal error");
3521         return;
3522     }
3523     const UnicodeString tests[] = {
3524         "Any", latinTest,
3525         "Latin", latinTest,
3526         "Halfwidth", latinTest,
3527         "Devanagari", devaTest,
3528         "Katakana", kataTest,
3529         "" // END MARKER
3530     };
3531 
3532     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3533     int32_t i = 0, j=0, k=0;
3534     int32_t sources = Transliterator::countAvailableSources();
3535     for (i = 0; i < sources; i++) {
3536         UnicodeString source;
3537         Transliterator::getAvailableSource(i, source);
3538         UnicodeString test = _findMatch(source, tests);
3539         if (test.length() == 0) {
3540             logln((UnicodeString)"Skipping " + source + "-X");
3541             continue;
3542         }
3543         int32_t targets = Transliterator::countAvailableTargets(source);
3544         for (j = 0; j < targets; j++) {
3545             UnicodeString target;
3546             Transliterator::getAvailableTarget(j, source, target);
3547             int32_t variants = Transliterator::countAvailableVariants(source, target);
3548             for (k =0; k< variants; k++) {
3549                 UnicodeString variant;
3550                 UParseError err;
3551                 UErrorCode status = U_ZERO_ERROR;
3552 
3553                 Transliterator::getAvailableVariant(k, source, target, variant);
3554                 UnicodeString id = source + "-" + target + "/" + variant;
3555 
3556                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3557                 if (U_FAILURE(status)) {
3558                     errln((UnicodeString)"FAIL: Could not create " + id);
3559                     delete t;
3560                     continue;
3561                 }
3562                 status = U_ZERO_ERROR;
3563                 CheckIncrementalAux(t, test);
3564 
3565                 UnicodeString rev;
3566                 _trans(*t, test, rev);
3567                 Transliterator *inv = t->createInverse(status);
3568                 if (U_FAILURE(status)) {
3569                     errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3570                     delete t;
3571                     delete inv;
3572                     continue;
3573                 }
3574                 CheckIncrementalAux(inv, rev);
3575                 delete t;
3576                 delete inv;
3577             }
3578         }
3579     }
3580 }
3581 
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3582 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3583                                                       const UnicodeString& input) {
3584     UErrorCode ec = U_ZERO_ERROR;
3585     UTransPosition pos;
3586     UnicodeString test = input;
3587 
3588     pos.contextStart = 0;
3589     pos.contextLimit = input.length();
3590     pos.start = 0;
3591     pos.limit = input.length();
3592 
3593     t->transliterate(test, pos, ec);
3594     if (U_FAILURE(ec)) {
3595         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3596         return;
3597     }
3598     UBool gotError = FALSE;
3599 
3600     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3601 
3602     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3603         errln((UnicodeString)"No Progress, " +
3604               t->getID() + ": " + formatInput(test, input, pos));
3605         gotError = TRUE;
3606     } else {
3607         logln((UnicodeString)"PASS Progress, " +
3608               t->getID() + ": " + formatInput(test, input, pos));
3609     }
3610     t->finishTransliteration(test, pos);
3611     if (pos.start != pos.limit) {
3612         errln((UnicodeString)"Incomplete, " +
3613               t->getID() + ": " + formatInput(test, input, pos));
3614         gotError = TRUE;
3615     }
3616 }
3617 
TestFunction()3618 void TransliteratorTest::TestFunction() {
3619     // Careful with spacing and ';' here:  Phrase this exactly
3620     // as toRules() is going to return it.  If toRules() changes
3621     // with regard to spacing or ';', then adjust this string.
3622     UnicodeString rule =
3623         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3624 
3625     UParseError pe;
3626     UErrorCode ec = U_ZERO_ERROR;
3627     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3628     if (t == NULL) {
3629         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3630         return;
3631     }
3632 
3633     UnicodeString r;
3634     t->toRules(r, TRUE);
3635     if (r == rule) {
3636         logln((UnicodeString)"OK: toRules() => " + r);
3637     } else {
3638         errln((UnicodeString)"FAIL: toRules() => " + r +
3639               ", expected " + rule);
3640     }
3641 
3642     expect(*t, "The Quick Brown Fox",
3643            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3644 
3645     delete t;
3646 }
3647 
TestInvalidBackRef(void)3648 void TransliteratorTest::TestInvalidBackRef(void) {
3649     UnicodeString rule =  ". > $1;";
3650     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3651     UParseError pe;
3652     UErrorCode ec = U_ZERO_ERROR;
3653     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3654     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3655 
3656     if (t != NULL) {
3657         errln("FAIL: createFromRules should have returned NULL");
3658         delete t;
3659     }
3660 
3661     if (t2 != NULL) {
3662         errln("FAIL: createFromRules should have returned NULL");
3663         delete t2;
3664     }
3665 
3666     if (U_SUCCESS(ec)) {
3667         errln("FAIL: Ok: . > $1; => no error");
3668     } else {
3669         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3670     }
3671 }
3672 
TestMulticharStringSet()3673 void TransliteratorTest::TestMulticharStringSet() {
3674     // Basic testing
3675     const char* rule =
3676         "       [{aa}]       > x;"
3677         "         a          > y;"
3678         "       [b{bc}]      > z;"
3679         "[{gd}] { e          > q;"
3680         "         e } [{fg}] > r;" ;
3681 
3682     UParseError pe;
3683     UErrorCode ec = U_ZERO_ERROR;
3684     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3685     if (t == NULL || U_FAILURE(ec)) {
3686         delete t;
3687         errln("FAIL: createFromRules failed");
3688         return;
3689     }
3690 
3691     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3692            "y x yz z d gd de gdq gdqfg ddrfg");
3693     delete t;
3694 
3695     // Overlapped string test.  Make sure that when multiple
3696     // strings can match that the longest one is matched.
3697     rule =
3698         "    [a {ab} {abc}]    > x;"
3699         "           b          > y;"
3700         "           c          > z;"
3701         " q [t {st} {rst}] { e > p;" ;
3702 
3703     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3704     if (t == NULL || U_FAILURE(ec)) {
3705         delete t;
3706         errln("FAIL: createFromRules failed");
3707         return;
3708     }
3709 
3710     expect(*t, "a ab abc qte qste qrste",
3711            "x x x qtp qstp qrstp");
3712     delete t;
3713 }
3714 
3715 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3716 // BEGIN TestUserFunction support factory
3717 
3718 Transliterator* _TUFF[4];
3719 UnicodeString* _TUFID[4];
3720 
_TUFFactory(const UnicodeString &,Transliterator::Token context)3721 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3722                                    Transliterator::Token context) {
3723     return _TUFF[context.integer]->clone();
3724 }
3725 
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3726 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3727     _TUFF[n] = t;
3728     _TUFID[n] = new UnicodeString(ID);
3729     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3730 }
3731 
_TUFUnreg(int32_t n)3732 static void _TUFUnreg(int32_t n) {
3733     if (_TUFF[n] != NULL) {
3734         Transliterator::unregister(*_TUFID[n]);
3735         delete _TUFF[n];
3736         delete _TUFID[n];
3737     }
3738 }
3739 
3740 // END TestUserFunction support factory
3741 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3742 
3743 /**
3744  * Test that user-registered transliterators can be used under function
3745  * syntax.
3746  */
TestUserFunction()3747 void TransliteratorTest::TestUserFunction() {
3748 
3749     Transliterator* t;
3750     UParseError pe;
3751     UErrorCode ec = U_ZERO_ERROR;
3752 
3753     // Setup our factory
3754     int32_t i;
3755     for (i=0; i<4; ++i) {
3756         _TUFF[i] = NULL;
3757     }
3758 
3759     // There's no need to register inverses if we don't use them
3760     t = Transliterator::createFromRules("gif",
3761                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3762                                         UTRANS_FORWARD, pe, ec);
3763     if (t == NULL || U_FAILURE(ec)) {
3764         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3765         return;
3766     }
3767     _TUFReg("Any-gif", t, 0);
3768 
3769     t = Transliterator::createFromRules("RemoveCurly",
3770                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3771                                         UTRANS_FORWARD, pe, ec);
3772     if (t == NULL || U_FAILURE(ec)) {
3773         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3774         goto FAIL;
3775     }
3776     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3777     _TUFReg("Any-RemoveCurly", t, 1);
3778 
3779     logln("Trying &hex");
3780     t = Transliterator::createFromRules("hex2",
3781                                         "(.) > &hex($1);",
3782                                         UTRANS_FORWARD, pe, ec);
3783     if (t == NULL || U_FAILURE(ec)) {
3784         errln("FAIL: createFromRules");
3785         goto FAIL;
3786     }
3787     logln("Registering");
3788     _TUFReg("Any-hex2", t, 2);
3789     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3790     if (t == NULL || U_FAILURE(ec)) {
3791         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3792         goto FAIL;
3793     }
3794     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3795     delete t;
3796 
3797     logln("Trying &gif");
3798     t = Transliterator::createFromRules("gif2",
3799                                         "(.) > &Gif(&Hex2($1));",
3800                                         UTRANS_FORWARD, pe, ec);
3801     if (t == NULL || U_FAILURE(ec)) {
3802         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3803         goto FAIL;
3804     }
3805     logln("Registering");
3806     _TUFReg("Any-gif2", t, 3);
3807     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3808     if (t == NULL || U_FAILURE(ec)) {
3809         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3810         goto FAIL;
3811     }
3812     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3813            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3814     delete t;
3815 
3816     // Test that filters are allowed after &
3817     t = Transliterator::createFromRules("test",
3818                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3819                                         UTRANS_FORWARD, pe, ec);
3820     if (t == NULL || U_FAILURE(ec)) {
3821         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3822         goto FAIL;
3823     }
3824     expect(*t, "abc",
3825            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3826     delete t;
3827 
3828  FAIL:
3829     for (i=0; i<4; ++i) {
3830         _TUFUnreg(i);
3831     }
3832 }
3833 
3834 /**
3835  * Test the Any-X transliterators.
3836  */
TestAnyX(void)3837 void TransliteratorTest::TestAnyX(void) {
3838     UParseError parseError;
3839     UErrorCode status = U_ZERO_ERROR;
3840     Transliterator* anyLatin =
3841         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3842     if (anyLatin==0) {
3843         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3844         delete anyLatin;
3845         return;
3846     }
3847 
3848     expect(*anyLatin,
3849            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3850            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3851 
3852     delete anyLatin;
3853 }
3854 
3855 /**
3856  * Test Any-X transliterators with sample letters from all scripts.
3857  */
TestAny(void)3858 void TransliteratorTest::TestAny(void) {
3859     UErrorCode status = U_ZERO_ERROR;
3860     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3861     //       function call parameters going on in this test.
3862     UnicodeSet alphabetic("[:alphabetic:]", status);
3863     if (U_FAILURE(status)) {
3864         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3865         return;
3866     }
3867     alphabetic.freeze();
3868 
3869     UnicodeString testString;
3870     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3871         const char *scriptName = uscript_getShortName((UScriptCode)i);
3872         if (scriptName == NULL) {
3873             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3874             return;
3875         }
3876 
3877         UnicodeSet sample;
3878         sample.applyPropertyAlias("script", scriptName, status);
3879         if (U_FAILURE(status)) {
3880             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3881             return;
3882         }
3883         sample.retainAll(alphabetic);
3884         for (int32_t count=0; count<5; count++) {
3885             UChar32 c = sample.charAt(count);
3886             if (c == -1) {
3887                 break;
3888             }
3889             testString.append(c);
3890         }
3891     }
3892 
3893     UParseError parseError;
3894     Transliterator* anyLatin =
3895         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3896     if (U_FAILURE(status)) {
3897         errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3898         return;
3899     }
3900 
3901     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3902     anyLatin->transliterate(testString);
3903     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3904     delete anyLatin;
3905 }
3906 
3907 
3908 /**
3909  * Test the source and target set API.  These are only implemented
3910  * for RBT and CompoundTransliterator at this time.
3911  */
TestSourceTargetSet()3912 void TransliteratorTest::TestSourceTargetSet() {
3913     UErrorCode ec = U_ZERO_ERROR;
3914 
3915     // Rules
3916     const char* r =
3917         "a > b; "
3918         "r [x{lu}] > q;";
3919 
3920     // Expected source
3921     UnicodeSet expSrc("[arx{lu}]", ec);
3922 
3923     // Expected target
3924     UnicodeSet expTrg("[bq]", ec);
3925 
3926     UParseError pe;
3927     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3928 
3929     if (U_FAILURE(ec)) {
3930         delete t;
3931         errln("FAIL: Couldn't set up test");
3932         return;
3933     }
3934 
3935     UnicodeSet src; t->getSourceSet(src);
3936     UnicodeSet trg; t->getTargetSet(trg);
3937 
3938     if (src == expSrc && trg == expTrg) {
3939         UnicodeString a, b;
3940         logln((UnicodeString)"Ok: " +
3941               r + " => source = " + src.toPattern(a, TRUE) +
3942               ", target = " + trg.toPattern(b, TRUE));
3943     } else {
3944         UnicodeString a, b, c, d;
3945         errln((UnicodeString)"FAIL: " +
3946               r + " => source = " + src.toPattern(a, TRUE) +
3947               ", expected " + expSrc.toPattern(b, TRUE) +
3948               "; target = " + trg.toPattern(c, TRUE) +
3949               ", expected " + expTrg.toPattern(d, TRUE));
3950     }
3951 
3952     delete t;
3953 }
3954 
3955 /**
3956  * Test handling of rule whitespace, for both RBT and UnicodeSet.
3957  */
TestRuleWhitespace()3958 void TransliteratorTest::TestRuleWhitespace() {
3959     // Rules
3960     const char* r = "a > \\u200E b;";
3961 
3962     UErrorCode ec = U_ZERO_ERROR;
3963     UParseError pe;
3964     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3965 
3966     if (U_FAILURE(ec)) {
3967         errln("FAIL: Couldn't set up test");
3968     } else {
3969         expect(*t, "a", "b");
3970     }
3971     delete t;
3972 
3973     // UnicodeSet
3974     ec = U_ZERO_ERROR;
3975     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3976 
3977     if (U_FAILURE(ec)) {
3978         errln("FAIL: Couldn't set up test");
3979     } else {
3980         if (set.contains(0x200E)) {
3981             errln("FAIL: U+200E not being ignored by UnicodeSet");
3982         }
3983     }
3984 }
3985 //======================================================================
3986 // this method is in TestUScript.java
3987 //======================================================================
TestAllCodepoints()3988 void TransliteratorTest::TestAllCodepoints(){
3989     UScriptCode code= USCRIPT_INVALID_CODE;
3990     char id[256]={'\0'};
3991     char abbr[256]={'\0'};
3992     char newId[256]={'\0'};
3993     char newAbbrId[256]={'\0'};
3994     char oldId[256]={'\0'};
3995     char oldAbbrId[256]={'\0'};
3996 
3997     UErrorCode status =U_ZERO_ERROR;
3998     UParseError pe;
3999 
4000     for(uint32_t i = 0; i<=0x10ffff; i++){
4001         code =  uscript_getScript(i,&status);
4002         if(code == USCRIPT_INVALID_CODE){
4003             errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
4004         }
4005         const char* myId = uscript_getName(code);
4006         if(!myId) {
4007           dataerrln("Valid script code returned NULL name. Check your data!");
4008           return;
4009         }
4010         uprv_strcpy(id,myId);
4011         uprv_strcpy(abbr,uscript_getShortName(code));
4012 
4013         uprv_strcpy(newId,"[:");
4014         uprv_strcat(newId,id);
4015         uprv_strcat(newId,":];NFD");
4016 
4017         uprv_strcpy(newAbbrId,"[:");
4018         uprv_strcat(newAbbrId,abbr);
4019         uprv_strcat(newAbbrId,":];NFD");
4020 
4021         if(uprv_strcmp(newId,oldId)!=0){
4022             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4023             if(t==NULL || U_FAILURE(status)){
4024                 errln((UnicodeString)"FAIL: Could not create " + id);
4025             }
4026             delete t;
4027         }
4028         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4029             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4030             if(t==NULL || U_FAILURE(status)){
4031                 errln((UnicodeString)"FAIL: Could not create " + id);
4032             }
4033             delete t;
4034         }
4035         uprv_strcpy(oldId,newId);
4036         uprv_strcpy(oldAbbrId, newAbbrId);
4037 
4038     }
4039 
4040 }
4041 
4042 #define TEST_TRANSLIT_ID(id, cls) { \
4043   UErrorCode ec = U_ZERO_ERROR; \
4044   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4045   if (U_FAILURE(ec)) { \
4046     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4047   } else { \
4048     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4049       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4050     } \
4051     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4052   } \
4053   delete t; \
4054 }
4055 
4056 #define TEST_TRANSLIT_RULE(rule, cls) { \
4057   UErrorCode ec = U_ZERO_ERROR; \
4058   UParseError pe; \
4059   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4060   if (U_FAILURE(ec)) { \
4061     errln("FAIL: Couldn't create " rule); \
4062   } else { \
4063     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4064       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4065     } \
4066     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4067   } \
4068   delete t; \
4069 }
4070 
TestBoilerplate()4071 void TransliteratorTest::TestBoilerplate() {
4072     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4073     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4074     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4075     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4076     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4077     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4078     TEST_TRANSLIT_ID("Null", NullTransliterator);
4079     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4080     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4081     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4082     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4083     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4084     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4085 }
4086 
TestAlternateSyntax()4087 void TransliteratorTest::TestAlternateSyntax() {
4088     // U+2206 == &
4089     // U+2190 == <
4090     // U+2192 == >
4091     // U+2194 == <>
4092     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4093            "abc",
4094            "xbz");
4095     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4096            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4097            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4098 }
4099 
4100 static const char* BEGIN_END_RULES[] = {
4101     // [0]
4102     "abc > xy;"
4103     "aba > z;",
4104 
4105     // [1]
4106 /*
4107     "::BEGIN;"
4108     "abc > xy;"
4109     "::END;"
4110     "::BEGIN;"
4111     "aba > z;"
4112     "::END;",
4113 */
4114     "", // test case commented out below, this is here to keep from messing up the indexes
4115 
4116     // [2]
4117 /*
4118     "abc > xy;"
4119     "::BEGIN;"
4120     "aba > z;"
4121     "::END;",
4122 */
4123     "", // test case commented out below, this is here to keep from messing up the indexes
4124 
4125     // [3]
4126 /*
4127     "::BEGIN;"
4128     "abc > xy;"
4129     "::END;"
4130     "aba > z;",
4131 */
4132     "", // test case commented out below, this is here to keep from messing up the indexes
4133 
4134     // [4]
4135     "abc > xy;"
4136     "::Null;"
4137     "aba > z;",
4138 
4139     // [5]
4140     "::Upper;"
4141     "ABC > xy;"
4142     "AB > x;"
4143     "C > z;"
4144     "::Upper;"
4145     "XYZ > p;"
4146     "XY > q;"
4147     "Z > r;"
4148     "::Upper;",
4149 
4150     // [6]
4151     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4152     "$delim = [\\-$ws];"
4153     "$ws $delim* > ' ';"
4154     "'-' $delim* > '-';",
4155 
4156     // [7]
4157     "::Null;"
4158     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4159     "$delim = [\\-$ws];"
4160     "$ws $delim* > ' ';"
4161     "'-' $delim* > '-';",
4162 
4163     // [8]
4164     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4165     "$delim = [\\-$ws];"
4166     "$ws $delim* > ' ';"
4167     "'-' $delim* > '-';"
4168     "::Null;",
4169 
4170     // [9]
4171     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4172     "$delim = [\\-$ws];"
4173     "::Null;"
4174     "$ws $delim* > ' ';"
4175     "'-' $delim* > '-';",
4176 
4177     // [10]
4178 /*
4179     "::BEGIN;"
4180     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4181     "$delim = [\\-$ws];"
4182     "::END;"
4183     "$ws $delim* > ' ';"
4184     "'-' $delim* > '-';",
4185 */
4186     "", // test case commented out below, this is here to keep from messing up the indexes
4187 
4188     // [11]
4189 /*
4190     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4191     "$delim = [\\-$ws];"
4192     "::BEGIN;"
4193     "$ws $delim* > ' ';"
4194     "'-' $delim* > '-';"
4195     "::END;",
4196 */
4197     "", // test case commented out below, this is here to keep from messing up the indexes
4198 
4199     // [12]
4200 /*
4201     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4202     "$delim = [\\-$ws];"
4203     "$ab = [ab];"
4204     "::BEGIN;"
4205     "$ws $delim* > ' ';"
4206     "'-' $delim* > '-';"
4207     "::END;"
4208     "::BEGIN;"
4209     "$ab { ' ' } $ab > '-';"
4210     "c { ' ' > ;"
4211     "::END;"
4212     "::BEGIN;"
4213     "'a-a' > a\\%|a;"
4214     "::END;",
4215 */
4216     "", // test case commented out below, this is here to keep from messing up the indexes
4217 
4218     // [13]
4219     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4220     "$delim = [\\-$ws];"
4221     "$ab = [ab];"
4222     "::Null;"
4223     "$ws $delim* > ' ';"
4224     "'-' $delim* > '-';"
4225     "::Null;"
4226     "$ab { ' ' } $ab > '-';"
4227     "c { ' ' > ;"
4228     "::Null;"
4229     "'a-a' > a\\%|a;",
4230 
4231     // [14]
4232 /*
4233     "::[abc];"
4234     "::BEGIN;"
4235     "abc > xy;"
4236     "::END;"
4237     "::BEGIN;"
4238     "aba > yz;"
4239     "::END;"
4240     "::Upper;",
4241 */
4242     "", // test case commented out below, this is here to keep from messing up the indexes
4243 
4244     // [15]
4245     "::[abc];"
4246     "abc > xy;"
4247     "::Null;"
4248     "aba > yz;"
4249     "::Upper;",
4250 
4251     // [16]
4252 /*
4253     "::[abc];"
4254     "::BEGIN;"
4255     "abc <> xy;"
4256     "::END;"
4257     "::BEGIN;"
4258     "aba <> yz;"
4259     "::END;"
4260     "::Upper(Lower);"
4261     "::([XYZ]);"
4262 */
4263     "", // test case commented out below, this is here to keep from messing up the indexes
4264 
4265     // [17]
4266     "::[abc];"
4267     "abc <> xy;"
4268     "::Null;"
4269     "aba <> yz;"
4270     "::Upper(Lower);"
4271     "::([XYZ]);"
4272 };
4273 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4274 
4275 /*
4276 (This entire test is commented out below and will need some heavy revision when we re-add
4277 the ::BEGIN/::END stuff)
4278 static const char* BOGUS_BEGIN_END_RULES[] = {
4279     // [7]
4280     "::BEGIN;"
4281     "abc > xy;"
4282     "::BEGIN;"
4283     "aba > z;"
4284     "::END;"
4285     "::END;",
4286 
4287     // [8]
4288     "abc > xy;"
4289     " aba > z;"
4290     "::END;",
4291 
4292     // [9]
4293     "::BEGIN;"
4294     "::Upper;"
4295     "::END;"
4296 };
4297 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4298 */
4299 
4300 static const char* BEGIN_END_TEST_CASES[] = {
4301     // rules             input                   expected output
4302     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4303 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4304 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4305 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4306     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4307     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4308 
4309     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4310     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4311     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4312     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4313 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4314 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4315 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4316 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4317 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4318     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4319     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4320     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4321 
4322 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4323     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4324 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4325     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4326 };
4327 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4328 
TestBeginEnd()4329 void TransliteratorTest::TestBeginEnd() {
4330     // run through the list of test cases above
4331     int32_t i = 0;
4332     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4333         expect((UnicodeString)"Test case #" + (i / 3),
4334                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4335                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4336                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4337     }
4338 
4339     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4340     UParseError parseError;
4341     UErrorCode status = U_ZERO_ERROR;
4342     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4343             UTRANS_REVERSE, parseError, status);
4344     if (reversed == 0 || U_FAILURE(status)) {
4345         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4346     } else {
4347         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4348     }
4349     delete reversed;
4350 
4351     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4352     // that all of them cause errors
4353 /*
4354 (commented out until we have the real ::BEGIN/::END stuff in place
4355     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4356         UParseError parseError;
4357         UErrorCode status = U_ZERO_ERROR;
4358         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4359                 UTRANS_FORWARD, parseError, status);
4360         if (!U_FAILURE(status)) {
4361             delete t;
4362             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4363         }
4364     }
4365 */
4366 }
4367 
TestBeginEndToRules()4368 void TransliteratorTest::TestBeginEndToRules() {
4369     // run through the same list of test cases we used above, but this time, instead of just
4370     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4371     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4372     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4373     // to (i.e., does the same thing as) the original rule set
4374     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4375         UParseError parseError;
4376         UErrorCode status = U_ZERO_ERROR;
4377         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4378                 UTRANS_FORWARD, parseError, status);
4379         if (U_FAILURE(status)) {
4380             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4381         } else {
4382             UnicodeString rules;
4383             t->toRules(rules, TRUE);
4384             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4385                     UTRANS_FORWARD, parseError, status);
4386             if (U_FAILURE(status)) {
4387                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4388                         parseError, status);
4389                 delete t;
4390             } else {
4391                 expect(*t2,
4392                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4393                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4394                 delete t;
4395                 delete t2;
4396             }
4397         }
4398     }
4399 
4400     // do the same thing for the reversible test case
4401     UParseError parseError;
4402     UErrorCode status = U_ZERO_ERROR;
4403     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4404             UTRANS_REVERSE, parseError, status);
4405     if (U_FAILURE(status)) {
4406         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4407     } else {
4408         UnicodeString rules;
4409         reversed->toRules(rules, FALSE);
4410         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4411                 parseError, status);
4412         if (U_FAILURE(status)) {
4413             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4414                     parseError, status);
4415             delete reversed;
4416         } else {
4417             expect(*reversed2,
4418                    UnicodeString("xy XY XYZ yz YZ"),
4419                    UnicodeString("xy abc xaba yz aba"));
4420             delete reversed;
4421             delete reversed2;
4422         }
4423     }
4424 }
4425 
TestRegisterAlias()4426 void TransliteratorTest::TestRegisterAlias() {
4427     UnicodeString longID("Lower;[aeiou]Upper");
4428     UnicodeString shortID("Any-CapVowels");
4429     UnicodeString reallyShortID("CapVowels");
4430 
4431     Transliterator::registerAlias(shortID, longID);
4432 
4433     UErrorCode err = U_ZERO_ERROR;
4434     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4435     if (U_FAILURE(err)) {
4436         errln("Failed to instantiate transliterator with long ID");
4437         Transliterator::unregister(shortID);
4438         return;
4439     }
4440     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4441     if (U_FAILURE(err)) {
4442         errln("Failed to instantiate transliterator with short ID");
4443         delete t1;
4444         Transliterator::unregister(shortID);
4445         return;
4446     }
4447 
4448     if (t1->getID() != longID)
4449         errln("Transliterator instantiated with long ID doesn't have long ID");
4450     if (t2->getID() != reallyShortID)
4451         errln("Transliterator instantiated with short ID doesn't have short ID");
4452 
4453     UnicodeString rules1;
4454     UnicodeString rules2;
4455 
4456     t1->toRules(rules1, TRUE);
4457     t2->toRules(rules2, TRUE);
4458     if (rules1 != rules2)
4459         errln("Alias transliterators aren't the same");
4460 
4461     delete t1;
4462     delete t2;
4463     Transliterator::unregister(shortID);
4464 
4465     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4466     if (U_SUCCESS(err)) {
4467         errln("Instantiation with short ID succeeded after short ID was unregistered");
4468         delete t1;
4469     }
4470 
4471     // try the same thing again, but this time with something other than
4472     // an instance of CompoundTransliterator
4473     UnicodeString realID("Latin-Greek");
4474     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4475     Transliterator::registerAlias(fakeID, realID);
4476 
4477     err = U_ZERO_ERROR;
4478     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4479     if (U_FAILURE(err)) {
4480         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4481         Transliterator::unregister(realID);
4482         return;
4483     }
4484     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4485     if (U_FAILURE(err)) {
4486         errln("Failed to instantiate transliterator with fake ID");
4487         delete t1;
4488         Transliterator::unregister(realID);
4489         return;
4490     }
4491 
4492     t1->toRules(rules1, TRUE);
4493     t2->toRules(rules2, TRUE);
4494     if (rules1 != rules2)
4495         errln("Alias transliterators aren't the same");
4496 
4497     delete t1;
4498     delete t2;
4499     Transliterator::unregister(fakeID);
4500 }
4501 
TestRuleStripping()4502 void TransliteratorTest::TestRuleStripping() {
4503     /*
4504 #
4505 \uE001>\u0C01; # SIGN
4506     */
4507     static const UChar rule[] = {
4508         0x0023,0x0020,0x000D,0x000A,
4509         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4510     };
4511     static const UChar expectedRule[] = {
4512         0xE001,0x003E,0x0C01,0x003B,0
4513     };
4514     UChar result[sizeof(rule)/sizeof(rule[0])];
4515     UErrorCode status = U_ZERO_ERROR;
4516     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4517     if (len != u_strlen(expectedRule)) {
4518         errln("utrans_stripRules return len = %d", len);
4519     }
4520     if (u_strncmp(expectedRule, result, len) != 0) {
4521         errln("utrans_stripRules did not return expected string");
4522     }
4523 }
4524 
4525 /**
4526  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4527  */
TestHalfwidthFullwidth(void)4528 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4529     UParseError parseError;
4530     UErrorCode status = U_ZERO_ERROR;
4531     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4532     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4533     if (hf == 0 || fh == 0) {
4534         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4535         delete hf;
4536         delete fh;
4537         return;
4538     }
4539 
4540     // Array of 2n items
4541     // Each item is
4542     //   "hf"|"fh"|"both",
4543     //   <Halfwidth>,
4544     //   <Fullwidth>
4545     const char* DATA[] = {
4546         "both",
4547         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4548         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4549     };
4550     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4551 
4552     for (int32_t i=0; i<DATA_length; i+=3) {
4553         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4554         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4555         switch (*DATA[i]) {
4556         case 0x68: //'h': // Halfwidth-Fullwidth only
4557             expect(*hf, h, f);
4558             break;
4559         case 0x66: //'f': // Fullwidth-Halfwidth only
4560             expect(*fh, f, h);
4561             break;
4562         case 0x62: //'b': // both directions
4563             expect(*hf, h, f);
4564             expect(*fh, f, h);
4565             break;
4566         }
4567     }
4568     delete hf;
4569     delete fh;
4570 }
4571 
4572 
4573     /**
4574      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4575      *              TODO: confirm that the expected results are correct.
4576      *              For now, test just confirms that C++ and Java give identical results.
4577      */
TestThai(void)4578 void TransliteratorTest::TestThai(void) {
4579     UParseError parseError;
4580     UErrorCode status = U_ZERO_ERROR;
4581     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4582     if (tr == 0) {
4583         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4584         return;
4585     }
4586     if (U_FAILURE(status)) {
4587         errln("FAIL: createInstance failed with %s", u_errorName(status));
4588         return;
4589     }
4590     const char *thaiText =
4591         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4592         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4593         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4594         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4595         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4596         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4597         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4598         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4599         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4600         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4601         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4602         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4603         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4604         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4605         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4606         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4607         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4608         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4609         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4610         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4611         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4612         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4613         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4614         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4615         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4616         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4617         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4618         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4619         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4620         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4621 
4622     const char *latinText =
4623         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4624         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4625         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4626         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4627         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4628         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4629         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4630         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4631         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4632         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4633         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4634         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4635         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4636         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4637         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4638         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4639         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4640         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4641 
4642 
4643     UnicodeString  xlitText(thaiText);
4644     xlitText = xlitText.unescape();
4645     tr->transliterate(xlitText);
4646 
4647     UnicodeString expectedText(latinText);
4648     expectedText = expectedText.unescape();
4649     expect(*tr, xlitText, expectedText);
4650 
4651     delete tr;
4652 }
4653 
4654 
4655 //======================================================================
4656 // Support methods
4657 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4658 void TransliteratorTest::expectT(const UnicodeString& id,
4659                                  const UnicodeString& source,
4660                                  const UnicodeString& expectedResult) {
4661     UErrorCode ec = U_ZERO_ERROR;
4662     UParseError pe;
4663     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4664     if (U_FAILURE(ec)) {
4665         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4666         delete t;
4667         return;
4668     }
4669     expect(*t, source, expectedResult);
4670     delete t;
4671 }
4672 
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4673 void TransliteratorTest::reportParseError(const UnicodeString& message,
4674                                           const UParseError& parseError,
4675                                           const UErrorCode& status) {
4676     dataerrln(message +
4677           /*", parse error " + parseError.code +*/
4678           ", line " + parseError.line +
4679           ", offset " + parseError.offset +
4680           ", pre-context " + prettify(parseError.preContext, TRUE) +
4681           ", post-context " + prettify(parseError.postContext,TRUE) +
4682           ", Error: " + u_errorName(status));
4683 }
4684 
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4685 void TransliteratorTest::expect(const UnicodeString& rules,
4686                                 const UnicodeString& source,
4687                                 const UnicodeString& expectedResult,
4688                                 UTransPosition *pos) {
4689     expect("<ID>", rules, source, expectedResult, pos);
4690 }
4691 
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4692 void TransliteratorTest::expect(const UnicodeString& id,
4693                                 const UnicodeString& rules,
4694                                 const UnicodeString& source,
4695                                 const UnicodeString& expectedResult,
4696                                 UTransPosition *pos) {
4697     UErrorCode status = U_ZERO_ERROR;
4698     UParseError parseError;
4699     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4700     if (U_FAILURE(status)) {
4701         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4702     } else {
4703         expect(*t, source, expectedResult, pos);
4704     }
4705     delete t;
4706 }
4707 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4708 void TransliteratorTest::expect(const Transliterator& t,
4709                                 const UnicodeString& source,
4710                                 const UnicodeString& expectedResult,
4711                                 const Transliterator& reverseTransliterator) {
4712     expect(t, source, expectedResult);
4713     expect(reverseTransliterator, expectedResult, source);
4714 }
4715 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4716 void TransliteratorTest::expect(const Transliterator& t,
4717                                 const UnicodeString& source,
4718                                 const UnicodeString& expectedResult,
4719                                 UTransPosition *pos) {
4720     if (pos == 0) {
4721         UnicodeString result(source);
4722         t.transliterate(result);
4723         expectAux(t.getID() + ":String", source, result, expectedResult);
4724     }
4725     UTransPosition index={0, 0, 0, 0};
4726     if (pos != 0) {
4727         index = *pos;
4728     }
4729 
4730     UnicodeString rsource(source);
4731     if (pos == 0) {
4732         t.transliterate(rsource);
4733     } else {
4734         // Do it all at once -- below we do it incrementally
4735         t.finishTransliteration(rsource, *pos);
4736     }
4737     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4738 
4739     // Test keyboard (incremental) transliteration -- this result
4740     // must be the same after we finalize (see below).
4741     UnicodeString log;
4742     rsource.remove();
4743     if (pos != 0) {
4744         rsource = source;
4745         formatInput(log, rsource, index);
4746         log.append(" -> ");
4747         UErrorCode status = U_ZERO_ERROR;
4748         t.transliterate(rsource, index, status);
4749         formatInput(log, rsource, index);
4750     } else {
4751         for (int32_t i=0; i<source.length(); ++i) {
4752             if (i != 0) {
4753                 log.append(" + ");
4754             }
4755             log.append(source.charAt(i)).append(" -> ");
4756             UErrorCode status = U_ZERO_ERROR;
4757             t.transliterate(rsource, index, source.charAt(i), status);
4758             formatInput(log, rsource, index);
4759         }
4760     }
4761 
4762     // As a final step in keyboard transliteration, we must call
4763     // transliterate to finish off any pending partial matches that
4764     // were waiting for more input.
4765     t.finishTransliteration(rsource, index);
4766     log.append(" => ").append(rsource);
4767 
4768     expectAux(t.getID() + ":Keyboard", log,
4769               rsource == expectedResult,
4770               expectedResult);
4771 }
4772 
4773 
4774 /**
4775  * @param appendTo result is appended to this param.
4776  * @param input the string being transliterated
4777  * @param pos the index struct
4778  */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4779 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4780                                                const UnicodeString& input,
4781                                                const UTransPosition& pos) {
4782     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4783     // the {} indicate the context start and limit, and the ||
4784     // indicate the start and limit.
4785     if (0 <= pos.contextStart &&
4786         pos.contextStart <= pos.start &&
4787         pos.start <= pos.limit &&
4788         pos.limit <= pos.contextLimit &&
4789         pos.contextLimit <= input.length()) {
4790 
4791         UnicodeString a, b, c, d, e;
4792         input.extractBetween(0, pos.contextStart, a);
4793         input.extractBetween(pos.contextStart, pos.start, b);
4794         input.extractBetween(pos.start, pos.limit, c);
4795         input.extractBetween(pos.limit, pos.contextLimit, d);
4796         input.extractBetween(pos.contextLimit, input.length(), e);
4797         appendTo.append(a).append((UChar)123/*{*/).append(b).
4798             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4799             append((UChar)125/*}*/).append(e);
4800     } else {
4801         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4802                         pos.contextStart + ", s=" + pos.start + ", l=" +
4803                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4804                         input);
4805     }
4806     return appendTo;
4807 }
4808 
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4809 void TransliteratorTest::expectAux(const UnicodeString& tag,
4810                                    const UnicodeString& source,
4811                                    const UnicodeString& result,
4812                                    const UnicodeString& expectedResult) {
4813     expectAux(tag, source + " -> " + result,
4814               result == expectedResult,
4815               expectedResult);
4816 }
4817 
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4818 void TransliteratorTest::expectAux(const UnicodeString& tag,
4819                                    const UnicodeString& summary, UBool pass,
4820                                    const UnicodeString& expectedResult) {
4821     if (pass) {
4822         logln(UnicodeString("(")+tag+") " + prettify(summary));
4823     } else {
4824         dataerrln(UnicodeString("FAIL: (")+tag+") "
4825               + prettify(summary)
4826               + ", expected " + prettify(expectedResult));
4827     }
4828 }
4829 
4830 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4831