• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 1999-2011, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/10/99    aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_TRANSLITERATION
14 
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "unicode/utf16.h"
27 #include "cpdtrans.h"
28 #include "nultrans.h"
29 #include "rbt.h"
30 #include "rbt_pars.h"
31 #include "anytrans.h"
32 #include "esctrn.h"
33 #include "name2uni.h"
34 #include "nortrans.h"
35 #include "remtrans.h"
36 #include "titletrn.h"
37 #include "tolowtrn.h"
38 #include "toupptrn.h"
39 #include "unesctrn.h"
40 #include "uni2name.h"
41 #include "cstring.h"
42 #include "cmemory.h"
43 #include <stdio.h>
44 
45 /***********************************************************************
46 
47                      HOW TO USE THIS TEST FILE
48                                -or-
49                   How I developed on two platforms
50                 without losing (too much of) my mind
51 
52 
53 1. Add new tests by copying/pasting/changing existing tests.  On Java,
54    any public void method named Test...() taking no parameters becomes
55    a test.  On C++, you need to modify the header and add a line to
56    the runIndexedTest() dispatch method.
57 
58 2. Make liberal use of the expect() method; it is your friend.
59 
60 3. The tests in this file exactly match those in a sister file on the
61    other side.  The two files are:
62 
63    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
64    icu4c:  source/test/intltest/transtst.cpp
65 
66                   ==> THIS IS THE IMPORTANT PART <==
67 
68    When you add a test in this file, add it in TransliteratorTest.java
69    too.  Give it the same name and put it in the same relative place.
70    This makes maintenance a lot simpler for any poor soul who ends up
71    trying to synchronize the tests between icu4j and icu4c.
72 
73 4. If you MUST enter a test that is NOT paralleled in the sister file,
74    then add it in the special non-mirrored section.  These are
75    labeled
76 
77      "icu4j ONLY"
78 
79    or
80 
81      "icu4c ONLY"
82 
83    Make sure you document the reason the test is here and not there.
84 
85 
86 Thank you.
87 The Management
88 ***********************************************************************/
89 
90 // Define character constants thusly to be EBCDIC-friendly
91 enum {
92     LEFT_BRACE=((UChar)0x007B), /*{*/
93     PIPE      =((UChar)0x007C), /*|*/
94     ZERO      =((UChar)0x0030), /*0*/
95     UPPER_A   =((UChar)0x0041)  /*A*/
96 };
97 
TransliteratorTest()98 TransliteratorTest::TransliteratorTest()
99 :   DESERET_DEE((UChar32)0x10414),
100     DESERET_dee((UChar32)0x1043C)
101 {
102 }
103 
~TransliteratorTest()104 TransliteratorTest::~TransliteratorTest() {}
105 
106 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
108                                    const char* &name, char* /*par*/) {
109     switch (index) {
110         TESTCASE(0,TestInstantiation);
111         TESTCASE(1,TestSimpleRules);
112         TESTCASE(2,TestRuleBasedInverse);
113         TESTCASE(3,TestKeyboard);
114         TESTCASE(4,TestKeyboard2);
115         TESTCASE(5,TestKeyboard3);
116         TESTCASE(6,TestArabic);
117         TESTCASE(7,TestCompoundKana);
118         TESTCASE(8,TestCompoundHex);
119         TESTCASE(9,TestFiltering);
120         TESTCASE(10,TestInlineSet);
121         TESTCASE(11,TestPatternQuoting);
122         TESTCASE(12,TestJ277);
123         TESTCASE(13,TestJ243);
124         TESTCASE(14,TestJ329);
125         TESTCASE(15,TestSegments);
126         TESTCASE(16,TestCursorOffset);
127         TESTCASE(17,TestArbitraryVariableValues);
128         TESTCASE(18,TestPositionHandling);
129         TESTCASE(19,TestHiraganaKatakana);
130         TESTCASE(20,TestCopyJ476);
131         TESTCASE(21,TestAnchors);
132         TESTCASE(22,TestInterIndic);
133         TESTCASE(23,TestFilterIDs);
134         TESTCASE(24,TestCaseMap);
135         TESTCASE(25,TestNameMap);
136         TESTCASE(26,TestLiberalizedID);
137         TESTCASE(27,TestCreateInstance);
138         TESTCASE(28,TestNormalizationTransliterator);
139         TESTCASE(29,TestCompoundRBT);
140         TESTCASE(30,TestCompoundFilter);
141         TESTCASE(31,TestRemove);
142         TESTCASE(32,TestToRules);
143         TESTCASE(33,TestContext);
144         TESTCASE(34,TestSupplemental);
145         TESTCASE(35,TestQuantifier);
146         TESTCASE(36,TestSTV);
147         TESTCASE(37,TestCompoundInverse);
148         TESTCASE(38,TestNFDChainRBT);
149         TESTCASE(39,TestNullInverse);
150         TESTCASE(40,TestAliasInverseID);
151         TESTCASE(41,TestCompoundInverseID);
152         TESTCASE(42,TestUndefinedVariable);
153         TESTCASE(43,TestEmptyContext);
154         TESTCASE(44,TestCompoundFilterID);
155         TESTCASE(45,TestPropertySet);
156         TESTCASE(46,TestNewEngine);
157         TESTCASE(47,TestQuantifiedSegment);
158         TESTCASE(48,TestDevanagariLatinRT);
159         TESTCASE(49,TestTeluguLatinRT);
160         TESTCASE(50,TestCompoundLatinRT);
161         TESTCASE(51,TestSanskritLatinRT);
162         TESTCASE(52,TestLocaleInstantiation);
163         TESTCASE(53,TestTitleAccents);
164         TESTCASE(54,TestLocaleResource);
165         TESTCASE(55,TestParseError);
166         TESTCASE(56,TestOutputSet);
167         TESTCASE(57,TestVariableRange);
168         TESTCASE(58,TestInvalidPostContext);
169         TESTCASE(59,TestIDForms);
170         TESTCASE(60,TestToRulesMark);
171         TESTCASE(61,TestEscape);
172         TESTCASE(62,TestAnchorMasking);
173         TESTCASE(63,TestDisplayName);
174         TESTCASE(64,TestSpecialCases);
175 #if !UCONFIG_NO_FILE_IO
176         TESTCASE(65,TestIncrementalProgress);
177 #endif
178         TESTCASE(66,TestSurrogateCasing);
179         TESTCASE(67,TestFunction);
180         TESTCASE(68,TestInvalidBackRef);
181         TESTCASE(69,TestMulticharStringSet);
182         TESTCASE(70,TestUserFunction);
183         TESTCASE(71,TestAnyX);
184         TESTCASE(72,TestSourceTargetSet);
185         TESTCASE(73,TestGurmukhiDevanagari);
186         TESTCASE(74,TestPatternWhiteSpace);
187         TESTCASE(75,TestAllCodepoints);
188         TESTCASE(76,TestBoilerplate);
189         TESTCASE(77,TestAlternateSyntax);
190         TESTCASE(78,TestBeginEnd);
191         TESTCASE(79,TestBeginEndToRules);
192         TESTCASE(80,TestRegisterAlias);
193         TESTCASE(81,TestRuleStripping);
194         TESTCASE(82,TestHalfwidthFullwidth);
195         TESTCASE(83,TestThai);
196         TESTCASE(84,TestAny);
197         default: name = ""; break;
198     }
199 }
200 
201 static const UVersionInfo ICU_39 = {3,9,4,0};
202 /**
203  * Make sure every system transliterator can be instantiated.
204  *
205  * ALSO test that the result of toRules() for each rule is a valid
206  * rule.  Do this here so we don't have to have another test that
207  * instantiates everything as well.
208  */
TestInstantiation()209 void TransliteratorTest::TestInstantiation() {
210     UErrorCode ec = U_ZERO_ERROR;
211     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
212     assertSuccess("getAvailableIDs()", ec);
213     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
214     int32_t n = Transliterator::countAvailableIDs();
215     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
216                avail->count(ec) == n);
217     assertSuccess("count()", ec);
218     UnicodeString name;
219     for (int32_t i=0; i<n; ++i) {
220         const UnicodeString& id = *avail->snext(ec);
221         if (!assertSuccess("snext()", ec) ||
222             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
223             break;
224         }
225         UnicodeString id2 = Transliterator::getAvailableID(i);
226         if (id.length() < 1) {
227             errln(UnicodeString("FAIL: getAvailableID(") +
228                   i + ") returned empty string");
229             continue;
230         }
231         if (id != id2) {
232             errln(UnicodeString("FAIL: getAvailableID(") +
233                   i + ") != getAvailableIDs().snext()");
234             continue;
235         }
236         UParseError parseError;
237         UErrorCode status = U_ZERO_ERROR;
238         Transliterator* t = Transliterator::createInstance(id,
239                               UTRANS_FORWARD, parseError,status);
240         name.truncate(0);
241         Transliterator::getDisplayName(id, name);
242         if (t == 0) {
243 #if UCONFIG_NO_BREAK_ITERATION
244             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
245             if (id.compare((UnicodeString)"Thai-Latin") != 0)
246 #endif
247                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
248                       /*", parse error " + parseError.code +*/
249                       ", line " + parseError.line +
250                       ", offset " + parseError.offset +
251                       ", pre-context " + prettify(parseError.preContext, TRUE) +
252                       ", post-context " +prettify(parseError.postContext,TRUE) +
253                       ", Error: " + u_errorName(status));
254                 // When createInstance fails, it deletes the failing
255                 // entry from the available ID list.  We detect this
256                 // here by looking for a change in countAvailableIDs.
257             int32_t nn = Transliterator::countAvailableIDs();
258             if (nn == (n - 1)) {
259                 n = nn;
260                 --i; // Compensate for deleted entry
261             }
262         } else {
263             logln(UnicodeString("OK: ") + name + " (" + id + ")");
264 
265             // Now test toRules
266             UnicodeString rules;
267             t->toRules(rules, TRUE);
268             Transliterator *u = Transliterator::createFromRules("x",
269                                     rules, UTRANS_FORWARD, parseError,status);
270             if (u == 0) {
271                 errln(UnicodeString("FAIL: ") + id +
272                       ".createFromRules() => bad rules" +
273                       /*", parse error " + parseError.code +*/
274                       ", line " + parseError.line +
275                       ", offset " + parseError.offset +
276                       ", context " + prettify(parseError.preContext, TRUE) +
277                       ", rules: " + prettify(rules, TRUE));
278             } else {
279                 delete u;
280             }
281             delete t;
282         }
283     }
284     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
285     assertSuccess("snext()", ec);
286     delete avail;
287 
288     // Now test the failure path
289     UParseError parseError;
290     UErrorCode status = U_ZERO_ERROR;
291     UnicodeString id("<Not a valid Transliterator ID>");
292     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
293     if (t != 0) {
294         errln("FAIL: " + id + " returned a transliterator");
295         delete t;
296     } else {
297         logln("OK: Bogus ID handled properly");
298     }
299 }
300 
TestSimpleRules(void)301 void TransliteratorTest::TestSimpleRules(void) {
302     /* Example: rules 1. ab>x|y
303      *                2. yc>z
304      *
305      * []|eabcd  start - no match, copy e to tranlated buffer
306      * [e]|abcd  match rule 1 - copy output & adjust cursor
307      * [ex|y]cd  match rule 2 - copy output & adjust cursor
308      * [exz]|d   no match, copy d to transliterated buffer
309      * [exzd]|   done
310      */
311     expect(UnicodeString("ab>x|y;", "") +
312            "yc>z",
313            "eabcd", "exzd");
314 
315     /* Another set of rules:
316      *    1. ab>x|yzacw
317      *    2. za>q
318      *    3. qc>r
319      *    4. cw>n
320      *
321      * []|ab       Rule 1
322      * [x|yzacw]   No match
323      * [xy|zacw]   Rule 2
324      * [xyq|cw]    Rule 4
325      * [xyqn]|     Done
326      */
327     expect(UnicodeString("ab>x|yzacw;") +
328            "za>q;" +
329            "qc>r;" +
330            "cw>n",
331            "ab", "xyqn");
332 
333     /* Test categories
334      */
335     UErrorCode status = U_ZERO_ERROR;
336     UParseError parseError;
337     Transliterator *t = Transliterator::createFromRules(
338         "<ID>",
339         UnicodeString("$dummy=").append((UChar)0xE100) +
340         UnicodeString(";"
341                       "$vowel=[aeiouAEIOU];"
342                       "$lu=[:Lu:];"
343                       "$vowel } $lu > '!';"
344                       "$vowel > '&';"
345                       "'!' { $lu > '^';"
346                       "$lu > '*';"
347                       "a > ERROR", ""),
348         UTRANS_FORWARD, parseError,
349         status);
350     if (U_FAILURE(status)) {
351         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
352         return;
353     }
354     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
355     delete t;
356 }
357 
358 /**
359  * Test inline set syntax and set variable syntax.
360  */
TestInlineSet(void)361 void TransliteratorTest::TestInlineSet(void) {
362     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
363     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
364 
365     expect(UnicodeString(
366            "$digit = [0-9];"
367            "$alpha = [a-zA-Z];"
368            "$alphanumeric = [$digit $alpha];" // ***
369            "$special = [^$alphanumeric];"     // ***
370            "$alphanumeric > '-';"
371            "$special > '*';", ""),
372 
373            "thx-1138", "---*----");
374 }
375 
376 /**
377  * Create some inverses and confirm that they work.  We have to be
378  * careful how we do this, since the inverses will not be true
379  * inverses -- we can't throw any random string at the composition
380  * of the transliterators and expect the identity function.  F x
381  * F' != I.  However, if we are careful about the input, we will
382  * get the expected results.
383  */
TestRuleBasedInverse(void)384 void TransliteratorTest::TestRuleBasedInverse(void) {
385     UnicodeString RULES =
386         UnicodeString("abc>zyx;") +
387         "ab>yz;" +
388         "bc>zx;" +
389         "ca>xy;" +
390         "a>x;" +
391         "b>y;" +
392         "c>z;" +
393 
394         "abc<zyx;" +
395         "ab<yz;" +
396         "bc<zx;" +
397         "ca<xy;" +
398         "a<x;" +
399         "b<y;" +
400         "c<z;" +
401 
402         "";
403 
404     const char* DATA[] = {
405         // Careful here -- random strings will not work.  If we keep
406         // the left side to the domain and the right side to the range
407         // we will be okay though (left, abc; right xyz).
408         "a", "x",
409         "abcacab", "zyxxxyy",
410         "caccb", "xyzzy",
411     };
412 
413     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
414 
415     UErrorCode status = U_ZERO_ERROR;
416     UParseError parseError;
417     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
418                                 UTRANS_FORWARD, parseError, status);
419     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
420                                 UTRANS_REVERSE, parseError, status);
421     if (U_FAILURE(status)) {
422         errln("FAIL: RBT constructor failed");
423         return;
424     }
425     for (int32_t i=0; i<DATA_length; i+=2) {
426         expect(*fwd, DATA[i], DATA[i+1]);
427         expect(*rev, DATA[i+1], DATA[i]);
428     }
429     delete fwd;
430     delete rev;
431 }
432 
433 /**
434  * Basic test of keyboard.
435  */
TestKeyboard(void)436 void TransliteratorTest::TestKeyboard(void) {
437     UParseError parseError;
438     UErrorCode status = U_ZERO_ERROR;
439     Transliterator *t = Transliterator::createFromRules("<ID>",
440                               UnicodeString("psch>Y;")
441                               +"ps>y;"
442                               +"ch>x;"
443                               +"a>A;",
444                               UTRANS_FORWARD, parseError,
445                               status);
446     if (U_FAILURE(status)) {
447         errln("FAIL: RBT constructor failed");
448         return;
449     }
450     const char* DATA[] = {
451         // insertion, buffer
452         "a", "A",
453         "p", "Ap",
454         "s", "Aps",
455         "c", "Apsc",
456         "a", "AycA",
457         "psch", "AycAY",
458         0, "AycAY", // null means finishKeyboardTransliteration
459     };
460 
461     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
462     delete t;
463 }
464 
465 /**
466  * Basic test of keyboard with cursor.
467  */
TestKeyboard2(void)468 void TransliteratorTest::TestKeyboard2(void) {
469     UParseError parseError;
470     UErrorCode status = U_ZERO_ERROR;
471     Transliterator *t = Transliterator::createFromRules("<ID>",
472                               UnicodeString("ych>Y;")
473                               +"ps>|y;"
474                               +"ch>x;"
475                               +"a>A;",
476                               UTRANS_FORWARD, parseError,
477                               status);
478     if (U_FAILURE(status)) {
479         errln("FAIL: RBT constructor failed");
480         return;
481     }
482     const char* DATA[] = {
483         // insertion, buffer
484         "a", "A",
485         "p", "Ap",
486         "s", "Aps", // modified for rollback - "Ay",
487         "c", "Apsc", // modified for rollback - "Ayc",
488         "a", "AycA",
489         "p", "AycAp",
490         "s", "AycAps", // modified for rollback - "AycAy",
491         "c", "AycApsc", // modified for rollback - "AycAyc",
492         "h", "AycAY",
493         0, "AycAY", // null means finishKeyboardTransliteration
494     };
495 
496     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
497     delete t;
498 }
499 
500 /**
501  * Test keyboard transliteration with back-replacement.
502  */
TestKeyboard3(void)503 void TransliteratorTest::TestKeyboard3(void) {
504     // We want th>z but t>y.  Furthermore, during keyboard
505     // transliteration we want t>y then yh>z if t, then h are
506     // typed.
507     UnicodeString RULES("t>|y;"
508                         "yh>z;");
509 
510     const char* DATA[] = {
511         // Column 1: characters to add to buffer (as if typed)
512         // Column 2: expected appearance of buffer after
513         //           keyboard xliteration.
514         "a", "a",
515         "b", "ab",
516         "t", "abt", // modified for rollback - "aby",
517         "c", "abyc",
518         "t", "abyct", // modified for rollback - "abycy",
519         "h", "abycz",
520         0, "abycz", // null means finishKeyboardTransliteration
521     };
522 
523     UParseError parseError;
524     UErrorCode status = U_ZERO_ERROR;
525     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
526     if (U_FAILURE(status)) {
527         errln("FAIL: RBT constructor failed");
528         return;
529     }
530     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
531     delete t;
532 }
533 
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)534 void TransliteratorTest::keyboardAux(const Transliterator& t,
535                                      const char* DATA[], int32_t DATA_length) {
536     UErrorCode status = U_ZERO_ERROR;
537     UTransPosition index={0, 0, 0, 0};
538     UnicodeString s;
539     for (int32_t i=0; i<DATA_length; i+=2) {
540         UnicodeString log;
541         if (DATA[i] != 0) {
542             log = s + " + "
543                 + DATA[i]
544                 + " -> ";
545             t.transliterate(s, index, DATA[i], status);
546         } else {
547             log = s + " => ";
548             t.finishTransliteration(s, index);
549         }
550         // Show the start index '{' and the cursor '|'
551         UnicodeString a, b, c;
552         s.extractBetween(0, index.contextStart, a);
553         s.extractBetween(index.contextStart, index.start, b);
554         s.extractBetween(index.start, s.length(), c);
555         log.append(a).
556             append((UChar)LEFT_BRACE).
557             append(b).
558             append((UChar)PIPE).
559             append(c);
560         if (s == DATA[i+1] && U_SUCCESS(status)) {
561             logln(log);
562         } else {
563             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
564         }
565     }
566 }
567 
TestArabic(void)568 void TransliteratorTest::TestArabic(void) {
569 // Test disabled for 2.0 until new Arabic transliterator can be written.
570 //    /*
571 //    const char* DATA[] = {
572 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
573 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
574 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
575 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
576 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
577 //                  "\u062c\u0645\u064a\u0644\u0629",
578 //    };
579 //    */
580 //
581 //    UChar ar_raw[] = {
582 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
583 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
584 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
585 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
586 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
588 //    };
589 //    UnicodeString ar(ar_raw);
590 //    UErrorCode status=U_ZERO_ERROR;
591 //    UParseError parseError;
592 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
593 //    if (t == 0) {
594 //        errln("FAIL: createInstance failed");
595 //        return;
596 //    }
597 //    expect(*t, "Arabic", ar);
598 //    delete t;
599 }
600 
601 /**
602  * Compose the Kana transliterator forward and reverse and try
603  * some strings that should come out unchanged.
604  */
TestCompoundKana(void)605 void TransliteratorTest::TestCompoundKana(void) {
606     UParseError parseError;
607     UErrorCode status = U_ZERO_ERROR;
608     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
609     if (t == 0) {
610         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
611     } else {
612         expect(*t, "aaaaa", "aaaaa");
613         delete t;
614     }
615 }
616 
617 /**
618  * Compose the hex transliterators forward and reverse.
619  */
TestCompoundHex(void)620 void TransliteratorTest::TestCompoundHex(void) {
621     UParseError parseError;
622     UErrorCode status = U_ZERO_ERROR;
623     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
624     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
625     Transliterator* transab[] = { a, b };
626     Transliterator* transba[] = { b, a };
627     if (a == 0 || b == 0) {
628         errln("FAIL: construction failed");
629         delete a;
630         delete b;
631         return;
632     }
633     // Do some basic tests of a
634     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
635     // Do some basic tests of b
636     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
637 
638     Transliterator* ab = new CompoundTransliterator(transab, 2);
639     UnicodeString s("abcde", "");
640     expect(*ab, s, s);
641 
642     UnicodeString str(s);
643     a->transliterate(str);
644     Transliterator* ba = new CompoundTransliterator(transba, 2);
645     expect(*ba, str, str);
646 
647     delete ab;
648     delete ba;
649     delete a;
650     delete b;
651 }
652 
653 int gTestFilterClassID = 0;
654 /**
655  * Used by TestFiltering().
656  */
657 class TestFilter : public UnicodeFilter {
clone() const658     virtual UnicodeFunctor* clone() const {
659         return new TestFilter(*this);
660     }
contains(UChar32 c) const661     virtual UBool contains(UChar32 c) const {
662         return c != (UChar)0x0063 /*c*/;
663     }
664     // Stubs
toPattern(UnicodeString & result,UBool) const665     virtual UnicodeString& toPattern(UnicodeString& result,
666                                      UBool /*escapeUnprintable*/) const {
667         return result;
668     }
matchesIndexValue(uint8_t) const669     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
670         return FALSE;
671     }
addMatchSetTo(UnicodeSet &) const672     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
673 public:
getDynamicClassID() const674     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
675 };
676 
677 /**
678  * Do some basic tests of filtering.
679  */
TestFiltering(void)680 void TransliteratorTest::TestFiltering(void) {
681     UParseError parseError;
682     UErrorCode status = U_ZERO_ERROR;
683     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
684     if (hex == 0) {
685         errln("FAIL: createInstance(Any-Hex) failed");
686         return;
687     }
688     hex->adoptFilter(new TestFilter());
689     UnicodeString s("abcde");
690     hex->transliterate(s);
691     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
692     if (s == exp) {
693         logln(UnicodeString("Ok:   \"") + exp + "\"");
694     } else {
695         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
696     }
697 
698     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
699     UnicodeFilter *f = hex->orphanFilter();
700     if (f == NULL){
701         errln("FAIL: orphanFilter() should get a UnicodeFilter");
702     } else {
703         delete f;
704     }
705     delete hex;
706 }
707 
708 /**
709  * Test anchors
710  */
TestAnchors(void)711 void TransliteratorTest::TestAnchors(void) {
712     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
713            "aaa",
714            "012");
715     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
716            "aaa",
717            "012");
718     expect(UnicodeString("^ab  > 01 ;"
719            " ab  > |8 ;"
720            "  b  > k ;"
721            " 8x$ > 45 ;"
722            " 8x  > 77 ;", ""),
723 
724            "ababbabxabx",
725            "018k7745");
726     expect(UnicodeString("$s = [z$] ;"
727            "$s{ab    > 01 ;"
728            "   ab    > |8 ;"
729            "    b    > k ;"
730            "   8x}$s > 45 ;"
731            "   8x    > 77 ;", ""),
732 
733            "abzababbabxzabxabx",
734            "01z018k45z01x45");
735 }
736 
737 /**
738  * Test pattern quoting and escape mechanisms.
739  */
TestPatternQuoting(void)740 void TransliteratorTest::TestPatternQuoting(void) {
741     // Array of 3n items
742     // Each item is <rules>, <input>, <expected output>
743     const UnicodeString DATA[] = {
744         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
745         UnicodeString(UChar(0x4E01)),
746         "[male adult]"
747     };
748 
749     for (int32_t i=0; i<3; i+=3) {
750         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
751         UParseError parseError;
752         UErrorCode status = U_ZERO_ERROR;
753         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
754         if (U_FAILURE(status)) {
755             errln("RBT constructor failed");
756         } else {
757             expect(*t, DATA[i+1], DATA[i+2]);
758         }
759         delete t;
760     }
761 }
762 
763 /**
764  * Regression test for bugs found in Greek transliteration.
765  */
TestJ277(void)766 void TransliteratorTest::TestJ277(void) {
767     UErrorCode status = U_ZERO_ERROR;
768     UParseError parseError;
769     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
770     if (gl == NULL) {
771         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
772         return;
773     }
774 
775     UChar sigma = 0x3C3;
776     UChar upsilon = 0x3C5;
777     UChar nu = 0x3BD;
778 //    UChar PHI = 0x3A6;
779     UChar alpha = 0x3B1;
780 //    UChar omega = 0x3C9;
781 //    UChar omicron = 0x3BF;
782 //    UChar epsilon = 0x3B5;
783 
784     // sigma upsilon nu -> syn
785     UnicodeString syn;
786     syn.append(sigma).append(upsilon).append(nu);
787     expect(*gl, syn, "syn");
788 
789     // sigma alpha upsilon nu -> saun
790     UnicodeString sayn;
791     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
792     expect(*gl, sayn, "saun");
793 
794     // Again, using a smaller rule set
795     UnicodeString rules(
796                 "$alpha   = \\u03B1;"
797                 "$nu      = \\u03BD;"
798                 "$sigma   = \\u03C3;"
799                 "$ypsilon = \\u03C5;"
800                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
801                 "s <>           $sigma;"
802                 "a <>           $alpha;"
803                 "u <>  $vowel { $ypsilon;"
804                 "y <>           $ypsilon;"
805                 "n <>           $nu;",
806                 "");
807     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
808     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
809     expect(*mini, syn, "syn");
810     expect(*mini, sayn, "saun");
811     delete mini;
812     mini = NULL;
813 
814 #if !UCONFIG_NO_FORMATTING
815     // Transliterate the Greek locale data
816     Locale el("el");
817     DateFormatSymbols syms(el, status);
818     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
819     int32_t i, count;
820     const UnicodeString* data = syms.getMonths(count);
821     for (i=0; i<count; ++i) {
822         if (data[i].length() == 0) {
823             continue;
824         }
825         UnicodeString out(data[i]);
826         gl->transliterate(out);
827         UBool ok = TRUE;
828         if (data[i].length() >= 2 && out.length() >= 2 &&
829             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
830             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
831                 ok = FALSE;
832             }
833         }
834         if (ok) {
835             logln(prettify(data[i] + " -> " + out));
836         } else {
837             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
838         }
839     }
840 #endif
841 
842     delete gl;
843 }
844 
845 /**
846  * Prefix, suffix support in hex transliterators
847  */
TestJ243(void)848 void TransliteratorTest::TestJ243(void) {
849     UErrorCode ec = U_ZERO_ERROR;
850 
851     // Test default Hex-Any, which should handle
852     // \u, \U, u+, and U+
853     Transliterator *hex =
854         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
855     if (assertSuccess("getInstance", ec)) {
856         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
857     }
858     delete hex;
859 
860 //    // Try a custom Hex-Unicode
861 //    // \uXXXX and &#xXXXX;
862 //    ec = U_ZERO_ERROR;
863 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
864 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
865 //           "abcd5fx012&#x00033;");
866 //    // Try custom Any-Hex (default is tested elsewhere)
867 //    ec = U_ZERO_ERROR;
868 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
869 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
870 }
871 
872 /**
873  * Parsers need better syntax error messages.
874  */
TestJ329(void)875 void TransliteratorTest::TestJ329(void) {
876 
877     struct { UBool containsErrors; const char* rule; } DATA[] = {
878         { FALSE, "a > b; c > d" },
879         { TRUE,  "a > b; no operator; c > d" },
880     };
881     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
882 
883     for (int32_t i=0; i<DATA_length; ++i) {
884         UErrorCode status = U_ZERO_ERROR;
885         UParseError parseError;
886         Transliterator *rbt = Transliterator::createFromRules("<ID>",
887                                     DATA[i].rule,
888                                     UTRANS_FORWARD,
889                                     parseError,
890                                     status);
891         UBool gotError = U_FAILURE(status);
892         UnicodeString desc(DATA[i].rule);
893         desc.append(gotError ? " -> error" : " -> no error");
894         if (gotError) {
895             desc = desc + ", ParseError code=" + u_errorName(status) +
896                 " line=" + parseError.line +
897                 " offset=" + parseError.offset +
898                 " context=" + parseError.preContext;
899         }
900         if (gotError == DATA[i].containsErrors) {
901             logln(UnicodeString("Ok:   ") + desc);
902         } else {
903             errln(UnicodeString("FAIL: ") + desc);
904         }
905         delete rbt;
906     }
907 }
908 
909 /**
910  * Test segments and segment references.
911  */
TestSegments(void)912 void TransliteratorTest::TestSegments(void) {
913     // Array of 3n items
914     // Each item is <rules>, <input>, <expected output>
915     UnicodeString DATA[] = {
916         "([a-z]) '.' ([0-9]) > $2 '-' $1",
917         "abc.123.xyz.456",
918         "ab1-c23.xy4-z56",
919 
920         // nested
921         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
922         "a1 b2",
923         "a1.a.1 b2.b.2",
924     };
925     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
926 
927     for (int32_t i=0; i<DATA_length; i+=3) {
928         logln("Pattern: " + prettify(DATA[i]));
929         UParseError parseError;
930         UErrorCode status = U_ZERO_ERROR;
931         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
932         if (U_FAILURE(status)) {
933             errln("FAIL: RBT constructor");
934         } else {
935             expect(*t, DATA[i+1], DATA[i+2]);
936         }
937         delete t;
938     }
939 }
940 
941 /**
942  * Test cursor positioning outside of the key
943  */
TestCursorOffset(void)944 void TransliteratorTest::TestCursorOffset(void) {
945     // Array of 3n items
946     // Each item is <rules>, <input>, <expected output>
947     UnicodeString DATA[] = {
948         "pre {alpha} post > | @ ALPHA ;"
949         "eALPHA > beta ;"
950         "pre {beta} post > BETA @@ | ;"
951         "post > xyz",
952 
953         "prealphapost prebetapost",
954 
955         "prbetaxyz preBETApost",
956     };
957     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
958 
959     for (int32_t i=0; i<DATA_length; i+=3) {
960         logln("Pattern: " + prettify(DATA[i]));
961         UParseError parseError;
962         UErrorCode status = U_ZERO_ERROR;
963         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
964         if (U_FAILURE(status)) {
965             errln("FAIL: RBT constructor");
966         } else {
967             expect(*t, DATA[i+1], DATA[i+2]);
968         }
969         delete t;
970     }
971 }
972 
973 /**
974  * Test zero length and > 1 char length variable values.  Test
975  * use of variable refs in UnicodeSets.
976  */
TestArbitraryVariableValues(void)977 void TransliteratorTest::TestArbitraryVariableValues(void) {
978     // Array of 3n items
979     // Each item is <rules>, <input>, <expected output>
980     UnicodeString DATA[] = {
981         "$abe = ab;"
982         "$pat = x[yY]z;"
983         "$ll  = 'a-z';"
984         "$llZ = [$ll];"
985         "$llY = [$ll$pat];"
986         "$emp = ;"
987 
988         "$abe > ABE;"
989         "$pat > END;"
990         "$llZ > 1;"
991         "$llY > 2;"
992         "7$emp 8 > 9;"
993         "",
994 
995         "ab xYzxyz stY78",
996         "ABE ENDEND 1129",
997     };
998     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
999 
1000     for (int32_t i=0; i<DATA_length; i+=3) {
1001         logln("Pattern: " + prettify(DATA[i]));
1002         UParseError parseError;
1003         UErrorCode status = U_ZERO_ERROR;
1004         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1005         if (U_FAILURE(status)) {
1006             errln("FAIL: RBT constructor");
1007         } else {
1008             expect(*t, DATA[i+1], DATA[i+2]);
1009         }
1010         delete t;
1011     }
1012 }
1013 
1014 /**
1015  * Confirm that the contextStart, contextLimit, start, and limit
1016  * behave correctly. J474.
1017  */
TestPositionHandling(void)1018 void TransliteratorTest::TestPositionHandling(void) {
1019     // Array of 3n items
1020     // Each item is <rules>, <input>, <expected output>
1021     const char* DATA[] = {
1022         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1023         "xtat txtb", // pos 0,9,0,9
1024         "xTTaSS TTxUUb",
1025 
1026         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1027         "xtat txtb", // pos 2,9,3,8
1028         "xtaSS TTxUUb",
1029 
1030         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1031         "xtat txtb", // pos 3,8,3,8
1032         "xtaTT TTxTTb",
1033     };
1034 
1035     // Array of 4n positions -- these go with the DATA array
1036     // They are: contextStart, contextLimit, start, limit
1037     int32_t POS[] = {
1038         0, 9, 0, 9,
1039         2, 9, 3, 8,
1040         3, 8, 3, 8,
1041     };
1042 
1043     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1044     for (int32_t i=0; i<n; i++) {
1045         UErrorCode status = U_ZERO_ERROR;
1046         UParseError parseError;
1047         Transliterator *t = Transliterator::createFromRules("<ID>",
1048                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1049         if (U_FAILURE(status)) {
1050             delete t;
1051             errln("FAIL: RBT constructor");
1052             return;
1053         }
1054         UTransPosition pos;
1055         pos.contextStart= POS[4*i];
1056         pos.contextLimit = POS[4*i+1];
1057         pos.start = POS[4*i+2];
1058         pos.limit = POS[4*i+3];
1059         UnicodeString rsource(DATA[3*i+1]);
1060         t->transliterate(rsource, pos, status);
1061         if (U_FAILURE(status)) {
1062             delete t;
1063             errln("FAIL: transliterate");
1064             return;
1065         }
1066         t->finishTransliteration(rsource, pos);
1067         expectAux(DATA[3*i],
1068                   DATA[3*i+1],
1069                   rsource,
1070                   DATA[3*i+2]);
1071         delete t;
1072     }
1073 }
1074 
1075 /**
1076  * Test the Hiragana-Katakana transliterator.
1077  */
TestHiraganaKatakana(void)1078 void TransliteratorTest::TestHiraganaKatakana(void) {
1079     UParseError parseError;
1080     UErrorCode status = U_ZERO_ERROR;
1081     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1082     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1083     if (hk == 0 || kh == 0) {
1084         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1085         delete hk;
1086         delete kh;
1087         return;
1088     }
1089 
1090     // Array of 3n items
1091     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1092     const char* DATA[] = {
1093         "both",
1094         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1095         "\\u30A2\\u30F8\\u30F2\\u30B0",
1096 
1097         "kh",
1098         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1099         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1100     };
1101     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1102 
1103     for (int32_t i=0; i<DATA_length; i+=3) {
1104         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1105         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1106         switch (*DATA[i]) {
1107         case 0x68: //'h': // Hiragana-Katakana
1108             expect(*hk, h, k);
1109             break;
1110         case 0x6B: //'k': // Katakana-Hiragana
1111             expect(*kh, k, h);
1112             break;
1113         case 0x62: //'b': // both
1114             expect(*hk, h, k);
1115             expect(*kh, k, h);
1116             break;
1117         }
1118     }
1119     delete hk;
1120     delete kh;
1121 }
1122 
1123 /**
1124  * Test cloning / copy constructor of RBT.
1125  */
TestCopyJ476(void)1126 void TransliteratorTest::TestCopyJ476(void) {
1127     // The real test here is what happens when the destructors are
1128     // called.  So we let one object get destructed, and check to
1129     // see that its copy still works.
1130     Transliterator *t2 = 0;
1131     {
1132         UParseError parseError;
1133         UErrorCode status = U_ZERO_ERROR;
1134         Transliterator *t1 = Transliterator::createFromRules("t1",
1135             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1136         if (U_FAILURE(status)) {
1137             errln("FAIL: RBT constructor");
1138             return;
1139         }
1140         t2 = t1->clone(); // Call copy constructor under the covers.
1141         expect(*t1, "abcfoofoo", "ABcbar");
1142         delete t1;
1143     }
1144     expect(*t2, "abcfoofoo", "ABcbar");
1145     delete t2;
1146 }
1147 
1148 /**
1149  * Test inter-Indic transliterators.  These are composed.
1150  * ICU4C Jitterbug 483.
1151  */
TestInterIndic(void)1152 void TransliteratorTest::TestInterIndic(void) {
1153     UnicodeString ID("Devanagari-Gujarati", "");
1154     UErrorCode status = U_ZERO_ERROR;
1155     UParseError parseError;
1156     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1157     if (dg == 0) {
1158         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1159         return;
1160     }
1161     UnicodeString id = dg->getID();
1162     if (id != ID) {
1163         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1164     }
1165     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1166     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1167     expect(*dg, dev, guj);
1168     delete dg;
1169 }
1170 
1171 /**
1172  * Test filter syntax in IDs. (J918)
1173  */
TestFilterIDs(void)1174 void TransliteratorTest::TestFilterIDs(void) {
1175     // Array of 3n strings:
1176     // <id>, <inverse id>, <input>, <expected output>
1177     const char* DATA[] = {
1178         "[aeiou]Any-Hex", // ID
1179         "[aeiou]Hex-Any", // expected inverse ID
1180         "quizzical",      // src
1181         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1182 
1183         "[aeiou]Any-Hex;[^5]Hex-Any",
1184         "[^5]Any-Hex;[aeiou]Hex-Any",
1185         "quizzical",
1186         "q\\u0075izzical",
1187 
1188         "[abc]Null",
1189         "[abc]Null",
1190         "xyz",
1191         "xyz",
1192     };
1193     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1194 
1195     for (int i=0; i<DATA_length; i+=4) {
1196         UnicodeString ID(DATA[i], "");
1197         UnicodeString uID(DATA[i+1], "");
1198         UnicodeString data2(DATA[i+2], "");
1199         UnicodeString data3(DATA[i+3], "");
1200         UParseError parseError;
1201         UErrorCode status = U_ZERO_ERROR;
1202         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1203         if (t == 0) {
1204             errln("FAIL: createInstance(" + ID + ") returned NULL");
1205             return;
1206         }
1207         expect(*t, data2, data3);
1208 
1209         // Check the ID
1210         if (ID != t->getID()) {
1211             errln("FAIL: createInstance(" + ID + ").getID() => " +
1212                   t->getID());
1213         }
1214 
1215         // Check the inverse
1216         Transliterator *u = t->createInverse(status);
1217         if (u == 0) {
1218             errln("FAIL: " + ID + ".createInverse() returned NULL");
1219         } else if (u->getID() != uID) {
1220             errln("FAIL: " + ID + ".createInverse().getID() => " +
1221                   u->getID() + ", expected " + uID);
1222         }
1223 
1224         delete t;
1225         delete u;
1226     }
1227 }
1228 
1229 /**
1230  * Test the case mapping transliterators.
1231  */
TestCaseMap(void)1232 void TransliteratorTest::TestCaseMap(void) {
1233     UParseError parseError;
1234     UErrorCode status = U_ZERO_ERROR;
1235     Transliterator* toUpper =
1236         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1237     Transliterator* toLower =
1238         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239     Transliterator* toTitle =
1240         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241     if (toUpper==0 || toLower==0 || toTitle==0) {
1242         errln("FAIL: createInstance returned NULL");
1243         delete toUpper;
1244         delete toLower;
1245         delete toTitle;
1246         return;
1247     }
1248 
1249     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1250            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1251     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1252            "the quick brown foX jumped over the lazY dogs.");
1253     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1254            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1255 
1256     delete toUpper;
1257     delete toLower;
1258     delete toTitle;
1259 }
1260 
1261 /**
1262  * Test the name mapping transliterators.
1263  */
TestNameMap(void)1264 void TransliteratorTest::TestNameMap(void) {
1265     UParseError parseError;
1266     UErrorCode status = U_ZERO_ERROR;
1267     Transliterator* uni2name =
1268         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1269     Transliterator* name2uni =
1270         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1271     if (uni2name==0 || name2uni==0) {
1272         errln("FAIL: createInstance returned NULL");
1273         delete uni2name;
1274         delete name2uni;
1275         return;
1276     }
1277 
1278     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1279     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1280            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1281     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1282            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1283 
1284     delete uni2name;
1285     delete name2uni;
1286 
1287     // round trip
1288     Transliterator* t =
1289         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1290     if (t==0) {
1291         errln("FAIL: createInstance returned NULL");
1292         delete t;
1293         return;
1294     }
1295 
1296     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1297     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1298     expect(*t, s, s);
1299     delete t;
1300 }
1301 
1302 /**
1303  * Test liberalized ID syntax.  1006c
1304  */
TestLiberalizedID(void)1305 void TransliteratorTest::TestLiberalizedID(void) {
1306     // Some test cases have an expected getID() value of NULL.  This
1307     // means I have disabled the test case for now.  This stuff is
1308     // still under development, and I haven't decided whether to make
1309     // getID() return canonical case yet.  It will all get rewritten
1310     // with the move to Source-Target/Variant IDs anyway. [aliu]
1311     const char* DATA[] = {
1312         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1313         "  Null  ", "Null", "whitespace",
1314         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1315         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1316     };
1317     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1318     UParseError parseError;
1319     UErrorCode status= U_ZERO_ERROR;
1320     for (int32_t i=0; i<DATA_length; i+=3) {
1321         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1322         if (t == 0) {
1323             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1324                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1325         } else {
1326             UnicodeString exp;
1327             if (DATA[i+1]) {
1328                 exp = UnicodeString(DATA[i+1], "");
1329             }
1330             // Don't worry about getID() if the expected char*
1331             // is NULL -- see above.
1332             if (exp.length() == 0 || exp == t->getID()) {
1333                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1334                       " create ID \"" + DATA[i] + "\" => \"" +
1335                       exp + "\"");
1336             } else {
1337                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1338                       " create ID \"" + DATA[i] + "\" => \"" +
1339                       t->getID() + "\", exp \"" + exp + "\"");
1340             }
1341             delete t;
1342         }
1343     }
1344 }
1345 
1346 /* test for Jitterbug 912 */
TestCreateInstance()1347 void TransliteratorTest::TestCreateInstance(){
1348     const char* FORWARD = "F";
1349     const char* REVERSE = "R";
1350     const char* DATA[] = {
1351         // Column 1: id
1352         // Column 2: direction
1353         // Column 3: expected ID, or "" if expect failure
1354         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1355 
1356         // JB#2689: bad compound causes crash
1357         "InvalidSource-InvalidTarget", FORWARD, "",
1358         "InvalidSource-InvalidTarget", REVERSE, "",
1359         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1360         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1361         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1362         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1363 
1364         NULL
1365     };
1366 
1367     for (int32_t i=0; DATA[i]; i+=3) {
1368         UParseError err;
1369         UErrorCode ec = U_ZERO_ERROR;
1370         UnicodeString id(DATA[i]);
1371         UTransDirection dir = (DATA[i+1]==FORWARD)?
1372             UTRANS_FORWARD:UTRANS_REVERSE;
1373         UnicodeString expID(DATA[i+2]);
1374         Transliterator* t =
1375             Transliterator::createInstance(id,dir,err,ec);
1376         UnicodeString newID;
1377         if (t) {
1378             newID = t->getID();
1379         }
1380         UBool ok = (newID == expID);
1381         if (!t) {
1382             newID = u_errorName(ec);
1383         }
1384         if (ok) {
1385             logln((UnicodeString)"Ok: createInstance(" +
1386                   id + "," + DATA[i+1] + ") => " + newID);
1387         } else {
1388             dataerrln((UnicodeString)"FAIL: createInstance(" +
1389                   id + "," + DATA[i+1] + ") => " + newID +
1390                   ", expected " + expID);
1391         }
1392         delete t;
1393     }
1394 }
1395 
1396 /**
1397  * Test the normalization transliterator.
1398  */
TestNormalizationTransliterator()1399 void TransliteratorTest::TestNormalizationTransliterator() {
1400     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1401     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1402     const char* CANON[] = {
1403         // Input               Decomposed            Composed
1404         "cat",                "cat",                "cat"               ,
1405         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1406 
1407         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1408         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1409 
1410         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1411         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1412         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1413 
1414         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1415         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1416 
1417         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1418         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1419         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1420 
1421         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1422         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1423 
1424         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1425         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1426 
1427         "Henry IV",           "Henry IV",           "Henry IV"          ,
1428         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1429 
1430         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1431         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1432         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1433         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1434         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1435 
1436         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1437         0 // end
1438     };
1439 
1440     const char* COMPAT[] = {
1441         // Input               Decomposed            Composed
1442         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1443 
1444         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1445         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1446 
1447         "Henry IV",           "Henry IV",           "Henry IV"          ,
1448         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1449 
1450         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1451         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1452 
1453         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1454         0 // end
1455     };
1456 
1457     int32_t i;
1458     UParseError parseError;
1459     UErrorCode status = U_ZERO_ERROR;
1460     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1461     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1462     if (!NFD || !NFC) {
1463         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1464         delete NFD;
1465         delete NFC;
1466         return;
1467     }
1468     for (i=0; CANON[i]; i+=3) {
1469         UnicodeString in = CharsToUnicodeString(CANON[i]);
1470         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1471         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1472         expect(*NFD, in, expd);
1473         expect(*NFC, in, expc);
1474     }
1475     delete NFD;
1476     delete NFC;
1477 
1478     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1479     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1480     if (!NFKD || !NFKC) {
1481         errln("FAIL: createInstance failed");
1482         delete NFKD;
1483         delete NFKC;
1484         return;
1485     }
1486     for (i=0; COMPAT[i]; i+=3) {
1487         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1488         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1489         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1490         expect(*NFKD, in, expkd);
1491         expect(*NFKC, in, expkc);
1492     }
1493     delete NFKD;
1494     delete NFKC;
1495 
1496     UParseError pe;
1497     status = U_ZERO_ERROR;
1498     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1499                                                        UTRANS_FORWARD,
1500                                                        pe, status);
1501     if (t == 0) {
1502         errln("FAIL: createInstance failed");
1503     }
1504     expect(*t, CharsToUnicodeString("\\u010dx"),
1505            CharsToUnicodeString("c\\u030C"));
1506     delete t;
1507 }
1508 
1509 /**
1510  * Test compound RBT rules.
1511  */
TestCompoundRBT(void)1512 void TransliteratorTest::TestCompoundRBT(void) {
1513     // Careful with spacing and ';' here:  Phrase this exactly
1514     // as toRules() is going to return it.  If toRules() changes
1515     // with regard to spacing or ';', then adjust this string.
1516     UnicodeString rule("::Hex-Any;\n"
1517                        "::Any-Lower;\n"
1518                        "a > '.A.';\n"
1519                        "b > '.B.';\n"
1520                        "::[^t]Any-Upper;", "");
1521     UParseError parseError;
1522     UErrorCode status = U_ZERO_ERROR;
1523     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1524     if (t == 0) {
1525         errln("FAIL: createFromRules failed");
1526         return;
1527     }
1528     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1529            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1530     UnicodeString r;
1531     t->toRules(r, TRUE);
1532     if (r == rule) {
1533         logln((UnicodeString)"OK: toRules() => " + r);
1534     } else {
1535         errln((UnicodeString)"FAIL: toRules() => " + r +
1536               ", expected " + rule);
1537     }
1538     delete t;
1539 
1540     // Now test toRules
1541     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1542     if (t == 0) {
1543         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1544         return;
1545     }
1546     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1547     t->toRules(r, TRUE);
1548     if (r != exp) {
1549         errln((UnicodeString)"FAIL: toRules() => " + r +
1550               ", expected " + exp);
1551     } else {
1552         logln((UnicodeString)"OK: toRules() => " + r);
1553     }
1554     delete t;
1555 
1556     // Round trip the result of toRules
1557     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1558     if (t == 0) {
1559         errln("FAIL: createFromRules #2 failed");
1560         return;
1561     } else {
1562         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1563     }
1564 
1565     // Test toRules again
1566     t->toRules(r, TRUE);
1567     if (r != exp) {
1568         errln((UnicodeString)"FAIL: toRules() => " + r +
1569               ", expected " + exp);
1570     } else {
1571         logln((UnicodeString)"OK: toRules() => " + r);
1572     }
1573 
1574     delete t;
1575 
1576     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1577     // to what the regenerated ID will look like.
1578     UnicodeString id("Upper(Lower);(NFKC)", "");
1579     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1580     if (t == 0) {
1581         errln("FAIL: createInstance #2 failed");
1582         return;
1583     }
1584     if (t->getID() == id) {
1585         logln((UnicodeString)"OK: created " + id);
1586     } else {
1587         errln((UnicodeString)"FAIL: createInstance(" + id +
1588               ").getID() => " + t->getID());
1589     }
1590 
1591     Transliterator *u = t->createInverse(status);
1592     if (u == 0) {
1593         errln("FAIL: createInverse failed");
1594         delete t;
1595         return;
1596     }
1597     exp = "NFKC();Lower(Upper)";
1598     if (u->getID() == exp) {
1599         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1600               u->getID());
1601     } else {
1602         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1603               u->getID());
1604     }
1605     delete t;
1606     delete u;
1607 }
1608 
1609 /**
1610  * Compound filter semantics were orginially not implemented
1611  * correctly.  Originally, each component filter f(i) is replaced by
1612  * f'(i) = f(i) && g, where g is the filter for the compound
1613  * transliterator.
1614  *
1615  * From Mark:
1616  *
1617  * Suppose and I have a transliterator X. Internally X is
1618  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1619  *
1620  * The compound should convert all greek characters (through latin) to
1621  * cyrillic, then lowercase the result. The filter should say "don't
1622  * touch 'A' in the original". But because an intermediate result
1623  * happens to go through "A", the Greek Alpha gets hung up.
1624  */
TestCompoundFilter(void)1625 void TransliteratorTest::TestCompoundFilter(void) {
1626     UParseError parseError;
1627     UErrorCode status = U_ZERO_ERROR;
1628     Transliterator *t = Transliterator::createInstance
1629         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1630     if (t == 0) {
1631         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1632         return;
1633     }
1634     t->adoptFilter(new UnicodeSet("[^A]", status));
1635     if (U_FAILURE(status)) {
1636         errln("FAIL: UnicodeSet ct failed");
1637         delete t;
1638         return;
1639     }
1640 
1641     // Only the 'A' at index 1 should remain unchanged
1642     expect(*t,
1643            CharsToUnicodeString("BA\\u039A\\u0391"),
1644            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1645     delete t;
1646 }
1647 
TestRemove(void)1648 void TransliteratorTest::TestRemove(void) {
1649     UParseError parseError;
1650     UErrorCode status = U_ZERO_ERROR;
1651     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1652     if (t == 0) {
1653         errln("FAIL: createInstance failed");
1654         return;
1655     }
1656 
1657     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1658 
1659     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1660     // duplicating the filter
1661     Transliterator* t2 = t->clone();
1662     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1663 
1664     delete t;
1665     delete t2;
1666 }
1667 
TestToRules(void)1668 void TransliteratorTest::TestToRules(void) {
1669     const char* RBT = "rbt";
1670     const char* SET = "set";
1671     static const char* DATA[] = {
1672         RBT,
1673         "$a=\\u4E61; [$a] > A;",
1674         "[\\u4E61] > A;",
1675 
1676         RBT,
1677         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1678         "[[:Zs:][:Zl:]]{a} > A;",
1679 
1680         SET,
1681         "[[:Zs:][:Zl:]]",
1682         "[[:Zs:][:Zl:]]",
1683 
1684         SET,
1685         "[:Ps:]",
1686         "[:Ps:]",
1687 
1688         SET,
1689         "[:L:]",
1690         "[:L:]",
1691 
1692         SET,
1693         "[[:L:]-[A]]",
1694         "[[:L:]-[A]]",
1695 
1696         SET,
1697         "[~[:Lu:][:Ll:]]",
1698         "[~[:Lu:][:Ll:]]",
1699 
1700         SET,
1701         "[~[a-z]]",
1702         "[~[a-z]]",
1703 
1704         RBT,
1705         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1706         "[^[:Zs:]]{a} > A;",
1707 
1708         RBT,
1709         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1710         "[[a-z]-[:Zs:]]{a} > A;",
1711 
1712         RBT,
1713         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1714         "[[:Zs:]&[a-z]]{a} > A;",
1715 
1716         RBT,
1717         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1718         "[x[:Zs:]]{a} > A;",
1719 
1720         RBT,
1721         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1722         "$macron = \\u0304 ;"
1723         "$evowel = [aeiouyAEIOUY] ;"
1724         "$iotasub = \\u0345 ;"
1725         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1726         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1727 
1728         RBT,
1729         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731     };
1732     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1733 
1734     for (int32_t d=0; d < DATA_length; d+=3) {
1735         if (DATA[d] == RBT) {
1736             // Transliterator test
1737             UParseError parseError;
1738             UErrorCode status = U_ZERO_ERROR;
1739             Transliterator *t = Transliterator::createFromRules("ID",
1740                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1741             if (t == 0) {
1742                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1743                 return;
1744             }
1745             UnicodeString rules, escapedRules;
1746             t->toRules(rules, FALSE);
1747             t->toRules(escapedRules, TRUE);
1748             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1749             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1750             if (rules == expRules) {
1751                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752                       " => " + rules);
1753             } else {
1754                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755                       " => " + rules + ", exp " + expRules);
1756             }
1757             if (escapedRules == expEscapedRules) {
1758                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1759                       " => " + escapedRules);
1760             } else {
1761                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1762                       " => " + escapedRules + ", exp " + expEscapedRules);
1763             }
1764             delete t;
1765 
1766         } else {
1767             // UnicodeSet test
1768             UErrorCode status = U_ZERO_ERROR;
1769             UnicodeString pat(DATA[d+1], -1, US_INV);
1770             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1771             UnicodeSet set(pat, status);
1772             if (U_FAILURE(status)) {
1773                 errln("FAIL: UnicodeSet ct failed");
1774                 return;
1775             }
1776             // Adjust spacing etc. as necessary.
1777             UnicodeString toPat;
1778             set.toPattern(toPat);
1779             if (expToPat == toPat) {
1780                 logln((UnicodeString)"Ok: " + pat +
1781                       " => " + toPat);
1782             } else {
1783                 errln((UnicodeString)"FAIL: " + pat +
1784                       " => " + prettify(toPat, TRUE) +
1785                       ", exp " + prettify(pat, TRUE));
1786             }
1787         }
1788     }
1789 }
1790 
TestContext()1791 void TransliteratorTest::TestContext() {
1792     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1793     expect("de > x; {d}e > y;",
1794            "de",
1795            "ye",
1796            &pos);
1797 
1798     expect("ab{c} > z;",
1799            "xadabdabcy",
1800            "xadabdabzy");
1801 }
1802 
TestSupplemental()1803 void TransliteratorTest::TestSupplemental() {
1804 
1805     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1806                                 "a > $a; $s > i;"),
1807            CharsToUnicodeString("ab\\U0001030Fx"),
1808            CharsToUnicodeString("\\U00010300bix"));
1809 
1810     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1811                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1812                                 "($a)($b) > $2 $1;"),
1813            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1814            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1815 
1816     // k|ax\\U00010300xm
1817 
1818     // k|a\\U00010400\\U00010300xm
1819     // ky|\\U00010400\\U00010300xm
1820     // ky\\U00010400|\\U00010300xm
1821 
1822     // ky\\U00010400|\\U00010300\\U00010400m
1823     // ky\\U00010400y|\\U00010400m
1824     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1825                                 "$a {x} > | @ \\U00010400;"
1826                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1827            CharsToUnicodeString("kax\\U00010300xm"),
1828            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1829 
1830     expectT("Any-Name",
1831            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1832            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1833 
1834     expectT("Any-Hex/Unicode",
1835            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1837 
1838     expectT("Any-Hex/C",
1839            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1841 
1842     expectT("Any-Hex/Perl",
1843            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1845 
1846     expectT("Any-Hex/Java",
1847            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1849 
1850     expectT("Any-Hex/XML",
1851            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1853 
1854     expectT("Any-Hex/XML10",
1855            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1856            "&#66352;&#1113856;&#917601;&#160;");
1857 
1858     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1859            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1860            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1861 }
1862 
TestQuantifier()1863 void TransliteratorTest::TestQuantifier() {
1864 
1865     // Make sure @ in a quantified anteContext works
1866     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1867            "AAAAAb",
1868            "aaa(aac)");
1869 
1870     // Make sure @ in a quantified postContext works
1871     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1872            "baaaaa",
1873            "caa(aaa)");
1874 
1875     // Make sure @ in a quantified postContext with seg ref works
1876     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1877            "baaaaa",
1878            "baa(aaa)");
1879 
1880     // Make sure @ past ante context doesn't enter ante context
1881     UTransPosition pos = {0, 5, 3, 5};
1882     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1883            "xxxab",
1884            "xxx(ac)",
1885            &pos);
1886 
1887     // Make sure @ past post context doesn't pass limit
1888     UTransPosition pos2 = {0, 4, 0, 2};
1889     expect("{b} a+ > c @@ |; x > y; a > A;",
1890            "baxx",
1891            "caxx",
1892            &pos2);
1893 
1894     // Make sure @ past post context doesn't enter post context
1895     expect("{b} a+ > c @@ |; x > y; a > A;",
1896            "baxx",
1897            "cayy");
1898 
1899     expect("(ab)? c > d;",
1900            "c abc ababc",
1901            "d d abd");
1902 
1903     // NOTE: The (ab)+ when referenced just yields a single "ab",
1904     // not the full sequence of them.  This accords with perl behavior.
1905     expect("(ab)+ {x} > '(' $1 ')';",
1906            "x abx ababxy",
1907            "x ab(ab) abab(ab)y");
1908 
1909     expect("b+ > x;",
1910            "ac abc abbc abbbc",
1911            "ac axc axc axc");
1912 
1913     expect("[abc]+ > x;",
1914            "qac abrc abbcs abtbbc",
1915            "qx xrx xs xtx");
1916 
1917     expect("q{(ab)+} > x;",
1918            "qa qab qaba qababc qaba",
1919            "qa qx qxa qxc qxa");
1920 
1921     expect("q(ab)* > x;",
1922            "qa qab qaba qababc",
1923            "xa x xa xc");
1924 
1925     // NOTE: The (ab)+ when referenced just yields a single "ab",
1926     // not the full sequence of them.  This accords with perl behavior.
1927     expect("q(ab)* > '(' $1 ')';",
1928            "qa qab qaba qababc",
1929            "()a (ab) (ab)a (ab)c");
1930 
1931     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1932     // quoted string
1933     expect("'ab'+ > x;",
1934            "bb ab ababb",
1935            "bb x xb");
1936 
1937     // $foo+ and $foo* -- the quantifier should apply to the entire
1938     // variable reference
1939     expect("$var = ab; $var+ > x;",
1940            "bb ab ababb",
1941            "bb x xb");
1942 }
1943 
1944 class TestTrans : public Transliterator {
1945 public:
TestTrans(const UnicodeString & id)1946     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1947     }
clone(void) const1948     virtual Transliterator* clone(void) const {
1949         return new TestTrans(getID());
1950     }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1951     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1952         UBool /*isIncremental*/) const
1953     {
1954         offsets.start = offsets.limit;
1955     }
1956     virtual UClassID getDynamicClassID() const;
1957     static UClassID U_EXPORT2 getStaticClassID();
1958 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1959 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1960 
1961 /**
1962  * Test Source-Target/Variant.
1963  */
1964 void TransliteratorTest::TestSTV(void) {
1965     int32_t ns = Transliterator::countAvailableSources();
1966     if (ns < 0 || ns > 255) {
1967         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1968         return;
1969     }
1970     int32_t i, j;
1971     for (i=0; i<ns; ++i) {
1972         UnicodeString source;
1973         Transliterator::getAvailableSource(i, source);
1974         logln((UnicodeString)"" + i + ": " + source);
1975         if (source.length() == 0) {
1976             errln("FAIL: empty source");
1977             continue;
1978         }
1979         int32_t nt = Transliterator::countAvailableTargets(source);
1980         if (nt < 0 || nt > 255) {
1981             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1982             continue;
1983         }
1984         for (int32_t j=0; j<nt; ++j) {
1985             UnicodeString target;
1986             Transliterator::getAvailableTarget(j, source, target);
1987             logln((UnicodeString)" " + j + ": " + target);
1988             if (target.length() == 0) {
1989                 errln("FAIL: empty target");
1990                 continue;
1991             }
1992             int32_t nv = Transliterator::countAvailableVariants(source, target);
1993             if (nv < 0 || nv > 255) {
1994                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1995                 continue;
1996             }
1997             for (int32_t k=0; k<nv; ++k) {
1998                 UnicodeString variant;
1999                 Transliterator::getAvailableVariant(k, source, target, variant);
2000                 if (variant.length() == 0) {
2001                     logln((UnicodeString)"  " + k + ": <empty>");
2002                 } else {
2003                     logln((UnicodeString)"  " + k + ": " + variant);
2004                 }
2005             }
2006         }
2007     }
2008 
2009     // Test registration
2010     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2013     for (i=0; i<3; ++i) {
2014         Transliterator *t = new TestTrans(IDS[i]);
2015         if (t == 0) {
2016             errln("FAIL: out of memory");
2017             return;
2018         }
2019         if (t->getID() != IDS[i]) {
2020             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2021             delete t;
2022             return;
2023         }
2024         Transliterator::registerInstance(t);
2025         UErrorCode status = U_ZERO_ERROR;
2026         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2027         if (t == NULL) {
2028             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2029                   IDS[i]);
2030         } else {
2031             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2032                   IDS[i]);
2033             delete t;
2034         }
2035         Transliterator::unregister(IDS[i]);
2036         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2037         if (t != NULL) {
2038             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2039                   IDS[i]);
2040             delete t;
2041         }
2042     }
2043 
2044     // Make sure getAvailable API reflects removal
2045     int32_t n = Transliterator::countAvailableIDs();
2046     for (i=0; i<n; ++i) {
2047         UnicodeString id = Transliterator::getAvailableID(i);
2048         for (j=0; j<3; ++j) {
2049             if (id.caseCompare(FULL_IDS[j],0)==0) {
2050                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2051             }
2052         }
2053     }
2054     n = Transliterator::countAvailableTargets("Any");
2055     for (i=0; i<n; ++i) {
2056         UnicodeString t;
2057         Transliterator::getAvailableTarget(i, "Any", t);
2058         if (t.caseCompare(IDS[0],0)==0) {
2059             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2060         }
2061     }
2062     n = Transliterator::countAvailableSources();
2063     for (i=0; i<n; ++i) {
2064         UnicodeString s;
2065         Transliterator::getAvailableSource(i, s);
2066         for (j=0; j<3; ++j) {
2067             if (SOURCES[j] == NULL) continue;
2068             if (s.caseCompare(SOURCES[j],0)==0) {
2069                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2070             }
2071         }
2072     }
2073 }
2074 
2075 /**
2076  * Test inverse of Greek-Latin; Title()
2077  */
TestCompoundInverse(void)2078 void TransliteratorTest::TestCompoundInverse(void) {
2079     UParseError parseError;
2080     UErrorCode status = U_ZERO_ERROR;
2081     Transliterator *t = Transliterator::createInstance
2082         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2083     if (t == 0) {
2084         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2085         return;
2086     }
2087     UnicodeString exp("(Title);Latin-Greek");
2088     if (t->getID() == exp) {
2089         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2090               t->getID());
2091     } else {
2092         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2093               t->getID() + "\", expected \"" + exp + "\"");
2094     }
2095     delete t;
2096 }
2097 
2098 /**
2099  * Test NFD chaining with RBT
2100  */
TestNFDChainRBT()2101 void TransliteratorTest::TestNFDChainRBT() {
2102     UParseError pe;
2103     UErrorCode ec = U_ZERO_ERROR;
2104     Transliterator* t = Transliterator::createFromRules(
2105                                "TEST", "::NFD; aa > Q; a > q;",
2106                                UTRANS_FORWARD, pe, ec);
2107     if (t == NULL || U_FAILURE(ec)) {
2108         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2109         return;
2110     }
2111     expect(*t, "aa", "Q");
2112     delete t;
2113 
2114     // TEMPORARY TESTS -- BEING DEBUGGED
2115 //=-    UnicodeString s, s2;
2116 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2117 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2118 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2119 //=-    expect(*t, s, s2);
2120 //=-    delete t;
2121 //=-
2122 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2123 //=-    expect(*t, s2, s);
2124 //=-    delete t;
2125 //=-
2126 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2127 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2128 //=-    expect(*t, s, s);
2129 //=-    delete t;
2130 
2131 //    const char* source[] = {
2132 //        /*
2133 //        "\\u015Br\\u012Bmad",
2134 //        "bhagavadg\\u012Bt\\u0101",
2135 //        "adhy\\u0101ya",
2136 //        "arjuna",
2137 //        "vi\\u1E63\\u0101da",
2138 //        "y\\u014Dga",
2139 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2140 //        "uv\\u0101cr\\u0325",
2141 //        */
2142 //        "rmk\\u1E63\\u0113t",
2143 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2144 //        /*
2145 //        "kuruk\\u1E63\\u0113tr\\u0113",
2146 //        "samav\\u0113t\\u0101",
2147 //        "yuyutsava-\\u1E25",
2148 //        "m\\u0101mak\\u0101-\\u1E25",
2149 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2150 //        "kimakurvata",
2151 //        "san\\u0304java",
2152 //        */
2153 //
2154 //        0
2155 //    };
2156 //    const char* expected[] = {
2157 //        /*
2158 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2159 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2160 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2161 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2162 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2163 //        "\\u092f\\u094b\\u0917",
2164 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2165 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2166 //        */
2167 //        "\\u0927",
2168 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2169 //        /*
2170 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2172 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2173 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2174 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2175 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2176 //        "\\u0938\\u0902\\u091c\\u0935",
2177 //        */
2178 //        0
2179 //    };
2180 //    UErrorCode status = U_ZERO_ERROR;
2181 //    UParseError parseError;
2182 //    UnicodeString message;
2183 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2184 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2185 //    if(U_FAILURE(status)){
2186 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2187 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2188 //        delete latinToDevToLatin;
2189 //        delete devToLatinToDev;
2190 //        return;
2191 //    }
2192 //    UnicodeString gotResult;
2193 //    for(int i= 0; source[i] != 0; i++){
2194 //        gotResult = source[i];
2195 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2196 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2197 //    }
2198 //    delete latinToDevToLatin;
2199 //    delete devToLatinToDev;
2200 }
2201 
2202 /**
2203  * Inverse of "Null" should be "Null". (J21)
2204  */
TestNullInverse()2205 void TransliteratorTest::TestNullInverse() {
2206     UParseError pe;
2207     UErrorCode ec = U_ZERO_ERROR;
2208     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2209     if (t == 0 || U_FAILURE(ec)) {
2210         errln("FAIL: createInstance");
2211         return;
2212     }
2213     Transliterator *u = t->createInverse(ec);
2214     if (u == 0 || U_FAILURE(ec)) {
2215         errln("FAIL: createInverse");
2216         delete t;
2217         return;
2218     }
2219     if (u->getID() != "Null") {
2220         errln("FAIL: Inverse of Null should be Null");
2221     }
2222     delete t;
2223     delete u;
2224 }
2225 
2226 /**
2227  * Check ID of inverse of alias. (J22)
2228  */
TestAliasInverseID()2229 void TransliteratorTest::TestAliasInverseID() {
2230     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2231     UParseError pe;
2232     UErrorCode ec = U_ZERO_ERROR;
2233     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2234     if (t == 0 || U_FAILURE(ec)) {
2235         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2236         return;
2237     }
2238     Transliterator *u = t->createInverse(ec);
2239     if (u == 0 || U_FAILURE(ec)) {
2240         errln("FAIL: createInverse");
2241         delete t;
2242         return;
2243     }
2244     UnicodeString exp = "Hangul-Latin";
2245     UnicodeString got = u->getID();
2246     if (got != exp) {
2247         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2248               ", expected " + exp);
2249     }
2250     delete t;
2251     delete u;
2252 }
2253 
2254 /**
2255  * Test IDs of inverses of compound transliterators. (J20)
2256  */
TestCompoundInverseID()2257 void TransliteratorTest::TestCompoundInverseID() {
2258     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2259     UParseError pe;
2260     UErrorCode ec = U_ZERO_ERROR;
2261     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2262     if (t == 0 || U_FAILURE(ec)) {
2263         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2264         return;
2265     }
2266     Transliterator *u = t->createInverse(ec);
2267     if (u == 0 || U_FAILURE(ec)) {
2268         errln("FAIL: createInverse");
2269         delete t;
2270         return;
2271     }
2272     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2273     UnicodeString got = u->getID();
2274     if (got != exp) {
2275         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2276               ", expected " + exp);
2277     }
2278     delete t;
2279     delete u;
2280 }
2281 
2282 /**
2283  * Test undefined variable.
2284 
2285  */
TestUndefinedVariable()2286 void TransliteratorTest::TestUndefinedVariable() {
2287     UnicodeString rule = "$initial } a <> \\u1161;";
2288     UParseError pe;
2289     UErrorCode ec = U_ZERO_ERROR;
2290     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2291     delete t;
2292     if (U_FAILURE(ec)) {
2293         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2294               u_errorName(ec));
2295         return;
2296     }
2297     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2298           u_errorName(ec));
2299 }
2300 
2301 /**
2302  * Test empty context.
2303  */
TestEmptyContext()2304 void TransliteratorTest::TestEmptyContext() {
2305     expect(" { a } > b;", "xay a ", "xby b ");
2306 }
2307 
2308 /**
2309 * Test compound filter ID syntax
2310 */
TestCompoundFilterID(void)2311 void TransliteratorTest::TestCompoundFilterID(void) {
2312     static const char* DATA[] = {
2313         // Col. 1 = ID or rule set (latter must start with #)
2314 
2315         // = columns > 1 are null if expect col. 1 to be illegal =
2316 
2317         // Col. 2 = direction, "F..." or "R..."
2318         // Col. 3 = source string
2319         // Col. 4 = exp result
2320 
2321         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2322         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2323         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2324         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2325         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2326         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327         NULL,
2328     };
2329 
2330     for (int32_t i=0; DATA[i]; i+=4) {
2331         UnicodeString id = CharsToUnicodeString(DATA[i]);
2332         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2333             UTRANS_REVERSE : UTRANS_FORWARD;
2334         UnicodeString source;
2335         UnicodeString exp;
2336         if (DATA[i+2] != NULL) {
2337             source = CharsToUnicodeString(DATA[i+2]);
2338             exp = CharsToUnicodeString(DATA[i+3]);
2339         }
2340         UBool expOk = (DATA[i+1] != NULL);
2341         Transliterator* t = NULL;
2342         UParseError pe;
2343         UErrorCode ec = U_ZERO_ERROR;
2344         if (id.charAt(0) == 0x23/*#*/) {
2345             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2346         } else {
2347             t = Transliterator::createInstance(id, direction, pe, ec);
2348         }
2349         UBool ok = (t != NULL && U_SUCCESS(ec));
2350         UnicodeString transID;
2351         if (t!=0) {
2352             transID = t->getID();
2353         }
2354         else {
2355             transID = UnicodeString("NULL", "");
2356         }
2357         if (ok == expOk) {
2358             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2359                   u_errorName(ec));
2360             if (source.length() != 0) {
2361                 expect(*t, source, exp);
2362             }
2363             delete t;
2364         } else {
2365             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2366                   u_errorName(ec));
2367         }
2368     }
2369 }
2370 
2371 /**
2372  * Test new property set syntax
2373  */
TestPropertySet()2374 void TransliteratorTest::TestPropertySet() {
2375     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2376     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2377            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2378 }
2379 
2380 /**
2381  * Test various failure points of the new 2.0 engine.
2382  */
TestNewEngine()2383 void TransliteratorTest::TestNewEngine() {
2384     UParseError pe;
2385     UErrorCode ec = U_ZERO_ERROR;
2386     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2387     if (t == 0 || U_FAILURE(ec)) {
2388         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2389         return;
2390     }
2391     // Katakana should be untouched
2392     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2393            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2394 
2395     delete t;
2396 
2397 #if 1
2398     // This test will only work if Transliterator.ROLLBACK is
2399     // true.  Otherwise, this test will fail, revealing a
2400     // limitation of global filters in incremental mode.
2401     Transliterator *a =
2402         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2403     Transliterator *A =
2404         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2405     if (U_FAILURE(ec)) {
2406         delete a;
2407         delete A;
2408         return;
2409     }
2410 
2411     Transliterator* array[3];
2412     array[0] = a;
2413     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2414     array[2] = A;
2415     if (U_FAILURE(ec)) {
2416         errln("FAIL: createInstance NFD");
2417         delete a;
2418         delete A;
2419         delete array[1];
2420         return;
2421     }
2422 
2423     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2424     if (U_FAILURE(ec)) {
2425         errln("FAIL: UnicodeSet constructor");
2426         delete a;
2427         delete A;
2428         delete array[1];
2429         delete t;
2430         return;
2431     }
2432 
2433     expect(*t, "aAaA", "bAbA");
2434 
2435     assertTrue("countElements", t->countElements() == 3);
2436     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2437     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2438     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2439     assertSuccess("getElement", ec);
2440 
2441     delete a;
2442     delete A;
2443     delete array[1];
2444     delete t;
2445 #endif
2446 
2447     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2448            "a",
2449            "ax");
2450 
2451     UnicodeString gr = CharsToUnicodeString(
2452         "$ddot = \\u0308 ;"
2453         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2454         "$rough = \\u0314 ;"
2455         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2456         "\\u03b1 <> a ;"
2457         "$rough <> h ;");
2458 
2459     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2460 }
2461 
2462 /**
2463  * Test quantified segment behavior.  We want:
2464  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2465  */
TestQuantifiedSegment(void)2466 void TransliteratorTest::TestQuantifiedSegment(void) {
2467     // The normal case
2468     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2469 
2470     // The tricky case; the quantifier is around the segment
2471     expect("([abc])+ > x $1 x;", "cba", "xax");
2472 
2473     // Tricky case in reverse direction
2474     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2475 
2476     // Check post-context segment
2477     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2478 
2479     // Test toRule/toPattern for non-quantified segment.
2480     // Careful with spacing here.
2481     UnicodeString r("([a-c]){q} > x $1 x;");
2482     UParseError pe;
2483     UErrorCode ec = U_ZERO_ERROR;
2484     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2485     if (U_FAILURE(ec)) {
2486         errln("FAIL: createFromRules");
2487         delete t;
2488         return;
2489     }
2490     UnicodeString rr;
2491     t->toRules(rr, TRUE);
2492     if (r != rr) {
2493         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2494     } else {
2495         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496     }
2497     delete t;
2498 
2499     // Test toRule/toPattern for quantified segment.
2500     // Careful with spacing here.
2501     r = "([a-c])+{q} > x $1 x;";
2502     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2503     if (U_FAILURE(ec)) {
2504         errln("FAIL: createFromRules");
2505         delete t;
2506         return;
2507     }
2508     t->toRules(rr, TRUE);
2509     if (r != rr) {
2510         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2511     } else {
2512         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513     }
2514     delete t;
2515 }
2516 
2517 //======================================================================
2518 // Ram's tests
2519 //======================================================================
TestDevanagariLatinRT()2520 void TransliteratorTest::TestDevanagariLatinRT(){
2521     const int MAX_LEN= 52;
2522     const char* const source[MAX_LEN] = {
2523         "bh\\u0101rata",
2524         "kra",
2525         "k\\u1E63a",
2526         "khra",
2527         "gra",
2528         "\\u1E45ra",
2529         "cra",
2530         "chra",
2531         "j\\u00F1a",
2532         "jhra",
2533         "\\u00F1ra",
2534         "\\u1E6Dya",
2535         "\\u1E6Dhra",
2536         "\\u1E0Dya",
2537       //"r\\u0323ya", // \u095c is not valid in Devanagari
2538         "\\u1E0Dhya",
2539         "\\u1E5Bhra",
2540         "\\u1E47ra",
2541         "tta",
2542         "thra",
2543         "dda",
2544         "dhra",
2545         "nna",
2546         "pra",
2547         "phra",
2548         "bra",
2549         "bhra",
2550         "mra",
2551         "\\u1E49ra",
2552       //"l\\u0331ra",
2553         "yra",
2554         "\\u1E8Fra",
2555       //"l-",
2556         "vra",
2557         "\\u015Bra",
2558         "\\u1E63ra",
2559         "sra",
2560         "hma",
2561         "\\u1E6D\\u1E6Da",
2562         "\\u1E6D\\u1E6Dha",
2563         "\\u1E6Dh\\u1E6Dha",
2564         "\\u1E0D\\u1E0Da",
2565         "\\u1E0D\\u1E0Dha",
2566         "\\u1E6Dya",
2567         "\\u1E6Dhya",
2568         "\\u1E0Dya",
2569         "\\u1E0Dhya",
2570         // Not roundtrippable --
2571         // \\u0939\\u094d\\u094d\\u092E  - hma
2572         // \\u0939\\u094d\\u092E         - hma
2573         // CharsToUnicodeString("hma"),
2574         "hya",
2575         "\\u015Br\\u0325",
2576         "\\u015Bca",
2577         "\\u0115",
2578         "san\\u0304j\\u012Bb s\\u0113nagupta",
2579         "\\u0101nand vaddir\\u0101ju",
2580         "\\u0101",
2581         "a"
2582     };
2583     const char* const expected[MAX_LEN] = {
2584         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2585         "\\u0915\\u094D\\u0930",          /* kra         */
2586         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2587         "\\u0916\\u094D\\u0930",          /* khra        */
2588         "\\u0917\\u094D\\u0930",          /* gra         */
2589         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2590         "\\u091A\\u094D\\u0930",          /* cra         */
2591         "\\u091B\\u094D\\u0930",          /* chra        */
2592         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2593         "\\u091D\\u094D\\u0930",          /* jhra        */
2594         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2595         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2596         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2597         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2598       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2599         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2600         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2601         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2602         "\\u0924\\u094D\\u0924",          /* tta         */
2603         "\\u0925\\u094D\\u0930",          /* thra        */
2604         "\\u0926\\u094D\\u0926",          /* dda         */
2605         "\\u0927\\u094D\\u0930",          /* dhra        */
2606         "\\u0928\\u094D\\u0928",          /* nna         */
2607         "\\u092A\\u094D\\u0930",          /* pra         */
2608         "\\u092B\\u094D\\u0930",          /* phra        */
2609         "\\u092C\\u094D\\u0930",          /* bra         */
2610         "\\u092D\\u094D\\u0930",          /* bhra        */
2611         "\\u092E\\u094D\\u0930",          /* mra         */
2612         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2613       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2614         "\\u092F\\u094D\\u0930",          /* yra         */
2615         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2616       //"l-",
2617         "\\u0935\\u094D\\u0930",          /* vra         */
2618         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2619         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2620         "\\u0938\\u094D\\u0930",          /* sra         */
2621         "\\u0939\\u094d\\u092E",          /* hma         */
2622         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2623         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2624         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2625         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2626         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2627         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2628         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2629         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2630         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2631      // "hma",                         /* hma         */
2632         "\\u0939\\u094D\\u092F",          /* hya         */
2633         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2634         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2635         "\\u090d",                        /* e\\u0306    */
2636         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2637         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2638         "\\u0906",
2639         "\\u0905",
2640     };
2641     UErrorCode status = U_ZERO_ERROR;
2642     UParseError parseError;
2643     UnicodeString message;
2644     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2645     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2646     if(U_FAILURE(status)){
2647         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2648         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2649         return;
2650     }
2651     UnicodeString gotResult;
2652     for(int i= 0; i<MAX_LEN; i++){
2653         gotResult = source[i];
2654         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2655         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2656     }
2657     delete latinToDev;
2658     delete devToLatin;
2659 }
2660 
TestTeluguLatinRT()2661 void TransliteratorTest::TestTeluguLatinRT(){
2662     const int MAX_LEN=10;
2663     const char* const source[MAX_LEN] = {
2664         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2665         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2666         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2667         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2668         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2669         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2670         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2671         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2672         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2673         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2674     };
2675 
2676     const char* const expected[MAX_LEN] = {
2677         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2679         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2682         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2683         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2684         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2685         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687     };
2688 
2689     UErrorCode status = U_ZERO_ERROR;
2690     UParseError parseError;
2691     UnicodeString message;
2692     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2693     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2694     if(U_FAILURE(status)){
2695         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2696         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2697         return;
2698     }
2699     UnicodeString gotResult;
2700     for(int i= 0; i<MAX_LEN; i++){
2701         gotResult = source[i];
2702         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2703         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2704     }
2705     delete latinToDev;
2706     delete devToLatin;
2707 }
2708 
TestSanskritLatinRT()2709 void TransliteratorTest::TestSanskritLatinRT(){
2710     const int MAX_LEN =16;
2711     const char* const source[MAX_LEN] = {
2712         "rmk\\u1E63\\u0113t",
2713         "\\u015Br\\u012Bmad",
2714         "bhagavadg\\u012Bt\\u0101",
2715         "adhy\\u0101ya",
2716         "arjuna",
2717         "vi\\u1E63\\u0101da",
2718         "y\\u014Dga",
2719         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2720         "uv\\u0101cr\\u0325",
2721         "dharmak\\u1E63\\u0113tr\\u0113",
2722         "kuruk\\u1E63\\u0113tr\\u0113",
2723         "samav\\u0113t\\u0101",
2724         "yuyutsava\\u1E25",
2725         "m\\u0101mak\\u0101\\u1E25",
2726     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2727         "kimakurvata",
2728         "san\\u0304java",
2729     };
2730     const char* const expected[MAX_LEN] = {
2731         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2732         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2733         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2734         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2735         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2736         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2737         "\\u092f\\u094b\\u0917",
2738         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2739         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2740         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2743         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2744         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2745     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2746         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2747         "\\u0938\\u0902\\u091c\\u0935",
2748     };
2749     UErrorCode status = U_ZERO_ERROR;
2750     UParseError parseError;
2751     UnicodeString message;
2752     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2753     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2754     if(U_FAILURE(status)){
2755         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2756         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2757         return;
2758     }
2759     UnicodeString gotResult;
2760     for(int i= 0; i<MAX_LEN; i++){
2761         gotResult = source[i];
2762         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2763         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2764     }
2765     delete latinToDev;
2766     delete devToLatin;
2767 }
2768 
2769 
TestCompoundLatinRT()2770 void TransliteratorTest::TestCompoundLatinRT(){
2771     const char* const source[] = {
2772         "rmk\\u1E63\\u0113t",
2773         "\\u015Br\\u012Bmad",
2774         "bhagavadg\\u012Bt\\u0101",
2775         "adhy\\u0101ya",
2776         "arjuna",
2777         "vi\\u1E63\\u0101da",
2778         "y\\u014Dga",
2779         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2780         "uv\\u0101cr\\u0325",
2781         "dharmak\\u1E63\\u0113tr\\u0113",
2782         "kuruk\\u1E63\\u0113tr\\u0113",
2783         "samav\\u0113t\\u0101",
2784         "yuyutsava\\u1E25",
2785         "m\\u0101mak\\u0101\\u1E25",
2786      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2787         "kimakurvata",
2788         "san\\u0304java"
2789     };
2790     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2791     const char* const expected[MAX_LEN] = {
2792         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2793         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2794         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2795         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2796         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2797         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2798         "\\u092f\\u094b\\u0917",
2799         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2800         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2801         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2804         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2805         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2806     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2807         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2808         "\\u0938\\u0902\\u091c\\u0935"
2809     };
2810     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2811         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2812         return;
2813     }
2814 
2815     UErrorCode status = U_ZERO_ERROR;
2816     UParseError parseError;
2817     UnicodeString message;
2818     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2819     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2820     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2821     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2822 
2823     if(U_FAILURE(status)){
2824         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2825         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2826         return;
2827     }
2828     UnicodeString gotResult;
2829     for(int i= 0; i<MAX_LEN; i++){
2830         gotResult = source[i];
2831         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2832         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834 
2835     }
2836     delete(latinToDevToLatin);
2837     delete(devToLatinToDev);
2838     delete(devToTelToDev);
2839     delete(latinToTelToLatin);
2840 }
2841 
2842 /**
2843  * Test Gurmukhi-Devanagari Tippi and Bindi
2844  */
TestGurmukhiDevanagari()2845 void TransliteratorTest::TestGurmukhiDevanagari(){
2846     // the rule says:
2847     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2848     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2849     UErrorCode status = U_ZERO_ERROR;
2850     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2851     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2852     UParseError parseError;
2853 
2854     UnicodeSetIterator vIter(vowel);
2855     UnicodeSetIterator nvIter(non_vowel);
2856     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2857     if(U_FAILURE(status)) {
2858       dataerrln("Error creating transliterator %s", u_errorName(status));
2859       delete trans;
2860       return;
2861     }
2862     UnicodeString src (" \\u0902", -1, US_INV);
2863     UnicodeString expected(" \\u0A02", -1, US_INV);
2864     src = src.unescape();
2865     expected= expected.unescape();
2866 
2867     while(vIter.next()){
2868         src.setCharAt(0,(UChar) vIter.getCodepoint());
2869         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2870         expect(*trans,src,expected);
2871     }
2872 
2873     expected.setCharAt(1,0x0A70);
2874     while(nvIter.next()){
2875         //src.setCharAt(0,(char) nvIter.codepoint);
2876         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2877         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2878         expect(*trans,src,expected);
2879     }
2880     delete trans;
2881 }
2882 /**
2883  * Test instantiation from a locale.
2884  */
TestLocaleInstantiation(void)2885 void TransliteratorTest::TestLocaleInstantiation(void) {
2886     UParseError pe;
2887     UErrorCode ec = U_ZERO_ERROR;
2888     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2889     if (U_FAILURE(ec)) {
2890         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2891         delete t;
2892         return;
2893     }
2894     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2895     delete t;
2896 
2897     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2898     if (U_FAILURE(ec)) {
2899         errln("FAIL: createInstance(en-el)");
2900         delete t;
2901         return;
2902     }
2903     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2904     delete t;
2905 }
2906 
2907 /**
2908  * Test title case handling of accent (should ignore accents)
2909  */
TestTitleAccents(void)2910 void TransliteratorTest::TestTitleAccents(void) {
2911     UParseError pe;
2912     UErrorCode ec = U_ZERO_ERROR;
2913     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2914     if (U_FAILURE(ec)) {
2915         errln("FAIL: createInstance(Title)");
2916         delete t;
2917         return;
2918     }
2919     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2920     delete t;
2921 }
2922 
2923 /**
2924  * Basic test of a locale resource based rule.
2925  */
TestLocaleResource()2926 void TransliteratorTest::TestLocaleResource() {
2927     const char* DATA[] = {
2928         // id                    from               to
2929         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2930         "Latin-el",              "b",               "\\u03bc\\u03c0",
2931         "Latin-Greek",           "b",               "\\u03B2",
2932         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2933         "el-Latin",              "\\u03B2",         "v",
2934         "Greek-Latin",           "\\u03B2",         "b",
2935     };
2936     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2937     for (int32_t i=0; i<DATA_length; i+=3) {
2938         UParseError pe;
2939         UErrorCode ec = U_ZERO_ERROR;
2940         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2941         if (U_FAILURE(ec)) {
2942             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2943             delete t;
2944             continue;
2945         }
2946         expect(*t, CharsToUnicodeString(DATA[i+1]),
2947                CharsToUnicodeString(DATA[i+2]));
2948         delete t;
2949     }
2950 }
2951 
2952 /**
2953  * Make sure parse errors reference the right line.
2954  */
TestParseError()2955 void TransliteratorTest::TestParseError() {
2956     static const char* rule =
2957         "a > b;\n"
2958         "# more stuff\n"
2959         "d << b;";
2960     UErrorCode ec = U_ZERO_ERROR;
2961     UParseError pe;
2962     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2963     delete t;
2964     if (U_FAILURE(ec)) {
2965         UnicodeString err(pe.preContext);
2966         err.append((UChar)124/*|*/).append(pe.postContext);
2967         if (err.indexOf("d << b") >= 0) {
2968             logln("Ok: " + err);
2969         } else {
2970             errln("FAIL: " + err);
2971         }
2972     }
2973     else {
2974         errln("FAIL: no syntax error");
2975     }
2976     static const char* maskingRule =
2977         "a>x;\n"
2978         "# more stuff\n"
2979         "ab>y;";
2980     ec = U_ZERO_ERROR;
2981     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2982     if (ec != U_RULE_MASK_ERROR) {
2983         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2984     }
2985     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2986         errln("FAIL: did not get expected precontext");
2987     }
2988     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2989         errln("FAIL: did not get expected postcontext");
2990     }
2991 }
2992 
2993 /**
2994  * Make sure sets on output are disallowed.
2995  */
TestOutputSet()2996 void TransliteratorTest::TestOutputSet() {
2997     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2998     UErrorCode ec = U_ZERO_ERROR;
2999     UParseError pe;
3000     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3001     delete t;
3002     if (U_FAILURE(ec)) {
3003         UnicodeString err(pe.preContext);
3004         err.append((UChar)124/*|*/).append(pe.postContext);
3005         logln("Ok: " + err);
3006         return;
3007     }
3008     errln("FAIL: No syntax error");
3009 }
3010 
3011 /**
3012  * Test the use variable range pragma, making sure that use of
3013  * variable range characters is detected and flagged as an error.
3014  */
TestVariableRange()3015 void TransliteratorTest::TestVariableRange() {
3016     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3017     UErrorCode ec = U_ZERO_ERROR;
3018     UParseError pe;
3019     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3020     delete t;
3021     if (U_FAILURE(ec)) {
3022         UnicodeString err(pe.preContext);
3023         err.append((UChar)124/*|*/).append(pe.postContext);
3024         logln("Ok: " + err);
3025         return;
3026     }
3027     errln("FAIL: No syntax error");
3028 }
3029 
3030 /**
3031  * Test invalid post context error handling
3032  */
TestInvalidPostContext()3033 void TransliteratorTest::TestInvalidPostContext() {
3034     UnicodeString rule = "a}b{c>d;";
3035     UErrorCode ec = U_ZERO_ERROR;
3036     UParseError pe;
3037     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3038     delete t;
3039     if (U_FAILURE(ec)) {
3040         UnicodeString err(pe.preContext);
3041         err.append((UChar)124/*|*/).append(pe.postContext);
3042         if (err.indexOf("a}b{c") >= 0) {
3043             logln("Ok: " + err);
3044         } else {
3045             errln("FAIL: " + err);
3046         }
3047         return;
3048     }
3049     errln("FAIL: No syntax error");
3050 }
3051 
3052 /**
3053  * Test ID form variants
3054  */
TestIDForms()3055 void TransliteratorTest::TestIDForms() {
3056     const char* DATA[] = {
3057         "NFC", NULL, "NFD",
3058         "nfd", NULL, "NFC", // make sure case is ignored
3059         "Any-NFKD", NULL, "Any-NFKC",
3060         "Null", NULL, "Null",
3061         "-nfkc", "nfkc", "NFKD",
3062         "-nfkc/", "nfkc", "NFKD",
3063         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3064         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3065         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3066         "Source-", NULL, NULL,
3067         "Source/Variant-", NULL, NULL,
3068         "Source-/Variant", NULL, NULL,
3069         "/Variant", NULL, NULL,
3070         "/Variant-", NULL, NULL,
3071         "-/Variant", NULL, NULL,
3072         "-/", NULL, NULL,
3073         "-", NULL, NULL,
3074         "/", NULL, NULL,
3075     };
3076     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3077 
3078     for (int32_t i=0; i<DATA_length; i+=3) {
3079         const char* ID = DATA[i];
3080         const char* expID = DATA[i+1];
3081         const char* expInvID = DATA[i+2];
3082         UBool expValid = (expInvID != NULL);
3083         if (expID == NULL) {
3084             expID = ID;
3085         }
3086         UParseError pe;
3087         UErrorCode ec = U_ZERO_ERROR;
3088         Transliterator *t =
3089             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3090         if (U_FAILURE(ec)) {
3091             if (!expValid) {
3092                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3093             } else {
3094                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3095             }
3096             delete t;
3097             continue;
3098         }
3099         Transliterator *u = t->createInverse(ec);
3100         if (U_FAILURE(ec)) {
3101             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3102             delete t;
3103             delete u;
3104             continue;
3105         }
3106         if (t->getID() == expID &&
3107             u->getID() == expInvID) {
3108             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3109         } else {
3110             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3111                   t->getID() + " x getInverse() => " + u->getID() +
3112                   ", expected " + expInvID);
3113         }
3114         delete t;
3115         delete u;
3116     }
3117 }
3118 
3119 static const UChar SPACE[]   = {32,0};
3120 static const UChar NEWLINE[] = {10,0};
3121 static const UChar RETURN[]  = {13,0};
3122 static const UChar EMPTY[]   = {0};
3123 
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3124 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3125                                     const UnicodeString& testRulesForward) {
3126     UnicodeString rules2; t2.toRules(rules2, TRUE);
3127     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3128     rules2.findAndReplace(SPACE, EMPTY);
3129     rules2.findAndReplace(NEWLINE, EMPTY);
3130     rules2.findAndReplace(RETURN, EMPTY);
3131 
3132     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3133 
3134     if (rules2 != testRules) {
3135         errln(label);
3136         logln((UnicodeString)"GENERATED RULES: " + rules2);
3137         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3138     }
3139 }
3140 
3141 /**
3142  * Mark's toRules test.
3143  */
TestToRulesMark()3144 void TransliteratorTest::TestToRulesMark() {
3145     const char* testRules =
3146         "::[[:Latin:][:Mark:]];"
3147         "::NFKD (NFC);"
3148         "::Lower (Lower);"
3149         "a <> \\u03B1;" // alpha
3150         "::NFKC (NFD);"
3151         "::Upper (Lower);"
3152         "::Lower ();"
3153         "::([[:Greek:][:Mark:]]);"
3154         ;
3155     const char* testRulesForward =
3156         "::[[:Latin:][:Mark:]];"
3157         "::NFKD(NFC);"
3158         "::Lower(Lower);"
3159         "a > \\u03B1;"
3160         "::NFKC(NFD);"
3161         "::Upper (Lower);"
3162         "::Lower ();"
3163         ;
3164     const char* testRulesBackward =
3165         "::[[:Greek:][:Mark:]];"
3166         "::Lower (Upper);"
3167         "::NFD(NFKC);"
3168         "\\u03B1 > a;"
3169         "::Lower(Lower);"
3170         "::NFC(NFKD);"
3171         ;
3172     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3173     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3174 
3175     UParseError pe;
3176     UErrorCode ec = U_ZERO_ERROR;
3177     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3178     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3179 
3180     if (U_FAILURE(ec)) {
3181         delete t2;
3182         delete t3;
3183         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3184         return;
3185     }
3186 
3187     expect(*t2, source, target);
3188     expect(*t3, target, source);
3189 
3190     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3191     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3192 
3193     delete t2;
3194     delete t3;
3195 }
3196 
3197 /**
3198  * Test Escape and Unescape transliterators.
3199  */
TestEscape()3200 void TransliteratorTest::TestEscape() {
3201     UParseError pe;
3202     UErrorCode ec;
3203     Transliterator *t;
3204 
3205     ec = U_ZERO_ERROR;
3206     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3207     if (U_FAILURE(ec)) {
3208         errln((UnicodeString)"FAIL: createInstance");
3209     } else {
3210         expect(*t,
3211                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3212                "@12Q");
3213     }
3214     delete t;
3215 
3216     ec = U_ZERO_ERROR;
3217     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3218     if (U_FAILURE(ec)) {
3219         errln((UnicodeString)"FAIL: createInstance");
3220     } else {
3221         expect(*t,
3222                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3223                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3224     }
3225     delete t;
3226 
3227     ec = U_ZERO_ERROR;
3228     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3229     if (U_FAILURE(ec)) {
3230         errln((UnicodeString)"FAIL: createInstance");
3231     } else {
3232         expect(*t,
3233                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3234                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3235     }
3236     delete t;
3237 
3238     ec = U_ZERO_ERROR;
3239     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3240     if (U_FAILURE(ec)) {
3241         errln((UnicodeString)"FAIL: createInstance");
3242     } else {
3243         expect(*t,
3244                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3245                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3246     }
3247     delete t;
3248 }
3249 
3250 
TestAnchorMasking()3251 void TransliteratorTest::TestAnchorMasking(){
3252     UnicodeString rule ("^a > Q; a > q;");
3253     UErrorCode status= U_ZERO_ERROR;
3254     UParseError parseError;
3255 
3256     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3257     if(U_FAILURE(status)){
3258         errln(UnicodeString("FAIL: ") + "ID" +
3259               ".createFromRules() => bad rules" +
3260               /*", parse error " + parseError.code +*/
3261               ", line " + parseError.line +
3262               ", offset " + parseError.offset +
3263               ", context " + prettify(parseError.preContext, TRUE) +
3264               ", rules: " + prettify(rule, TRUE));
3265     }
3266     delete t;
3267 }
3268 
3269 /**
3270  * Make sure display names of variants look reasonable.
3271  */
TestDisplayName()3272 void TransliteratorTest::TestDisplayName() {
3273 #if UCONFIG_NO_FORMATTING
3274     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3275     return;
3276 #else
3277     static const char* DATA[] = {
3278         // ID, forward name, reverse name
3279         // Update the text as necessary -- the important thing is
3280         // not the text itself, but how various cases are handled.
3281 
3282         // Basic test
3283         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3284 
3285         // Variants
3286         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3287 
3288         // Target-only IDs
3289         "NFC", "Any to NFC", "Any to NFD",
3290     };
3291 
3292     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3293 
3294     Locale US("en", "US");
3295 
3296     for (int32_t i=0; i<DATA_length; i+=3) {
3297         UnicodeString name;
3298         Transliterator::getDisplayName(DATA[i], US, name);
3299         if (name != DATA[i+1]) {
3300             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3301                   name + ", expected " + DATA[i+1]);
3302         } else {
3303             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3304         }
3305         UErrorCode ec = U_ZERO_ERROR;
3306         UParseError pe;
3307         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3308         if (U_FAILURE(ec)) {
3309             delete t;
3310             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3311             continue;
3312         }
3313         name = Transliterator::getDisplayName(t->getID(), US, name);
3314         if (name != DATA[i+2]) {
3315             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3316                   name + ", expected " + DATA[i+2]);
3317         } else {
3318             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3319         }
3320         delete t;
3321     }
3322 #endif
3323 }
3324 
TestSpecialCases(void)3325 void TransliteratorTest::TestSpecialCases(void) {
3326     const UnicodeString registerRules[] = {
3327         "Any-Dev1", "x > X; y > Y;",
3328         "Any-Dev2", "XY > Z",
3329         "Greek-Latin/FAKE",
3330             CharsToUnicodeString
3331             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3332         "" // END MARKER
3333     };
3334 
3335     const UnicodeString testCases[] = {
3336         // NORMALIZATION
3337         // should add more test cases
3338         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342 
3343         // mp -> b BUG
3344         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346 
3347         // check for devanagari bug
3348         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349 
3350         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3351         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3353 
3354         //TODO: enable this test once Titlecase works right
3355         /*
3356         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3357                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358                  */
3359         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3360                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3361         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3363 
3364         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366 
3367          // FORMS OF S
3368         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3370         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3371                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3372         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3373                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3374         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3375                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376         // Tatiana bug
3377         // Upper: TAT\\u02B9\\u00C2NA
3378         // Lower: tat\\u02B9\\u00E2na
3379         // Title: Tat\\u02B9\\u00E2na
3380         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3381                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3386 
3387         "" // END MARKER
3388     };
3389 
3390     UParseError pos;
3391     int32_t i;
3392     for (i = 0; registerRules[i].length()!=0; i+=2) {
3393         UErrorCode status = U_ZERO_ERROR;
3394 
3395         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3396             registerRules[i+1], UTRANS_FORWARD, pos, status);
3397         if (U_FAILURE(status)) {
3398             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3399         } else {
3400             Transliterator::registerInstance(t);
3401         }
3402     }
3403     for (i = 0; testCases[i].length()!=0; i+=3) {
3404         UErrorCode ec = U_ZERO_ERROR;
3405         UParseError pe;
3406         const UnicodeString& name = testCases[i];
3407         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3408         if (U_FAILURE(ec)) {
3409             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3410             delete t;
3411             continue;
3412         }
3413         const UnicodeString& id = t->getID();
3414         const UnicodeString& source = testCases[i+1];
3415         UnicodeString target;
3416 
3417         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418 
3419         if (testCases[i+2].length() > 0) {
3420             target = testCases[i+2];
3421         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3422             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3423         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3424             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3425         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3426             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3427         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3428             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3429         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3430             target = source;
3431             target.toLower(Locale::getUS());
3432         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3433             target = source;
3434             target.toUpper(Locale::getUS());
3435         }
3436         if (U_FAILURE(ec)) {
3437             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3438             continue;
3439         }
3440 
3441         expect(*t, source, target);
3442         delete t;
3443     }
3444     for (i = 0; registerRules[i].length()!=0; i+=2) {
3445         Transliterator::unregister(registerRules[i]);
3446     }
3447 }
3448 
Char32ToEscapedChars(UChar32 ch,char * buffer)3449 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3450     if (ch <= 0xFFFF) {
3451         sprintf(buffer, "\\u%04x", (int)ch);
3452     } else {
3453         sprintf(buffer, "\\U%08x", (int)ch);
3454     }
3455     return buffer;
3456 }
3457 
TestSurrogateCasing(void)3458 void TransliteratorTest::TestSurrogateCasing (void) {
3459     // check that casing handles surrogates
3460     // titlecase is currently defective
3461     char buffer[20];
3462     UChar buffer2[20];
3463     UChar32 dee;
3464     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3465     UnicodeString DEE(u_totitle(dee));
3466     if (DEE != DESERET_DEE) {
3467         err("Fails titlecase of surrogates");
3468         err(Char32ToEscapedChars(dee, buffer));
3469         err(", ");
3470         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3471     }
3472 
3473     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3474     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3475     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3476     UErrorCode status= U_ZERO_ERROR;
3477 
3478     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3480         errln("Fails: Can't uppercase surrogates.");
3481     }
3482 
3483     status= U_ZERO_ERROR;
3484     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3485     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3486         errln("Fails: Can't lowercase surrogates.");
3487     }
3488 }
3489 
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3490 static void _trans(Transliterator& t, const UnicodeString& src,
3491                    UnicodeString& result) {
3492     result = src;
3493     t.transliterate(result);
3494 }
3495 
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3496 static void _trans(const UnicodeString& id, const UnicodeString& src,
3497                    UnicodeString& result, UErrorCode ec) {
3498     UParseError pe;
3499     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3500     if (U_SUCCESS(ec)) {
3501         _trans(*t, src, result);
3502     }
3503     delete t;
3504 }
3505 
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3506 static UnicodeString _findMatch(const UnicodeString& source,
3507                                        const UnicodeString* pairs) {
3508     UnicodeString empty;
3509     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3510         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3511             return pairs[i+1];
3512         }
3513     }
3514     return empty;
3515 }
3516 
3517 // Check to see that incremental gets at least part way through a reasonable string.
3518 
TestIncrementalProgress(void)3519 void TransliteratorTest::TestIncrementalProgress(void) {
3520     UErrorCode ec = U_ZERO_ERROR;
3521     UnicodeString latinTest = "The Quick Brown Fox.";
3522     UnicodeString devaTest;
3523     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3524     UnicodeString kataTest;
3525     _trans("Latin-Katakana", latinTest, kataTest, ec);
3526     if (U_FAILURE(ec)) {
3527         errln("FAIL: Internal error");
3528         return;
3529     }
3530     const UnicodeString tests[] = {
3531         "Any", latinTest,
3532         "Latin", latinTest,
3533         "Halfwidth", latinTest,
3534         "Devanagari", devaTest,
3535         "Katakana", kataTest,
3536         "" // END MARKER
3537     };
3538 
3539     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3540     int32_t i = 0, j=0, k=0;
3541     int32_t sources = Transliterator::countAvailableSources();
3542     for (i = 0; i < sources; i++) {
3543         UnicodeString source;
3544         Transliterator::getAvailableSource(i, source);
3545         UnicodeString test = _findMatch(source, tests);
3546         if (test.length() == 0) {
3547             logln((UnicodeString)"Skipping " + source + "-X");
3548             continue;
3549         }
3550         int32_t targets = Transliterator::countAvailableTargets(source);
3551         for (j = 0; j < targets; j++) {
3552             UnicodeString target;
3553             Transliterator::getAvailableTarget(j, source, target);
3554             int32_t variants = Transliterator::countAvailableVariants(source, target);
3555             for (k =0; k< variants; k++) {
3556                 UnicodeString variant;
3557                 UParseError err;
3558                 UErrorCode status = U_ZERO_ERROR;
3559 
3560                 Transliterator::getAvailableVariant(k, source, target, variant);
3561                 UnicodeString id = source + "-" + target + "/" + variant;
3562 
3563                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3564                 if (U_FAILURE(status)) {
3565                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3566                     delete t;
3567                     continue;
3568                 }
3569                 status = U_ZERO_ERROR;
3570                 CheckIncrementalAux(t, test);
3571 
3572                 UnicodeString rev;
3573                 _trans(*t, test, rev);
3574                 Transliterator *inv = t->createInverse(status);
3575                 if (U_FAILURE(status)) {
3576 #if UCONFIG_NO_BREAK_ITERATION
3577                     // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3578                     if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3579 #endif
3580                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3581 
3582                     delete t;
3583                     delete inv;
3584                     continue;
3585                 }
3586                 CheckIncrementalAux(inv, rev);
3587                 delete t;
3588                 delete inv;
3589             }
3590         }
3591     }
3592 }
3593 
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3594 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3595                                                       const UnicodeString& input) {
3596     UErrorCode ec = U_ZERO_ERROR;
3597     UTransPosition pos;
3598     UnicodeString test = input;
3599 
3600     pos.contextStart = 0;
3601     pos.contextLimit = input.length();
3602     pos.start = 0;
3603     pos.limit = input.length();
3604 
3605     t->transliterate(test, pos, ec);
3606     if (U_FAILURE(ec)) {
3607         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3608         return;
3609     }
3610     UBool gotError = FALSE;
3611 
3612     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3613 
3614     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3615         errln((UnicodeString)"No Progress, " +
3616               t->getID() + ": " + formatInput(test, input, pos));
3617         gotError = TRUE;
3618     } else {
3619         logln((UnicodeString)"PASS Progress, " +
3620               t->getID() + ": " + formatInput(test, input, pos));
3621     }
3622     t->finishTransliteration(test, pos);
3623     if (pos.start != pos.limit) {
3624         errln((UnicodeString)"Incomplete, " +
3625               t->getID() + ": " + formatInput(test, input, pos));
3626         gotError = TRUE;
3627     }
3628 }
3629 
TestFunction()3630 void TransliteratorTest::TestFunction() {
3631     // Careful with spacing and ';' here:  Phrase this exactly
3632     // as toRules() is going to return it.  If toRules() changes
3633     // with regard to spacing or ';', then adjust this string.
3634     UnicodeString rule =
3635         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3636 
3637     UParseError pe;
3638     UErrorCode ec = U_ZERO_ERROR;
3639     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3640     if (t == NULL) {
3641         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3642         return;
3643     }
3644 
3645     UnicodeString r;
3646     t->toRules(r, TRUE);
3647     if (r == rule) {
3648         logln((UnicodeString)"OK: toRules() => " + r);
3649     } else {
3650         errln((UnicodeString)"FAIL: toRules() => " + r +
3651               ", expected " + rule);
3652     }
3653 
3654     expect(*t, "The Quick Brown Fox",
3655            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3656 
3657     delete t;
3658 }
3659 
TestInvalidBackRef(void)3660 void TransliteratorTest::TestInvalidBackRef(void) {
3661     UnicodeString rule =  ". > $1;";
3662     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3663     UParseError pe;
3664     UErrorCode ec = U_ZERO_ERROR;
3665     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3666     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3667 
3668     if (t != NULL) {
3669         errln("FAIL: createFromRules should have returned NULL");
3670         delete t;
3671     }
3672 
3673     if (t2 != NULL) {
3674         errln("FAIL: createFromRules should have returned NULL");
3675         delete t2;
3676     }
3677 
3678     if (U_SUCCESS(ec)) {
3679         errln("FAIL: Ok: . > $1; => no error");
3680     } else {
3681         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3682     }
3683 }
3684 
TestMulticharStringSet()3685 void TransliteratorTest::TestMulticharStringSet() {
3686     // Basic testing
3687     const char* rule =
3688         "       [{aa}]       > x;"
3689         "         a          > y;"
3690         "       [b{bc}]      > z;"
3691         "[{gd}] { e          > q;"
3692         "         e } [{fg}] > r;" ;
3693 
3694     UParseError pe;
3695     UErrorCode ec = U_ZERO_ERROR;
3696     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3697     if (t == NULL || U_FAILURE(ec)) {
3698         delete t;
3699         errln("FAIL: createFromRules failed");
3700         return;
3701     }
3702 
3703     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3704            "y x yz z d gd de gdq gdqfg ddrfg");
3705     delete t;
3706 
3707     // Overlapped string test.  Make sure that when multiple
3708     // strings can match that the longest one is matched.
3709     rule =
3710         "    [a {ab} {abc}]    > x;"
3711         "           b          > y;"
3712         "           c          > z;"
3713         " q [t {st} {rst}] { e > p;" ;
3714 
3715     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3716     if (t == NULL || U_FAILURE(ec)) {
3717         delete t;
3718         errln("FAIL: createFromRules failed");
3719         return;
3720     }
3721 
3722     expect(*t, "a ab abc qte qste qrste",
3723            "x x x qtp qstp qrstp");
3724     delete t;
3725 }
3726 
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728 // BEGIN TestUserFunction support factory
3729 
3730 Transliterator* _TUFF[4];
3731 UnicodeString* _TUFID[4];
3732 
_TUFFactory(const UnicodeString &,Transliterator::Token context)3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3734                                    Transliterator::Token context) {
3735     return _TUFF[context.integer]->clone();
3736 }
3737 
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3739     _TUFF[n] = t;
3740     _TUFID[n] = new UnicodeString(ID);
3741     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3742 }
3743 
_TUFUnreg(int32_t n)3744 static void _TUFUnreg(int32_t n) {
3745     if (_TUFF[n] != NULL) {
3746         Transliterator::unregister(*_TUFID[n]);
3747         delete _TUFF[n];
3748         delete _TUFID[n];
3749     }
3750 }
3751 
3752 // END TestUserFunction support factory
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3754 
3755 /**
3756  * Test that user-registered transliterators can be used under function
3757  * syntax.
3758  */
TestUserFunction()3759 void TransliteratorTest::TestUserFunction() {
3760 
3761     Transliterator* t;
3762     UParseError pe;
3763     UErrorCode ec = U_ZERO_ERROR;
3764 
3765     // Setup our factory
3766     int32_t i;
3767     for (i=0; i<4; ++i) {
3768         _TUFF[i] = NULL;
3769     }
3770 
3771     // There's no need to register inverses if we don't use them
3772     t = Transliterator::createFromRules("gif",
3773                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774                                         UTRANS_FORWARD, pe, ec);
3775     if (t == NULL || U_FAILURE(ec)) {
3776         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3777         return;
3778     }
3779     _TUFReg("Any-gif", t, 0);
3780 
3781     t = Transliterator::createFromRules("RemoveCurly",
3782                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783                                         UTRANS_FORWARD, pe, ec);
3784     if (t == NULL || U_FAILURE(ec)) {
3785         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3786         goto FAIL;
3787     }
3788     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789     _TUFReg("Any-RemoveCurly", t, 1);
3790 
3791     logln("Trying &hex");
3792     t = Transliterator::createFromRules("hex2",
3793                                         "(.) > &hex($1);",
3794                                         UTRANS_FORWARD, pe, ec);
3795     if (t == NULL || U_FAILURE(ec)) {
3796         errln("FAIL: createFromRules");
3797         goto FAIL;
3798     }
3799     logln("Registering");
3800     _TUFReg("Any-hex2", t, 2);
3801     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3802     if (t == NULL || U_FAILURE(ec)) {
3803         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3804         goto FAIL;
3805     }
3806     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3807     delete t;
3808 
3809     logln("Trying &gif");
3810     t = Transliterator::createFromRules("gif2",
3811                                         "(.) > &Gif(&Hex2($1));",
3812                                         UTRANS_FORWARD, pe, ec);
3813     if (t == NULL || U_FAILURE(ec)) {
3814         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3815         goto FAIL;
3816     }
3817     logln("Registering");
3818     _TUFReg("Any-gif2", t, 3);
3819     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3820     if (t == NULL || U_FAILURE(ec)) {
3821         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3822         goto FAIL;
3823     }
3824     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3826     delete t;
3827 
3828     // Test that filters are allowed after &
3829     t = Transliterator::createFromRules("test",
3830                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831                                         UTRANS_FORWARD, pe, ec);
3832     if (t == NULL || U_FAILURE(ec)) {
3833         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3834         goto FAIL;
3835     }
3836     expect(*t, "abc",
3837            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3838     delete t;
3839 
3840  FAIL:
3841     for (i=0; i<4; ++i) {
3842         _TUFUnreg(i);
3843     }
3844 }
3845 
3846 /**
3847  * Test the Any-X transliterators.
3848  */
TestAnyX(void)3849 void TransliteratorTest::TestAnyX(void) {
3850     UParseError parseError;
3851     UErrorCode status = U_ZERO_ERROR;
3852     Transliterator* anyLatin =
3853         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3854     if (anyLatin==0) {
3855         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3856         delete anyLatin;
3857         return;
3858     }
3859 
3860     expect(*anyLatin,
3861            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3863 
3864     delete anyLatin;
3865 }
3866 
3867 /**
3868  * Test Any-X transliterators with sample letters from all scripts.
3869  */
TestAny(void)3870 void TransliteratorTest::TestAny(void) {
3871     UErrorCode status = U_ZERO_ERROR;
3872     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873     //       function call parameters going on in this test.
3874     UnicodeSet alphabetic("[:alphabetic:]", status);
3875     if (U_FAILURE(status)) {
3876         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3877         return;
3878     }
3879     alphabetic.freeze();
3880 
3881     UnicodeString testString;
3882     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3883         const char *scriptName = uscript_getShortName((UScriptCode)i);
3884         if (scriptName == NULL) {
3885             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3886             return;
3887         }
3888 
3889         UnicodeSet sample;
3890         sample.applyPropertyAlias("script", scriptName, status);
3891         if (U_FAILURE(status)) {
3892             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3893             return;
3894         }
3895         sample.retainAll(alphabetic);
3896         for (int32_t count=0; count<5; count++) {
3897             UChar32 c = sample.charAt(count);
3898             if (c == -1) {
3899                 break;
3900             }
3901             testString.append(c);
3902         }
3903     }
3904 
3905     UParseError parseError;
3906     Transliterator* anyLatin =
3907         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3908     if (U_FAILURE(status)) {
3909         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3910         return;
3911     }
3912 
3913     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3914     anyLatin->transliterate(testString);
3915     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3916     delete anyLatin;
3917 }
3918 
3919 
3920 /**
3921  * Test the source and target set API.  These are only implemented
3922  * for RBT and CompoundTransliterator at this time.
3923  */
TestSourceTargetSet()3924 void TransliteratorTest::TestSourceTargetSet() {
3925     UErrorCode ec = U_ZERO_ERROR;
3926 
3927     // Rules
3928     const char* r =
3929         "a > b; "
3930         "r [x{lu}] > q;";
3931 
3932     // Expected source
3933     UnicodeSet expSrc("[arx{lu}]", ec);
3934 
3935     // Expected target
3936     UnicodeSet expTrg("[bq]", ec);
3937 
3938     UParseError pe;
3939     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3940 
3941     if (U_FAILURE(ec)) {
3942         delete t;
3943         errln("FAIL: Couldn't set up test");
3944         return;
3945     }
3946 
3947     UnicodeSet src; t->getSourceSet(src);
3948     UnicodeSet trg; t->getTargetSet(trg);
3949 
3950     if (src == expSrc && trg == expTrg) {
3951         UnicodeString a, b;
3952         logln((UnicodeString)"Ok: " +
3953               r + " => source = " + src.toPattern(a, TRUE) +
3954               ", target = " + trg.toPattern(b, TRUE));
3955     } else {
3956         UnicodeString a, b, c, d;
3957         errln((UnicodeString)"FAIL: " +
3958               r + " => source = " + src.toPattern(a, TRUE) +
3959               ", expected " + expSrc.toPattern(b, TRUE) +
3960               "; target = " + trg.toPattern(c, TRUE) +
3961               ", expected " + expTrg.toPattern(d, TRUE));
3962     }
3963 
3964     delete t;
3965 }
3966 
3967 /**
3968  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3969  */
TestPatternWhiteSpace()3970 void TransliteratorTest::TestPatternWhiteSpace() {
3971     // Rules
3972     const char* r = "a > \\u200E b;";
3973 
3974     UErrorCode ec = U_ZERO_ERROR;
3975     UParseError pe;
3976     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3977 
3978     if (U_FAILURE(ec)) {
3979         errln("FAIL: Couldn't set up test");
3980     } else {
3981         expect(*t, "a", "b");
3982     }
3983     delete t;
3984 
3985     // UnicodeSet
3986     ec = U_ZERO_ERROR;
3987     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3988 
3989     if (U_FAILURE(ec)) {
3990         errln("FAIL: Couldn't set up test");
3991     } else {
3992         if (set.contains(0x200E)) {
3993             errln("FAIL: U+200E not being ignored by UnicodeSet");
3994         }
3995     }
3996 }
3997 //======================================================================
3998 // this method is in TestUScript.java
3999 //======================================================================
TestAllCodepoints()4000 void TransliteratorTest::TestAllCodepoints(){
4001     UScriptCode code= USCRIPT_INVALID_CODE;
4002     char id[256]={'\0'};
4003     char abbr[256]={'\0'};
4004     char newId[256]={'\0'};
4005     char newAbbrId[256]={'\0'};
4006     char oldId[256]={'\0'};
4007     char oldAbbrId[256]={'\0'};
4008 
4009     UErrorCode status =U_ZERO_ERROR;
4010     UParseError pe;
4011 
4012     for(uint32_t i = 0; i<=0x10ffff; i++){
4013         code =  uscript_getScript(i,&status);
4014         if(code == USCRIPT_INVALID_CODE){
4015             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4016         }
4017         const char* myId = uscript_getName(code);
4018         if(!myId) {
4019           dataerrln("Valid script code returned NULL name. Check your data!");
4020           return;
4021         }
4022         uprv_strcpy(id,myId);
4023         uprv_strcpy(abbr,uscript_getShortName(code));
4024 
4025         uprv_strcpy(newId,"[:");
4026         uprv_strcat(newId,id);
4027         uprv_strcat(newId,":];NFD");
4028 
4029         uprv_strcpy(newAbbrId,"[:");
4030         uprv_strcat(newAbbrId,abbr);
4031         uprv_strcat(newAbbrId,":];NFD");
4032 
4033         if(uprv_strcmp(newId,oldId)!=0){
4034             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4035             if(t==NULL || U_FAILURE(status)){
4036                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4037             }
4038             delete t;
4039         }
4040         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4041             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4042             if(t==NULL || U_FAILURE(status)){
4043                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4044             }
4045             delete t;
4046         }
4047         uprv_strcpy(oldId,newId);
4048         uprv_strcpy(oldAbbrId, newAbbrId);
4049 
4050     }
4051 
4052 }
4053 
4054 #define TEST_TRANSLIT_ID(id, cls) { \
4055   UErrorCode ec = U_ZERO_ERROR; \
4056   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057   if (U_FAILURE(ec)) { \
4058     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4059   } else { \
4060     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4062     } \
4063     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4064   } \
4065   delete t; \
4066 }
4067 
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
4069   UErrorCode ec = U_ZERO_ERROR; \
4070   UParseError pe; \
4071   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072   if (U_FAILURE(ec)) { \
4073     errln("FAIL: Couldn't create " rule); \
4074   } else { \
4075     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077     } \
4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079   } \
4080   delete t; \
4081 }
4082 
TestBoilerplate()4083 void TransliteratorTest::TestBoilerplate() {
4084     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4085     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4086     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4087     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4088     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4089     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4090     TEST_TRANSLIT_ID("Null", NullTransliterator);
4091     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4092     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4093     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4094     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4095     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4096     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4097 }
4098 
TestAlternateSyntax()4099 void TransliteratorTest::TestAlternateSyntax() {
4100     // U+2206 == &
4101     // U+2190 == <
4102     // U+2192 == >
4103     // U+2194 == <>
4104     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4105            "abc",
4106            "xbz");
4107     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4110 }
4111 
4112 static const char* BEGIN_END_RULES[] = {
4113     // [0]
4114     "abc > xy;"
4115     "aba > z;",
4116 
4117     // [1]
4118 /*
4119     "::BEGIN;"
4120     "abc > xy;"
4121     "::END;"
4122     "::BEGIN;"
4123     "aba > z;"
4124     "::END;",
4125 */
4126     "", // test case commented out below, this is here to keep from messing up the indexes
4127 
4128     // [2]
4129 /*
4130     "abc > xy;"
4131     "::BEGIN;"
4132     "aba > z;"
4133     "::END;",
4134 */
4135     "", // test case commented out below, this is here to keep from messing up the indexes
4136 
4137     // [3]
4138 /*
4139     "::BEGIN;"
4140     "abc > xy;"
4141     "::END;"
4142     "aba > z;",
4143 */
4144     "", // test case commented out below, this is here to keep from messing up the indexes
4145 
4146     // [4]
4147     "abc > xy;"
4148     "::Null;"
4149     "aba > z;",
4150 
4151     // [5]
4152     "::Upper;"
4153     "ABC > xy;"
4154     "AB > x;"
4155     "C > z;"
4156     "::Upper;"
4157     "XYZ > p;"
4158     "XY > q;"
4159     "Z > r;"
4160     "::Upper;",
4161 
4162     // [6]
4163     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164     "$delim = [\\-$ws];"
4165     "$ws $delim* > ' ';"
4166     "'-' $delim* > '-';",
4167 
4168     // [7]
4169     "::Null;"
4170     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171     "$delim = [\\-$ws];"
4172     "$ws $delim* > ' ';"
4173     "'-' $delim* > '-';",
4174 
4175     // [8]
4176     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177     "$delim = [\\-$ws];"
4178     "$ws $delim* > ' ';"
4179     "'-' $delim* > '-';"
4180     "::Null;",
4181 
4182     // [9]
4183     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184     "$delim = [\\-$ws];"
4185     "::Null;"
4186     "$ws $delim* > ' ';"
4187     "'-' $delim* > '-';",
4188 
4189     // [10]
4190 /*
4191     "::BEGIN;"
4192     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193     "$delim = [\\-$ws];"
4194     "::END;"
4195     "$ws $delim* > ' ';"
4196     "'-' $delim* > '-';",
4197 */
4198     "", // test case commented out below, this is here to keep from messing up the indexes
4199 
4200     // [11]
4201 /*
4202     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203     "$delim = [\\-$ws];"
4204     "::BEGIN;"
4205     "$ws $delim* > ' ';"
4206     "'-' $delim* > '-';"
4207     "::END;",
4208 */
4209     "", // test case commented out below, this is here to keep from messing up the indexes
4210 
4211     // [12]
4212 /*
4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214     "$delim = [\\-$ws];"
4215     "$ab = [ab];"
4216     "::BEGIN;"
4217     "$ws $delim* > ' ';"
4218     "'-' $delim* > '-';"
4219     "::END;"
4220     "::BEGIN;"
4221     "$ab { ' ' } $ab > '-';"
4222     "c { ' ' > ;"
4223     "::END;"
4224     "::BEGIN;"
4225     "'a-a' > a\\%|a;"
4226     "::END;",
4227 */
4228     "", // test case commented out below, this is here to keep from messing up the indexes
4229 
4230     // [13]
4231     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232     "$delim = [\\-$ws];"
4233     "$ab = [ab];"
4234     "::Null;"
4235     "$ws $delim* > ' ';"
4236     "'-' $delim* > '-';"
4237     "::Null;"
4238     "$ab { ' ' } $ab > '-';"
4239     "c { ' ' > ;"
4240     "::Null;"
4241     "'a-a' > a\\%|a;",
4242 
4243     // [14]
4244 /*
4245     "::[abc];"
4246     "::BEGIN;"
4247     "abc > xy;"
4248     "::END;"
4249     "::BEGIN;"
4250     "aba > yz;"
4251     "::END;"
4252     "::Upper;",
4253 */
4254     "", // test case commented out below, this is here to keep from messing up the indexes
4255 
4256     // [15]
4257     "::[abc];"
4258     "abc > xy;"
4259     "::Null;"
4260     "aba > yz;"
4261     "::Upper;",
4262 
4263     // [16]
4264 /*
4265     "::[abc];"
4266     "::BEGIN;"
4267     "abc <> xy;"
4268     "::END;"
4269     "::BEGIN;"
4270     "aba <> yz;"
4271     "::END;"
4272     "::Upper(Lower);"
4273     "::([XYZ]);"
4274 */
4275     "", // test case commented out below, this is here to keep from messing up the indexes
4276 
4277     // [17]
4278     "::[abc];"
4279     "abc <> xy;"
4280     "::Null;"
4281     "aba <> yz;"
4282     "::Upper(Lower);"
4283     "::([XYZ]);"
4284 };
4285 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4286 
4287 /*
4288 (This entire test is commented out below and will need some heavy revision when we re-add
4289 the ::BEGIN/::END stuff)
4290 static const char* BOGUS_BEGIN_END_RULES[] = {
4291     // [7]
4292     "::BEGIN;"
4293     "abc > xy;"
4294     "::BEGIN;"
4295     "aba > z;"
4296     "::END;"
4297     "::END;",
4298 
4299     // [8]
4300     "abc > xy;"
4301     " aba > z;"
4302     "::END;",
4303 
4304     // [9]
4305     "::BEGIN;"
4306     "::Upper;"
4307     "::END;"
4308 };
4309 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4310 */
4311 
4312 static const char* BEGIN_END_TEST_CASES[] = {
4313     // rules             input                   expected output
4314     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4315 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4316 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4317 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4318     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4319     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4320 
4321     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4322     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4323     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4324     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4325 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4326 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4327 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4328 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4329 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4330     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4331     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4332     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4333 
4334 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4337     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4338 };
4339 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4340 
TestBeginEnd()4341 void TransliteratorTest::TestBeginEnd() {
4342     // run through the list of test cases above
4343     int32_t i = 0;
4344     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4345         expect((UnicodeString)"Test case #" + (i / 3),
4346                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4347                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4348                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4349     }
4350 
4351     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4352     UParseError parseError;
4353     UErrorCode status = U_ZERO_ERROR;
4354     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4355             UTRANS_REVERSE, parseError, status);
4356     if (reversed == 0 || U_FAILURE(status)) {
4357         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4358     } else {
4359         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4360     }
4361     delete reversed;
4362 
4363     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4364     // that all of them cause errors
4365 /*
4366 (commented out until we have the real ::BEGIN/::END stuff in place
4367     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4368         UParseError parseError;
4369         UErrorCode status = U_ZERO_ERROR;
4370         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4371                 UTRANS_FORWARD, parseError, status);
4372         if (!U_FAILURE(status)) {
4373             delete t;
4374             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4375         }
4376     }
4377 */
4378 }
4379 
TestBeginEndToRules()4380 void TransliteratorTest::TestBeginEndToRules() {
4381     // run through the same list of test cases we used above, but this time, instead of just
4382     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4383     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4384     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4385     // to (i.e., does the same thing as) the original rule set
4386     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4387         UParseError parseError;
4388         UErrorCode status = U_ZERO_ERROR;
4389         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4390                 UTRANS_FORWARD, parseError, status);
4391         if (U_FAILURE(status)) {
4392             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4393         } else {
4394             UnicodeString rules;
4395             t->toRules(rules, TRUE);
4396             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4397                     UTRANS_FORWARD, parseError, status);
4398             if (U_FAILURE(status)) {
4399                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4400                         parseError, status);
4401                 delete t;
4402             } else {
4403                 expect(*t2,
4404                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4405                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4406                 delete t;
4407                 delete t2;
4408             }
4409         }
4410     }
4411 
4412     // do the same thing for the reversible test case
4413     UParseError parseError;
4414     UErrorCode status = U_ZERO_ERROR;
4415     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4416             UTRANS_REVERSE, parseError, status);
4417     if (U_FAILURE(status)) {
4418         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4419     } else {
4420         UnicodeString rules;
4421         reversed->toRules(rules, FALSE);
4422         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4423                 parseError, status);
4424         if (U_FAILURE(status)) {
4425             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4426                     parseError, status);
4427             delete reversed;
4428         } else {
4429             expect(*reversed2,
4430                    UnicodeString("xy XY XYZ yz YZ"),
4431                    UnicodeString("xy abc xaba yz aba"));
4432             delete reversed;
4433             delete reversed2;
4434         }
4435     }
4436 }
4437 
TestRegisterAlias()4438 void TransliteratorTest::TestRegisterAlias() {
4439     UnicodeString longID("Lower;[aeiou]Upper");
4440     UnicodeString shortID("Any-CapVowels");
4441     UnicodeString reallyShortID("CapVowels");
4442 
4443     Transliterator::registerAlias(shortID, longID);
4444 
4445     UErrorCode err = U_ZERO_ERROR;
4446     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4447     if (U_FAILURE(err)) {
4448         errln("Failed to instantiate transliterator with long ID");
4449         Transliterator::unregister(shortID);
4450         return;
4451     }
4452     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4453     if (U_FAILURE(err)) {
4454         errln("Failed to instantiate transliterator with short ID");
4455         delete t1;
4456         Transliterator::unregister(shortID);
4457         return;
4458     }
4459 
4460     if (t1->getID() != longID)
4461         errln("Transliterator instantiated with long ID doesn't have long ID");
4462     if (t2->getID() != reallyShortID)
4463         errln("Transliterator instantiated with short ID doesn't have short ID");
4464 
4465     UnicodeString rules1;
4466     UnicodeString rules2;
4467 
4468     t1->toRules(rules1, TRUE);
4469     t2->toRules(rules2, TRUE);
4470     if (rules1 != rules2)
4471         errln("Alias transliterators aren't the same");
4472 
4473     delete t1;
4474     delete t2;
4475     Transliterator::unregister(shortID);
4476 
4477     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4478     if (U_SUCCESS(err)) {
4479         errln("Instantiation with short ID succeeded after short ID was unregistered");
4480         delete t1;
4481     }
4482 
4483     // try the same thing again, but this time with something other than
4484     // an instance of CompoundTransliterator
4485     UnicodeString realID("Latin-Greek");
4486     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4487     Transliterator::registerAlias(fakeID, realID);
4488 
4489     err = U_ZERO_ERROR;
4490     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4491     if (U_FAILURE(err)) {
4492         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4493         Transliterator::unregister(realID);
4494         return;
4495     }
4496     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4497     if (U_FAILURE(err)) {
4498         errln("Failed to instantiate transliterator with fake ID");
4499         delete t1;
4500         Transliterator::unregister(realID);
4501         return;
4502     }
4503 
4504     t1->toRules(rules1, TRUE);
4505     t2->toRules(rules2, TRUE);
4506     if (rules1 != rules2)
4507         errln("Alias transliterators aren't the same");
4508 
4509     delete t1;
4510     delete t2;
4511     Transliterator::unregister(fakeID);
4512 }
4513 
TestRuleStripping()4514 void TransliteratorTest::TestRuleStripping() {
4515     /*
4516 #
4517 \uE001>\u0C01; # SIGN
4518     */
4519     static const UChar rule[] = {
4520         0x0023,0x0020,0x000D,0x000A,
4521         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4522     };
4523     static const UChar expectedRule[] = {
4524         0xE001,0x003E,0x0C01,0x003B,0
4525     };
4526     UChar result[sizeof(rule)/sizeof(rule[0])];
4527     UErrorCode status = U_ZERO_ERROR;
4528     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4529     if (len != u_strlen(expectedRule)) {
4530         errln("utrans_stripRules return len = %d", len);
4531     }
4532     if (u_strncmp(expectedRule, result, len) != 0) {
4533         errln("utrans_stripRules did not return expected string");
4534     }
4535 }
4536 
4537 /**
4538  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4539  */
TestHalfwidthFullwidth(void)4540 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4541     UParseError parseError;
4542     UErrorCode status = U_ZERO_ERROR;
4543     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4544     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4545     if (hf == 0 || fh == 0) {
4546         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4547         delete hf;
4548         delete fh;
4549         return;
4550     }
4551 
4552     // Array of 2n items
4553     // Each item is
4554     //   "hf"|"fh"|"both",
4555     //   <Halfwidth>,
4556     //   <Fullwidth>
4557     const char* DATA[] = {
4558         "both",
4559         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4560         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4561     };
4562     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4563 
4564     for (int32_t i=0; i<DATA_length; i+=3) {
4565         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4566         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4567         switch (*DATA[i]) {
4568         case 0x68: //'h': // Halfwidth-Fullwidth only
4569             expect(*hf, h, f);
4570             break;
4571         case 0x66: //'f': // Fullwidth-Halfwidth only
4572             expect(*fh, f, h);
4573             break;
4574         case 0x62: //'b': // both directions
4575             expect(*hf, h, f);
4576             expect(*fh, f, h);
4577             break;
4578         }
4579     }
4580     delete hf;
4581     delete fh;
4582 }
4583 
4584 
4585     /**
4586      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4587      *              TODO: confirm that the expected results are correct.
4588      *              For now, test just confirms that C++ and Java give identical results.
4589      */
TestThai(void)4590 void TransliteratorTest::TestThai(void) {
4591 #if !UCONFIG_NO_BREAK_ITERATION
4592     UParseError parseError;
4593     UErrorCode status = U_ZERO_ERROR;
4594     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4595     if (tr == 0) {
4596         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4597         return;
4598     }
4599     if (U_FAILURE(status)) {
4600         errln("FAIL: createInstance failed with %s", u_errorName(status));
4601         return;
4602     }
4603     const char *thaiText =
4604         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4605         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4606         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4607         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4608         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4609         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4610         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4611         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4612         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4613         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4614         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4615         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4616         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4617         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4618         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4619         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4620         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4621         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4622         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4623         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4624         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4625         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4626         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4627         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4628         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4629         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4630         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4631         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4632         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4633         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4634 
4635     const char *latinText =
4636         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4637         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4638         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4639         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4640         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4641         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4642         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4643         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4644         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4645         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4646         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4647         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4648         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4649         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4650         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4651         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4652         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4653         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4654 
4655 
4656     UnicodeString  xlitText(thaiText);
4657     xlitText = xlitText.unescape();
4658     tr->transliterate(xlitText);
4659 
4660     UnicodeString expectedText(latinText);
4661     expectedText = expectedText.unescape();
4662     expect(*tr, xlitText, expectedText);
4663 
4664     delete tr;
4665 #endif
4666 }
4667 
4668 
4669 //======================================================================
4670 // Support methods
4671 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4672 void TransliteratorTest::expectT(const UnicodeString& id,
4673                                  const UnicodeString& source,
4674                                  const UnicodeString& expectedResult) {
4675     UErrorCode ec = U_ZERO_ERROR;
4676     UParseError pe;
4677     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4678     if (U_FAILURE(ec)) {
4679         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4680         delete t;
4681         return;
4682     }
4683     expect(*t, source, expectedResult);
4684     delete t;
4685 }
4686 
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4687 void TransliteratorTest::reportParseError(const UnicodeString& message,
4688                                           const UParseError& parseError,
4689                                           const UErrorCode& status) {
4690     dataerrln(message +
4691           /*", parse error " + parseError.code +*/
4692           ", line " + parseError.line +
4693           ", offset " + parseError.offset +
4694           ", pre-context " + prettify(parseError.preContext, TRUE) +
4695           ", post-context " + prettify(parseError.postContext,TRUE) +
4696           ", Error: " + u_errorName(status));
4697 }
4698 
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4699 void TransliteratorTest::expect(const UnicodeString& rules,
4700                                 const UnicodeString& source,
4701                                 const UnicodeString& expectedResult,
4702                                 UTransPosition *pos) {
4703     expect("<ID>", rules, source, expectedResult, pos);
4704 }
4705 
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4706 void TransliteratorTest::expect(const UnicodeString& id,
4707                                 const UnicodeString& rules,
4708                                 const UnicodeString& source,
4709                                 const UnicodeString& expectedResult,
4710                                 UTransPosition *pos) {
4711     UErrorCode status = U_ZERO_ERROR;
4712     UParseError parseError;
4713     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4714     if (U_FAILURE(status)) {
4715         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4716     } else {
4717         expect(*t, source, expectedResult, pos);
4718     }
4719     delete t;
4720 }
4721 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4722 void TransliteratorTest::expect(const Transliterator& t,
4723                                 const UnicodeString& source,
4724                                 const UnicodeString& expectedResult,
4725                                 const Transliterator& reverseTransliterator) {
4726     expect(t, source, expectedResult);
4727     expect(reverseTransliterator, expectedResult, source);
4728 }
4729 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4730 void TransliteratorTest::expect(const Transliterator& t,
4731                                 const UnicodeString& source,
4732                                 const UnicodeString& expectedResult,
4733                                 UTransPosition *pos) {
4734     if (pos == 0) {
4735         UnicodeString result(source);
4736         t.transliterate(result);
4737         expectAux(t.getID() + ":String", source, result, expectedResult);
4738     }
4739     UTransPosition index={0, 0, 0, 0};
4740     if (pos != 0) {
4741         index = *pos;
4742     }
4743 
4744     UnicodeString rsource(source);
4745     if (pos == 0) {
4746         t.transliterate(rsource);
4747     } else {
4748         // Do it all at once -- below we do it incrementally
4749         t.finishTransliteration(rsource, *pos);
4750     }
4751     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4752 
4753     // Test keyboard (incremental) transliteration -- this result
4754     // must be the same after we finalize (see below).
4755     UnicodeString log;
4756     rsource.remove();
4757     if (pos != 0) {
4758         rsource = source;
4759         formatInput(log, rsource, index);
4760         log.append(" -> ");
4761         UErrorCode status = U_ZERO_ERROR;
4762         t.transliterate(rsource, index, status);
4763         formatInput(log, rsource, index);
4764     } else {
4765         for (int32_t i=0; i<source.length(); ++i) {
4766             if (i != 0) {
4767                 log.append(" + ");
4768             }
4769             log.append(source.charAt(i)).append(" -> ");
4770             UErrorCode status = U_ZERO_ERROR;
4771             t.transliterate(rsource, index, source.charAt(i), status);
4772             formatInput(log, rsource, index);
4773         }
4774     }
4775 
4776     // As a final step in keyboard transliteration, we must call
4777     // transliterate to finish off any pending partial matches that
4778     // were waiting for more input.
4779     t.finishTransliteration(rsource, index);
4780     log.append(" => ").append(rsource);
4781 
4782     expectAux(t.getID() + ":Keyboard", log,
4783               rsource == expectedResult,
4784               expectedResult);
4785 }
4786 
4787 
4788 /**
4789  * @param appendTo result is appended to this param.
4790  * @param input the string being transliterated
4791  * @param pos the index struct
4792  */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4793 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4794                                                const UnicodeString& input,
4795                                                const UTransPosition& pos) {
4796     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4797     // the {} indicate the context start and limit, and the ||
4798     // indicate the start and limit.
4799     if (0 <= pos.contextStart &&
4800         pos.contextStart <= pos.start &&
4801         pos.start <= pos.limit &&
4802         pos.limit <= pos.contextLimit &&
4803         pos.contextLimit <= input.length()) {
4804 
4805         UnicodeString a, b, c, d, e;
4806         input.extractBetween(0, pos.contextStart, a);
4807         input.extractBetween(pos.contextStart, pos.start, b);
4808         input.extractBetween(pos.start, pos.limit, c);
4809         input.extractBetween(pos.limit, pos.contextLimit, d);
4810         input.extractBetween(pos.contextLimit, input.length(), e);
4811         appendTo.append(a).append((UChar)123/*{*/).append(b).
4812             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4813             append((UChar)125/*}*/).append(e);
4814     } else {
4815         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4816                         pos.contextStart + ", s=" + pos.start + ", l=" +
4817                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4818                         input);
4819     }
4820     return appendTo;
4821 }
4822 
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4823 void TransliteratorTest::expectAux(const UnicodeString& tag,
4824                                    const UnicodeString& source,
4825                                    const UnicodeString& result,
4826                                    const UnicodeString& expectedResult) {
4827     expectAux(tag, source + " -> " + result,
4828               result == expectedResult,
4829               expectedResult);
4830 }
4831 
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4832 void TransliteratorTest::expectAux(const UnicodeString& tag,
4833                                    const UnicodeString& summary, UBool pass,
4834                                    const UnicodeString& expectedResult) {
4835     if (pass) {
4836         logln(UnicodeString("(")+tag+") " + prettify(summary));
4837     } else {
4838         dataerrln(UnicodeString("FAIL: (")+tag+") "
4839               + prettify(summary)
4840               + ", expected " + prettify(expectedResult));
4841     }
4842 }
4843 
4844 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4845