• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1999-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/10/99    aliu        Creation.
10 **********************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "transtst.h"
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
29 #include "cpdtrans.h"
30 #include "nultrans.h"
31 #include "rbt.h"
32 #include "rbt_pars.h"
33 #include "anytrans.h"
34 #include "esctrn.h"
35 #include "name2uni.h"
36 #include "nortrans.h"
37 #include "remtrans.h"
38 #include "titletrn.h"
39 #include "tolowtrn.h"
40 #include "toupptrn.h"
41 #include "unesctrn.h"
42 #include "uni2name.h"
43 #include "cstring.h"
44 #include "cmemory.h"
45 #include <stdio.h>
46 
47 /***********************************************************************
48 
49                      HOW TO USE THIS TEST FILE
50                                -or-
51                   How I developed on two platforms
52                 without losing (too much of) my mind
53 
54 
55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
56    any public void method named Test...() taking no parameters becomes
57    a test.  On C++, you need to modify the header and add a line to
58    the runIndexedTest() dispatch method.
59 
60 2. Make liberal use of the expect() method; it is your friend.
61 
62 3. The tests in this file exactly match those in a sister file on the
63    other side.  The two files are:
64 
65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
66    icu4c:  source/test/intltest/transtst.cpp
67 
68                   ==> THIS IS THE IMPORTANT PART <==
69 
70    When you add a test in this file, add it in TransliteratorTest.java
71    too.  Give it the same name and put it in the same relative place.
72    This makes maintenance a lot simpler for any poor soul who ends up
73    trying to synchronize the tests between icu4j and icu4c.
74 
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76    then add it in the special non-mirrored section.  These are
77    labeled
78 
79      "icu4j ONLY"
80 
81    or
82 
83      "icu4c ONLY"
84 
85    Make sure you document the reason the test is here and not there.
86 
87 
88 Thank you.
89 The Management
90 ***********************************************************************/
91 
92 // Define character constants thusly to be EBCDIC-friendly
93 enum {
94     LEFT_BRACE=((UChar)0x007B), /*{*/
95     PIPE      =((UChar)0x007C), /*|*/
96     ZERO      =((UChar)0x0030), /*0*/
97     UPPER_A   =((UChar)0x0041)  /*A*/
98 };
99 
TransliteratorTest()100 TransliteratorTest::TransliteratorTest()
101 :   DESERET_DEE((UChar32)0x10414),
102     DESERET_dee((UChar32)0x1043C)
103 {
104 }
105 
~TransliteratorTest()106 TransliteratorTest::~TransliteratorTest() {}
107 
108 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110                                    const char* &name, char* /*par*/) {
111     switch (index) {
112         TESTCASE(0,TestInstantiation);
113         TESTCASE(1,TestSimpleRules);
114         TESTCASE(2,TestRuleBasedInverse);
115         TESTCASE(3,TestKeyboard);
116         TESTCASE(4,TestKeyboard2);
117         TESTCASE(5,TestKeyboard3);
118         TESTCASE(6,TestArabic);
119         TESTCASE(7,TestCompoundKana);
120         TESTCASE(8,TestCompoundHex);
121         TESTCASE(9,TestFiltering);
122         TESTCASE(10,TestInlineSet);
123         TESTCASE(11,TestPatternQuoting);
124         TESTCASE(12,TestJ277);
125         TESTCASE(13,TestJ243);
126         TESTCASE(14,TestJ329);
127         TESTCASE(15,TestSegments);
128         TESTCASE(16,TestCursorOffset);
129         TESTCASE(17,TestArbitraryVariableValues);
130         TESTCASE(18,TestPositionHandling);
131         TESTCASE(19,TestHiraganaKatakana);
132         TESTCASE(20,TestCopyJ476);
133         TESTCASE(21,TestAnchors);
134         TESTCASE(22,TestInterIndic);
135         TESTCASE(23,TestFilterIDs);
136         TESTCASE(24,TestCaseMap);
137         TESTCASE(25,TestNameMap);
138         TESTCASE(26,TestLiberalizedID);
139         TESTCASE(27,TestCreateInstance);
140         TESTCASE(28,TestNormalizationTransliterator);
141         TESTCASE(29,TestCompoundRBT);
142         TESTCASE(30,TestCompoundFilter);
143         TESTCASE(31,TestRemove);
144         TESTCASE(32,TestToRules);
145         TESTCASE(33,TestContext);
146         TESTCASE(34,TestSupplemental);
147         TESTCASE(35,TestQuantifier);
148         TESTCASE(36,TestSTV);
149         TESTCASE(37,TestCompoundInverse);
150         TESTCASE(38,TestNFDChainRBT);
151         TESTCASE(39,TestNullInverse);
152         TESTCASE(40,TestAliasInverseID);
153         TESTCASE(41,TestCompoundInverseID);
154         TESTCASE(42,TestUndefinedVariable);
155         TESTCASE(43,TestEmptyContext);
156         TESTCASE(44,TestCompoundFilterID);
157         TESTCASE(45,TestPropertySet);
158         TESTCASE(46,TestNewEngine);
159         TESTCASE(47,TestQuantifiedSegment);
160         TESTCASE(48,TestDevanagariLatinRT);
161         TESTCASE(49,TestTeluguLatinRT);
162         TESTCASE(50,TestCompoundLatinRT);
163         TESTCASE(51,TestSanskritLatinRT);
164         TESTCASE(52,TestLocaleInstantiation);
165         TESTCASE(53,TestTitleAccents);
166         TESTCASE(54,TestLocaleResource);
167         TESTCASE(55,TestParseError);
168         TESTCASE(56,TestOutputSet);
169         TESTCASE(57,TestVariableRange);
170         TESTCASE(58,TestInvalidPostContext);
171         TESTCASE(59,TestIDForms);
172         TESTCASE(60,TestToRulesMark);
173         TESTCASE(61,TestEscape);
174         TESTCASE(62,TestAnchorMasking);
175         TESTCASE(63,TestDisplayName);
176         TESTCASE(64,TestSpecialCases);
177 #if !UCONFIG_NO_FILE_IO
178         TESTCASE(65,TestIncrementalProgress);
179 #endif
180         TESTCASE(66,TestSurrogateCasing);
181         TESTCASE(67,TestFunction);
182         TESTCASE(68,TestInvalidBackRef);
183         TESTCASE(69,TestMulticharStringSet);
184         TESTCASE(70,TestUserFunction);
185         TESTCASE(71,TestAnyX);
186         TESTCASE(72,TestSourceTargetSet);
187         TESTCASE(73,TestGurmukhiDevanagari);
188         TESTCASE(74,TestPatternWhiteSpace);
189         TESTCASE(75,TestAllCodepoints);
190         TESTCASE(76,TestBoilerplate);
191         TESTCASE(77,TestAlternateSyntax);
192         TESTCASE(78,TestBeginEnd);
193         TESTCASE(79,TestBeginEndToRules);
194         TESTCASE(80,TestRegisterAlias);
195         TESTCASE(81,TestRuleStripping);
196         TESTCASE(82,TestHalfwidthFullwidth);
197         TESTCASE(83,TestThai);
198         TESTCASE(84,TestAny);
199         default: name = ""; break;
200     }
201 }
202 
203 /**
204  * Make sure every system transliterator can be instantiated.
205  *
206  * ALSO test that the result of toRules() for each rule is a valid
207  * rule.  Do this here so we don't have to have another test that
208  * instantiates everything as well.
209  */
TestInstantiation()210 void TransliteratorTest::TestInstantiation() {
211     UErrorCode ec = U_ZERO_ERROR;
212     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
213     assertSuccess("getAvailableIDs()", ec);
214     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
215     int32_t n = Transliterator::countAvailableIDs();
216     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217                avail->count(ec) == n);
218     assertSuccess("count()", ec);
219     UnicodeString name;
220     for (int32_t i=0; i<n; ++i) {
221         const UnicodeString& id = *avail->snext(ec);
222         if (!assertSuccess("snext()", ec) ||
223             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
224             break;
225         }
226         UnicodeString id2 = Transliterator::getAvailableID(i);
227         if (id.length() < 1) {
228             errln(UnicodeString("FAIL: getAvailableID(") +
229                   i + ") returned empty string");
230             continue;
231         }
232         if (id != id2) {
233             errln(UnicodeString("FAIL: getAvailableID(") +
234                   i + ") != getAvailableIDs().snext()");
235             continue;
236         }
237         UParseError parseError;
238         UErrorCode status = U_ZERO_ERROR;
239         Transliterator* t = Transliterator::createInstance(id,
240                               UTRANS_FORWARD, parseError,status);
241         name.truncate(0);
242         Transliterator::getDisplayName(id, name);
243         if (t == 0) {
244 #if UCONFIG_NO_BREAK_ITERATION
245             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246             if (id.compare((UnicodeString)"Thai-Latin") != 0)
247 #endif
248                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
249                       /*", parse error " + parseError.code +*/
250                       ", line " + parseError.line +
251                       ", offset " + parseError.offset +
252                       ", pre-context " + prettify(parseError.preContext, TRUE) +
253                       ", post-context " +prettify(parseError.postContext,TRUE) +
254                       ", Error: " + u_errorName(status));
255                 // When createInstance fails, it deletes the failing
256                 // entry from the available ID list.  We detect this
257                 // here by looking for a change in countAvailableIDs.
258             int32_t nn = Transliterator::countAvailableIDs();
259             if (nn == (n - 1)) {
260                 n = nn;
261                 --i; // Compensate for deleted entry
262             }
263         } else {
264             logln(UnicodeString("OK: ") + name + " (" + id + ")");
265 
266             // Now test toRules
267             UnicodeString rules;
268             t->toRules(rules, TRUE);
269             Transliterator *u = Transliterator::createFromRules("x",
270                                     rules, UTRANS_FORWARD, parseError,status);
271             if (u == 0) {
272                 errln(UnicodeString("FAIL: ") + id +
273                       ".createFromRules() => bad rules" +
274                       /*", parse error " + parseError.code +*/
275                       ", line " + parseError.line +
276                       ", offset " + parseError.offset +
277                       ", context " + prettify(parseError.preContext, TRUE) +
278                       ", rules: " + prettify(rules, TRUE));
279             } else {
280                 delete u;
281             }
282             delete t;
283         }
284     }
285     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
286     assertSuccess("snext()", ec);
287     delete avail;
288 
289     // Now test the failure path
290     UParseError parseError;
291     UErrorCode status = U_ZERO_ERROR;
292     UnicodeString id("<Not a valid Transliterator ID>");
293     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
294     if (t != 0) {
295         errln("FAIL: " + id + " returned a transliterator");
296         delete t;
297     } else {
298         logln("OK: Bogus ID handled properly");
299     }
300 }
301 
TestSimpleRules(void)302 void TransliteratorTest::TestSimpleRules(void) {
303     /* Example: rules 1. ab>x|y
304      *                2. yc>z
305      *
306      * []|eabcd  start - no match, copy e to tranlated buffer
307      * [e]|abcd  match rule 1 - copy output & adjust cursor
308      * [ex|y]cd  match rule 2 - copy output & adjust cursor
309      * [exz]|d   no match, copy d to transliterated buffer
310      * [exzd]|   done
311      */
312     expect(UnicodeString("ab>x|y;", "") +
313            "yc>z",
314            "eabcd", "exzd");
315 
316     /* Another set of rules:
317      *    1. ab>x|yzacw
318      *    2. za>q
319      *    3. qc>r
320      *    4. cw>n
321      *
322      * []|ab       Rule 1
323      * [x|yzacw]   No match
324      * [xy|zacw]   Rule 2
325      * [xyq|cw]    Rule 4
326      * [xyqn]|     Done
327      */
328     expect(UnicodeString("ab>x|yzacw;") +
329            "za>q;" +
330            "qc>r;" +
331            "cw>n",
332            "ab", "xyqn");
333 
334     /* Test categories
335      */
336     UErrorCode status = U_ZERO_ERROR;
337     UParseError parseError;
338     Transliterator *t = Transliterator::createFromRules(
339         "<ID>",
340         UnicodeString("$dummy=").append((UChar)0xE100) +
341         UnicodeString(";"
342                       "$vowel=[aeiouAEIOU];"
343                       "$lu=[:Lu:];"
344                       "$vowel } $lu > '!';"
345                       "$vowel > '&';"
346                       "'!' { $lu > '^';"
347                       "$lu > '*';"
348                       "a > ERROR", ""),
349         UTRANS_FORWARD, parseError,
350         status);
351     if (U_FAILURE(status)) {
352         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
353         return;
354     }
355     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
356     delete t;
357 }
358 
359 /**
360  * Test inline set syntax and set variable syntax.
361  */
TestInlineSet(void)362 void TransliteratorTest::TestInlineSet(void) {
363     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
364     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
365 
366     expect(UnicodeString(
367            "$digit = [0-9];"
368            "$alpha = [a-zA-Z];"
369            "$alphanumeric = [$digit $alpha];" // ***
370            "$special = [^$alphanumeric];"     // ***
371            "$alphanumeric > '-';"
372            "$special > '*';", ""),
373 
374            "thx-1138", "---*----");
375 }
376 
377 /**
378  * Create some inverses and confirm that they work.  We have to be
379  * careful how we do this, since the inverses will not be true
380  * inverses -- we can't throw any random string at the composition
381  * of the transliterators and expect the identity function.  F x
382  * F' != I.  However, if we are careful about the input, we will
383  * get the expected results.
384  */
TestRuleBasedInverse(void)385 void TransliteratorTest::TestRuleBasedInverse(void) {
386     UnicodeString RULES =
387         UnicodeString("abc>zyx;") +
388         "ab>yz;" +
389         "bc>zx;" +
390         "ca>xy;" +
391         "a>x;" +
392         "b>y;" +
393         "c>z;" +
394 
395         "abc<zyx;" +
396         "ab<yz;" +
397         "bc<zx;" +
398         "ca<xy;" +
399         "a<x;" +
400         "b<y;" +
401         "c<z;" +
402 
403         "";
404 
405     const char* DATA[] = {
406         // Careful here -- random strings will not work.  If we keep
407         // the left side to the domain and the right side to the range
408         // we will be okay though (left, abc; right xyz).
409         "a", "x",
410         "abcacab", "zyxxxyy",
411         "caccb", "xyzzy",
412     };
413 
414     int32_t DATA_length = UPRV_LENGTHOF(DATA);
415 
416     UErrorCode status = U_ZERO_ERROR;
417     UParseError parseError;
418     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
419                                 UTRANS_FORWARD, parseError, status);
420     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
421                                 UTRANS_REVERSE, parseError, status);
422     if (U_FAILURE(status)) {
423         errln("FAIL: RBT constructor failed");
424         return;
425     }
426     for (int32_t i=0; i<DATA_length; i+=2) {
427         expect(*fwd, DATA[i], DATA[i+1]);
428         expect(*rev, DATA[i+1], DATA[i]);
429     }
430     delete fwd;
431     delete rev;
432 }
433 
434 /**
435  * Basic test of keyboard.
436  */
TestKeyboard(void)437 void TransliteratorTest::TestKeyboard(void) {
438     UParseError parseError;
439     UErrorCode status = U_ZERO_ERROR;
440     Transliterator *t = Transliterator::createFromRules("<ID>",
441                               UnicodeString("psch>Y;")
442                               +"ps>y;"
443                               +"ch>x;"
444                               +"a>A;",
445                               UTRANS_FORWARD, parseError,
446                               status);
447     if (U_FAILURE(status)) {
448         errln("FAIL: RBT constructor failed");
449         return;
450     }
451     const char* DATA[] = {
452         // insertion, buffer
453         "a", "A",
454         "p", "Ap",
455         "s", "Aps",
456         "c", "Apsc",
457         "a", "AycA",
458         "psch", "AycAY",
459         0, "AycAY", // null means finishKeyboardTransliteration
460     };
461 
462     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
463     delete t;
464 }
465 
466 /**
467  * Basic test of keyboard with cursor.
468  */
TestKeyboard2(void)469 void TransliteratorTest::TestKeyboard2(void) {
470     UParseError parseError;
471     UErrorCode status = U_ZERO_ERROR;
472     Transliterator *t = Transliterator::createFromRules("<ID>",
473                               UnicodeString("ych>Y;")
474                               +"ps>|y;"
475                               +"ch>x;"
476                               +"a>A;",
477                               UTRANS_FORWARD, parseError,
478                               status);
479     if (U_FAILURE(status)) {
480         errln("FAIL: RBT constructor failed");
481         return;
482     }
483     const char* DATA[] = {
484         // insertion, buffer
485         "a", "A",
486         "p", "Ap",
487         "s", "Aps", // modified for rollback - "Ay",
488         "c", "Apsc", // modified for rollback - "Ayc",
489         "a", "AycA",
490         "p", "AycAp",
491         "s", "AycAps", // modified for rollback - "AycAy",
492         "c", "AycApsc", // modified for rollback - "AycAyc",
493         "h", "AycAY",
494         0, "AycAY", // null means finishKeyboardTransliteration
495     };
496 
497     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
498     delete t;
499 }
500 
501 /**
502  * Test keyboard transliteration with back-replacement.
503  */
TestKeyboard3(void)504 void TransliteratorTest::TestKeyboard3(void) {
505     // We want th>z but t>y.  Furthermore, during keyboard
506     // transliteration we want t>y then yh>z if t, then h are
507     // typed.
508     UnicodeString RULES("t>|y;"
509                         "yh>z;");
510 
511     const char* DATA[] = {
512         // Column 1: characters to add to buffer (as if typed)
513         // Column 2: expected appearance of buffer after
514         //           keyboard xliteration.
515         "a", "a",
516         "b", "ab",
517         "t", "abt", // modified for rollback - "aby",
518         "c", "abyc",
519         "t", "abyct", // modified for rollback - "abycy",
520         "h", "abycz",
521         0, "abycz", // null means finishKeyboardTransliteration
522     };
523 
524     UParseError parseError;
525     UErrorCode status = U_ZERO_ERROR;
526     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
527     if (U_FAILURE(status)) {
528         errln("FAIL: RBT constructor failed");
529         return;
530     }
531     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
532     delete t;
533 }
534 
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)535 void TransliteratorTest::keyboardAux(const Transliterator& t,
536                                      const char* DATA[], int32_t DATA_length) {
537     UErrorCode status = U_ZERO_ERROR;
538     UTransPosition index={0, 0, 0, 0};
539     UnicodeString s;
540     for (int32_t i=0; i<DATA_length; i+=2) {
541         UnicodeString log;
542         if (DATA[i] != 0) {
543             log = s + " + "
544                 + DATA[i]
545                 + " -> ";
546             t.transliterate(s, index, DATA[i], status);
547         } else {
548             log = s + " => ";
549             t.finishTransliteration(s, index);
550         }
551         // Show the start index '{' and the cursor '|'
552         UnicodeString a, b, c;
553         s.extractBetween(0, index.contextStart, a);
554         s.extractBetween(index.contextStart, index.start, b);
555         s.extractBetween(index.start, s.length(), c);
556         log.append(a).
557             append((UChar)LEFT_BRACE).
558             append(b).
559             append((UChar)PIPE).
560             append(c);
561         if (s == DATA[i+1] && U_SUCCESS(status)) {
562             logln(log);
563         } else {
564             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
565         }
566     }
567 }
568 
TestArabic(void)569 void TransliteratorTest::TestArabic(void) {
570 // Test disabled for 2.0 until new Arabic transliterator can be written.
571 //    /*
572 //    const char* DATA[] = {
573 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
574 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
575 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
576 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
577 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
578 //                  "\u062c\u0645\u064a\u0644\u0629",
579 //    };
580 //    */
581 //
582 //    UChar ar_raw[] = {
583 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
584 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
585 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
586 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
587 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
588 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
589 //    };
590 //    UnicodeString ar(ar_raw);
591 //    UErrorCode status=U_ZERO_ERROR;
592 //    UParseError parseError;
593 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
594 //    if (t == 0) {
595 //        errln("FAIL: createInstance failed");
596 //        return;
597 //    }
598 //    expect(*t, "Arabic", ar);
599 //    delete t;
600 }
601 
602 /**
603  * Compose the Kana transliterator forward and reverse and try
604  * some strings that should come out unchanged.
605  */
TestCompoundKana(void)606 void TransliteratorTest::TestCompoundKana(void) {
607     UParseError parseError;
608     UErrorCode status = U_ZERO_ERROR;
609     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
610     if (t == 0) {
611         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
612     } else {
613         expect(*t, "aaaaa", "aaaaa");
614         delete t;
615     }
616 }
617 
618 /**
619  * Compose the hex transliterators forward and reverse.
620  */
TestCompoundHex(void)621 void TransliteratorTest::TestCompoundHex(void) {
622     UParseError parseError;
623     UErrorCode status = U_ZERO_ERROR;
624     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
625     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
626     Transliterator* transab[] = { a, b };
627     Transliterator* transba[] = { b, a };
628     if (a == 0 || b == 0) {
629         errln("FAIL: construction failed");
630         delete a;
631         delete b;
632         return;
633     }
634     // Do some basic tests of a
635     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
636     // Do some basic tests of b
637     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
638 
639     Transliterator* ab = new CompoundTransliterator(transab, 2);
640     UnicodeString s("abcde", "");
641     expect(*ab, s, s);
642 
643     UnicodeString str(s);
644     a->transliterate(str);
645     Transliterator* ba = new CompoundTransliterator(transba, 2);
646     expect(*ba, str, str);
647 
648     delete ab;
649     delete ba;
650     delete a;
651     delete b;
652 }
653 
654 int gTestFilterClassID = 0;
655 /**
656  * Used by TestFiltering().
657  */
658 class TestFilter : public UnicodeFilter {
clone() const659     virtual UnicodeFunctor* clone() const {
660         return new TestFilter(*this);
661     }
contains(UChar32 c) const662     virtual UBool contains(UChar32 c) const {
663         return c != (UChar)0x0063 /*c*/;
664     }
665     // Stubs
toPattern(UnicodeString & result,UBool) const666     virtual UnicodeString& toPattern(UnicodeString& result,
667                                      UBool /*escapeUnprintable*/) const {
668         return result;
669     }
matchesIndexValue(uint8_t) const670     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
671         return FALSE;
672     }
addMatchSetTo(UnicodeSet &) const673     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
674 public:
getDynamicClassID() const675     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
676 };
677 
678 /**
679  * Do some basic tests of filtering.
680  */
TestFiltering(void)681 void TransliteratorTest::TestFiltering(void) {
682     UParseError parseError;
683     UErrorCode status = U_ZERO_ERROR;
684     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
685     if (hex == 0) {
686         errln("FAIL: createInstance(Any-Hex) failed");
687         return;
688     }
689     hex->adoptFilter(new TestFilter());
690     UnicodeString s("abcde");
691     hex->transliterate(s);
692     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
693     if (s == exp) {
694         logln(UnicodeString("Ok:   \"") + exp + "\"");
695     } else {
696         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
697     }
698 
699     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
700     UnicodeFilter *f = hex->orphanFilter();
701     if (f == NULL){
702         errln("FAIL: orphanFilter() should get a UnicodeFilter");
703     } else {
704         delete f;
705     }
706     delete hex;
707 }
708 
709 /**
710  * Test anchors
711  */
TestAnchors(void)712 void TransliteratorTest::TestAnchors(void) {
713     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
714            "aaa",
715            "012");
716     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
717            "aaa",
718            "012");
719     expect(UnicodeString("^ab  > 01 ;"
720            " ab  > |8 ;"
721            "  b  > k ;"
722            " 8x$ > 45 ;"
723            " 8x  > 77 ;", ""),
724 
725            "ababbabxabx",
726            "018k7745");
727     expect(UnicodeString("$s = [z$] ;"
728            "$s{ab    > 01 ;"
729            "   ab    > |8 ;"
730            "    b    > k ;"
731            "   8x}$s > 45 ;"
732            "   8x    > 77 ;", ""),
733 
734            "abzababbabxzabxabx",
735            "01z018k45z01x45");
736 }
737 
738 /**
739  * Test pattern quoting and escape mechanisms.
740  */
TestPatternQuoting(void)741 void TransliteratorTest::TestPatternQuoting(void) {
742     // Array of 3n items
743     // Each item is <rules>, <input>, <expected output>
744     const UnicodeString DATA[] = {
745         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
746         UnicodeString(UChar(0x4E01)),
747         "[male adult]"
748     };
749 
750     for (int32_t i=0; i<3; i+=3) {
751         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
752         UParseError parseError;
753         UErrorCode status = U_ZERO_ERROR;
754         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
755         if (U_FAILURE(status)) {
756             errln("RBT constructor failed");
757         } else {
758             expect(*t, DATA[i+1], DATA[i+2]);
759         }
760         delete t;
761     }
762 }
763 
764 /**
765  * Regression test for bugs found in Greek transliteration.
766  */
TestJ277(void)767 void TransliteratorTest::TestJ277(void) {
768     UErrorCode status = U_ZERO_ERROR;
769     UParseError parseError;
770     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
771     if (gl == NULL) {
772         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
773         return;
774     }
775 
776     UChar sigma = 0x3C3;
777     UChar upsilon = 0x3C5;
778     UChar nu = 0x3BD;
779 //    UChar PHI = 0x3A6;
780     UChar alpha = 0x3B1;
781 //    UChar omega = 0x3C9;
782 //    UChar omicron = 0x3BF;
783 //    UChar epsilon = 0x3B5;
784 
785     // sigma upsilon nu -> syn
786     UnicodeString syn;
787     syn.append(sigma).append(upsilon).append(nu);
788     expect(*gl, syn, "syn");
789 
790     // sigma alpha upsilon nu -> saun
791     UnicodeString sayn;
792     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
793     expect(*gl, sayn, "saun");
794 
795     // Again, using a smaller rule set
796     UnicodeString rules(
797                 "$alpha   = \\u03B1;"
798                 "$nu      = \\u03BD;"
799                 "$sigma   = \\u03C3;"
800                 "$ypsilon = \\u03C5;"
801                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
802                 "s <>           $sigma;"
803                 "a <>           $alpha;"
804                 "u <>  $vowel { $ypsilon;"
805                 "y <>           $ypsilon;"
806                 "n <>           $nu;",
807                 "");
808     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
809     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
810     expect(*mini, syn, "syn");
811     expect(*mini, sayn, "saun");
812     delete mini;
813     mini = NULL;
814 
815 #if !UCONFIG_NO_FORMATTING
816     // Transliterate the Greek locale data
817     Locale el("el");
818     DateFormatSymbols syms(el, status);
819     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
820     int32_t i, count;
821     const UnicodeString* data = syms.getMonths(count);
822     for (i=0; i<count; ++i) {
823         if (data[i].length() == 0) {
824             continue;
825         }
826         UnicodeString out(data[i]);
827         gl->transliterate(out);
828         UBool ok = TRUE;
829         if (data[i].length() >= 2 && out.length() >= 2 &&
830             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
831             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
832                 ok = FALSE;
833             }
834         }
835         if (ok) {
836             logln(prettify(data[i] + " -> " + out));
837         } else {
838             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
839         }
840     }
841 #endif
842 
843     delete gl;
844 }
845 
846 /**
847  * Prefix, suffix support in hex transliterators
848  */
TestJ243(void)849 void TransliteratorTest::TestJ243(void) {
850     UErrorCode ec = U_ZERO_ERROR;
851 
852     // Test default Hex-Any, which should handle
853     // \u, \U, u+, and U+
854     Transliterator *hex =
855         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
856     if (assertSuccess("getInstance", ec)) {
857         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
858     }
859     delete hex;
860 
861 //    // Try a custom Hex-Unicode
862 //    // \uXXXX and &#xXXXX;
863 //    ec = U_ZERO_ERROR;
864 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
865 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
866 //           "abcd5fx012&#x00033;");
867 //    // Try custom Any-Hex (default is tested elsewhere)
868 //    ec = U_ZERO_ERROR;
869 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
870 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
871 }
872 
873 /**
874  * Parsers need better syntax error messages.
875  */
TestJ329(void)876 void TransliteratorTest::TestJ329(void) {
877 
878     struct { UBool containsErrors; const char* rule; } DATA[] = {
879         { FALSE, "a > b; c > d" },
880         { TRUE,  "a > b; no operator; c > d" },
881     };
882     int32_t DATA_length = UPRV_LENGTHOF(DATA);
883 
884     for (int32_t i=0; i<DATA_length; ++i) {
885         UErrorCode status = U_ZERO_ERROR;
886         UParseError parseError;
887         Transliterator *rbt = Transliterator::createFromRules("<ID>",
888                                     DATA[i].rule,
889                                     UTRANS_FORWARD,
890                                     parseError,
891                                     status);
892         UBool gotError = U_FAILURE(status);
893         UnicodeString desc(DATA[i].rule);
894         desc.append(gotError ? " -> error" : " -> no error");
895         if (gotError) {
896             desc = desc + ", ParseError code=" + u_errorName(status) +
897                 " line=" + parseError.line +
898                 " offset=" + parseError.offset +
899                 " context=" + parseError.preContext;
900         }
901         if (gotError == DATA[i].containsErrors) {
902             logln(UnicodeString("Ok:   ") + desc);
903         } else {
904             errln(UnicodeString("FAIL: ") + desc);
905         }
906         delete rbt;
907     }
908 }
909 
910 /**
911  * Test segments and segment references.
912  */
TestSegments(void)913 void TransliteratorTest::TestSegments(void) {
914     // Array of 3n items
915     // Each item is <rules>, <input>, <expected output>
916     UnicodeString DATA[] = {
917         "([a-z]) '.' ([0-9]) > $2 '-' $1",
918         "abc.123.xyz.456",
919         "ab1-c23.xy4-z56",
920 
921         // nested
922         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
923         "a1 b2",
924         "a1.a.1 b2.b.2",
925     };
926     int32_t DATA_length = UPRV_LENGTHOF(DATA);
927 
928     for (int32_t i=0; i<DATA_length; i+=3) {
929         logln("Pattern: " + prettify(DATA[i]));
930         UParseError parseError;
931         UErrorCode status = U_ZERO_ERROR;
932         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
933         if (U_FAILURE(status)) {
934             errln("FAIL: RBT constructor");
935         } else {
936             expect(*t, DATA[i+1], DATA[i+2]);
937         }
938         delete t;
939     }
940 }
941 
942 /**
943  * Test cursor positioning outside of the key
944  */
TestCursorOffset(void)945 void TransliteratorTest::TestCursorOffset(void) {
946     // Array of 3n items
947     // Each item is <rules>, <input>, <expected output>
948     UnicodeString DATA[] = {
949         "pre {alpha} post > | @ ALPHA ;"
950         "eALPHA > beta ;"
951         "pre {beta} post > BETA @@ | ;"
952         "post > xyz",
953 
954         "prealphapost prebetapost",
955 
956         "prbetaxyz preBETApost",
957     };
958     int32_t DATA_length = UPRV_LENGTHOF(DATA);
959 
960     for (int32_t i=0; i<DATA_length; i+=3) {
961         logln("Pattern: " + prettify(DATA[i]));
962         UParseError parseError;
963         UErrorCode status = U_ZERO_ERROR;
964         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
965         if (U_FAILURE(status)) {
966             errln("FAIL: RBT constructor");
967         } else {
968             expect(*t, DATA[i+1], DATA[i+2]);
969         }
970         delete t;
971     }
972 }
973 
974 /**
975  * Test zero length and > 1 char length variable values.  Test
976  * use of variable refs in UnicodeSets.
977  */
TestArbitraryVariableValues(void)978 void TransliteratorTest::TestArbitraryVariableValues(void) {
979     // Array of 3n items
980     // Each item is <rules>, <input>, <expected output>
981     UnicodeString DATA[] = {
982         "$abe = ab;"
983         "$pat = x[yY]z;"
984         "$ll  = 'a-z';"
985         "$llZ = [$ll];"
986         "$llY = [$ll$pat];"
987         "$emp = ;"
988 
989         "$abe > ABE;"
990         "$pat > END;"
991         "$llZ > 1;"
992         "$llY > 2;"
993         "7$emp 8 > 9;"
994         "",
995 
996         "ab xYzxyz stY78",
997         "ABE ENDEND 1129",
998     };
999     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1000 
1001     for (int32_t i=0; i<DATA_length; i+=3) {
1002         logln("Pattern: " + prettify(DATA[i]));
1003         UParseError parseError;
1004         UErrorCode status = U_ZERO_ERROR;
1005         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1006         if (U_FAILURE(status)) {
1007             errln("FAIL: RBT constructor");
1008         } else {
1009             expect(*t, DATA[i+1], DATA[i+2]);
1010         }
1011         delete t;
1012     }
1013 }
1014 
1015 /**
1016  * Confirm that the contextStart, contextLimit, start, and limit
1017  * behave correctly. J474.
1018  */
TestPositionHandling(void)1019 void TransliteratorTest::TestPositionHandling(void) {
1020     // Array of 3n items
1021     // Each item is <rules>, <input>, <expected output>
1022     const char* DATA[] = {
1023         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1024         "xtat txtb", // pos 0,9,0,9
1025         "xTTaSS TTxUUb",
1026 
1027         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1028         "xtat txtb", // pos 2,9,3,8
1029         "xtaSS TTxUUb",
1030 
1031         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1032         "xtat txtb", // pos 3,8,3,8
1033         "xtaTT TTxTTb",
1034     };
1035 
1036     // Array of 4n positions -- these go with the DATA array
1037     // They are: contextStart, contextLimit, start, limit
1038     int32_t POS[] = {
1039         0, 9, 0, 9,
1040         2, 9, 3, 8,
1041         3, 8, 3, 8,
1042     };
1043 
1044     int32_t n = UPRV_LENGTHOF(DATA) / 3;
1045     for (int32_t i=0; i<n; i++) {
1046         UErrorCode status = U_ZERO_ERROR;
1047         UParseError parseError;
1048         Transliterator *t = Transliterator::createFromRules("<ID>",
1049                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1050         if (U_FAILURE(status)) {
1051             delete t;
1052             errln("FAIL: RBT constructor");
1053             return;
1054         }
1055         UTransPosition pos;
1056         pos.contextStart= POS[4*i];
1057         pos.contextLimit = POS[4*i+1];
1058         pos.start = POS[4*i+2];
1059         pos.limit = POS[4*i+3];
1060         UnicodeString rsource(DATA[3*i+1]);
1061         t->transliterate(rsource, pos, status);
1062         if (U_FAILURE(status)) {
1063             delete t;
1064             errln("FAIL: transliterate");
1065             return;
1066         }
1067         t->finishTransliteration(rsource, pos);
1068         expectAux(DATA[3*i],
1069                   DATA[3*i+1],
1070                   rsource,
1071                   DATA[3*i+2]);
1072         delete t;
1073     }
1074 }
1075 
1076 /**
1077  * Test the Hiragana-Katakana transliterator.
1078  */
TestHiraganaKatakana(void)1079 void TransliteratorTest::TestHiraganaKatakana(void) {
1080     UParseError parseError;
1081     UErrorCode status = U_ZERO_ERROR;
1082     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1083     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1084     if (hk == 0 || kh == 0) {
1085         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1086         delete hk;
1087         delete kh;
1088         return;
1089     }
1090 
1091     // Array of 3n items
1092     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1093     const char* DATA[] = {
1094         "both",
1095         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1096         "\\u30A2\\u30F8\\u30F2\\u30B0",
1097 
1098         "kh",
1099         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1100         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1101     };
1102     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1103 
1104     for (int32_t i=0; i<DATA_length; i+=3) {
1105         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1106         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1107         switch (*DATA[i]) {
1108         case 0x68: //'h': // Hiragana-Katakana
1109             expect(*hk, h, k);
1110             break;
1111         case 0x6B: //'k': // Katakana-Hiragana
1112             expect(*kh, k, h);
1113             break;
1114         case 0x62: //'b': // both
1115             expect(*hk, h, k);
1116             expect(*kh, k, h);
1117             break;
1118         }
1119     }
1120     delete hk;
1121     delete kh;
1122 }
1123 
1124 /**
1125  * Test cloning / copy constructor of RBT.
1126  */
TestCopyJ476(void)1127 void TransliteratorTest::TestCopyJ476(void) {
1128     // The real test here is what happens when the destructors are
1129     // called.  So we let one object get destructed, and check to
1130     // see that its copy still works.
1131     Transliterator *t2 = 0;
1132     {
1133         UParseError parseError;
1134         UErrorCode status = U_ZERO_ERROR;
1135         Transliterator *t1 = Transliterator::createFromRules("t1",
1136             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1137         if (U_FAILURE(status)) {
1138             errln("FAIL: RBT constructor");
1139             return;
1140         }
1141         t2 = t1->clone(); // Call copy constructor under the covers.
1142         expect(*t1, "abcfoofoo", "ABcbar");
1143         delete t1;
1144     }
1145     expect(*t2, "abcfoofoo", "ABcbar");
1146     delete t2;
1147 }
1148 
1149 /**
1150  * Test inter-Indic transliterators.  These are composed.
1151  * ICU4C Jitterbug 483.
1152  */
TestInterIndic(void)1153 void TransliteratorTest::TestInterIndic(void) {
1154     UnicodeString ID("Devanagari-Gujarati", "");
1155     UErrorCode status = U_ZERO_ERROR;
1156     UParseError parseError;
1157     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1158     if (dg == 0) {
1159         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1160         return;
1161     }
1162     UnicodeString id = dg->getID();
1163     if (id != ID) {
1164         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1165     }
1166     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1167     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1168     expect(*dg, dev, guj);
1169     delete dg;
1170 }
1171 
1172 /**
1173  * Test filter syntax in IDs. (J918)
1174  */
TestFilterIDs(void)1175 void TransliteratorTest::TestFilterIDs(void) {
1176     // Array of 3n strings:
1177     // <id>, <inverse id>, <input>, <expected output>
1178     const char* DATA[] = {
1179         "[aeiou]Any-Hex", // ID
1180         "[aeiou]Hex-Any", // expected inverse ID
1181         "quizzical",      // src
1182         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1183 
1184         "[aeiou]Any-Hex;[^5]Hex-Any",
1185         "[^5]Any-Hex;[aeiou]Hex-Any",
1186         "quizzical",
1187         "q\\u0075izzical",
1188 
1189         "[abc]Null",
1190         "[abc]Null",
1191         "xyz",
1192         "xyz",
1193     };
1194     enum { DATA_length = UPRV_LENGTHOF(DATA) };
1195 
1196     for (int i=0; i<DATA_length; i+=4) {
1197         UnicodeString ID(DATA[i], "");
1198         UnicodeString uID(DATA[i+1], "");
1199         UnicodeString data2(DATA[i+2], "");
1200         UnicodeString data3(DATA[i+3], "");
1201         UParseError parseError;
1202         UErrorCode status = U_ZERO_ERROR;
1203         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1204         if (t == 0) {
1205             errln("FAIL: createInstance(" + ID + ") returned NULL");
1206             return;
1207         }
1208         expect(*t, data2, data3);
1209 
1210         // Check the ID
1211         if (ID != t->getID()) {
1212             errln("FAIL: createInstance(" + ID + ").getID() => " +
1213                   t->getID());
1214         }
1215 
1216         // Check the inverse
1217         Transliterator *u = t->createInverse(status);
1218         if (u == 0) {
1219             errln("FAIL: " + ID + ".createInverse() returned NULL");
1220         } else if (u->getID() != uID) {
1221             errln("FAIL: " + ID + ".createInverse().getID() => " +
1222                   u->getID() + ", expected " + uID);
1223         }
1224 
1225         delete t;
1226         delete u;
1227     }
1228 }
1229 
1230 /**
1231  * Test the case mapping transliterators.
1232  */
TestCaseMap(void)1233 void TransliteratorTest::TestCaseMap(void) {
1234     UParseError parseError;
1235     UErrorCode status = U_ZERO_ERROR;
1236     Transliterator* toUpper =
1237         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1238     Transliterator* toLower =
1239         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240     Transliterator* toTitle =
1241         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1242     if (toUpper==0 || toLower==0 || toTitle==0) {
1243         errln("FAIL: createInstance returned NULL");
1244         delete toUpper;
1245         delete toLower;
1246         delete toTitle;
1247         return;
1248     }
1249 
1250     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1251            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1252     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1253            "the quick brown foX jumped over the lazY dogs.");
1254     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1255            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1256 
1257     delete toUpper;
1258     delete toLower;
1259     delete toTitle;
1260 }
1261 
1262 /**
1263  * Test the name mapping transliterators.
1264  */
TestNameMap(void)1265 void TransliteratorTest::TestNameMap(void) {
1266     UParseError parseError;
1267     UErrorCode status = U_ZERO_ERROR;
1268     Transliterator* uni2name =
1269         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1270     Transliterator* name2uni =
1271         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1272     if (uni2name==0 || name2uni==0) {
1273         errln("FAIL: createInstance returned NULL");
1274         delete uni2name;
1275         delete name2uni;
1276         return;
1277     }
1278 
1279     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1280     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1281            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1282     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1283            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1284 
1285     delete uni2name;
1286     delete name2uni;
1287 
1288     // round trip
1289     Transliterator* t =
1290         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1291     if (t==0) {
1292         errln("FAIL: createInstance returned NULL");
1293         delete t;
1294         return;
1295     }
1296 
1297     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1298     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1299     expect(*t, s, s);
1300     delete t;
1301 }
1302 
1303 /**
1304  * Test liberalized ID syntax.  1006c
1305  */
TestLiberalizedID(void)1306 void TransliteratorTest::TestLiberalizedID(void) {
1307     // Some test cases have an expected getID() value of NULL.  This
1308     // means I have disabled the test case for now.  This stuff is
1309     // still under development, and I haven't decided whether to make
1310     // getID() return canonical case yet.  It will all get rewritten
1311     // with the move to Source-Target/Variant IDs anyway. [aliu]
1312     const char* DATA[] = {
1313         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1314         "  Null  ", "Null", "whitespace",
1315         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1316         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1317     };
1318     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1319     UParseError parseError;
1320     UErrorCode status= U_ZERO_ERROR;
1321     for (int32_t i=0; i<DATA_length; i+=3) {
1322         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1323         if (t == 0) {
1324             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1325                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1326         } else {
1327             UnicodeString exp;
1328             if (DATA[i+1]) {
1329                 exp = UnicodeString(DATA[i+1], "");
1330             }
1331             // Don't worry about getID() if the expected char*
1332             // is NULL -- see above.
1333             if (exp.length() == 0 || exp == t->getID()) {
1334                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1335                       " create ID \"" + DATA[i] + "\" => \"" +
1336                       exp + "\"");
1337             } else {
1338                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1339                       " create ID \"" + DATA[i] + "\" => \"" +
1340                       t->getID() + "\", exp \"" + exp + "\"");
1341             }
1342             delete t;
1343         }
1344     }
1345 }
1346 
1347 /* test for Jitterbug 912 */
TestCreateInstance()1348 void TransliteratorTest::TestCreateInstance(){
1349     const char* FORWARD = "F";
1350     const char* REVERSE = "R";
1351     const char* DATA[] = {
1352         // Column 1: id
1353         // Column 2: direction
1354         // Column 3: expected ID, or "" if expect failure
1355         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1356 
1357         // JB#2689: bad compound causes crash
1358         "InvalidSource-InvalidTarget", FORWARD, "",
1359         "InvalidSource-InvalidTarget", REVERSE, "",
1360         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1361         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1362         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1363         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1364 
1365         NULL
1366     };
1367 
1368     for (int32_t i=0; DATA[i]; i+=3) {
1369         UParseError err;
1370         UErrorCode ec = U_ZERO_ERROR;
1371         UnicodeString id(DATA[i]);
1372         UTransDirection dir = (DATA[i+1]==FORWARD)?
1373             UTRANS_FORWARD:UTRANS_REVERSE;
1374         UnicodeString expID(DATA[i+2]);
1375         Transliterator* t =
1376             Transliterator::createInstance(id,dir,err,ec);
1377         UnicodeString newID;
1378         if (t) {
1379             newID = t->getID();
1380         }
1381         UBool ok = (newID == expID);
1382         if (!t) {
1383             newID = u_errorName(ec);
1384         }
1385         if (ok) {
1386             logln((UnicodeString)"Ok: createInstance(" +
1387                   id + "," + DATA[i+1] + ") => " + newID);
1388         } else {
1389             dataerrln((UnicodeString)"FAIL: createInstance(" +
1390                   id + "," + DATA[i+1] + ") => " + newID +
1391                   ", expected " + expID);
1392         }
1393         delete t;
1394     }
1395 }
1396 
1397 /**
1398  * Test the normalization transliterator.
1399  */
TestNormalizationTransliterator()1400 void TransliteratorTest::TestNormalizationTransliterator() {
1401     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1402     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1403     const char* CANON[] = {
1404         // Input               Decomposed            Composed
1405         "cat",                "cat",                "cat"               ,
1406         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1407 
1408         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1409         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1410 
1411         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1412         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1413         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1414 
1415         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1416         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1417 
1418         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1419         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1420         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1421 
1422         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1423         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1424 
1425         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1426         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1427 
1428         "Henry IV",           "Henry IV",           "Henry IV"          ,
1429         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1430 
1431         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1432         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1433         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1434         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1435         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1436 
1437         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1438         0 // end
1439     };
1440 
1441     const char* COMPAT[] = {
1442         // Input               Decomposed            Composed
1443         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1444 
1445         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1446         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1447 
1448         "Henry IV",           "Henry IV",           "Henry IV"          ,
1449         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1450 
1451         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1452         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1453 
1454         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1455         0 // end
1456     };
1457 
1458     int32_t i;
1459     UParseError parseError;
1460     UErrorCode status = U_ZERO_ERROR;
1461     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1462     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1463     if (!NFD || !NFC) {
1464         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1465         delete NFD;
1466         delete NFC;
1467         return;
1468     }
1469     for (i=0; CANON[i]; i+=3) {
1470         UnicodeString in = CharsToUnicodeString(CANON[i]);
1471         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1472         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1473         expect(*NFD, in, expd);
1474         expect(*NFC, in, expc);
1475     }
1476     delete NFD;
1477     delete NFC;
1478 
1479     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1480     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1481     if (!NFKD || !NFKC) {
1482         dataerrln("FAIL: createInstance failed");
1483         delete NFKD;
1484         delete NFKC;
1485         return;
1486     }
1487     for (i=0; COMPAT[i]; i+=3) {
1488         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1489         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1490         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1491         expect(*NFKD, in, expkd);
1492         expect(*NFKC, in, expkc);
1493     }
1494     delete NFKD;
1495     delete NFKC;
1496 
1497     UParseError pe;
1498     status = U_ZERO_ERROR;
1499     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1500                                                        UTRANS_FORWARD,
1501                                                        pe, status);
1502     if (t == 0) {
1503         errln("FAIL: createInstance failed");
1504     }
1505     expect(*t, CharsToUnicodeString("\\u010dx"),
1506            CharsToUnicodeString("c\\u030C"));
1507     delete t;
1508 }
1509 
1510 /**
1511  * Test compound RBT rules.
1512  */
TestCompoundRBT(void)1513 void TransliteratorTest::TestCompoundRBT(void) {
1514     // Careful with spacing and ';' here:  Phrase this exactly
1515     // as toRules() is going to return it.  If toRules() changes
1516     // with regard to spacing or ';', then adjust this string.
1517     UnicodeString rule("::Hex-Any;\n"
1518                        "::Any-Lower;\n"
1519                        "a > '.A.';\n"
1520                        "b > '.B.';\n"
1521                        "::[^t]Any-Upper;", "");
1522     UParseError parseError;
1523     UErrorCode status = U_ZERO_ERROR;
1524     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1525     if (t == 0) {
1526         errln("FAIL: createFromRules failed");
1527         return;
1528     }
1529     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1530            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1531     UnicodeString r;
1532     t->toRules(r, TRUE);
1533     if (r == rule) {
1534         logln((UnicodeString)"OK: toRules() => " + r);
1535     } else {
1536         errln((UnicodeString)"FAIL: toRules() => " + r +
1537               ", expected " + rule);
1538     }
1539     delete t;
1540 
1541     // Now test toRules
1542     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1543     if (t == 0) {
1544         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1545         return;
1546     }
1547     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1548     t->toRules(r, TRUE);
1549     if (r != exp) {
1550         errln((UnicodeString)"FAIL: toRules() => " + r +
1551               ", expected " + exp);
1552     } else {
1553         logln((UnicodeString)"OK: toRules() => " + r);
1554     }
1555     delete t;
1556 
1557     // Round trip the result of toRules
1558     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1559     if (t == 0) {
1560         errln("FAIL: createFromRules #2 failed");
1561         return;
1562     } else {
1563         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1564     }
1565 
1566     // Test toRules again
1567     t->toRules(r, TRUE);
1568     if (r != exp) {
1569         errln((UnicodeString)"FAIL: toRules() => " + r +
1570               ", expected " + exp);
1571     } else {
1572         logln((UnicodeString)"OK: toRules() => " + r);
1573     }
1574 
1575     delete t;
1576 
1577     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1578     // to what the regenerated ID will look like.
1579     UnicodeString id("Upper(Lower);(NFKC)", "");
1580     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1581     if (t == 0) {
1582         errln("FAIL: createInstance #2 failed");
1583         return;
1584     }
1585     if (t->getID() == id) {
1586         logln((UnicodeString)"OK: created " + id);
1587     } else {
1588         errln((UnicodeString)"FAIL: createInstance(" + id +
1589               ").getID() => " + t->getID());
1590     }
1591 
1592     Transliterator *u = t->createInverse(status);
1593     if (u == 0) {
1594         errln("FAIL: createInverse failed");
1595         delete t;
1596         return;
1597     }
1598     exp = "NFKC();Lower(Upper)";
1599     if (u->getID() == exp) {
1600         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1601               u->getID());
1602     } else {
1603         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1604               u->getID());
1605     }
1606     delete t;
1607     delete u;
1608 }
1609 
1610 /**
1611  * Compound filter semantics were orginially not implemented
1612  * correctly.  Originally, each component filter f(i) is replaced by
1613  * f'(i) = f(i) && g, where g is the filter for the compound
1614  * transliterator.
1615  *
1616  * From Mark:
1617  *
1618  * Suppose and I have a transliterator X. Internally X is
1619  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1620  *
1621  * The compound should convert all greek characters (through latin) to
1622  * cyrillic, then lowercase the result. The filter should say "don't
1623  * touch 'A' in the original". But because an intermediate result
1624  * happens to go through "A", the Greek Alpha gets hung up.
1625  */
TestCompoundFilter(void)1626 void TransliteratorTest::TestCompoundFilter(void) {
1627     UParseError parseError;
1628     UErrorCode status = U_ZERO_ERROR;
1629     Transliterator *t = Transliterator::createInstance
1630         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1631     if (t == 0) {
1632         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1633         return;
1634     }
1635     t->adoptFilter(new UnicodeSet("[^A]", status));
1636     if (U_FAILURE(status)) {
1637         errln("FAIL: UnicodeSet ct failed");
1638         delete t;
1639         return;
1640     }
1641 
1642     // Only the 'A' at index 1 should remain unchanged
1643     expect(*t,
1644            CharsToUnicodeString("BA\\u039A\\u0391"),
1645            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1646     delete t;
1647 }
1648 
TestRemove(void)1649 void TransliteratorTest::TestRemove(void) {
1650     UParseError parseError;
1651     UErrorCode status = U_ZERO_ERROR;
1652     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1653     if (t == 0) {
1654         errln("FAIL: createInstance failed");
1655         return;
1656     }
1657 
1658     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1659 
1660     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1661     // duplicating the filter
1662     Transliterator* t2 = t->clone();
1663     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1664 
1665     delete t;
1666     delete t2;
1667 }
1668 
TestToRules(void)1669 void TransliteratorTest::TestToRules(void) {
1670     const char* RBT = "rbt";
1671     const char* SET = "set";
1672     static const char* DATA[] = {
1673         RBT,
1674         "$a=\\u4E61; [$a] > A;",
1675         "[\\u4E61] > A;",
1676 
1677         RBT,
1678         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1679         "[[:Zs:][:Zl:]]{a} > A;",
1680 
1681         SET,
1682         "[[:Zs:][:Zl:]]",
1683         "[[:Zs:][:Zl:]]",
1684 
1685         SET,
1686         "[:Ps:]",
1687         "[:Ps:]",
1688 
1689         SET,
1690         "[:L:]",
1691         "[:L:]",
1692 
1693         SET,
1694         "[[:L:]-[A]]",
1695         "[[:L:]-[A]]",
1696 
1697         SET,
1698         "[~[:Lu:][:Ll:]]",
1699         "[~[:Lu:][:Ll:]]",
1700 
1701         SET,
1702         "[~[a-z]]",
1703         "[~[a-z]]",
1704 
1705         RBT,
1706         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1707         "[^[:Zs:]]{a} > A;",
1708 
1709         RBT,
1710         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1711         "[[a-z]-[:Zs:]]{a} > A;",
1712 
1713         RBT,
1714         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1715         "[[:Zs:]&[a-z]]{a} > A;",
1716 
1717         RBT,
1718         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1719         "[x[:Zs:]]{a} > A;",
1720 
1721         RBT,
1722         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1723         "$macron = \\u0304 ;"
1724         "$evowel = [aeiouyAEIOUY] ;"
1725         "$iotasub = \\u0345 ;"
1726         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1727         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1728 
1729         RBT,
1730         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732     };
1733     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1734 
1735     for (int32_t d=0; d < DATA_length; d+=3) {
1736         if (DATA[d] == RBT) {
1737             // Transliterator test
1738             UParseError parseError;
1739             UErrorCode status = U_ZERO_ERROR;
1740             Transliterator *t = Transliterator::createFromRules("ID",
1741                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1742             if (t == 0) {
1743                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1744                 return;
1745             }
1746             UnicodeString rules, escapedRules;
1747             t->toRules(rules, FALSE);
1748             t->toRules(escapedRules, TRUE);
1749             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1750             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1751             if (rules == expRules) {
1752                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1753                       " => " + rules);
1754             } else {
1755                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1756                       " => " + rules + ", exp " + expRules);
1757             }
1758             if (escapedRules == expEscapedRules) {
1759                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1760                       " => " + escapedRules);
1761             } else {
1762                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1763                       " => " + escapedRules + ", exp " + expEscapedRules);
1764             }
1765             delete t;
1766 
1767         } else {
1768             // UnicodeSet test
1769             UErrorCode status = U_ZERO_ERROR;
1770             UnicodeString pat(DATA[d+1], -1, US_INV);
1771             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1772             UnicodeSet set(pat, status);
1773             if (U_FAILURE(status)) {
1774                 errln("FAIL: UnicodeSet ct failed");
1775                 return;
1776             }
1777             // Adjust spacing etc. as necessary.
1778             UnicodeString toPat;
1779             set.toPattern(toPat);
1780             if (expToPat == toPat) {
1781                 logln((UnicodeString)"Ok: " + pat +
1782                       " => " + toPat);
1783             } else {
1784                 errln((UnicodeString)"FAIL: " + pat +
1785                       " => " + prettify(toPat, TRUE) +
1786                       ", exp " + prettify(pat, TRUE));
1787             }
1788         }
1789     }
1790 }
1791 
TestContext()1792 void TransliteratorTest::TestContext() {
1793     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1794     expect("de > x; {d}e > y;",
1795            "de",
1796            "ye",
1797            &pos);
1798 
1799     expect("ab{c} > z;",
1800            "xadabdabcy",
1801            "xadabdabzy");
1802 }
1803 
TestSupplemental()1804 void TransliteratorTest::TestSupplemental() {
1805 
1806     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1807                                 "a > $a; $s > i;"),
1808            CharsToUnicodeString("ab\\U0001030Fx"),
1809            CharsToUnicodeString("\\U00010300bix"));
1810 
1811     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1812                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1813                                 "($a)($b) > $2 $1;"),
1814            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1815            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1816 
1817     // k|ax\\U00010300xm
1818 
1819     // k|a\\U00010400\\U00010300xm
1820     // ky|\\U00010400\\U00010300xm
1821     // ky\\U00010400|\\U00010300xm
1822 
1823     // ky\\U00010400|\\U00010300\\U00010400m
1824     // ky\\U00010400y|\\U00010400m
1825     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1826                                 "$a {x} > | @ \\U00010400;"
1827                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1828            CharsToUnicodeString("kax\\U00010300xm"),
1829            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1830 
1831     expectT("Any-Name",
1832            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1833            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1834 
1835     expectT("Any-Hex/Unicode",
1836            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1837            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1838 
1839     expectT("Any-Hex/C",
1840            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1841            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1842 
1843     expectT("Any-Hex/Perl",
1844            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1845            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1846 
1847     expectT("Any-Hex/Java",
1848            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1849            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1850 
1851     expectT("Any-Hex/XML",
1852            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1853            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1854 
1855     expectT("Any-Hex/XML10",
1856            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1857            "&#66352;&#1113856;&#917601;&#160;");
1858 
1859     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1860            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1861            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1862 }
1863 
TestQuantifier()1864 void TransliteratorTest::TestQuantifier() {
1865 
1866     // Make sure @ in a quantified anteContext works
1867     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1868            "AAAAAb",
1869            "aaa(aac)");
1870 
1871     // Make sure @ in a quantified postContext works
1872     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1873            "baaaaa",
1874            "caa(aaa)");
1875 
1876     // Make sure @ in a quantified postContext with seg ref works
1877     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1878            "baaaaa",
1879            "baa(aaa)");
1880 
1881     // Make sure @ past ante context doesn't enter ante context
1882     UTransPosition pos = {0, 5, 3, 5};
1883     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1884            "xxxab",
1885            "xxx(ac)",
1886            &pos);
1887 
1888     // Make sure @ past post context doesn't pass limit
1889     UTransPosition pos2 = {0, 4, 0, 2};
1890     expect("{b} a+ > c @@ |; x > y; a > A;",
1891            "baxx",
1892            "caxx",
1893            &pos2);
1894 
1895     // Make sure @ past post context doesn't enter post context
1896     expect("{b} a+ > c @@ |; x > y; a > A;",
1897            "baxx",
1898            "cayy");
1899 
1900     expect("(ab)? c > d;",
1901            "c abc ababc",
1902            "d d abd");
1903 
1904     // NOTE: The (ab)+ when referenced just yields a single "ab",
1905     // not the full sequence of them.  This accords with perl behavior.
1906     expect("(ab)+ {x} > '(' $1 ')';",
1907            "x abx ababxy",
1908            "x ab(ab) abab(ab)y");
1909 
1910     expect("b+ > x;",
1911            "ac abc abbc abbbc",
1912            "ac axc axc axc");
1913 
1914     expect("[abc]+ > x;",
1915            "qac abrc abbcs abtbbc",
1916            "qx xrx xs xtx");
1917 
1918     expect("q{(ab)+} > x;",
1919            "qa qab qaba qababc qaba",
1920            "qa qx qxa qxc qxa");
1921 
1922     expect("q(ab)* > x;",
1923            "qa qab qaba qababc",
1924            "xa x xa xc");
1925 
1926     // NOTE: The (ab)+ when referenced just yields a single "ab",
1927     // not the full sequence of them.  This accords with perl behavior.
1928     expect("q(ab)* > '(' $1 ')';",
1929            "qa qab qaba qababc",
1930            "()a (ab) (ab)a (ab)c");
1931 
1932     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1933     // quoted string
1934     expect("'ab'+ > x;",
1935            "bb ab ababb",
1936            "bb x xb");
1937 
1938     // $foo+ and $foo* -- the quantifier should apply to the entire
1939     // variable reference
1940     expect("$var = ab; $var+ > x;",
1941            "bb ab ababb",
1942            "bb x xb");
1943 }
1944 
1945 class TestTrans : public Transliterator {
1946 public:
TestTrans(const UnicodeString & id)1947     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1948     }
clone(void) const1949     virtual Transliterator* clone(void) const {
1950         return new TestTrans(getID());
1951     }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1952     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1953         UBool /*isIncremental*/) const
1954     {
1955         offsets.start = offsets.limit;
1956     }
1957     virtual UClassID getDynamicClassID() const;
1958     static UClassID U_EXPORT2 getStaticClassID();
1959 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1960 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1961 
1962 /**
1963  * Test Source-Target/Variant.
1964  */
1965 void TransliteratorTest::TestSTV(void) {
1966     int32_t ns = Transliterator::countAvailableSources();
1967     if (ns < 0 || ns > 255) {
1968         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1969         return;
1970     }
1971     int32_t i, j;
1972     for (i=0; i<ns; ++i) {
1973         UnicodeString source;
1974         Transliterator::getAvailableSource(i, source);
1975         logln((UnicodeString)"" + i + ": " + source);
1976         if (source.length() == 0) {
1977             errln("FAIL: empty source");
1978             continue;
1979         }
1980         int32_t nt = Transliterator::countAvailableTargets(source);
1981         if (nt < 0 || nt > 255) {
1982             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1983             continue;
1984         }
1985         for (int32_t j=0; j<nt; ++j) {
1986             UnicodeString target;
1987             Transliterator::getAvailableTarget(j, source, target);
1988             logln((UnicodeString)" " + j + ": " + target);
1989             if (target.length() == 0) {
1990                 errln("FAIL: empty target");
1991                 continue;
1992             }
1993             int32_t nv = Transliterator::countAvailableVariants(source, target);
1994             if (nv < 0 || nv > 255) {
1995                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1996                 continue;
1997             }
1998             for (int32_t k=0; k<nv; ++k) {
1999                 UnicodeString variant;
2000                 Transliterator::getAvailableVariant(k, source, target, variant);
2001                 if (variant.length() == 0) {
2002                     logln((UnicodeString)"  " + k + ": <empty>");
2003                 } else {
2004                     logln((UnicodeString)"  " + k + ": " + variant);
2005                 }
2006             }
2007         }
2008     }
2009 
2010     // Test registration
2011     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2014     for (i=0; i<3; ++i) {
2015         Transliterator *t = new TestTrans(IDS[i]);
2016         if (t == 0) {
2017             errln("FAIL: out of memory");
2018             return;
2019         }
2020         if (t->getID() != IDS[i]) {
2021             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2022             delete t;
2023             return;
2024         }
2025         Transliterator::registerInstance(t);
2026         UErrorCode status = U_ZERO_ERROR;
2027         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2028         if (t == NULL) {
2029             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2030                   IDS[i]);
2031         } else {
2032             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2033                   IDS[i]);
2034             delete t;
2035         }
2036         Transliterator::unregister(IDS[i]);
2037         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2038         if (t != NULL) {
2039             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2040                   IDS[i]);
2041             delete t;
2042         }
2043     }
2044 
2045     // Make sure getAvailable API reflects removal
2046     int32_t n = Transliterator::countAvailableIDs();
2047     for (i=0; i<n; ++i) {
2048         UnicodeString id = Transliterator::getAvailableID(i);
2049         for (j=0; j<3; ++j) {
2050             if (id.caseCompare(FULL_IDS[j],0)==0) {
2051                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2052             }
2053         }
2054     }
2055     n = Transliterator::countAvailableTargets("Any");
2056     for (i=0; i<n; ++i) {
2057         UnicodeString t;
2058         Transliterator::getAvailableTarget(i, "Any", t);
2059         if (t.caseCompare(IDS[0],0)==0) {
2060             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2061         }
2062     }
2063     n = Transliterator::countAvailableSources();
2064     for (i=0; i<n; ++i) {
2065         UnicodeString s;
2066         Transliterator::getAvailableSource(i, s);
2067         for (j=0; j<3; ++j) {
2068             if (SOURCES[j] == NULL) continue;
2069             if (s.caseCompare(SOURCES[j],0)==0) {
2070                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2071             }
2072         }
2073     }
2074 }
2075 
2076 /**
2077  * Test inverse of Greek-Latin; Title()
2078  */
TestCompoundInverse(void)2079 void TransliteratorTest::TestCompoundInverse(void) {
2080     UParseError parseError;
2081     UErrorCode status = U_ZERO_ERROR;
2082     Transliterator *t = Transliterator::createInstance
2083         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2084     if (t == 0) {
2085         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2086         return;
2087     }
2088     UnicodeString exp("(Title);Latin-Greek");
2089     if (t->getID() == exp) {
2090         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2091               t->getID());
2092     } else {
2093         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2094               t->getID() + "\", expected \"" + exp + "\"");
2095     }
2096     delete t;
2097 }
2098 
2099 /**
2100  * Test NFD chaining with RBT
2101  */
TestNFDChainRBT()2102 void TransliteratorTest::TestNFDChainRBT() {
2103     UParseError pe;
2104     UErrorCode ec = U_ZERO_ERROR;
2105     Transliterator* t = Transliterator::createFromRules(
2106                                "TEST", "::NFD; aa > Q; a > q;",
2107                                UTRANS_FORWARD, pe, ec);
2108     if (t == NULL || U_FAILURE(ec)) {
2109         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2110         return;
2111     }
2112     expect(*t, "aa", "Q");
2113     delete t;
2114 
2115     // TEMPORARY TESTS -- BEING DEBUGGED
2116 //=-    UnicodeString s, s2;
2117 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2118 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2119 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2120 //=-    expect(*t, s, s2);
2121 //=-    delete t;
2122 //=-
2123 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2124 //=-    expect(*t, s2, s);
2125 //=-    delete t;
2126 //=-
2127 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2128 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2129 //=-    expect(*t, s, s);
2130 //=-    delete t;
2131 
2132 //    const char* source[] = {
2133 //        /*
2134 //        "\\u015Br\\u012Bmad",
2135 //        "bhagavadg\\u012Bt\\u0101",
2136 //        "adhy\\u0101ya",
2137 //        "arjuna",
2138 //        "vi\\u1E63\\u0101da",
2139 //        "y\\u014Dga",
2140 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2141 //        "uv\\u0101cr\\u0325",
2142 //        */
2143 //        "rmk\\u1E63\\u0113t",
2144 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2145 //        /*
2146 //        "kuruk\\u1E63\\u0113tr\\u0113",
2147 //        "samav\\u0113t\\u0101",
2148 //        "yuyutsava-\\u1E25",
2149 //        "m\\u0101mak\\u0101-\\u1E25",
2150 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2151 //        "kimakurvata",
2152 //        "san\\u0304java",
2153 //        */
2154 //
2155 //        0
2156 //    };
2157 //    const char* expected[] = {
2158 //        /*
2159 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2160 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2161 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2162 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2163 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2164 //        "\\u092f\\u094b\\u0917",
2165 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2166 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2167 //        */
2168 //        "\\u0927",
2169 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170 //        /*
2171 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2172 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2173 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2174 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2175 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2176 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2177 //        "\\u0938\\u0902\\u091c\\u0935",
2178 //        */
2179 //        0
2180 //    };
2181 //    UErrorCode status = U_ZERO_ERROR;
2182 //    UParseError parseError;
2183 //    UnicodeString message;
2184 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2185 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2186 //    if(U_FAILURE(status)){
2187 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2188 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2189 //        delete latinToDevToLatin;
2190 //        delete devToLatinToDev;
2191 //        return;
2192 //    }
2193 //    UnicodeString gotResult;
2194 //    for(int i= 0; source[i] != 0; i++){
2195 //        gotResult = source[i];
2196 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2197 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2198 //    }
2199 //    delete latinToDevToLatin;
2200 //    delete devToLatinToDev;
2201 }
2202 
2203 /**
2204  * Inverse of "Null" should be "Null". (J21)
2205  */
TestNullInverse()2206 void TransliteratorTest::TestNullInverse() {
2207     UParseError pe;
2208     UErrorCode ec = U_ZERO_ERROR;
2209     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2210     if (t == 0 || U_FAILURE(ec)) {
2211         errln("FAIL: createInstance");
2212         return;
2213     }
2214     Transliterator *u = t->createInverse(ec);
2215     if (u == 0 || U_FAILURE(ec)) {
2216         errln("FAIL: createInverse");
2217         delete t;
2218         return;
2219     }
2220     if (u->getID() != "Null") {
2221         errln("FAIL: Inverse of Null should be Null");
2222     }
2223     delete t;
2224     delete u;
2225 }
2226 
2227 /**
2228  * Check ID of inverse of alias. (J22)
2229  */
TestAliasInverseID()2230 void TransliteratorTest::TestAliasInverseID() {
2231     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2232     UParseError pe;
2233     UErrorCode ec = U_ZERO_ERROR;
2234     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2235     if (t == 0 || U_FAILURE(ec)) {
2236         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2237         return;
2238     }
2239     Transliterator *u = t->createInverse(ec);
2240     if (u == 0 || U_FAILURE(ec)) {
2241         errln("FAIL: createInverse");
2242         delete t;
2243         return;
2244     }
2245     UnicodeString exp = "Hangul-Latin";
2246     UnicodeString got = u->getID();
2247     if (got != exp) {
2248         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2249               ", expected " + exp);
2250     }
2251     delete t;
2252     delete u;
2253 }
2254 
2255 /**
2256  * Test IDs of inverses of compound transliterators. (J20)
2257  */
TestCompoundInverseID()2258 void TransliteratorTest::TestCompoundInverseID() {
2259     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2260     UParseError pe;
2261     UErrorCode ec = U_ZERO_ERROR;
2262     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2263     if (t == 0 || U_FAILURE(ec)) {
2264         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2265         return;
2266     }
2267     Transliterator *u = t->createInverse(ec);
2268     if (u == 0 || U_FAILURE(ec)) {
2269         errln("FAIL: createInverse");
2270         delete t;
2271         return;
2272     }
2273     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2274     UnicodeString got = u->getID();
2275     if (got != exp) {
2276         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2277               ", expected " + exp);
2278     }
2279     delete t;
2280     delete u;
2281 }
2282 
2283 /**
2284  * Test undefined variable.
2285 
2286  */
TestUndefinedVariable()2287 void TransliteratorTest::TestUndefinedVariable() {
2288     UnicodeString rule = "$initial } a <> \\u1161;";
2289     UParseError pe;
2290     UErrorCode ec = U_ZERO_ERROR;
2291     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2292     delete t;
2293     if (U_FAILURE(ec)) {
2294         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2295               u_errorName(ec));
2296         return;
2297     }
2298     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2299           u_errorName(ec));
2300 }
2301 
2302 /**
2303  * Test empty context.
2304  */
TestEmptyContext()2305 void TransliteratorTest::TestEmptyContext() {
2306     expect(" { a } > b;", "xay a ", "xby b ");
2307 }
2308 
2309 /**
2310 * Test compound filter ID syntax
2311 */
TestCompoundFilterID(void)2312 void TransliteratorTest::TestCompoundFilterID(void) {
2313     static const char* DATA[] = {
2314         // Col. 1 = ID or rule set (latter must start with #)
2315 
2316         // = columns > 1 are null if expect col. 1 to be illegal =
2317 
2318         // Col. 2 = direction, "F..." or "R..."
2319         // Col. 3 = source string
2320         // Col. 4 = exp result
2321 
2322         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2323         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2324         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2325         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2326         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2327         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2328         NULL,
2329     };
2330 
2331     for (int32_t i=0; DATA[i]; i+=4) {
2332         UnicodeString id = CharsToUnicodeString(DATA[i]);
2333         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2334             UTRANS_REVERSE : UTRANS_FORWARD;
2335         UnicodeString source;
2336         UnicodeString exp;
2337         if (DATA[i+2] != NULL) {
2338             source = CharsToUnicodeString(DATA[i+2]);
2339             exp = CharsToUnicodeString(DATA[i+3]);
2340         }
2341         UBool expOk = (DATA[i+1] != NULL);
2342         Transliterator* t = NULL;
2343         UParseError pe;
2344         UErrorCode ec = U_ZERO_ERROR;
2345         if (id.charAt(0) == 0x23/*#*/) {
2346             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2347         } else {
2348             t = Transliterator::createInstance(id, direction, pe, ec);
2349         }
2350         UBool ok = (t != NULL && U_SUCCESS(ec));
2351         UnicodeString transID;
2352         if (t!=0) {
2353             transID = t->getID();
2354         }
2355         else {
2356             transID = UnicodeString("NULL", "");
2357         }
2358         if (ok == expOk) {
2359             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2360                   u_errorName(ec));
2361             if (source.length() != 0) {
2362                 expect(*t, source, exp);
2363             }
2364             delete t;
2365         } else {
2366             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2367                   u_errorName(ec));
2368         }
2369     }
2370 }
2371 
2372 /**
2373  * Test new property set syntax
2374  */
TestPropertySet()2375 void TransliteratorTest::TestPropertySet() {
2376     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2377     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2378            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2379 }
2380 
2381 /**
2382  * Test various failure points of the new 2.0 engine.
2383  */
TestNewEngine()2384 void TransliteratorTest::TestNewEngine() {
2385     UParseError pe;
2386     UErrorCode ec = U_ZERO_ERROR;
2387     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2388     if (t == 0 || U_FAILURE(ec)) {
2389         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2390         return;
2391     }
2392     // Katakana should be untouched
2393     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2394            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2395 
2396     delete t;
2397 
2398 #if 1
2399     // This test will only work if Transliterator.ROLLBACK is
2400     // true.  Otherwise, this test will fail, revealing a
2401     // limitation of global filters in incremental mode.
2402     Transliterator *a =
2403         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2404     Transliterator *A =
2405         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2406     if (U_FAILURE(ec)) {
2407         delete a;
2408         delete A;
2409         return;
2410     }
2411 
2412     Transliterator* array[3];
2413     array[0] = a;
2414     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2415     array[2] = A;
2416     if (U_FAILURE(ec)) {
2417         errln("FAIL: createInstance NFD");
2418         delete a;
2419         delete A;
2420         delete array[1];
2421         return;
2422     }
2423 
2424     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2425     if (U_FAILURE(ec)) {
2426         errln("FAIL: UnicodeSet constructor");
2427         delete a;
2428         delete A;
2429         delete array[1];
2430         delete t;
2431         return;
2432     }
2433 
2434     expect(*t, "aAaA", "bAbA");
2435 
2436     assertTrue("countElements", t->countElements() == 3);
2437     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2438     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2439     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2440     assertSuccess("getElement", ec);
2441 
2442     delete a;
2443     delete A;
2444     delete array[1];
2445     delete t;
2446 #endif
2447 
2448     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2449            "a",
2450            "ax");
2451 
2452     UnicodeString gr = CharsToUnicodeString(
2453         "$ddot = \\u0308 ;"
2454         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2455         "$rough = \\u0314 ;"
2456         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2457         "\\u03b1 <> a ;"
2458         "$rough <> h ;");
2459 
2460     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2461 }
2462 
2463 /**
2464  * Test quantified segment behavior.  We want:
2465  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2466  */
TestQuantifiedSegment(void)2467 void TransliteratorTest::TestQuantifiedSegment(void) {
2468     // The normal case
2469     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2470 
2471     // The tricky case; the quantifier is around the segment
2472     expect("([abc])+ > x $1 x;", "cba", "xax");
2473 
2474     // Tricky case in reverse direction
2475     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2476 
2477     // Check post-context segment
2478     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2479 
2480     // Test toRule/toPattern for non-quantified segment.
2481     // Careful with spacing here.
2482     UnicodeString r("([a-c]){q} > x $1 x;");
2483     UParseError pe;
2484     UErrorCode ec = U_ZERO_ERROR;
2485     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2486     if (U_FAILURE(ec)) {
2487         errln("FAIL: createFromRules");
2488         delete t;
2489         return;
2490     }
2491     UnicodeString rr;
2492     t->toRules(rr, TRUE);
2493     if (r != rr) {
2494         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2495     } else {
2496         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2497     }
2498     delete t;
2499 
2500     // Test toRule/toPattern for quantified segment.
2501     // Careful with spacing here.
2502     r = "([a-c])+{q} > x $1 x;";
2503     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2504     if (U_FAILURE(ec)) {
2505         errln("FAIL: createFromRules");
2506         delete t;
2507         return;
2508     }
2509     t->toRules(rr, TRUE);
2510     if (r != rr) {
2511         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2512     } else {
2513         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2514     }
2515     delete t;
2516 }
2517 
2518 //======================================================================
2519 // Ram's tests
2520 //======================================================================
TestDevanagariLatinRT()2521 void TransliteratorTest::TestDevanagariLatinRT(){
2522     const int MAX_LEN= 52;
2523     const char* const source[MAX_LEN] = {
2524         "bh\\u0101rata",
2525         "kra",
2526         "k\\u1E63a",
2527         "khra",
2528         "gra",
2529         "\\u1E45ra",
2530         "cra",
2531         "chra",
2532         "j\\u00F1a",
2533         "jhra",
2534         "\\u00F1ra",
2535         "\\u1E6Dya",
2536         "\\u1E6Dhra",
2537         "\\u1E0Dya",
2538       //"r\\u0323ya", // \u095c is not valid in Devanagari
2539         "\\u1E0Dhya",
2540         "\\u1E5Bhra",
2541         "\\u1E47ra",
2542         "tta",
2543         "thra",
2544         "dda",
2545         "dhra",
2546         "nna",
2547         "pra",
2548         "phra",
2549         "bra",
2550         "bhra",
2551         "mra",
2552         "\\u1E49ra",
2553       //"l\\u0331ra",
2554         "yra",
2555         "\\u1E8Fra",
2556       //"l-",
2557         "vra",
2558         "\\u015Bra",
2559         "\\u1E63ra",
2560         "sra",
2561         "hma",
2562         "\\u1E6D\\u1E6Da",
2563         "\\u1E6D\\u1E6Dha",
2564         "\\u1E6Dh\\u1E6Dha",
2565         "\\u1E0D\\u1E0Da",
2566         "\\u1E0D\\u1E0Dha",
2567         "\\u1E6Dya",
2568         "\\u1E6Dhya",
2569         "\\u1E0Dya",
2570         "\\u1E0Dhya",
2571         // Not roundtrippable --
2572         // \\u0939\\u094d\\u094d\\u092E  - hma
2573         // \\u0939\\u094d\\u092E         - hma
2574         // CharsToUnicodeString("hma"),
2575         "hya",
2576         "\\u015Br\\u0325",
2577         "\\u015Bca",
2578         "\\u0115",
2579         "san\\u0304j\\u012Bb s\\u0113nagupta",
2580         "\\u0101nand vaddir\\u0101ju",
2581         "\\u0101",
2582         "a"
2583     };
2584     const char* const expected[MAX_LEN] = {
2585         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2586         "\\u0915\\u094D\\u0930",          /* kra         */
2587         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2588         "\\u0916\\u094D\\u0930",          /* khra        */
2589         "\\u0917\\u094D\\u0930",          /* gra         */
2590         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2591         "\\u091A\\u094D\\u0930",          /* cra         */
2592         "\\u091B\\u094D\\u0930",          /* chra        */
2593         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2594         "\\u091D\\u094D\\u0930",          /* jhra        */
2595         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2596         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2597         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2598         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2599       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2600         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2601         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2602         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2603         "\\u0924\\u094D\\u0924",          /* tta         */
2604         "\\u0925\\u094D\\u0930",          /* thra        */
2605         "\\u0926\\u094D\\u0926",          /* dda         */
2606         "\\u0927\\u094D\\u0930",          /* dhra        */
2607         "\\u0928\\u094D\\u0928",          /* nna         */
2608         "\\u092A\\u094D\\u0930",          /* pra         */
2609         "\\u092B\\u094D\\u0930",          /* phra        */
2610         "\\u092C\\u094D\\u0930",          /* bra         */
2611         "\\u092D\\u094D\\u0930",          /* bhra        */
2612         "\\u092E\\u094D\\u0930",          /* mra         */
2613         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2614       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2615         "\\u092F\\u094D\\u0930",          /* yra         */
2616         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2617       //"l-",
2618         "\\u0935\\u094D\\u0930",          /* vra         */
2619         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2620         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2621         "\\u0938\\u094D\\u0930",          /* sra         */
2622         "\\u0939\\u094d\\u092E",          /* hma         */
2623         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2624         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2625         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2626         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2627         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2628         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2629         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2630         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2631         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2632      // "hma",                         /* hma         */
2633         "\\u0939\\u094D\\u092F",          /* hya         */
2634         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2635         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2636         "\\u090d",                        /* e\\u0306    */
2637         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2638         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2639         "\\u0906",
2640         "\\u0905",
2641     };
2642     UErrorCode status = U_ZERO_ERROR;
2643     UParseError parseError;
2644     UnicodeString message;
2645     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2646     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2647     if(U_FAILURE(status)){
2648         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2649         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2650         return;
2651     }
2652     UnicodeString gotResult;
2653     for(int i= 0; i<MAX_LEN; i++){
2654         gotResult = source[i];
2655         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2656         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2657     }
2658     delete latinToDev;
2659     delete devToLatin;
2660 }
2661 
TestTeluguLatinRT()2662 void TransliteratorTest::TestTeluguLatinRT(){
2663     const int MAX_LEN=10;
2664     const char* const source[MAX_LEN] = {
2665         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2666         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2667         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2668         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2669         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2670         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2671         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2672         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2673         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2674         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2675     };
2676 
2677     const char* const expected[MAX_LEN] = {
2678         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2679         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2680         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2683         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2684         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2685         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2686         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688     };
2689 
2690     UErrorCode status = U_ZERO_ERROR;
2691     UParseError parseError;
2692     UnicodeString message;
2693     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2694     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2695     if(U_FAILURE(status)){
2696         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2697         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2698         return;
2699     }
2700     UnicodeString gotResult;
2701     for(int i= 0; i<MAX_LEN; i++){
2702         gotResult = source[i];
2703         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2704         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2705     }
2706     delete latinToDev;
2707     delete devToLatin;
2708 }
2709 
TestSanskritLatinRT()2710 void TransliteratorTest::TestSanskritLatinRT(){
2711     const int MAX_LEN =16;
2712     const char* const source[MAX_LEN] = {
2713         "rmk\\u1E63\\u0113t",
2714         "\\u015Br\\u012Bmad",
2715         "bhagavadg\\u012Bt\\u0101",
2716         "adhy\\u0101ya",
2717         "arjuna",
2718         "vi\\u1E63\\u0101da",
2719         "y\\u014Dga",
2720         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2721         "uv\\u0101cr\\u0325",
2722         "dharmak\\u1E63\\u0113tr\\u0113",
2723         "kuruk\\u1E63\\u0113tr\\u0113",
2724         "samav\\u0113t\\u0101",
2725         "yuyutsava\\u1E25",
2726         "m\\u0101mak\\u0101\\u1E25",
2727     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2728         "kimakurvata",
2729         "san\\u0304java",
2730     };
2731     const char* const expected[MAX_LEN] = {
2732         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2733         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2734         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2735         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2736         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2737         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2738         "\\u092f\\u094b\\u0917",
2739         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2740         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2741         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2744         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2745         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2746     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2747         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2748         "\\u0938\\u0902\\u091c\\u0935",
2749     };
2750     UErrorCode status = U_ZERO_ERROR;
2751     UParseError parseError;
2752     UnicodeString message;
2753     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2754     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2755     if(U_FAILURE(status)){
2756         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2757         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2758         return;
2759     }
2760     UnicodeString gotResult;
2761     for(int i= 0; i<MAX_LEN; i++){
2762         gotResult = source[i];
2763         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2764         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2765     }
2766     delete latinToDev;
2767     delete devToLatin;
2768 }
2769 
2770 
TestCompoundLatinRT()2771 void TransliteratorTest::TestCompoundLatinRT(){
2772     const char* const source[] = {
2773         "rmk\\u1E63\\u0113t",
2774         "\\u015Br\\u012Bmad",
2775         "bhagavadg\\u012Bt\\u0101",
2776         "adhy\\u0101ya",
2777         "arjuna",
2778         "vi\\u1E63\\u0101da",
2779         "y\\u014Dga",
2780         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2781         "uv\\u0101cr\\u0325",
2782         "dharmak\\u1E63\\u0113tr\\u0113",
2783         "kuruk\\u1E63\\u0113tr\\u0113",
2784         "samav\\u0113t\\u0101",
2785         "yuyutsava\\u1E25",
2786         "m\\u0101mak\\u0101\\u1E25",
2787      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2788         "kimakurvata",
2789         "san\\u0304java"
2790     };
2791     const int MAX_LEN = UPRV_LENGTHOF(source);
2792     const char* const expected[MAX_LEN] = {
2793         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2794         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2795         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2796         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2797         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2798         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2799         "\\u092f\\u094b\\u0917",
2800         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2801         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2802         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2805         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2806         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2807     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2808         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2809         "\\u0938\\u0902\\u091c\\u0935"
2810     };
2811     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2812         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2813         return;
2814     }
2815 
2816     UErrorCode status = U_ZERO_ERROR;
2817     UParseError parseError;
2818     UnicodeString message;
2819     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2820     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2821     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2822     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2823 
2824     if(U_FAILURE(status)){
2825         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2826         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2827         return;
2828     }
2829     UnicodeString gotResult;
2830     for(int i= 0; i<MAX_LEN; i++){
2831         gotResult = source[i];
2832         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2833         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835 
2836     }
2837     delete(latinToDevToLatin);
2838     delete(devToLatinToDev);
2839     delete(devToTelToDev);
2840     delete(latinToTelToLatin);
2841 }
2842 
2843 /**
2844  * Test Gurmukhi-Devanagari Tippi and Bindi
2845  */
TestGurmukhiDevanagari()2846 void TransliteratorTest::TestGurmukhiDevanagari(){
2847     // the rule says:
2848     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2849     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2850     UErrorCode status = U_ZERO_ERROR;
2851     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2852     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2853     UParseError parseError;
2854 
2855     UnicodeSetIterator vIter(vowel);
2856     UnicodeSetIterator nvIter(non_vowel);
2857     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2858     if(U_FAILURE(status)) {
2859       dataerrln("Error creating transliterator %s", u_errorName(status));
2860       delete trans;
2861       return;
2862     }
2863     UnicodeString src (" \\u0902", -1, US_INV);
2864     UnicodeString expected(" \\u0A02", -1, US_INV);
2865     src = src.unescape();
2866     expected= expected.unescape();
2867 
2868     while(vIter.next()){
2869         src.setCharAt(0,(UChar) vIter.getCodepoint());
2870         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2871         expect(*trans,src,expected);
2872     }
2873 
2874     expected.setCharAt(1,0x0A70);
2875     while(nvIter.next()){
2876         //src.setCharAt(0,(char) nvIter.codepoint);
2877         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2878         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2879         expect(*trans,src,expected);
2880     }
2881     delete trans;
2882 }
2883 /**
2884  * Test instantiation from a locale.
2885  */
TestLocaleInstantiation(void)2886 void TransliteratorTest::TestLocaleInstantiation(void) {
2887     UParseError pe;
2888     UErrorCode ec = U_ZERO_ERROR;
2889     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2890     if (U_FAILURE(ec)) {
2891         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2892         delete t;
2893         return;
2894     }
2895     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2896     delete t;
2897 
2898     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2899     if (U_FAILURE(ec)) {
2900         errln("FAIL: createInstance(en-el)");
2901         delete t;
2902         return;
2903     }
2904     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2905     delete t;
2906 }
2907 
2908 /**
2909  * Test title case handling of accent (should ignore accents)
2910  */
TestTitleAccents(void)2911 void TransliteratorTest::TestTitleAccents(void) {
2912     UParseError pe;
2913     UErrorCode ec = U_ZERO_ERROR;
2914     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2915     if (U_FAILURE(ec)) {
2916         errln("FAIL: createInstance(Title)");
2917         delete t;
2918         return;
2919     }
2920     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2921     delete t;
2922 }
2923 
2924 /**
2925  * Basic test of a locale resource based rule.
2926  */
TestLocaleResource()2927 void TransliteratorTest::TestLocaleResource() {
2928     const char* DATA[] = {
2929         // id                    from               to
2930         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2931         "Latin-el",              "b",               "\\u03bc\\u03c0",
2932         "Latin-Greek",           "b",               "\\u03B2",
2933         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2934         "el-Latin",              "\\u03B2",         "v",
2935         "Greek-Latin",           "\\u03B2",         "b",
2936     };
2937     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2938     for (int32_t i=0; i<DATA_length; i+=3) {
2939         UParseError pe;
2940         UErrorCode ec = U_ZERO_ERROR;
2941         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2942         if (U_FAILURE(ec)) {
2943             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2944             delete t;
2945             continue;
2946         }
2947         expect(*t, CharsToUnicodeString(DATA[i+1]),
2948                CharsToUnicodeString(DATA[i+2]));
2949         delete t;
2950     }
2951 }
2952 
2953 /**
2954  * Make sure parse errors reference the right line.
2955  */
TestParseError()2956 void TransliteratorTest::TestParseError() {
2957     static const char* rule =
2958         "a > b;\n"
2959         "# more stuff\n"
2960         "d << b;";
2961     UErrorCode ec = U_ZERO_ERROR;
2962     UParseError pe;
2963     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2964     delete t;
2965     if (U_FAILURE(ec)) {
2966         UnicodeString err(pe.preContext);
2967         err.append((UChar)124/*|*/).append(pe.postContext);
2968         if (err.indexOf("d << b") >= 0) {
2969             logln("Ok: " + err);
2970         } else {
2971             errln("FAIL: " + err);
2972         }
2973     }
2974     else {
2975         errln("FAIL: no syntax error");
2976     }
2977     static const char* maskingRule =
2978         "a>x;\n"
2979         "# more stuff\n"
2980         "ab>y;";
2981     ec = U_ZERO_ERROR;
2982     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2983     if (ec != U_RULE_MASK_ERROR) {
2984         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2985     }
2986     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2987         errln("FAIL: did not get expected precontext");
2988     }
2989     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2990         errln("FAIL: did not get expected postcontext");
2991     }
2992 }
2993 
2994 /**
2995  * Make sure sets on output are disallowed.
2996  */
TestOutputSet()2997 void TransliteratorTest::TestOutputSet() {
2998     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2999     UErrorCode ec = U_ZERO_ERROR;
3000     UParseError pe;
3001     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3002     delete t;
3003     if (U_FAILURE(ec)) {
3004         UnicodeString err(pe.preContext);
3005         err.append((UChar)124/*|*/).append(pe.postContext);
3006         logln("Ok: " + err);
3007         return;
3008     }
3009     errln("FAIL: No syntax error");
3010 }
3011 
3012 /**
3013  * Test the use variable range pragma, making sure that use of
3014  * variable range characters is detected and flagged as an error.
3015  */
TestVariableRange()3016 void TransliteratorTest::TestVariableRange() {
3017     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3018     UErrorCode ec = U_ZERO_ERROR;
3019     UParseError pe;
3020     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3021     delete t;
3022     if (U_FAILURE(ec)) {
3023         UnicodeString err(pe.preContext);
3024         err.append((UChar)124/*|*/).append(pe.postContext);
3025         logln("Ok: " + err);
3026         return;
3027     }
3028     errln("FAIL: No syntax error");
3029 }
3030 
3031 /**
3032  * Test invalid post context error handling
3033  */
TestInvalidPostContext()3034 void TransliteratorTest::TestInvalidPostContext() {
3035     UnicodeString rule = "a}b{c>d;";
3036     UErrorCode ec = U_ZERO_ERROR;
3037     UParseError pe;
3038     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3039     delete t;
3040     if (U_FAILURE(ec)) {
3041         UnicodeString err(pe.preContext);
3042         err.append((UChar)124/*|*/).append(pe.postContext);
3043         if (err.indexOf("a}b{c") >= 0) {
3044             logln("Ok: " + err);
3045         } else {
3046             errln("FAIL: " + err);
3047         }
3048         return;
3049     }
3050     errln("FAIL: No syntax error");
3051 }
3052 
3053 /**
3054  * Test ID form variants
3055  */
TestIDForms()3056 void TransliteratorTest::TestIDForms() {
3057     const char* DATA[] = {
3058         "NFC", NULL, "NFD",
3059         "nfd", NULL, "NFC", // make sure case is ignored
3060         "Any-NFKD", NULL, "Any-NFKC",
3061         "Null", NULL, "Null",
3062         "-nfkc", "nfkc", "NFKD",
3063         "-nfkc/", "nfkc", "NFKD",
3064         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3065         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3066         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3067         "Source-", NULL, NULL,
3068         "Source/Variant-", NULL, NULL,
3069         "Source-/Variant", NULL, NULL,
3070         "/Variant", NULL, NULL,
3071         "/Variant-", NULL, NULL,
3072         "-/Variant", NULL, NULL,
3073         "-/", NULL, NULL,
3074         "-", NULL, NULL,
3075         "/", NULL, NULL,
3076     };
3077     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3078 
3079     for (int32_t i=0; i<DATA_length; i+=3) {
3080         const char* ID = DATA[i];
3081         const char* expID = DATA[i+1];
3082         const char* expInvID = DATA[i+2];
3083         UBool expValid = (expInvID != NULL);
3084         if (expID == NULL) {
3085             expID = ID;
3086         }
3087         UParseError pe;
3088         UErrorCode ec = U_ZERO_ERROR;
3089         Transliterator *t =
3090             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3091         if (U_FAILURE(ec)) {
3092             if (!expValid) {
3093                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3094             } else {
3095                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3096             }
3097             delete t;
3098             continue;
3099         }
3100         Transliterator *u = t->createInverse(ec);
3101         if (U_FAILURE(ec)) {
3102             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3103             delete t;
3104             delete u;
3105             continue;
3106         }
3107         if (t->getID() == expID &&
3108             u->getID() == expInvID) {
3109             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3110         } else {
3111             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3112                   t->getID() + " x getInverse() => " + u->getID() +
3113                   ", expected " + expInvID);
3114         }
3115         delete t;
3116         delete u;
3117     }
3118 }
3119 
3120 static const UChar SPACE[]   = {32,0};
3121 static const UChar NEWLINE[] = {10,0};
3122 static const UChar RETURN[]  = {13,0};
3123 static const UChar EMPTY[]   = {0};
3124 
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3125 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3126                                     const UnicodeString& testRulesForward) {
3127     UnicodeString rules2; t2.toRules(rules2, TRUE);
3128     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3129     rules2.findAndReplace(SPACE, EMPTY);
3130     rules2.findAndReplace(NEWLINE, EMPTY);
3131     rules2.findAndReplace(RETURN, EMPTY);
3132 
3133     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3134 
3135     if (rules2 != testRules) {
3136         errln(label);
3137         logln((UnicodeString)"GENERATED RULES: " + rules2);
3138         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3139     }
3140 }
3141 
3142 /**
3143  * Mark's toRules test.
3144  */
TestToRulesMark()3145 void TransliteratorTest::TestToRulesMark() {
3146     const char* testRules =
3147         "::[[:Latin:][:Mark:]];"
3148         "::NFKD (NFC);"
3149         "::Lower (Lower);"
3150         "a <> \\u03B1;" // alpha
3151         "::NFKC (NFD);"
3152         "::Upper (Lower);"
3153         "::Lower ();"
3154         "::([[:Greek:][:Mark:]]);"
3155         ;
3156     const char* testRulesForward =
3157         "::[[:Latin:][:Mark:]];"
3158         "::NFKD(NFC);"
3159         "::Lower(Lower);"
3160         "a > \\u03B1;"
3161         "::NFKC(NFD);"
3162         "::Upper (Lower);"
3163         "::Lower ();"
3164         ;
3165     const char* testRulesBackward =
3166         "::[[:Greek:][:Mark:]];"
3167         "::Lower (Upper);"
3168         "::NFD(NFKC);"
3169         "\\u03B1 > a;"
3170         "::Lower(Lower);"
3171         "::NFC(NFKD);"
3172         ;
3173     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3174     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3175 
3176     UParseError pe;
3177     UErrorCode ec = U_ZERO_ERROR;
3178     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3179     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3180 
3181     if (U_FAILURE(ec)) {
3182         delete t2;
3183         delete t3;
3184         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3185         return;
3186     }
3187 
3188     expect(*t2, source, target);
3189     expect(*t3, target, source);
3190 
3191     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3192     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3193 
3194     delete t2;
3195     delete t3;
3196 }
3197 
3198 /**
3199  * Test Escape and Unescape transliterators.
3200  */
TestEscape()3201 void TransliteratorTest::TestEscape() {
3202     UParseError pe;
3203     UErrorCode ec;
3204     Transliterator *t;
3205 
3206     ec = U_ZERO_ERROR;
3207     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3208     if (U_FAILURE(ec)) {
3209         errln((UnicodeString)"FAIL: createInstance");
3210     } else {
3211         expect(*t,
3212                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3213                "@12Q");
3214     }
3215     delete t;
3216 
3217     ec = U_ZERO_ERROR;
3218     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3219     if (U_FAILURE(ec)) {
3220         errln((UnicodeString)"FAIL: createInstance");
3221     } else {
3222         expect(*t,
3223                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3224                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3225     }
3226     delete t;
3227 
3228     ec = U_ZERO_ERROR;
3229     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3230     if (U_FAILURE(ec)) {
3231         errln((UnicodeString)"FAIL: createInstance");
3232     } else {
3233         expect(*t,
3234                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3235                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3236     }
3237     delete t;
3238 
3239     ec = U_ZERO_ERROR;
3240     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3241     if (U_FAILURE(ec)) {
3242         errln((UnicodeString)"FAIL: createInstance");
3243     } else {
3244         expect(*t,
3245                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3246                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3247     }
3248     delete t;
3249 }
3250 
3251 
TestAnchorMasking()3252 void TransliteratorTest::TestAnchorMasking(){
3253     UnicodeString rule ("^a > Q; a > q;");
3254     UErrorCode status= U_ZERO_ERROR;
3255     UParseError parseError;
3256 
3257     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3258     if(U_FAILURE(status)){
3259         errln(UnicodeString("FAIL: ") + "ID" +
3260               ".createFromRules() => bad rules" +
3261               /*", parse error " + parseError.code +*/
3262               ", line " + parseError.line +
3263               ", offset " + parseError.offset +
3264               ", context " + prettify(parseError.preContext, TRUE) +
3265               ", rules: " + prettify(rule, TRUE));
3266     }
3267     delete t;
3268 }
3269 
3270 /**
3271  * Make sure display names of variants look reasonable.
3272  */
TestDisplayName()3273 void TransliteratorTest::TestDisplayName() {
3274 #if UCONFIG_NO_FORMATTING
3275     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3276     return;
3277 #else
3278     static const char* DATA[] = {
3279         // ID, forward name, reverse name
3280         // Update the text as necessary -- the important thing is
3281         // not the text itself, but how various cases are handled.
3282 
3283         // Basic test
3284         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3285 
3286         // Variants
3287         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3288 
3289         // Target-only IDs
3290         "NFC", "Any to NFC", "Any to NFD",
3291     };
3292 
3293     int32_t DATA_length = UPRV_LENGTHOF(DATA);
3294 
3295     Locale US("en", "US");
3296 
3297     for (int32_t i=0; i<DATA_length; i+=3) {
3298         UnicodeString name;
3299         Transliterator::getDisplayName(DATA[i], US, name);
3300         if (name != DATA[i+1]) {
3301             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3302                   name + ", expected " + DATA[i+1]);
3303         } else {
3304             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3305         }
3306         UErrorCode ec = U_ZERO_ERROR;
3307         UParseError pe;
3308         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3309         if (U_FAILURE(ec)) {
3310             delete t;
3311             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3312             continue;
3313         }
3314         name = Transliterator::getDisplayName(t->getID(), US, name);
3315         if (name != DATA[i+2]) {
3316             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3317                   name + ", expected " + DATA[i+2]);
3318         } else {
3319             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3320         }
3321         delete t;
3322     }
3323 #endif
3324 }
3325 
TestSpecialCases(void)3326 void TransliteratorTest::TestSpecialCases(void) {
3327     const UnicodeString registerRules[] = {
3328         "Any-Dev1", "x > X; y > Y;",
3329         "Any-Dev2", "XY > Z",
3330         "Greek-Latin/FAKE",
3331             CharsToUnicodeString
3332             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3333         "" // END MARKER
3334     };
3335 
3336     const UnicodeString testCases[] = {
3337         // NORMALIZATION
3338         // should add more test cases
3339         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 
3344         // mp -> b BUG
3345         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347 
3348         // check for devanagari bug
3349         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3350 
3351         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3352         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3353                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3354 
3355         //TODO: enable this test once Titlecase works right
3356         /*
3357         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3358                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3359                  */
3360         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3362         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3363                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3364 
3365         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367 
3368          // FORMS OF S
3369         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3370                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3371         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3372                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3373         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3374                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3375         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3376                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3377         // Tatiana bug
3378         // Upper: TAT\\u02B9\\u00C2NA
3379         // Lower: tat\\u02B9\\u00E2na
3380         // Title: Tat\\u02B9\\u00E2na
3381         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3382                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3387 
3388         "" // END MARKER
3389     };
3390 
3391     UParseError pos;
3392     int32_t i;
3393     for (i = 0; registerRules[i].length()!=0; i+=2) {
3394         UErrorCode status = U_ZERO_ERROR;
3395 
3396         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3397             registerRules[i+1], UTRANS_FORWARD, pos, status);
3398         if (U_FAILURE(status)) {
3399             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3400         } else {
3401             Transliterator::registerInstance(t);
3402         }
3403     }
3404     for (i = 0; testCases[i].length()!=0; i+=3) {
3405         UErrorCode ec = U_ZERO_ERROR;
3406         UParseError pe;
3407         const UnicodeString& name = testCases[i];
3408         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3409         if (U_FAILURE(ec)) {
3410             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3411             delete t;
3412             continue;
3413         }
3414         const UnicodeString& id = t->getID();
3415         const UnicodeString& source = testCases[i+1];
3416         UnicodeString target;
3417 
3418         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3419 
3420         if (testCases[i+2].length() > 0) {
3421             target = testCases[i+2];
3422         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3423             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3424         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3425             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3426         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3427             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3428         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3429             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3430         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3431             target = source;
3432             target.toLower(Locale::getUS());
3433         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3434             target = source;
3435             target.toUpper(Locale::getUS());
3436         }
3437         if (U_FAILURE(ec)) {
3438             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3439             continue;
3440         }
3441 
3442         expect(*t, source, target);
3443         delete t;
3444     }
3445     for (i = 0; registerRules[i].length()!=0; i+=2) {
3446         Transliterator::unregister(registerRules[i]);
3447     }
3448 }
3449 
Char32ToEscapedChars(UChar32 ch,char * buffer)3450 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3451     if (ch <= 0xFFFF) {
3452         sprintf(buffer, "\\u%04x", (int)ch);
3453     } else {
3454         sprintf(buffer, "\\U%08x", (int)ch);
3455     }
3456     return buffer;
3457 }
3458 
TestSurrogateCasing(void)3459 void TransliteratorTest::TestSurrogateCasing (void) {
3460     // check that casing handles surrogates
3461     // titlecase is currently defective
3462     char buffer[20];
3463     UChar buffer2[20];
3464     UChar32 dee;
3465     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3466     UnicodeString DEE(u_totitle(dee));
3467     if (DEE != DESERET_DEE) {
3468         err("Fails titlecase of surrogates");
3469         err(Char32ToEscapedChars(dee, buffer));
3470         err(", ");
3471         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3472     }
3473 
3474     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3475     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3476     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3477     UErrorCode status= U_ZERO_ERROR;
3478 
3479     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3480     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3481         errln("Fails: Can't uppercase surrogates.");
3482     }
3483 
3484     status= U_ZERO_ERROR;
3485     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3486     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3487         errln("Fails: Can't lowercase surrogates.");
3488     }
3489 }
3490 
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3491 static void _trans(Transliterator& t, const UnicodeString& src,
3492                    UnicodeString& result) {
3493     result = src;
3494     t.transliterate(result);
3495 }
3496 
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3497 static void _trans(const UnicodeString& id, const UnicodeString& src,
3498                    UnicodeString& result, UErrorCode ec) {
3499     UParseError pe;
3500     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3501     if (U_SUCCESS(ec)) {
3502         _trans(*t, src, result);
3503     }
3504     delete t;
3505 }
3506 
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3507 static UnicodeString _findMatch(const UnicodeString& source,
3508                                        const UnicodeString* pairs) {
3509     UnicodeString empty;
3510     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3511         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3512             return pairs[i+1];
3513         }
3514     }
3515     return empty;
3516 }
3517 
3518 // Check to see that incremental gets at least part way through a reasonable string.
3519 
TestIncrementalProgress(void)3520 void TransliteratorTest::TestIncrementalProgress(void) {
3521     UErrorCode ec = U_ZERO_ERROR;
3522     UnicodeString latinTest = "The Quick Brown Fox.";
3523     UnicodeString devaTest;
3524     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3525     UnicodeString kataTest;
3526     _trans("Latin-Katakana", latinTest, kataTest, ec);
3527     if (U_FAILURE(ec)) {
3528         errln("FAIL: Internal error");
3529         return;
3530     }
3531     const UnicodeString tests[] = {
3532         "Any", latinTest,
3533         "Latin", latinTest,
3534         "Halfwidth", latinTest,
3535         "Devanagari", devaTest,
3536         "Katakana", kataTest,
3537         "" // END MARKER
3538     };
3539 
3540     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3541     int32_t i = 0, j=0, k=0;
3542     int32_t sources = Transliterator::countAvailableSources();
3543     for (i = 0; i < sources; i++) {
3544         UnicodeString source;
3545         Transliterator::getAvailableSource(i, source);
3546         UnicodeString test = _findMatch(source, tests);
3547         if (test.length() == 0) {
3548             logln((UnicodeString)"Skipping " + source + "-X");
3549             continue;
3550         }
3551         int32_t targets = Transliterator::countAvailableTargets(source);
3552         for (j = 0; j < targets; j++) {
3553             UnicodeString target;
3554             Transliterator::getAvailableTarget(j, source, target);
3555             int32_t variants = Transliterator::countAvailableVariants(source, target);
3556             for (k =0; k< variants; k++) {
3557                 UnicodeString variant;
3558                 UParseError err;
3559                 UErrorCode status = U_ZERO_ERROR;
3560 
3561                 Transliterator::getAvailableVariant(k, source, target, variant);
3562                 UnicodeString id = source + "-" + target + "/" + variant;
3563 
3564                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3565                 if (U_FAILURE(status)) {
3566                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3567                     delete t;
3568                     continue;
3569                 }
3570                 status = U_ZERO_ERROR;
3571                 CheckIncrementalAux(t, test);
3572 
3573                 UnicodeString rev;
3574                 _trans(*t, test, rev);
3575                 Transliterator *inv = t->createInverse(status);
3576                 if (U_FAILURE(status)) {
3577                     // The following are forward-only, it is OK that creating an inverse will not work:
3578                     // 1. Devanagari-Arabic
3579                     // 2. Any-*/BGN
3580                     // 3. Any-*/UNGEGN
3581                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3582                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3583                          && !(id.startsWith((UnicodeString)"Any-") &&
3584                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3585                              )
3586 #if UCONFIG_NO_BREAK_ITERATION
3587                          && id.compare((UnicodeString)"Latin-Thai/") != 0
3588 #endif
3589                        )
3590                     {
3591                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3592                     }
3593                     delete t;
3594                     delete inv;
3595                     continue;
3596                 }
3597                 CheckIncrementalAux(inv, rev);
3598                 delete t;
3599                 delete inv;
3600             }
3601         }
3602     }
3603 }
3604 
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3605 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3606                                                       const UnicodeString& input) {
3607     UErrorCode ec = U_ZERO_ERROR;
3608     UTransPosition pos;
3609     UnicodeString test = input;
3610 
3611     pos.contextStart = 0;
3612     pos.contextLimit = input.length();
3613     pos.start = 0;
3614     pos.limit = input.length();
3615 
3616     t->transliterate(test, pos, ec);
3617     if (U_FAILURE(ec)) {
3618         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3619         return;
3620     }
3621     UBool gotError = FALSE;
3622     (void)gotError;    // Suppress set but not used warning.
3623 
3624     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3625 
3626     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3627         errln((UnicodeString)"No Progress, " +
3628               t->getID() + ": " + formatInput(test, input, pos));
3629         gotError = TRUE;
3630     } else {
3631         logln((UnicodeString)"PASS Progress, " +
3632               t->getID() + ": " + formatInput(test, input, pos));
3633     }
3634     t->finishTransliteration(test, pos);
3635     if (pos.start != pos.limit) {
3636         errln((UnicodeString)"Incomplete, " +
3637               t->getID() + ": " + formatInput(test, input, pos));
3638         gotError = TRUE;
3639     }
3640 }
3641 
TestFunction()3642 void TransliteratorTest::TestFunction() {
3643     // Careful with spacing and ';' here:  Phrase this exactly
3644     // as toRules() is going to return it.  If toRules() changes
3645     // with regard to spacing or ';', then adjust this string.
3646     UnicodeString rule =
3647         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3648 
3649     UParseError pe;
3650     UErrorCode ec = U_ZERO_ERROR;
3651     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3652     if (t == NULL) {
3653         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3654         return;
3655     }
3656 
3657     UnicodeString r;
3658     t->toRules(r, TRUE);
3659     if (r == rule) {
3660         logln((UnicodeString)"OK: toRules() => " + r);
3661     } else {
3662         errln((UnicodeString)"FAIL: toRules() => " + r +
3663               ", expected " + rule);
3664     }
3665 
3666     expect(*t, "The Quick Brown Fox",
3667            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3668 
3669     delete t;
3670 }
3671 
TestInvalidBackRef(void)3672 void TransliteratorTest::TestInvalidBackRef(void) {
3673     UnicodeString rule =  ". > $1;";
3674     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3675     UParseError pe;
3676     UErrorCode ec = U_ZERO_ERROR;
3677     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3678     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3679 
3680     if (t != NULL) {
3681         errln("FAIL: createFromRules should have returned NULL");
3682         delete t;
3683     }
3684 
3685     if (t2 != NULL) {
3686         errln("FAIL: createFromRules should have returned NULL");
3687         delete t2;
3688     }
3689 
3690     if (U_SUCCESS(ec)) {
3691         errln("FAIL: Ok: . > $1; => no error");
3692     } else {
3693         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3694     }
3695 }
3696 
TestMulticharStringSet()3697 void TransliteratorTest::TestMulticharStringSet() {
3698     // Basic testing
3699     const char* rule =
3700         "       [{aa}]       > x;"
3701         "         a          > y;"
3702         "       [b{bc}]      > z;"
3703         "[{gd}] { e          > q;"
3704         "         e } [{fg}] > r;" ;
3705 
3706     UParseError pe;
3707     UErrorCode ec = U_ZERO_ERROR;
3708     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3709     if (t == NULL || U_FAILURE(ec)) {
3710         delete t;
3711         errln("FAIL: createFromRules failed");
3712         return;
3713     }
3714 
3715     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3716            "y x yz z d gd de gdq gdqfg ddrfg");
3717     delete t;
3718 
3719     // Overlapped string test.  Make sure that when multiple
3720     // strings can match that the longest one is matched.
3721     rule =
3722         "    [a {ab} {abc}]    > x;"
3723         "           b          > y;"
3724         "           c          > z;"
3725         " q [t {st} {rst}] { e > p;" ;
3726 
3727     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3728     if (t == NULL || U_FAILURE(ec)) {
3729         delete t;
3730         errln("FAIL: createFromRules failed");
3731         return;
3732     }
3733 
3734     expect(*t, "a ab abc qte qste qrste",
3735            "x x x qtp qstp qrstp");
3736     delete t;
3737 }
3738 
3739 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3740 // BEGIN TestUserFunction support factory
3741 
3742 Transliterator* _TUFF[4];
3743 UnicodeString* _TUFID[4];
3744 
_TUFFactory(const UnicodeString &,Transliterator::Token context)3745 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3746                                    Transliterator::Token context) {
3747     return _TUFF[context.integer]->clone();
3748 }
3749 
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3750 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3751     _TUFF[n] = t;
3752     _TUFID[n] = new UnicodeString(ID);
3753     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3754 }
3755 
_TUFUnreg(int32_t n)3756 static void _TUFUnreg(int32_t n) {
3757     if (_TUFF[n] != NULL) {
3758         Transliterator::unregister(*_TUFID[n]);
3759         delete _TUFF[n];
3760         delete _TUFID[n];
3761     }
3762 }
3763 
3764 // END TestUserFunction support factory
3765 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3766 
3767 /**
3768  * Test that user-registered transliterators can be used under function
3769  * syntax.
3770  */
TestUserFunction()3771 void TransliteratorTest::TestUserFunction() {
3772 
3773     Transliterator* t;
3774     UParseError pe;
3775     UErrorCode ec = U_ZERO_ERROR;
3776 
3777     // Setup our factory
3778     int32_t i;
3779     for (i=0; i<4; ++i) {
3780         _TUFF[i] = NULL;
3781     }
3782 
3783     // There's no need to register inverses if we don't use them
3784     t = Transliterator::createFromRules("gif",
3785                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3786                                         UTRANS_FORWARD, pe, ec);
3787     if (t == NULL || U_FAILURE(ec)) {
3788         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3789         return;
3790     }
3791     _TUFReg("Any-gif", t, 0);
3792 
3793     t = Transliterator::createFromRules("RemoveCurly",
3794                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3795                                         UTRANS_FORWARD, pe, ec);
3796     if (t == NULL || U_FAILURE(ec)) {
3797         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3798         goto FAIL;
3799     }
3800     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3801     _TUFReg("Any-RemoveCurly", t, 1);
3802 
3803     logln("Trying &hex");
3804     t = Transliterator::createFromRules("hex2",
3805                                         "(.) > &hex($1);",
3806                                         UTRANS_FORWARD, pe, ec);
3807     if (t == NULL || U_FAILURE(ec)) {
3808         errln("FAIL: createFromRules");
3809         goto FAIL;
3810     }
3811     logln("Registering");
3812     _TUFReg("Any-hex2", t, 2);
3813     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3814     if (t == NULL || U_FAILURE(ec)) {
3815         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3816         goto FAIL;
3817     }
3818     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3819     delete t;
3820 
3821     logln("Trying &gif");
3822     t = Transliterator::createFromRules("gif2",
3823                                         "(.) > &Gif(&Hex2($1));",
3824                                         UTRANS_FORWARD, pe, ec);
3825     if (t == NULL || U_FAILURE(ec)) {
3826         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3827         goto FAIL;
3828     }
3829     logln("Registering");
3830     _TUFReg("Any-gif2", t, 3);
3831     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3832     if (t == NULL || U_FAILURE(ec)) {
3833         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3834         goto FAIL;
3835     }
3836     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3837            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3838     delete t;
3839 
3840     // Test that filters are allowed after &
3841     t = Transliterator::createFromRules("test",
3842                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3843                                         UTRANS_FORWARD, pe, ec);
3844     if (t == NULL || U_FAILURE(ec)) {
3845         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3846         goto FAIL;
3847     }
3848     expect(*t, "abc",
3849            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3850     delete t;
3851 
3852  FAIL:
3853     for (i=0; i<4; ++i) {
3854         _TUFUnreg(i);
3855     }
3856 }
3857 
3858 /**
3859  * Test the Any-X transliterators.
3860  */
TestAnyX(void)3861 void TransliteratorTest::TestAnyX(void) {
3862     UParseError parseError;
3863     UErrorCode status = U_ZERO_ERROR;
3864     Transliterator* anyLatin =
3865         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3866     if (anyLatin==0) {
3867         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3868         delete anyLatin;
3869         return;
3870     }
3871 
3872     expect(*anyLatin,
3873            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3874            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3875 
3876     delete anyLatin;
3877 }
3878 
3879 /**
3880  * Test Any-X transliterators with sample letters from all scripts.
3881  */
TestAny(void)3882 void TransliteratorTest::TestAny(void) {
3883     UErrorCode status = U_ZERO_ERROR;
3884     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3885     //       function call parameters going on in this test.
3886     UnicodeSet alphabetic("[:alphabetic:]", status);
3887     if (U_FAILURE(status)) {
3888         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3889         return;
3890     }
3891     alphabetic.freeze();
3892 
3893     UnicodeString testString;
3894     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3895         const char *scriptName = uscript_getShortName((UScriptCode)i);
3896         if (scriptName == NULL) {
3897             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3898             return;
3899         }
3900 
3901         UnicodeSet sample;
3902         sample.applyPropertyAlias("script", scriptName, status);
3903         if (U_FAILURE(status)) {
3904             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3905             return;
3906         }
3907         sample.retainAll(alphabetic);
3908         for (int32_t count=0; count<5; count++) {
3909             UChar32 c = sample.charAt(count);
3910             if (c == -1) {
3911                 break;
3912             }
3913             testString.append(c);
3914         }
3915     }
3916 
3917     UParseError parseError;
3918     Transliterator* anyLatin =
3919         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3920     if (U_FAILURE(status)) {
3921         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3922         return;
3923     }
3924 
3925     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3926     anyLatin->transliterate(testString);
3927     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3928     delete anyLatin;
3929 }
3930 
3931 
3932 /**
3933  * Test the source and target set API.  These are only implemented
3934  * for RBT and CompoundTransliterator at this time.
3935  */
TestSourceTargetSet()3936 void TransliteratorTest::TestSourceTargetSet() {
3937     UErrorCode ec = U_ZERO_ERROR;
3938 
3939     // Rules
3940     const char* r =
3941         "a > b; "
3942         "r [x{lu}] > q;";
3943 
3944     // Expected source
3945     UnicodeSet expSrc("[arx{lu}]", ec);
3946 
3947     // Expected target
3948     UnicodeSet expTrg("[bq]", ec);
3949 
3950     UParseError pe;
3951     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3952 
3953     if (U_FAILURE(ec)) {
3954         delete t;
3955         errln("FAIL: Couldn't set up test");
3956         return;
3957     }
3958 
3959     UnicodeSet src; t->getSourceSet(src);
3960     UnicodeSet trg; t->getTargetSet(trg);
3961 
3962     if (src == expSrc && trg == expTrg) {
3963         UnicodeString a, b;
3964         logln((UnicodeString)"Ok: " +
3965               r + " => source = " + src.toPattern(a, TRUE) +
3966               ", target = " + trg.toPattern(b, TRUE));
3967     } else {
3968         UnicodeString a, b, c, d;
3969         errln((UnicodeString)"FAIL: " +
3970               r + " => source = " + src.toPattern(a, TRUE) +
3971               ", expected " + expSrc.toPattern(b, TRUE) +
3972               "; target = " + trg.toPattern(c, TRUE) +
3973               ", expected " + expTrg.toPattern(d, TRUE));
3974     }
3975 
3976     delete t;
3977 }
3978 
3979 /**
3980  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3981  */
TestPatternWhiteSpace()3982 void TransliteratorTest::TestPatternWhiteSpace() {
3983     // Rules
3984     const char* r = "a > \\u200E b;";
3985 
3986     UErrorCode ec = U_ZERO_ERROR;
3987     UParseError pe;
3988     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3989 
3990     if (U_FAILURE(ec)) {
3991         errln("FAIL: Couldn't set up test");
3992     } else {
3993         expect(*t, "a", "b");
3994     }
3995     delete t;
3996 
3997     // UnicodeSet
3998     ec = U_ZERO_ERROR;
3999     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4000 
4001     if (U_FAILURE(ec)) {
4002         errln("FAIL: Couldn't set up test");
4003     } else {
4004         if (set.contains(0x200E)) {
4005             errln("FAIL: U+200E not being ignored by UnicodeSet");
4006         }
4007     }
4008 }
4009 //======================================================================
4010 // this method is in TestUScript.java
4011 //======================================================================
TestAllCodepoints()4012 void TransliteratorTest::TestAllCodepoints(){
4013     UScriptCode code= USCRIPT_INVALID_CODE;
4014     char id[256]={'\0'};
4015     char abbr[256]={'\0'};
4016     char newId[256]={'\0'};
4017     char newAbbrId[256]={'\0'};
4018     char oldId[256]={'\0'};
4019     char oldAbbrId[256]={'\0'};
4020 
4021     UErrorCode status =U_ZERO_ERROR;
4022     UParseError pe;
4023 
4024     for(uint32_t i = 0; i<=0x10ffff; i++){
4025         code =  uscript_getScript(i,&status);
4026         if(code == USCRIPT_INVALID_CODE){
4027             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4028         }
4029         const char* myId = uscript_getName(code);
4030         if(!myId) {
4031           dataerrln("Valid script code returned NULL name. Check your data!");
4032           return;
4033         }
4034         uprv_strcpy(id,myId);
4035         uprv_strcpy(abbr,uscript_getShortName(code));
4036 
4037         uprv_strcpy(newId,"[:");
4038         uprv_strcat(newId,id);
4039         uprv_strcat(newId,":];NFD");
4040 
4041         uprv_strcpy(newAbbrId,"[:");
4042         uprv_strcat(newAbbrId,abbr);
4043         uprv_strcat(newAbbrId,":];NFD");
4044 
4045         if(uprv_strcmp(newId,oldId)!=0){
4046             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4047             if(t==NULL || U_FAILURE(status)){
4048                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4049             }
4050             delete t;
4051         }
4052         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4053             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4054             if(t==NULL || U_FAILURE(status)){
4055                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4056             }
4057             delete t;
4058         }
4059         uprv_strcpy(oldId,newId);
4060         uprv_strcpy(oldAbbrId, newAbbrId);
4061 
4062     }
4063 
4064 }
4065 
4066 #define TEST_TRANSLIT_ID(id, cls) { \
4067   UErrorCode ec = U_ZERO_ERROR; \
4068   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4069   if (U_FAILURE(ec)) { \
4070     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4071   } else { \
4072     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4073       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4074     } \
4075     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4076   } \
4077   delete t; \
4078 }
4079 
4080 #define TEST_TRANSLIT_RULE(rule, cls) { \
4081   UErrorCode ec = U_ZERO_ERROR; \
4082   UParseError pe; \
4083   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4084   if (U_FAILURE(ec)) { \
4085     errln("FAIL: Couldn't create " rule); \
4086   } else { \
4087     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4088       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4089     } \
4090     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4091   } \
4092   delete t; \
4093 }
4094 
TestBoilerplate()4095 void TransliteratorTest::TestBoilerplate() {
4096     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4097     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4098     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4099     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4100     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4101     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4102     TEST_TRANSLIT_ID("Null", NullTransliterator);
4103     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4104     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4105     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4106     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4107     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4108     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4109 }
4110 
TestAlternateSyntax()4111 void TransliteratorTest::TestAlternateSyntax() {
4112     // U+2206 == &
4113     // U+2190 == <
4114     // U+2192 == >
4115     // U+2194 == <>
4116     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4117            "abc",
4118            "xbz");
4119     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4120            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4121            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4122 }
4123 
4124 static const char* BEGIN_END_RULES[] = {
4125     // [0]
4126     "abc > xy;"
4127     "aba > z;",
4128 
4129     // [1]
4130 /*
4131     "::BEGIN;"
4132     "abc > xy;"
4133     "::END;"
4134     "::BEGIN;"
4135     "aba > z;"
4136     "::END;",
4137 */
4138     "", // test case commented out below, this is here to keep from messing up the indexes
4139 
4140     // [2]
4141 /*
4142     "abc > xy;"
4143     "::BEGIN;"
4144     "aba > z;"
4145     "::END;",
4146 */
4147     "", // test case commented out below, this is here to keep from messing up the indexes
4148 
4149     // [3]
4150 /*
4151     "::BEGIN;"
4152     "abc > xy;"
4153     "::END;"
4154     "aba > z;",
4155 */
4156     "", // test case commented out below, this is here to keep from messing up the indexes
4157 
4158     // [4]
4159     "abc > xy;"
4160     "::Null;"
4161     "aba > z;",
4162 
4163     // [5]
4164     "::Upper;"
4165     "ABC > xy;"
4166     "AB > x;"
4167     "C > z;"
4168     "::Upper;"
4169     "XYZ > p;"
4170     "XY > q;"
4171     "Z > r;"
4172     "::Upper;",
4173 
4174     // [6]
4175     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4176     "$delim = [\\-$ws];"
4177     "$ws $delim* > ' ';"
4178     "'-' $delim* > '-';",
4179 
4180     // [7]
4181     "::Null;"
4182     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4183     "$delim = [\\-$ws];"
4184     "$ws $delim* > ' ';"
4185     "'-' $delim* > '-';",
4186 
4187     // [8]
4188     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4189     "$delim = [\\-$ws];"
4190     "$ws $delim* > ' ';"
4191     "'-' $delim* > '-';"
4192     "::Null;",
4193 
4194     // [9]
4195     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4196     "$delim = [\\-$ws];"
4197     "::Null;"
4198     "$ws $delim* > ' ';"
4199     "'-' $delim* > '-';",
4200 
4201     // [10]
4202 /*
4203     "::BEGIN;"
4204     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4205     "$delim = [\\-$ws];"
4206     "::END;"
4207     "$ws $delim* > ' ';"
4208     "'-' $delim* > '-';",
4209 */
4210     "", // test case commented out below, this is here to keep from messing up the indexes
4211 
4212     // [11]
4213 /*
4214     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4215     "$delim = [\\-$ws];"
4216     "::BEGIN;"
4217     "$ws $delim* > ' ';"
4218     "'-' $delim* > '-';"
4219     "::END;",
4220 */
4221     "", // test case commented out below, this is here to keep from messing up the indexes
4222 
4223     // [12]
4224 /*
4225     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4226     "$delim = [\\-$ws];"
4227     "$ab = [ab];"
4228     "::BEGIN;"
4229     "$ws $delim* > ' ';"
4230     "'-' $delim* > '-';"
4231     "::END;"
4232     "::BEGIN;"
4233     "$ab { ' ' } $ab > '-';"
4234     "c { ' ' > ;"
4235     "::END;"
4236     "::BEGIN;"
4237     "'a-a' > a\\%|a;"
4238     "::END;",
4239 */
4240     "", // test case commented out below, this is here to keep from messing up the indexes
4241 
4242     // [13]
4243     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4244     "$delim = [\\-$ws];"
4245     "$ab = [ab];"
4246     "::Null;"
4247     "$ws $delim* > ' ';"
4248     "'-' $delim* > '-';"
4249     "::Null;"
4250     "$ab { ' ' } $ab > '-';"
4251     "c { ' ' > ;"
4252     "::Null;"
4253     "'a-a' > a\\%|a;",
4254 
4255     // [14]
4256 /*
4257     "::[abc];"
4258     "::BEGIN;"
4259     "abc > xy;"
4260     "::END;"
4261     "::BEGIN;"
4262     "aba > yz;"
4263     "::END;"
4264     "::Upper;",
4265 */
4266     "", // test case commented out below, this is here to keep from messing up the indexes
4267 
4268     // [15]
4269     "::[abc];"
4270     "abc > xy;"
4271     "::Null;"
4272     "aba > yz;"
4273     "::Upper;",
4274 
4275     // [16]
4276 /*
4277     "::[abc];"
4278     "::BEGIN;"
4279     "abc <> xy;"
4280     "::END;"
4281     "::BEGIN;"
4282     "aba <> yz;"
4283     "::END;"
4284     "::Upper(Lower);"
4285     "::([XYZ]);"
4286 */
4287     "", // test case commented out below, this is here to keep from messing up the indexes
4288 
4289     // [17]
4290     "::[abc];"
4291     "abc <> xy;"
4292     "::Null;"
4293     "aba <> yz;"
4294     "::Upper(Lower);"
4295     "::([XYZ]);"
4296 };
4297 
4298 /*
4299 (This entire test is commented out below and will need some heavy revision when we re-add
4300 the ::BEGIN/::END stuff)
4301 static const char* BOGUS_BEGIN_END_RULES[] = {
4302     // [7]
4303     "::BEGIN;"
4304     "abc > xy;"
4305     "::BEGIN;"
4306     "aba > z;"
4307     "::END;"
4308     "::END;",
4309 
4310     // [8]
4311     "abc > xy;"
4312     " aba > z;"
4313     "::END;",
4314 
4315     // [9]
4316     "::BEGIN;"
4317     "::Upper;"
4318     "::END;"
4319 };
4320 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4321 */
4322 
4323 static const char* BEGIN_END_TEST_CASES[] = {
4324     // rules             input                   expected output
4325     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4326 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4327 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4328 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4329     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4330     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4331 
4332     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4333     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4334     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4335     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4336 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4337 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4338 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4339 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4340 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4341     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4342     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4343     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4344 
4345 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4346     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4347 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4348     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4349 };
4350 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4351 
TestBeginEnd()4352 void TransliteratorTest::TestBeginEnd() {
4353     // run through the list of test cases above
4354     int32_t i = 0;
4355     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4356         expect((UnicodeString)"Test case #" + (i / 3),
4357                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4358                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4359                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4360     }
4361 
4362     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4363     UParseError parseError;
4364     UErrorCode status = U_ZERO_ERROR;
4365     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4366             UTRANS_REVERSE, parseError, status);
4367     if (reversed == 0 || U_FAILURE(status)) {
4368         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4369     } else {
4370         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4371     }
4372     delete reversed;
4373 
4374     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4375     // that all of them cause errors
4376 /*
4377 (commented out until we have the real ::BEGIN/::END stuff in place
4378     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4379         UParseError parseError;
4380         UErrorCode status = U_ZERO_ERROR;
4381         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4382                 UTRANS_FORWARD, parseError, status);
4383         if (!U_FAILURE(status)) {
4384             delete t;
4385             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4386         }
4387     }
4388 */
4389 }
4390 
TestBeginEndToRules()4391 void TransliteratorTest::TestBeginEndToRules() {
4392     // run through the same list of test cases we used above, but this time, instead of just
4393     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4394     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4395     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4396     // to (i.e., does the same thing as) the original rule set
4397     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4398         UParseError parseError;
4399         UErrorCode status = U_ZERO_ERROR;
4400         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4401                 UTRANS_FORWARD, parseError, status);
4402         if (U_FAILURE(status)) {
4403             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4404         } else {
4405             UnicodeString rules;
4406             t->toRules(rules, TRUE);
4407             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4408                     UTRANS_FORWARD, parseError, status);
4409             if (U_FAILURE(status)) {
4410                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4411                         parseError, status);
4412                 delete t;
4413             } else {
4414                 expect(*t2,
4415                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4416                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4417                 delete t;
4418                 delete t2;
4419             }
4420         }
4421     }
4422 
4423     // do the same thing for the reversible test case
4424     UParseError parseError;
4425     UErrorCode status = U_ZERO_ERROR;
4426     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4427             UTRANS_REVERSE, parseError, status);
4428     if (U_FAILURE(status)) {
4429         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4430     } else {
4431         UnicodeString rules;
4432         reversed->toRules(rules, FALSE);
4433         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4434                 parseError, status);
4435         if (U_FAILURE(status)) {
4436             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4437                     parseError, status);
4438             delete reversed;
4439         } else {
4440             expect(*reversed2,
4441                    UnicodeString("xy XY XYZ yz YZ"),
4442                    UnicodeString("xy abc xaba yz aba"));
4443             delete reversed;
4444             delete reversed2;
4445         }
4446     }
4447 }
4448 
TestRegisterAlias()4449 void TransliteratorTest::TestRegisterAlias() {
4450     UnicodeString longID("Lower;[aeiou]Upper");
4451     UnicodeString shortID("Any-CapVowels");
4452     UnicodeString reallyShortID("CapVowels");
4453 
4454     Transliterator::registerAlias(shortID, longID);
4455 
4456     UErrorCode err = U_ZERO_ERROR;
4457     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4458     if (U_FAILURE(err)) {
4459         errln("Failed to instantiate transliterator with long ID");
4460         Transliterator::unregister(shortID);
4461         return;
4462     }
4463     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4464     if (U_FAILURE(err)) {
4465         errln("Failed to instantiate transliterator with short ID");
4466         delete t1;
4467         Transliterator::unregister(shortID);
4468         return;
4469     }
4470 
4471     if (t1->getID() != longID)
4472         errln("Transliterator instantiated with long ID doesn't have long ID");
4473     if (t2->getID() != reallyShortID)
4474         errln("Transliterator instantiated with short ID doesn't have short ID");
4475 
4476     UnicodeString rules1;
4477     UnicodeString rules2;
4478 
4479     t1->toRules(rules1, TRUE);
4480     t2->toRules(rules2, TRUE);
4481     if (rules1 != rules2)
4482         errln("Alias transliterators aren't the same");
4483 
4484     delete t1;
4485     delete t2;
4486     Transliterator::unregister(shortID);
4487 
4488     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4489     if (U_SUCCESS(err)) {
4490         errln("Instantiation with short ID succeeded after short ID was unregistered");
4491         delete t1;
4492     }
4493 
4494     // try the same thing again, but this time with something other than
4495     // an instance of CompoundTransliterator
4496     UnicodeString realID("Latin-Greek");
4497     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4498     Transliterator::registerAlias(fakeID, realID);
4499 
4500     err = U_ZERO_ERROR;
4501     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4502     if (U_FAILURE(err)) {
4503         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4504         Transliterator::unregister(realID);
4505         return;
4506     }
4507     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4508     if (U_FAILURE(err)) {
4509         errln("Failed to instantiate transliterator with fake ID");
4510         delete t1;
4511         Transliterator::unregister(realID);
4512         return;
4513     }
4514 
4515     t1->toRules(rules1, TRUE);
4516     t2->toRules(rules2, TRUE);
4517     if (rules1 != rules2)
4518         errln("Alias transliterators aren't the same");
4519 
4520     delete t1;
4521     delete t2;
4522     Transliterator::unregister(fakeID);
4523 }
4524 
TestRuleStripping()4525 void TransliteratorTest::TestRuleStripping() {
4526     /*
4527 #
4528 \uE001>\u0C01; # SIGN
4529     */
4530     static const UChar rule[] = {
4531         0x0023,0x0020,0x000D,0x000A,
4532         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4533     };
4534     static const UChar expectedRule[] = {
4535         0xE001,0x003E,0x0C01,0x003B,0
4536     };
4537     UChar result[UPRV_LENGTHOF(rule)];
4538     UErrorCode status = U_ZERO_ERROR;
4539     int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4540     if (len != u_strlen(expectedRule)) {
4541         errln("utrans_stripRules return len = %d", len);
4542     }
4543     if (u_strncmp(expectedRule, result, len) != 0) {
4544         errln("utrans_stripRules did not return expected string");
4545     }
4546 }
4547 
4548 /**
4549  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4550  */
TestHalfwidthFullwidth(void)4551 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4552     UParseError parseError;
4553     UErrorCode status = U_ZERO_ERROR;
4554     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4555     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4556     if (hf == 0 || fh == 0) {
4557         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4558         delete hf;
4559         delete fh;
4560         return;
4561     }
4562 
4563     // Array of 2n items
4564     // Each item is
4565     //   "hf"|"fh"|"both",
4566     //   <Halfwidth>,
4567     //   <Fullwidth>
4568     const char* DATA[] = {
4569         "both",
4570         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4571         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4572     };
4573     int32_t DATA_length = UPRV_LENGTHOF(DATA);
4574 
4575     for (int32_t i=0; i<DATA_length; i+=3) {
4576         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4577         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4578         switch (*DATA[i]) {
4579         case 0x68: //'h': // Halfwidth-Fullwidth only
4580             expect(*hf, h, f);
4581             break;
4582         case 0x66: //'f': // Fullwidth-Halfwidth only
4583             expect(*fh, f, h);
4584             break;
4585         case 0x62: //'b': // both directions
4586             expect(*hf, h, f);
4587             expect(*fh, f, h);
4588             break;
4589         }
4590     }
4591     delete hf;
4592     delete fh;
4593 }
4594 
4595 
4596     /**
4597      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4598      *              TODO: confirm that the expected results are correct.
4599      *              For now, test just confirms that C++ and Java give identical results.
4600      */
TestThai(void)4601 void TransliteratorTest::TestThai(void) {
4602 #if !UCONFIG_NO_BREAK_ITERATION
4603     UParseError parseError;
4604     UErrorCode status = U_ZERO_ERROR;
4605     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4606     if (tr == 0) {
4607         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4608         return;
4609     }
4610     if (U_FAILURE(status)) {
4611         errln("FAIL: createInstance failed with %s", u_errorName(status));
4612         return;
4613     }
4614     const char *thaiText =
4615         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4616         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4617         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4618         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4619         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4620         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4621         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4622         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4623         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4624         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4625         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4626         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4627         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4628         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4629         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4630         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4631         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4632         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4633         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4634         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4635         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4636         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4637         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4638         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4639         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4640         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4641         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4642         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4643         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4644         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4645 
4646     const char *latinText =
4647         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4648         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4649         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4650         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4651         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4652         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4653         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4654         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4655         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4656         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4657         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4658         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4659         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4660         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4661         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4662         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4663         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4664         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4665 
4666 
4667     UnicodeString  xlitText(thaiText);
4668     xlitText = xlitText.unescape();
4669     tr->transliterate(xlitText);
4670 
4671     UnicodeString expectedText(latinText);
4672     expectedText = expectedText.unescape();
4673     expect(*tr, xlitText, expectedText);
4674 
4675     delete tr;
4676 #endif
4677 }
4678 
4679 
4680 //======================================================================
4681 // Support methods
4682 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4683 void TransliteratorTest::expectT(const UnicodeString& id,
4684                                  const UnicodeString& source,
4685                                  const UnicodeString& expectedResult) {
4686     UErrorCode ec = U_ZERO_ERROR;
4687     UParseError pe;
4688     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4689     if (U_FAILURE(ec)) {
4690         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4691         delete t;
4692         return;
4693     }
4694     expect(*t, source, expectedResult);
4695     delete t;
4696 }
4697 
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4698 void TransliteratorTest::reportParseError(const UnicodeString& message,
4699                                           const UParseError& parseError,
4700                                           const UErrorCode& status) {
4701     dataerrln(message +
4702           /*", parse error " + parseError.code +*/
4703           ", line " + parseError.line +
4704           ", offset " + parseError.offset +
4705           ", pre-context " + prettify(parseError.preContext, TRUE) +
4706           ", post-context " + prettify(parseError.postContext,TRUE) +
4707           ", Error: " + u_errorName(status));
4708 }
4709 
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4710 void TransliteratorTest::expect(const UnicodeString& rules,
4711                                 const UnicodeString& source,
4712                                 const UnicodeString& expectedResult,
4713                                 UTransPosition *pos) {
4714     expect("<ID>", rules, source, expectedResult, pos);
4715 }
4716 
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4717 void TransliteratorTest::expect(const UnicodeString& id,
4718                                 const UnicodeString& rules,
4719                                 const UnicodeString& source,
4720                                 const UnicodeString& expectedResult,
4721                                 UTransPosition *pos) {
4722     UErrorCode status = U_ZERO_ERROR;
4723     UParseError parseError;
4724     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4725     if (U_FAILURE(status)) {
4726         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4727     } else {
4728         expect(*t, source, expectedResult, pos);
4729     }
4730     delete t;
4731 }
4732 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4733 void TransliteratorTest::expect(const Transliterator& t,
4734                                 const UnicodeString& source,
4735                                 const UnicodeString& expectedResult,
4736                                 const Transliterator& reverseTransliterator) {
4737     expect(t, source, expectedResult);
4738     expect(reverseTransliterator, expectedResult, source);
4739 }
4740 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4741 void TransliteratorTest::expect(const Transliterator& t,
4742                                 const UnicodeString& source,
4743                                 const UnicodeString& expectedResult,
4744                                 UTransPosition *pos) {
4745     if (pos == 0) {
4746         UnicodeString result(source);
4747         t.transliterate(result);
4748         expectAux(t.getID() + ":String", source, result, expectedResult);
4749     }
4750     UTransPosition index={0, 0, 0, 0};
4751     if (pos != 0) {
4752         index = *pos;
4753     }
4754 
4755     UnicodeString rsource(source);
4756     if (pos == 0) {
4757         t.transliterate(rsource);
4758     } else {
4759         // Do it all at once -- below we do it incrementally
4760         t.finishTransliteration(rsource, *pos);
4761     }
4762     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4763 
4764     // Test keyboard (incremental) transliteration -- this result
4765     // must be the same after we finalize (see below).
4766     UnicodeString log;
4767     rsource.remove();
4768     if (pos != 0) {
4769         rsource = source;
4770         formatInput(log, rsource, index);
4771         log.append(" -> ");
4772         UErrorCode status = U_ZERO_ERROR;
4773         t.transliterate(rsource, index, status);
4774         formatInput(log, rsource, index);
4775     } else {
4776         for (int32_t i=0; i<source.length(); ++i) {
4777             if (i != 0) {
4778                 log.append(" + ");
4779             }
4780             log.append(source.charAt(i)).append(" -> ");
4781             UErrorCode status = U_ZERO_ERROR;
4782             t.transliterate(rsource, index, source.charAt(i), status);
4783             formatInput(log, rsource, index);
4784         }
4785     }
4786 
4787     // As a final step in keyboard transliteration, we must call
4788     // transliterate to finish off any pending partial matches that
4789     // were waiting for more input.
4790     t.finishTransliteration(rsource, index);
4791     log.append(" => ").append(rsource);
4792 
4793     expectAux(t.getID() + ":Keyboard", log,
4794               rsource == expectedResult,
4795               expectedResult);
4796 }
4797 
4798 
4799 /**
4800  * @param appendTo result is appended to this param.
4801  * @param input the string being transliterated
4802  * @param pos the index struct
4803  */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4804 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4805                                                const UnicodeString& input,
4806                                                const UTransPosition& pos) {
4807     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4808     // the {} indicate the context start and limit, and the ||
4809     // indicate the start and limit.
4810     if (0 <= pos.contextStart &&
4811         pos.contextStart <= pos.start &&
4812         pos.start <= pos.limit &&
4813         pos.limit <= pos.contextLimit &&
4814         pos.contextLimit <= input.length()) {
4815 
4816         UnicodeString a, b, c, d, e;
4817         input.extractBetween(0, pos.contextStart, a);
4818         input.extractBetween(pos.contextStart, pos.start, b);
4819         input.extractBetween(pos.start, pos.limit, c);
4820         input.extractBetween(pos.limit, pos.contextLimit, d);
4821         input.extractBetween(pos.contextLimit, input.length(), e);
4822         appendTo.append(a).append((UChar)123/*{*/).append(b).
4823             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4824             append((UChar)125/*}*/).append(e);
4825     } else {
4826         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4827                         pos.contextStart + ", s=" + pos.start + ", l=" +
4828                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4829                         input);
4830     }
4831     return appendTo;
4832 }
4833 
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4834 void TransliteratorTest::expectAux(const UnicodeString& tag,
4835                                    const UnicodeString& source,
4836                                    const UnicodeString& result,
4837                                    const UnicodeString& expectedResult) {
4838     expectAux(tag, source + " -> " + result,
4839               result == expectedResult,
4840               expectedResult);
4841 }
4842 
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4843 void TransliteratorTest::expectAux(const UnicodeString& tag,
4844                                    const UnicodeString& summary, UBool pass,
4845                                    const UnicodeString& expectedResult) {
4846     if (pass) {
4847         logln(UnicodeString("(")+tag+") " + prettify(summary));
4848     } else {
4849         dataerrln(UnicodeString("FAIL: (")+tag+") "
4850               + prettify(summary)
4851               + ", expected " + prettify(expectedResult));
4852     }
4853 }
4854 
4855 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4856