• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1999-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/10/99    aliu        Creation.
10 **********************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "transtst.h"
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
29 #include "cpdtrans.h"
30 #include "nultrans.h"
31 #include "rbt.h"
32 #include "rbt_pars.h"
33 #include "anytrans.h"
34 #include "esctrn.h"
35 #include "name2uni.h"
36 #include "nortrans.h"
37 #include "remtrans.h"
38 #include "titletrn.h"
39 #include "tolowtrn.h"
40 #include "toupptrn.h"
41 #include "unesctrn.h"
42 #include "uni2name.h"
43 #include "cstring.h"
44 #include "cmemory.h"
45 #include <stdio.h>
46 
47 /***********************************************************************
48 
49                      HOW TO USE THIS TEST FILE
50                                -or-
51                   How I developed on two platforms
52                 without losing (too much of) my mind
53 
54 
55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
56    any public void method named Test...() taking no parameters becomes
57    a test.  On C++, you need to modify the header and add a line to
58    the runIndexedTest() dispatch method.
59 
60 2. Make liberal use of the expect() method; it is your friend.
61 
62 3. The tests in this file exactly match those in a sister file on the
63    other side.  The two files are:
64 
65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
66    icu4c:  source/test/intltest/transtst.cpp
67 
68                   ==> THIS IS THE IMPORTANT PART <==
69 
70    When you add a test in this file, add it in TransliteratorTest.java
71    too.  Give it the same name and put it in the same relative place.
72    This makes maintenance a lot simpler for any poor soul who ends up
73    trying to synchronize the tests between icu4j and icu4c.
74 
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76    then add it in the special non-mirrored section.  These are
77    labeled
78 
79      "icu4j ONLY"
80 
81    or
82 
83      "icu4c ONLY"
84 
85    Make sure you document the reason the test is here and not there.
86 
87 
88 Thank you.
89 The Management
90 ***********************************************************************/
91 
92 // Define character constants thusly to be EBCDIC-friendly
93 enum {
94     LEFT_BRACE=((UChar)0x007B), /*{*/
95     PIPE      =((UChar)0x007C), /*|*/
96     ZERO      =((UChar)0x0030), /*0*/
97     UPPER_A   =((UChar)0x0041)  /*A*/
98 };
99 
TransliteratorTest()100 TransliteratorTest::TransliteratorTest()
101 :   DESERET_DEE((UChar32)0x10414),
102     DESERET_dee((UChar32)0x1043C)
103 {
104 }
105 
~TransliteratorTest()106 TransliteratorTest::~TransliteratorTest() {}
107 
108 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110                                    const char* &name, char* /*par*/) {
111     switch (index) {
112         TESTCASE(0,TestInstantiation);
113         TESTCASE(1,TestSimpleRules);
114         TESTCASE(2,TestRuleBasedInverse);
115         TESTCASE(3,TestKeyboard);
116         TESTCASE(4,TestKeyboard2);
117         TESTCASE(5,TestKeyboard3);
118         TESTCASE(6,TestArabic);
119         TESTCASE(7,TestCompoundKana);
120         TESTCASE(8,TestCompoundHex);
121         TESTCASE(9,TestFiltering);
122         TESTCASE(10,TestInlineSet);
123         TESTCASE(11,TestPatternQuoting);
124         TESTCASE(12,TestJ277);
125         TESTCASE(13,TestJ243);
126         TESTCASE(14,TestJ329);
127         TESTCASE(15,TestSegments);
128         TESTCASE(16,TestCursorOffset);
129         TESTCASE(17,TestArbitraryVariableValues);
130         TESTCASE(18,TestPositionHandling);
131         TESTCASE(19,TestHiraganaKatakana);
132         TESTCASE(20,TestCopyJ476);
133         TESTCASE(21,TestAnchors);
134         TESTCASE(22,TestInterIndic);
135         TESTCASE(23,TestFilterIDs);
136         TESTCASE(24,TestCaseMap);
137         TESTCASE(25,TestNameMap);
138         TESTCASE(26,TestLiberalizedID);
139         TESTCASE(27,TestCreateInstance);
140         TESTCASE(28,TestNormalizationTransliterator);
141         TESTCASE(29,TestCompoundRBT);
142         TESTCASE(30,TestCompoundFilter);
143         TESTCASE(31,TestRemove);
144         TESTCASE(32,TestToRules);
145         TESTCASE(33,TestContext);
146         TESTCASE(34,TestSupplemental);
147         TESTCASE(35,TestQuantifier);
148         TESTCASE(36,TestSTV);
149         TESTCASE(37,TestCompoundInverse);
150         TESTCASE(38,TestNFDChainRBT);
151         TESTCASE(39,TestNullInverse);
152         TESTCASE(40,TestAliasInverseID);
153         TESTCASE(41,TestCompoundInverseID);
154         TESTCASE(42,TestUndefinedVariable);
155         TESTCASE(43,TestEmptyContext);
156         TESTCASE(44,TestCompoundFilterID);
157         TESTCASE(45,TestPropertySet);
158         TESTCASE(46,TestNewEngine);
159         TESTCASE(47,TestQuantifiedSegment);
160         TESTCASE(48,TestDevanagariLatinRT);
161         TESTCASE(49,TestTeluguLatinRT);
162         TESTCASE(50,TestCompoundLatinRT);
163         TESTCASE(51,TestSanskritLatinRT);
164         TESTCASE(52,TestLocaleInstantiation);
165         TESTCASE(53,TestTitleAccents);
166         TESTCASE(54,TestLocaleResource);
167         TESTCASE(55,TestParseError);
168         TESTCASE(56,TestOutputSet);
169         TESTCASE(57,TestVariableRange);
170         TESTCASE(58,TestInvalidPostContext);
171         TESTCASE(59,TestIDForms);
172         TESTCASE(60,TestToRulesMark);
173         TESTCASE(61,TestEscape);
174         TESTCASE(62,TestAnchorMasking);
175         TESTCASE(63,TestDisplayName);
176         TESTCASE(64,TestSpecialCases);
177 #if !UCONFIG_NO_FILE_IO
178         TESTCASE(65,TestIncrementalProgress);
179 #endif
180         TESTCASE(66,TestSurrogateCasing);
181         TESTCASE(67,TestFunction);
182         TESTCASE(68,TestInvalidBackRef);
183         TESTCASE(69,TestMulticharStringSet);
184         TESTCASE(70,TestUserFunction);
185         TESTCASE(71,TestAnyX);
186         TESTCASE(72,TestSourceTargetSet);
187         TESTCASE(73,TestGurmukhiDevanagari);
188         TESTCASE(74,TestPatternWhiteSpace);
189         TESTCASE(75,TestAllCodepoints);
190         TESTCASE(76,TestBoilerplate);
191         TESTCASE(77,TestAlternateSyntax);
192         TESTCASE(78,TestBeginEnd);
193         TESTCASE(79,TestBeginEndToRules);
194         TESTCASE(80,TestRegisterAlias);
195         TESTCASE(81,TestRuleStripping);
196         TESTCASE(82,TestHalfwidthFullwidth);
197         TESTCASE(83,TestThai);
198         TESTCASE(84,TestAny);
199         default: name = ""; break;
200     }
201 }
202 
203 /**
204  * Make sure every system transliterator can be instantiated.
205  *
206  * ALSO test that the result of toRules() for each rule is a valid
207  * rule.  Do this here so we don't have to have another test that
208  * instantiates everything as well.
209  */
TestInstantiation()210 void TransliteratorTest::TestInstantiation() {
211     UErrorCode ec = U_ZERO_ERROR;
212     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
213     assertSuccess("getAvailableIDs()", ec);
214     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
215     int32_t n = Transliterator::countAvailableIDs();
216     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217                avail->count(ec) == n);
218     assertSuccess("count()", ec);
219     UnicodeString name;
220     for (int32_t i=0; i<n; ++i) {
221         const UnicodeString& id = *avail->snext(ec);
222         if (!assertSuccess("snext()", ec) ||
223             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
224             break;
225         }
226         UnicodeString id2 = Transliterator::getAvailableID(i);
227         if (id.length() < 1) {
228             errln(UnicodeString("FAIL: getAvailableID(") +
229                   i + ") returned empty string");
230             continue;
231         }
232         if (id != id2) {
233             errln(UnicodeString("FAIL: getAvailableID(") +
234                   i + ") != getAvailableIDs().snext()");
235             continue;
236         }
237         UParseError parseError;
238         UErrorCode status = U_ZERO_ERROR;
239         Transliterator* t = Transliterator::createInstance(id,
240                               UTRANS_FORWARD, parseError,status);
241         name.truncate(0);
242         Transliterator::getDisplayName(id, name);
243         if (t == 0) {
244 #if UCONFIG_NO_BREAK_ITERATION
245             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246             if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
247                 id.compare((UnicodeString)"Thai-Latin") != 0)
248 #endif
249                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
250                       /*", parse error " + parseError.code +*/
251                       ", line " + parseError.line +
252                       ", offset " + parseError.offset +
253                       ", pre-context " + prettify(parseError.preContext, TRUE) +
254                       ", post-context " +prettify(parseError.postContext,TRUE) +
255                       ", Error: " + u_errorName(status));
256                 // When createInstance fails, it deletes the failing
257                 // entry from the available ID list.  We detect this
258                 // here by looking for a change in countAvailableIDs.
259             int32_t nn = Transliterator::countAvailableIDs();
260             if (nn == (n - 1)) {
261                 n = nn;
262                 --i; // Compensate for deleted entry
263             }
264         } else {
265             logln(UnicodeString("OK: ") + name + " (" + id + ")");
266 
267             // Now test toRules
268             UnicodeString rules;
269             t->toRules(rules, TRUE);
270             Transliterator *u = Transliterator::createFromRules("x",
271                                     rules, UTRANS_FORWARD, parseError,status);
272             if (u == 0) {
273                 errln(UnicodeString("FAIL: ") + id +
274                       ".createFromRules() => bad rules" +
275                       /*", parse error " + parseError.code +*/
276                       ", line " + parseError.line +
277                       ", offset " + parseError.offset +
278                       ", context " + prettify(parseError.preContext, TRUE) +
279                       ", rules: " + prettify(rules, TRUE));
280             } else {
281                 delete u;
282             }
283             delete t;
284         }
285     }
286     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
287     assertSuccess("snext()", ec);
288     delete avail;
289 
290     // Now test the failure path
291     UParseError parseError;
292     UErrorCode status = U_ZERO_ERROR;
293     UnicodeString id("<Not a valid Transliterator ID>");
294     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
295     if (t != 0) {
296         errln("FAIL: " + id + " returned a transliterator");
297         delete t;
298     } else {
299         logln("OK: Bogus ID handled properly");
300     }
301 }
302 
TestSimpleRules(void)303 void TransliteratorTest::TestSimpleRules(void) {
304     /* Example: rules 1. ab>x|y
305      *                2. yc>z
306      *
307      * []|eabcd  start - no match, copy e to tranlated buffer
308      * [e]|abcd  match rule 1 - copy output & adjust cursor
309      * [ex|y]cd  match rule 2 - copy output & adjust cursor
310      * [exz]|d   no match, copy d to transliterated buffer
311      * [exzd]|   done
312      */
313     expect(UnicodeString("ab>x|y;", "") +
314            "yc>z",
315            "eabcd", "exzd");
316 
317     /* Another set of rules:
318      *    1. ab>x|yzacw
319      *    2. za>q
320      *    3. qc>r
321      *    4. cw>n
322      *
323      * []|ab       Rule 1
324      * [x|yzacw]   No match
325      * [xy|zacw]   Rule 2
326      * [xyq|cw]    Rule 4
327      * [xyqn]|     Done
328      */
329     expect(UnicodeString("ab>x|yzacw;") +
330            "za>q;" +
331            "qc>r;" +
332            "cw>n",
333            "ab", "xyqn");
334 
335     /* Test categories
336      */
337     UErrorCode status = U_ZERO_ERROR;
338     UParseError parseError;
339     Transliterator *t = Transliterator::createFromRules(
340         "<ID>",
341         UnicodeString("$dummy=").append((UChar)0xE100) +
342         UnicodeString(";"
343                       "$vowel=[aeiouAEIOU];"
344                       "$lu=[:Lu:];"
345                       "$vowel } $lu > '!';"
346                       "$vowel > '&';"
347                       "'!' { $lu > '^';"
348                       "$lu > '*';"
349                       "a > ERROR", ""),
350         UTRANS_FORWARD, parseError,
351         status);
352     if (U_FAILURE(status)) {
353         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
354         return;
355     }
356     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
357     delete t;
358 }
359 
360 /**
361  * Test inline set syntax and set variable syntax.
362  */
TestInlineSet(void)363 void TransliteratorTest::TestInlineSet(void) {
364     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
365     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
366 
367     expect(UnicodeString(
368            "$digit = [0-9];"
369            "$alpha = [a-zA-Z];"
370            "$alphanumeric = [$digit $alpha];" // ***
371            "$special = [^$alphanumeric];"     // ***
372            "$alphanumeric > '-';"
373            "$special > '*';", ""),
374 
375            "thx-1138", "---*----");
376 }
377 
378 /**
379  * Create some inverses and confirm that they work.  We have to be
380  * careful how we do this, since the inverses will not be true
381  * inverses -- we can't throw any random string at the composition
382  * of the transliterators and expect the identity function.  F x
383  * F' != I.  However, if we are careful about the input, we will
384  * get the expected results.
385  */
TestRuleBasedInverse(void)386 void TransliteratorTest::TestRuleBasedInverse(void) {
387     UnicodeString RULES =
388         UnicodeString("abc>zyx;") +
389         "ab>yz;" +
390         "bc>zx;" +
391         "ca>xy;" +
392         "a>x;" +
393         "b>y;" +
394         "c>z;" +
395 
396         "abc<zyx;" +
397         "ab<yz;" +
398         "bc<zx;" +
399         "ca<xy;" +
400         "a<x;" +
401         "b<y;" +
402         "c<z;" +
403 
404         "";
405 
406     const char* DATA[] = {
407         // Careful here -- random strings will not work.  If we keep
408         // the left side to the domain and the right side to the range
409         // we will be okay though (left, abc; right xyz).
410         "a", "x",
411         "abcacab", "zyxxxyy",
412         "caccb", "xyzzy",
413     };
414 
415     int32_t DATA_length = UPRV_LENGTHOF(DATA);
416 
417     UErrorCode status = U_ZERO_ERROR;
418     UParseError parseError;
419     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
420                                 UTRANS_FORWARD, parseError, status);
421     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
422                                 UTRANS_REVERSE, parseError, status);
423     if (U_FAILURE(status)) {
424         errln("FAIL: RBT constructor failed");
425         return;
426     }
427     for (int32_t i=0; i<DATA_length; i+=2) {
428         expect(*fwd, DATA[i], DATA[i+1]);
429         expect(*rev, DATA[i+1], DATA[i]);
430     }
431     delete fwd;
432     delete rev;
433 }
434 
435 /**
436  * Basic test of keyboard.
437  */
TestKeyboard(void)438 void TransliteratorTest::TestKeyboard(void) {
439     UParseError parseError;
440     UErrorCode status = U_ZERO_ERROR;
441     Transliterator *t = Transliterator::createFromRules("<ID>",
442                               UnicodeString("psch>Y;")
443                               +"ps>y;"
444                               +"ch>x;"
445                               +"a>A;",
446                               UTRANS_FORWARD, parseError,
447                               status);
448     if (U_FAILURE(status)) {
449         errln("FAIL: RBT constructor failed");
450         return;
451     }
452     const char* DATA[] = {
453         // insertion, buffer
454         "a", "A",
455         "p", "Ap",
456         "s", "Aps",
457         "c", "Apsc",
458         "a", "AycA",
459         "psch", "AycAY",
460         0, "AycAY", // null means finishKeyboardTransliteration
461     };
462 
463     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
464     delete t;
465 }
466 
467 /**
468  * Basic test of keyboard with cursor.
469  */
TestKeyboard2(void)470 void TransliteratorTest::TestKeyboard2(void) {
471     UParseError parseError;
472     UErrorCode status = U_ZERO_ERROR;
473     Transliterator *t = Transliterator::createFromRules("<ID>",
474                               UnicodeString("ych>Y;")
475                               +"ps>|y;"
476                               +"ch>x;"
477                               +"a>A;",
478                               UTRANS_FORWARD, parseError,
479                               status);
480     if (U_FAILURE(status)) {
481         errln("FAIL: RBT constructor failed");
482         return;
483     }
484     const char* DATA[] = {
485         // insertion, buffer
486         "a", "A",
487         "p", "Ap",
488         "s", "Aps", // modified for rollback - "Ay",
489         "c", "Apsc", // modified for rollback - "Ayc",
490         "a", "AycA",
491         "p", "AycAp",
492         "s", "AycAps", // modified for rollback - "AycAy",
493         "c", "AycApsc", // modified for rollback - "AycAyc",
494         "h", "AycAY",
495         0, "AycAY", // null means finishKeyboardTransliteration
496     };
497 
498     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
499     delete t;
500 }
501 
502 /**
503  * Test keyboard transliteration with back-replacement.
504  */
TestKeyboard3(void)505 void TransliteratorTest::TestKeyboard3(void) {
506     // We want th>z but t>y.  Furthermore, during keyboard
507     // transliteration we want t>y then yh>z if t, then h are
508     // typed.
509     UnicodeString RULES("t>|y;"
510                         "yh>z;");
511 
512     const char* DATA[] = {
513         // Column 1: characters to add to buffer (as if typed)
514         // Column 2: expected appearance of buffer after
515         //           keyboard xliteration.
516         "a", "a",
517         "b", "ab",
518         "t", "abt", // modified for rollback - "aby",
519         "c", "abyc",
520         "t", "abyct", // modified for rollback - "abycy",
521         "h", "abycz",
522         0, "abycz", // null means finishKeyboardTransliteration
523     };
524 
525     UParseError parseError;
526     UErrorCode status = U_ZERO_ERROR;
527     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
528     if (U_FAILURE(status)) {
529         errln("FAIL: RBT constructor failed");
530         return;
531     }
532     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
533     delete t;
534 }
535 
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)536 void TransliteratorTest::keyboardAux(const Transliterator& t,
537                                      const char* DATA[], int32_t DATA_length) {
538     UErrorCode status = U_ZERO_ERROR;
539     UTransPosition index={0, 0, 0, 0};
540     UnicodeString s;
541     for (int32_t i=0; i<DATA_length; i+=2) {
542         UnicodeString log;
543         if (DATA[i] != 0) {
544             log = s + " + "
545                 + DATA[i]
546                 + " -> ";
547             t.transliterate(s, index, DATA[i], status);
548         } else {
549             log = s + " => ";
550             t.finishTransliteration(s, index);
551         }
552         // Show the start index '{' and the cursor '|'
553         UnicodeString a, b, c;
554         s.extractBetween(0, index.contextStart, a);
555         s.extractBetween(index.contextStart, index.start, b);
556         s.extractBetween(index.start, s.length(), c);
557         log.append(a).
558             append((UChar)LEFT_BRACE).
559             append(b).
560             append((UChar)PIPE).
561             append(c);
562         if (s == DATA[i+1] && U_SUCCESS(status)) {
563             logln(log);
564         } else {
565             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
566         }
567     }
568 }
569 
TestArabic(void)570 void TransliteratorTest::TestArabic(void) {
571 // Test disabled for 2.0 until new Arabic transliterator can be written.
572 //    /*
573 //    const char* DATA[] = {
574 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
575 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
576 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
577 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
578 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
579 //                  "\u062c\u0645\u064a\u0644\u0629",
580 //    };
581 //    */
582 //
583 //    UChar ar_raw[] = {
584 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
585 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
586 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
588 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
589 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
590 //    };
591 //    UnicodeString ar(ar_raw);
592 //    UErrorCode status=U_ZERO_ERROR;
593 //    UParseError parseError;
594 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
595 //    if (t == 0) {
596 //        errln("FAIL: createInstance failed");
597 //        return;
598 //    }
599 //    expect(*t, "Arabic", ar);
600 //    delete t;
601 }
602 
603 /**
604  * Compose the Kana transliterator forward and reverse and try
605  * some strings that should come out unchanged.
606  */
TestCompoundKana(void)607 void TransliteratorTest::TestCompoundKana(void) {
608     UParseError parseError;
609     UErrorCode status = U_ZERO_ERROR;
610     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
611     if (t == 0) {
612         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
613     } else {
614         expect(*t, "aaaaa", "aaaaa");
615         delete t;
616     }
617 }
618 
619 /**
620  * Compose the hex transliterators forward and reverse.
621  */
TestCompoundHex(void)622 void TransliteratorTest::TestCompoundHex(void) {
623     UParseError parseError;
624     UErrorCode status = U_ZERO_ERROR;
625     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
626     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
627     Transliterator* transab[] = { a, b };
628     Transliterator* transba[] = { b, a };
629     if (a == 0 || b == 0) {
630         errln("FAIL: construction failed");
631         delete a;
632         delete b;
633         return;
634     }
635     // Do some basic tests of a
636     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
637     // Do some basic tests of b
638     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
639 
640     Transliterator* ab = new CompoundTransliterator(transab, 2);
641     UnicodeString s("abcde", "");
642     expect(*ab, s, s);
643 
644     UnicodeString str(s);
645     a->transliterate(str);
646     Transliterator* ba = new CompoundTransliterator(transba, 2);
647     expect(*ba, str, str);
648 
649     delete ab;
650     delete ba;
651     delete a;
652     delete b;
653 }
654 
655 int gTestFilterClassID = 0;
656 /**
657  * Used by TestFiltering().
658  */
659 class TestFilter : public UnicodeFilter {
clone() const660     virtual UnicodeFunctor* clone() const {
661         return new TestFilter(*this);
662     }
contains(UChar32 c) const663     virtual UBool contains(UChar32 c) const {
664         return c != (UChar)0x0063 /*c*/;
665     }
666     // Stubs
toPattern(UnicodeString & result,UBool) const667     virtual UnicodeString& toPattern(UnicodeString& result,
668                                      UBool /*escapeUnprintable*/) const {
669         return result;
670     }
matchesIndexValue(uint8_t) const671     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
672         return FALSE;
673     }
addMatchSetTo(UnicodeSet &) const674     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
675 public:
getDynamicClassID() const676     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
677 };
678 
679 /**
680  * Do some basic tests of filtering.
681  */
TestFiltering(void)682 void TransliteratorTest::TestFiltering(void) {
683     UParseError parseError;
684     UErrorCode status = U_ZERO_ERROR;
685     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
686     if (hex == 0) {
687         errln("FAIL: createInstance(Any-Hex) failed");
688         return;
689     }
690     hex->adoptFilter(new TestFilter());
691     UnicodeString s("abcde");
692     hex->transliterate(s);
693     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
694     if (s == exp) {
695         logln(UnicodeString("Ok:   \"") + exp + "\"");
696     } else {
697         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
698     }
699 
700     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
701     UnicodeFilter *f = hex->orphanFilter();
702     if (f == NULL){
703         errln("FAIL: orphanFilter() should get a UnicodeFilter");
704     } else {
705         delete f;
706     }
707     delete hex;
708 }
709 
710 /**
711  * Test anchors
712  */
TestAnchors(void)713 void TransliteratorTest::TestAnchors(void) {
714     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
715            "aaa",
716            "012");
717     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
718            "aaa",
719            "012");
720     expect(UnicodeString("^ab  > 01 ;"
721            " ab  > |8 ;"
722            "  b  > k ;"
723            " 8x$ > 45 ;"
724            " 8x  > 77 ;", ""),
725 
726            "ababbabxabx",
727            "018k7745");
728     expect(UnicodeString("$s = [z$] ;"
729            "$s{ab    > 01 ;"
730            "   ab    > |8 ;"
731            "    b    > k ;"
732            "   8x}$s > 45 ;"
733            "   8x    > 77 ;", ""),
734 
735            "abzababbabxzabxabx",
736            "01z018k45z01x45");
737 }
738 
739 /**
740  * Test pattern quoting and escape mechanisms.
741  */
TestPatternQuoting(void)742 void TransliteratorTest::TestPatternQuoting(void) {
743     // Array of 3n items
744     // Each item is <rules>, <input>, <expected output>
745     const UnicodeString DATA[] = {
746         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
747         UnicodeString(UChar(0x4E01)),
748         "[male adult]"
749     };
750 
751     for (int32_t i=0; i<3; i+=3) {
752         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
753         UParseError parseError;
754         UErrorCode status = U_ZERO_ERROR;
755         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
756         if (U_FAILURE(status)) {
757             errln("RBT constructor failed");
758         } else {
759             expect(*t, DATA[i+1], DATA[i+2]);
760         }
761         delete t;
762     }
763 }
764 
765 /**
766  * Regression test for bugs found in Greek transliteration.
767  */
TestJ277(void)768 void TransliteratorTest::TestJ277(void) {
769     UErrorCode status = U_ZERO_ERROR;
770     UParseError parseError;
771     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
772     if (gl == NULL) {
773         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
774         return;
775     }
776 
777     UChar sigma = 0x3C3;
778     UChar upsilon = 0x3C5;
779     UChar nu = 0x3BD;
780 //    UChar PHI = 0x3A6;
781     UChar alpha = 0x3B1;
782 //    UChar omega = 0x3C9;
783 //    UChar omicron = 0x3BF;
784 //    UChar epsilon = 0x3B5;
785 
786     // sigma upsilon nu -> syn
787     UnicodeString syn;
788     syn.append(sigma).append(upsilon).append(nu);
789     expect(*gl, syn, "syn");
790 
791     // sigma alpha upsilon nu -> saun
792     UnicodeString sayn;
793     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
794     expect(*gl, sayn, "saun");
795 
796     // Again, using a smaller rule set
797     UnicodeString rules(
798                 "$alpha   = \\u03B1;"
799                 "$nu      = \\u03BD;"
800                 "$sigma   = \\u03C3;"
801                 "$ypsilon = \\u03C5;"
802                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
803                 "s <>           $sigma;"
804                 "a <>           $alpha;"
805                 "u <>  $vowel { $ypsilon;"
806                 "y <>           $ypsilon;"
807                 "n <>           $nu;",
808                 "");
809     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
810     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
811     expect(*mini, syn, "syn");
812     expect(*mini, sayn, "saun");
813     delete mini;
814     mini = NULL;
815 
816 #if !UCONFIG_NO_FORMATTING
817     // Transliterate the Greek locale data
818     Locale el("el");
819     DateFormatSymbols syms(el, status);
820     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
821     int32_t i, count;
822     const UnicodeString* data = syms.getMonths(count);
823     for (i=0; i<count; ++i) {
824         if (data[i].length() == 0) {
825             continue;
826         }
827         UnicodeString out(data[i]);
828         gl->transliterate(out);
829         UBool ok = TRUE;
830         if (data[i].length() >= 2 && out.length() >= 2 &&
831             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
832             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
833                 ok = FALSE;
834             }
835         }
836         if (ok) {
837             logln(prettify(data[i] + " -> " + out));
838         } else {
839             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
840         }
841     }
842 #endif
843 
844     delete gl;
845 }
846 
847 /**
848  * Prefix, suffix support in hex transliterators
849  */
TestJ243(void)850 void TransliteratorTest::TestJ243(void) {
851     UErrorCode ec = U_ZERO_ERROR;
852 
853     // Test default Hex-Any, which should handle
854     // \u, \U, u+, and U+
855     Transliterator *hex =
856         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
857     if (assertSuccess("getInstance", ec)) {
858         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
859     }
860     delete hex;
861 
862 //    // Try a custom Hex-Unicode
863 //    // \uXXXX and &#xXXXX;
864 //    ec = U_ZERO_ERROR;
865 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
866 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
867 //           "abcd5fx012&#x00033;");
868 //    // Try custom Any-Hex (default is tested elsewhere)
869 //    ec = U_ZERO_ERROR;
870 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
871 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
872 }
873 
874 /**
875  * Parsers need better syntax error messages.
876  */
TestJ329(void)877 void TransliteratorTest::TestJ329(void) {
878 
879     struct { UBool containsErrors; const char* rule; } DATA[] = {
880         { FALSE, "a > b; c > d" },
881         { TRUE,  "a > b; no operator; c > d" },
882     };
883     int32_t DATA_length = UPRV_LENGTHOF(DATA);
884 
885     for (int32_t i=0; i<DATA_length; ++i) {
886         UErrorCode status = U_ZERO_ERROR;
887         UParseError parseError;
888         Transliterator *rbt = Transliterator::createFromRules("<ID>",
889                                     DATA[i].rule,
890                                     UTRANS_FORWARD,
891                                     parseError,
892                                     status);
893         UBool gotError = U_FAILURE(status);
894         UnicodeString desc(DATA[i].rule);
895         desc.append(gotError ? " -> error" : " -> no error");
896         if (gotError) {
897             desc = desc + ", ParseError code=" + u_errorName(status) +
898                 " line=" + parseError.line +
899                 " offset=" + parseError.offset +
900                 " context=" + parseError.preContext;
901         }
902         if (gotError == DATA[i].containsErrors) {
903             logln(UnicodeString("Ok:   ") + desc);
904         } else {
905             errln(UnicodeString("FAIL: ") + desc);
906         }
907         delete rbt;
908     }
909 }
910 
911 /**
912  * Test segments and segment references.
913  */
TestSegments(void)914 void TransliteratorTest::TestSegments(void) {
915     // Array of 3n items
916     // Each item is <rules>, <input>, <expected output>
917     UnicodeString DATA[] = {
918         "([a-z]) '.' ([0-9]) > $2 '-' $1",
919         "abc.123.xyz.456",
920         "ab1-c23.xy4-z56",
921 
922         // nested
923         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
924         "a1 b2",
925         "a1.a.1 b2.b.2",
926     };
927     int32_t DATA_length = UPRV_LENGTHOF(DATA);
928 
929     for (int32_t i=0; i<DATA_length; i+=3) {
930         logln("Pattern: " + prettify(DATA[i]));
931         UParseError parseError;
932         UErrorCode status = U_ZERO_ERROR;
933         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
934         if (U_FAILURE(status)) {
935             errln("FAIL: RBT constructor");
936         } else {
937             expect(*t, DATA[i+1], DATA[i+2]);
938         }
939         delete t;
940     }
941 }
942 
943 /**
944  * Test cursor positioning outside of the key
945  */
TestCursorOffset(void)946 void TransliteratorTest::TestCursorOffset(void) {
947     // Array of 3n items
948     // Each item is <rules>, <input>, <expected output>
949     UnicodeString DATA[] = {
950         "pre {alpha} post > | @ ALPHA ;"
951         "eALPHA > beta ;"
952         "pre {beta} post > BETA @@ | ;"
953         "post > xyz",
954 
955         "prealphapost prebetapost",
956 
957         "prbetaxyz preBETApost",
958     };
959     int32_t DATA_length = UPRV_LENGTHOF(DATA);
960 
961     for (int32_t i=0; i<DATA_length; i+=3) {
962         logln("Pattern: " + prettify(DATA[i]));
963         UParseError parseError;
964         UErrorCode status = U_ZERO_ERROR;
965         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
966         if (U_FAILURE(status)) {
967             errln("FAIL: RBT constructor");
968         } else {
969             expect(*t, DATA[i+1], DATA[i+2]);
970         }
971         delete t;
972     }
973 }
974 
975 /**
976  * Test zero length and > 1 char length variable values.  Test
977  * use of variable refs in UnicodeSets.
978  */
TestArbitraryVariableValues(void)979 void TransliteratorTest::TestArbitraryVariableValues(void) {
980     // Array of 3n items
981     // Each item is <rules>, <input>, <expected output>
982     UnicodeString DATA[] = {
983         "$abe = ab;"
984         "$pat = x[yY]z;"
985         "$ll  = 'a-z';"
986         "$llZ = [$ll];"
987         "$llY = [$ll$pat];"
988         "$emp = ;"
989 
990         "$abe > ABE;"
991         "$pat > END;"
992         "$llZ > 1;"
993         "$llY > 2;"
994         "7$emp 8 > 9;"
995         "",
996 
997         "ab xYzxyz stY78",
998         "ABE ENDEND 1129",
999     };
1000     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1001 
1002     for (int32_t i=0; i<DATA_length; i+=3) {
1003         logln("Pattern: " + prettify(DATA[i]));
1004         UParseError parseError;
1005         UErrorCode status = U_ZERO_ERROR;
1006         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1007         if (U_FAILURE(status)) {
1008             errln("FAIL: RBT constructor");
1009         } else {
1010             expect(*t, DATA[i+1], DATA[i+2]);
1011         }
1012         delete t;
1013     }
1014 }
1015 
1016 /**
1017  * Confirm that the contextStart, contextLimit, start, and limit
1018  * behave correctly. J474.
1019  */
TestPositionHandling(void)1020 void TransliteratorTest::TestPositionHandling(void) {
1021     // Array of 3n items
1022     // Each item is <rules>, <input>, <expected output>
1023     const char* DATA[] = {
1024         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025         "xtat txtb", // pos 0,9,0,9
1026         "xTTaSS TTxUUb",
1027 
1028         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029         "xtat txtb", // pos 2,9,3,8
1030         "xtaSS TTxUUb",
1031 
1032         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033         "xtat txtb", // pos 3,8,3,8
1034         "xtaTT TTxTTb",
1035     };
1036 
1037     // Array of 4n positions -- these go with the DATA array
1038     // They are: contextStart, contextLimit, start, limit
1039     int32_t POS[] = {
1040         0, 9, 0, 9,
1041         2, 9, 3, 8,
1042         3, 8, 3, 8,
1043     };
1044 
1045     int32_t n = UPRV_LENGTHOF(DATA) / 3;
1046     for (int32_t i=0; i<n; i++) {
1047         UErrorCode status = U_ZERO_ERROR;
1048         UParseError parseError;
1049         Transliterator *t = Transliterator::createFromRules("<ID>",
1050                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1051         if (U_FAILURE(status)) {
1052             delete t;
1053             errln("FAIL: RBT constructor");
1054             return;
1055         }
1056         UTransPosition pos;
1057         pos.contextStart= POS[4*i];
1058         pos.contextLimit = POS[4*i+1];
1059         pos.start = POS[4*i+2];
1060         pos.limit = POS[4*i+3];
1061         UnicodeString rsource(DATA[3*i+1]);
1062         t->transliterate(rsource, pos, status);
1063         if (U_FAILURE(status)) {
1064             delete t;
1065             errln("FAIL: transliterate");
1066             return;
1067         }
1068         t->finishTransliteration(rsource, pos);
1069         expectAux(DATA[3*i],
1070                   DATA[3*i+1],
1071                   rsource,
1072                   DATA[3*i+2]);
1073         delete t;
1074     }
1075 }
1076 
1077 /**
1078  * Test the Hiragana-Katakana transliterator.
1079  */
TestHiraganaKatakana(void)1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081     UParseError parseError;
1082     UErrorCode status = U_ZERO_ERROR;
1083     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1084     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1085     if (hk == 0 || kh == 0) {
1086         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1087         delete hk;
1088         delete kh;
1089         return;
1090     }
1091 
1092     // Array of 3n items
1093     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094     const char* DATA[] = {
1095         "both",
1096         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097         "\\u30A2\\u30F8\\u30F2\\u30B0",
1098 
1099         "kh",
1100         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1102     };
1103     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1104 
1105     for (int32_t i=0; i<DATA_length; i+=3) {
1106         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1107         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1108         switch (*DATA[i]) {
1109         case 0x68: //'h': // Hiragana-Katakana
1110             expect(*hk, h, k);
1111             break;
1112         case 0x6B: //'k': // Katakana-Hiragana
1113             expect(*kh, k, h);
1114             break;
1115         case 0x62: //'b': // both
1116             expect(*hk, h, k);
1117             expect(*kh, k, h);
1118             break;
1119         }
1120     }
1121     delete hk;
1122     delete kh;
1123 }
1124 
1125 /**
1126  * Test cloning / copy constructor of RBT.
1127  */
TestCopyJ476(void)1128 void TransliteratorTest::TestCopyJ476(void) {
1129     // The real test here is what happens when the destructors are
1130     // called.  So we let one object get destructed, and check to
1131     // see that its copy still works.
1132     Transliterator *t2 = 0;
1133     {
1134         UParseError parseError;
1135         UErrorCode status = U_ZERO_ERROR;
1136         Transliterator *t1 = Transliterator::createFromRules("t1",
1137             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1138         if (U_FAILURE(status)) {
1139             errln("FAIL: RBT constructor");
1140             return;
1141         }
1142         t2 = t1->clone(); // Call copy constructor under the covers.
1143         expect(*t1, "abcfoofoo", "ABcbar");
1144         delete t1;
1145     }
1146     expect(*t2, "abcfoofoo", "ABcbar");
1147     delete t2;
1148 }
1149 
1150 /**
1151  * Test inter-Indic transliterators.  These are composed.
1152  * ICU4C Jitterbug 483.
1153  */
TestInterIndic(void)1154 void TransliteratorTest::TestInterIndic(void) {
1155     UnicodeString ID("Devanagari-Gujarati", "");
1156     UErrorCode status = U_ZERO_ERROR;
1157     UParseError parseError;
1158     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1159     if (dg == 0) {
1160         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1161         return;
1162     }
1163     UnicodeString id = dg->getID();
1164     if (id != ID) {
1165         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1166     }
1167     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169     expect(*dg, dev, guj);
1170     delete dg;
1171 }
1172 
1173 /**
1174  * Test filter syntax in IDs. (J918)
1175  */
TestFilterIDs(void)1176 void TransliteratorTest::TestFilterIDs(void) {
1177     // Array of 3n strings:
1178     // <id>, <inverse id>, <input>, <expected output>
1179     const char* DATA[] = {
1180         "[aeiou]Any-Hex", // ID
1181         "[aeiou]Hex-Any", // expected inverse ID
1182         "quizzical",      // src
1183         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1184 
1185         "[aeiou]Any-Hex;[^5]Hex-Any",
1186         "[^5]Any-Hex;[aeiou]Hex-Any",
1187         "quizzical",
1188         "q\\u0075izzical",
1189 
1190         "[abc]Null",
1191         "[abc]Null",
1192         "xyz",
1193         "xyz",
1194     };
1195     enum { DATA_length = UPRV_LENGTHOF(DATA) };
1196 
1197     for (int i=0; i<DATA_length; i+=4) {
1198         UnicodeString ID(DATA[i], "");
1199         UnicodeString uID(DATA[i+1], "");
1200         UnicodeString data2(DATA[i+2], "");
1201         UnicodeString data3(DATA[i+3], "");
1202         UParseError parseError;
1203         UErrorCode status = U_ZERO_ERROR;
1204         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1205         if (t == 0) {
1206             errln("FAIL: createInstance(" + ID + ") returned NULL");
1207             return;
1208         }
1209         expect(*t, data2, data3);
1210 
1211         // Check the ID
1212         if (ID != t->getID()) {
1213             errln("FAIL: createInstance(" + ID + ").getID() => " +
1214                   t->getID());
1215         }
1216 
1217         // Check the inverse
1218         Transliterator *u = t->createInverse(status);
1219         if (u == 0) {
1220             errln("FAIL: " + ID + ".createInverse() returned NULL");
1221         } else if (u->getID() != uID) {
1222             errln("FAIL: " + ID + ".createInverse().getID() => " +
1223                   u->getID() + ", expected " + uID);
1224         }
1225 
1226         delete t;
1227         delete u;
1228     }
1229 }
1230 
1231 /**
1232  * Test the case mapping transliterators.
1233  */
TestCaseMap(void)1234 void TransliteratorTest::TestCaseMap(void) {
1235     UParseError parseError;
1236     UErrorCode status = U_ZERO_ERROR;
1237     Transliterator* toUpper =
1238         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239     Transliterator* toLower =
1240         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241     Transliterator* toTitle =
1242         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1243     if (toUpper==0 || toLower==0 || toTitle==0) {
1244         errln("FAIL: createInstance returned NULL");
1245         delete toUpper;
1246         delete toLower;
1247         delete toTitle;
1248         return;
1249     }
1250 
1251     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1252            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254            "the quick brown foX jumped over the lazY dogs.");
1255     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1256            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1257 
1258     delete toUpper;
1259     delete toLower;
1260     delete toTitle;
1261 }
1262 
1263 /**
1264  * Test the name mapping transliterators.
1265  */
TestNameMap(void)1266 void TransliteratorTest::TestNameMap(void) {
1267     UParseError parseError;
1268     UErrorCode status = U_ZERO_ERROR;
1269     Transliterator* uni2name =
1270         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1271     Transliterator* name2uni =
1272         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1273     if (uni2name==0 || name2uni==0) {
1274         errln("FAIL: createInstance returned NULL");
1275         delete uni2name;
1276         delete name2uni;
1277         return;
1278     }
1279 
1280     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1285 
1286     delete uni2name;
1287     delete name2uni;
1288 
1289     // round trip
1290     Transliterator* t =
1291         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1292     if (t==0) {
1293         errln("FAIL: createInstance returned NULL");
1294         delete t;
1295         return;
1296     }
1297 
1298     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1300     expect(*t, s, s);
1301     delete t;
1302 }
1303 
1304 /**
1305  * Test liberalized ID syntax.  1006c
1306  */
TestLiberalizedID(void)1307 void TransliteratorTest::TestLiberalizedID(void) {
1308     // Some test cases have an expected getID() value of NULL.  This
1309     // means I have disabled the test case for now.  This stuff is
1310     // still under development, and I haven't decided whether to make
1311     // getID() return canonical case yet.  It will all get rewritten
1312     // with the move to Source-Target/Variant IDs anyway. [aliu]
1313     const char* DATA[] = {
1314         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1315         "  Null  ", "Null", "whitespace",
1316         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1317         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1318     };
1319     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1320     UParseError parseError;
1321     UErrorCode status= U_ZERO_ERROR;
1322     for (int32_t i=0; i<DATA_length; i+=3) {
1323         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1324         if (t == 0) {
1325             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1326                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1327         } else {
1328             UnicodeString exp;
1329             if (DATA[i+1]) {
1330                 exp = UnicodeString(DATA[i+1], "");
1331             }
1332             // Don't worry about getID() if the expected char*
1333             // is NULL -- see above.
1334             if (exp.length() == 0 || exp == t->getID()) {
1335                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1336                       " create ID \"" + DATA[i] + "\" => \"" +
1337                       exp + "\"");
1338             } else {
1339                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1340                       " create ID \"" + DATA[i] + "\" => \"" +
1341                       t->getID() + "\", exp \"" + exp + "\"");
1342             }
1343             delete t;
1344         }
1345     }
1346 }
1347 
1348 /* test for Jitterbug 912 */
TestCreateInstance()1349 void TransliteratorTest::TestCreateInstance(){
1350     const char* FORWARD = "F";
1351     const char* REVERSE = "R";
1352     const char* DATA[] = {
1353         // Column 1: id
1354         // Column 2: direction
1355         // Column 3: expected ID, or "" if expect failure
1356         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1357 
1358         // JB#2689: bad compound causes crash
1359         "InvalidSource-InvalidTarget", FORWARD, "",
1360         "InvalidSource-InvalidTarget", REVERSE, "",
1361         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1362         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1363         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1364         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1365 
1366         NULL
1367     };
1368 
1369     for (int32_t i=0; DATA[i]; i+=3) {
1370         UParseError err;
1371         UErrorCode ec = U_ZERO_ERROR;
1372         UnicodeString id(DATA[i]);
1373         UTransDirection dir = (DATA[i+1]==FORWARD)?
1374             UTRANS_FORWARD:UTRANS_REVERSE;
1375         UnicodeString expID(DATA[i+2]);
1376         Transliterator* t =
1377             Transliterator::createInstance(id,dir,err,ec);
1378         UnicodeString newID;
1379         if (t) {
1380             newID = t->getID();
1381         }
1382         UBool ok = (newID == expID);
1383         if (!t) {
1384             newID = u_errorName(ec);
1385         }
1386         if (ok) {
1387             logln((UnicodeString)"Ok: createInstance(" +
1388                   id + "," + DATA[i+1] + ") => " + newID);
1389         } else {
1390             dataerrln((UnicodeString)"FAIL: createInstance(" +
1391                   id + "," + DATA[i+1] + ") => " + newID +
1392                   ", expected " + expID);
1393         }
1394         delete t;
1395     }
1396 }
1397 
1398 /**
1399  * Test the normalization transliterator.
1400  */
TestNormalizationTransliterator()1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404     const char* CANON[] = {
1405         // Input               Decomposed            Composed
1406         "cat",                "cat",                "cat"               ,
1407         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1408 
1409         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1410         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1411 
1412         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1413         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1414         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1415 
1416         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1418 
1419         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1420         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1421         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1422 
1423         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1424         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1425 
1426         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1427         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1428 
1429         "Henry IV",           "Henry IV",           "Henry IV"          ,
1430         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1431 
1432         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1433         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1434         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1435         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1436         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1437 
1438         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1439         0 // end
1440     };
1441 
1442     const char* COMPAT[] = {
1443         // Input               Decomposed            Composed
1444         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1445 
1446         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1447         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1448 
1449         "Henry IV",           "Henry IV",           "Henry IV"          ,
1450         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1451 
1452         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1453         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1454 
1455         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1456         0 // end
1457     };
1458 
1459     int32_t i;
1460     UParseError parseError;
1461     UErrorCode status = U_ZERO_ERROR;
1462     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1463     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1464     if (!NFD || !NFC) {
1465         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1466         delete NFD;
1467         delete NFC;
1468         return;
1469     }
1470     for (i=0; CANON[i]; i+=3) {
1471         UnicodeString in = CharsToUnicodeString(CANON[i]);
1472         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1473         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1474         expect(*NFD, in, expd);
1475         expect(*NFC, in, expc);
1476     }
1477     delete NFD;
1478     delete NFC;
1479 
1480     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1481     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1482     if (!NFKD || !NFKC) {
1483         dataerrln("FAIL: createInstance failed");
1484         delete NFKD;
1485         delete NFKC;
1486         return;
1487     }
1488     for (i=0; COMPAT[i]; i+=3) {
1489         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1490         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1491         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1492         expect(*NFKD, in, expkd);
1493         expect(*NFKC, in, expkc);
1494     }
1495     delete NFKD;
1496     delete NFKC;
1497 
1498     UParseError pe;
1499     status = U_ZERO_ERROR;
1500     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1501                                                        UTRANS_FORWARD,
1502                                                        pe, status);
1503     if (t == 0) {
1504         errln("FAIL: createInstance failed");
1505     }
1506     expect(*t, CharsToUnicodeString("\\u010dx"),
1507            CharsToUnicodeString("c\\u030C"));
1508     delete t;
1509 }
1510 
1511 /**
1512  * Test compound RBT rules.
1513  */
TestCompoundRBT(void)1514 void TransliteratorTest::TestCompoundRBT(void) {
1515     // Careful with spacing and ';' here:  Phrase this exactly
1516     // as toRules() is going to return it.  If toRules() changes
1517     // with regard to spacing or ';', then adjust this string.
1518     UnicodeString rule("::Hex-Any;\n"
1519                        "::Any-Lower;\n"
1520                        "a > '.A.';\n"
1521                        "b > '.B.';\n"
1522                        "::[^t]Any-Upper;", "");
1523     UParseError parseError;
1524     UErrorCode status = U_ZERO_ERROR;
1525     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1526     if (t == 0) {
1527         errln("FAIL: createFromRules failed");
1528         return;
1529     }
1530     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1532     UnicodeString r;
1533     t->toRules(r, TRUE);
1534     if (r == rule) {
1535         logln((UnicodeString)"OK: toRules() => " + r);
1536     } else {
1537         errln((UnicodeString)"FAIL: toRules() => " + r +
1538               ", expected " + rule);
1539     }
1540     delete t;
1541 
1542     // Now test toRules
1543     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1544     if (t == 0) {
1545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1546         return;
1547     }
1548     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549     t->toRules(r, TRUE);
1550     if (r != exp) {
1551         errln((UnicodeString)"FAIL: toRules() => " + r +
1552               ", expected " + exp);
1553     } else {
1554         logln((UnicodeString)"OK: toRules() => " + r);
1555     }
1556     delete t;
1557 
1558     // Round trip the result of toRules
1559     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1560     if (t == 0) {
1561         errln("FAIL: createFromRules #2 failed");
1562         return;
1563     } else {
1564         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1565     }
1566 
1567     // Test toRules again
1568     t->toRules(r, TRUE);
1569     if (r != exp) {
1570         errln((UnicodeString)"FAIL: toRules() => " + r +
1571               ", expected " + exp);
1572     } else {
1573         logln((UnicodeString)"OK: toRules() => " + r);
1574     }
1575 
1576     delete t;
1577 
1578     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1579     // to what the regenerated ID will look like.
1580     UnicodeString id("Upper(Lower);(NFKC)", "");
1581     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1582     if (t == 0) {
1583         errln("FAIL: createInstance #2 failed");
1584         return;
1585     }
1586     if (t->getID() == id) {
1587         logln((UnicodeString)"OK: created " + id);
1588     } else {
1589         errln((UnicodeString)"FAIL: createInstance(" + id +
1590               ").getID() => " + t->getID());
1591     }
1592 
1593     Transliterator *u = t->createInverse(status);
1594     if (u == 0) {
1595         errln("FAIL: createInverse failed");
1596         delete t;
1597         return;
1598     }
1599     exp = "NFKC();Lower(Upper)";
1600     if (u->getID() == exp) {
1601         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1602               u->getID());
1603     } else {
1604         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1605               u->getID());
1606     }
1607     delete t;
1608     delete u;
1609 }
1610 
1611 /**
1612  * Compound filter semantics were orginially not implemented
1613  * correctly.  Originally, each component filter f(i) is replaced by
1614  * f'(i) = f(i) && g, where g is the filter for the compound
1615  * transliterator.
1616  *
1617  * From Mark:
1618  *
1619  * Suppose and I have a transliterator X. Internally X is
1620  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1621  *
1622  * The compound should convert all greek characters (through latin) to
1623  * cyrillic, then lowercase the result. The filter should say "don't
1624  * touch 'A' in the original". But because an intermediate result
1625  * happens to go through "A", the Greek Alpha gets hung up.
1626  */
TestCompoundFilter(void)1627 void TransliteratorTest::TestCompoundFilter(void) {
1628     UParseError parseError;
1629     UErrorCode status = U_ZERO_ERROR;
1630     Transliterator *t = Transliterator::createInstance
1631         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1632     if (t == 0) {
1633         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1634         return;
1635     }
1636     t->adoptFilter(new UnicodeSet("[^A]", status));
1637     if (U_FAILURE(status)) {
1638         errln("FAIL: UnicodeSet ct failed");
1639         delete t;
1640         return;
1641     }
1642 
1643     // Only the 'A' at index 1 should remain unchanged
1644     expect(*t,
1645            CharsToUnicodeString("BA\\u039A\\u0391"),
1646            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647     delete t;
1648 }
1649 
TestRemove(void)1650 void TransliteratorTest::TestRemove(void) {
1651     UParseError parseError;
1652     UErrorCode status = U_ZERO_ERROR;
1653     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1654     if (t == 0) {
1655         errln("FAIL: createInstance failed");
1656         return;
1657     }
1658 
1659     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1660 
1661     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662     // duplicating the filter
1663     Transliterator* t2 = t->clone();
1664     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1665 
1666     delete t;
1667     delete t2;
1668 }
1669 
TestToRules(void)1670 void TransliteratorTest::TestToRules(void) {
1671     const char* RBT = "rbt";
1672     const char* SET = "set";
1673     static const char* DATA[] = {
1674         RBT,
1675         "$a=\\u4E61; [$a] > A;",
1676         "[\\u4E61] > A;",
1677 
1678         RBT,
1679         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680         "[[:Zs:][:Zl:]]{a} > A;",
1681 
1682         SET,
1683         "[[:Zs:][:Zl:]]",
1684         "[[:Zs:][:Zl:]]",
1685 
1686         SET,
1687         "[:Ps:]",
1688         "[:Ps:]",
1689 
1690         SET,
1691         "[:L:]",
1692         "[:L:]",
1693 
1694         SET,
1695         "[[:L:]-[A]]",
1696         "[[:L:]-[A]]",
1697 
1698         SET,
1699         "[~[:Lu:][:Ll:]]",
1700         "[~[:Lu:][:Ll:]]",
1701 
1702         SET,
1703         "[~[a-z]]",
1704         "[~[a-z]]",
1705 
1706         RBT,
1707         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708         "[^[:Zs:]]{a} > A;",
1709 
1710         RBT,
1711         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712         "[[a-z]-[:Zs:]]{a} > A;",
1713 
1714         RBT,
1715         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716         "[[:Zs:]&[a-z]]{a} > A;",
1717 
1718         RBT,
1719         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720         "[x[:Zs:]]{a} > A;",
1721 
1722         RBT,
1723         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724         "$macron = \\u0304 ;"
1725         "$evowel = [aeiouyAEIOUY] ;"
1726         "$iotasub = \\u0345 ;"
1727         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729 
1730         RBT,
1731         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733     };
1734     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1735 
1736     for (int32_t d=0; d < DATA_length; d+=3) {
1737         if (DATA[d] == RBT) {
1738             // Transliterator test
1739             UParseError parseError;
1740             UErrorCode status = U_ZERO_ERROR;
1741             Transliterator *t = Transliterator::createFromRules("ID",
1742                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1743             if (t == 0) {
1744                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1745                 return;
1746             }
1747             UnicodeString rules, escapedRules;
1748             t->toRules(rules, FALSE);
1749             t->toRules(escapedRules, TRUE);
1750             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1751             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1752             if (rules == expRules) {
1753                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                       " => " + rules);
1755             } else {
1756                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1757                       " => " + rules + ", exp " + expRules);
1758             }
1759             if (escapedRules == expEscapedRules) {
1760                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                       " => " + escapedRules);
1762             } else {
1763                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1764                       " => " + escapedRules + ", exp " + expEscapedRules);
1765             }
1766             delete t;
1767 
1768         } else {
1769             // UnicodeSet test
1770             UErrorCode status = U_ZERO_ERROR;
1771             UnicodeString pat(DATA[d+1], -1, US_INV);
1772             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1773             UnicodeSet set(pat, status);
1774             if (U_FAILURE(status)) {
1775                 errln("FAIL: UnicodeSet ct failed");
1776                 return;
1777             }
1778             // Adjust spacing etc. as necessary.
1779             UnicodeString toPat;
1780             set.toPattern(toPat);
1781             if (expToPat == toPat) {
1782                 logln((UnicodeString)"Ok: " + pat +
1783                       " => " + toPat);
1784             } else {
1785                 errln((UnicodeString)"FAIL: " + pat +
1786                       " => " + prettify(toPat, TRUE) +
1787                       ", exp " + prettify(pat, TRUE));
1788             }
1789         }
1790     }
1791 }
1792 
TestContext()1793 void TransliteratorTest::TestContext() {
1794     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1795     expect("de > x; {d}e > y;",
1796            "de",
1797            "ye",
1798            &pos);
1799 
1800     expect("ab{c} > z;",
1801            "xadabdabcy",
1802            "xadabdabzy");
1803 }
1804 
TestSupplemental()1805 void TransliteratorTest::TestSupplemental() {
1806 
1807     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1808                                 "a > $a; $s > i;"),
1809            CharsToUnicodeString("ab\\U0001030Fx"),
1810            CharsToUnicodeString("\\U00010300bix"));
1811 
1812     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1814                                 "($a)($b) > $2 $1;"),
1815            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1817 
1818     // k|ax\\U00010300xm
1819 
1820     // k|a\\U00010400\\U00010300xm
1821     // ky|\\U00010400\\U00010300xm
1822     // ky\\U00010400|\\U00010300xm
1823 
1824     // ky\\U00010400|\\U00010300\\U00010400m
1825     // ky\\U00010400y|\\U00010400m
1826     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827                                 "$a {x} > | @ \\U00010400;"
1828                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829            CharsToUnicodeString("kax\\U00010300xm"),
1830            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831 
1832     expectT("Any-Name",
1833            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1835 
1836     expectT("Any-Hex/Unicode",
1837            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1839 
1840     expectT("Any-Hex/C",
1841            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1843 
1844     expectT("Any-Hex/Perl",
1845            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1847 
1848     expectT("Any-Hex/Java",
1849            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1851 
1852     expectT("Any-Hex/XML",
1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1855 
1856     expectT("Any-Hex/XML10",
1857            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858            "&#66352;&#1113856;&#917601;&#160;");
1859 
1860     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863 }
1864 
TestQuantifier()1865 void TransliteratorTest::TestQuantifier() {
1866 
1867     // Make sure @ in a quantified anteContext works
1868     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869            "AAAAAb",
1870            "aaa(aac)");
1871 
1872     // Make sure @ in a quantified postContext works
1873     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874            "baaaaa",
1875            "caa(aaa)");
1876 
1877     // Make sure @ in a quantified postContext with seg ref works
1878     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879            "baaaaa",
1880            "baa(aaa)");
1881 
1882     // Make sure @ past ante context doesn't enter ante context
1883     UTransPosition pos = {0, 5, 3, 5};
1884     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1885            "xxxab",
1886            "xxx(ac)",
1887            &pos);
1888 
1889     // Make sure @ past post context doesn't pass limit
1890     UTransPosition pos2 = {0, 4, 0, 2};
1891     expect("{b} a+ > c @@ |; x > y; a > A;",
1892            "baxx",
1893            "caxx",
1894            &pos2);
1895 
1896     // Make sure @ past post context doesn't enter post context
1897     expect("{b} a+ > c @@ |; x > y; a > A;",
1898            "baxx",
1899            "cayy");
1900 
1901     expect("(ab)? c > d;",
1902            "c abc ababc",
1903            "d d abd");
1904 
1905     // NOTE: The (ab)+ when referenced just yields a single "ab",
1906     // not the full sequence of them.  This accords with perl behavior.
1907     expect("(ab)+ {x} > '(' $1 ')';",
1908            "x abx ababxy",
1909            "x ab(ab) abab(ab)y");
1910 
1911     expect("b+ > x;",
1912            "ac abc abbc abbbc",
1913            "ac axc axc axc");
1914 
1915     expect("[abc]+ > x;",
1916            "qac abrc abbcs abtbbc",
1917            "qx xrx xs xtx");
1918 
1919     expect("q{(ab)+} > x;",
1920            "qa qab qaba qababc qaba",
1921            "qa qx qxa qxc qxa");
1922 
1923     expect("q(ab)* > x;",
1924            "qa qab qaba qababc",
1925            "xa x xa xc");
1926 
1927     // NOTE: The (ab)+ when referenced just yields a single "ab",
1928     // not the full sequence of them.  This accords with perl behavior.
1929     expect("q(ab)* > '(' $1 ')';",
1930            "qa qab qaba qababc",
1931            "()a (ab) (ab)a (ab)c");
1932 
1933     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1934     // quoted string
1935     expect("'ab'+ > x;",
1936            "bb ab ababb",
1937            "bb x xb");
1938 
1939     // $foo+ and $foo* -- the quantifier should apply to the entire
1940     // variable reference
1941     expect("$var = ab; $var+ > x;",
1942            "bb ab ababb",
1943            "bb x xb");
1944 }
1945 
1946 class TestTrans : public Transliterator {
1947 public:
TestTrans(const UnicodeString & id)1948     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1949     }
clone(void) const1950     virtual Transliterator* clone(void) const {
1951         return new TestTrans(getID());
1952     }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1953     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1954         UBool /*isIncremental*/) const
1955     {
1956         offsets.start = offsets.limit;
1957     }
1958     virtual UClassID getDynamicClassID() const;
1959     static UClassID U_EXPORT2 getStaticClassID();
1960 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1962 
1963 /**
1964  * Test Source-Target/Variant.
1965  */
1966 void TransliteratorTest::TestSTV(void) {
1967     int32_t ns = Transliterator::countAvailableSources();
1968     if (ns < 0 || ns > 255) {
1969         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1970         return;
1971     }
1972     int32_t i, j;
1973     for (i=0; i<ns; ++i) {
1974         UnicodeString source;
1975         Transliterator::getAvailableSource(i, source);
1976         logln((UnicodeString)"" + i + ": " + source);
1977         if (source.length() == 0) {
1978             errln("FAIL: empty source");
1979             continue;
1980         }
1981         int32_t nt = Transliterator::countAvailableTargets(source);
1982         if (nt < 0 || nt > 255) {
1983             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1984             continue;
1985         }
1986         for (int32_t j=0; j<nt; ++j) {
1987             UnicodeString target;
1988             Transliterator::getAvailableTarget(j, source, target);
1989             logln((UnicodeString)" " + j + ": " + target);
1990             if (target.length() == 0) {
1991                 errln("FAIL: empty target");
1992                 continue;
1993             }
1994             int32_t nv = Transliterator::countAvailableVariants(source, target);
1995             if (nv < 0 || nv > 255) {
1996                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1997                 continue;
1998             }
1999             for (int32_t k=0; k<nv; ++k) {
2000                 UnicodeString variant;
2001                 Transliterator::getAvailableVariant(k, source, target, variant);
2002                 if (variant.length() == 0) {
2003                     logln((UnicodeString)"  " + k + ": <empty>");
2004                 } else {
2005                     logln((UnicodeString)"  " + k + ": " + variant);
2006                 }
2007             }
2008         }
2009     }
2010 
2011     // Test registration
2012     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2015     for (i=0; i<3; ++i) {
2016         Transliterator *t = new TestTrans(IDS[i]);
2017         if (t == 0) {
2018             errln("FAIL: out of memory");
2019             return;
2020         }
2021         if (t->getID() != IDS[i]) {
2022             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2023             delete t;
2024             return;
2025         }
2026         Transliterator::registerInstance(t);
2027         UErrorCode status = U_ZERO_ERROR;
2028         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029         if (t == NULL) {
2030             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2031                   IDS[i]);
2032         } else {
2033             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2034                   IDS[i]);
2035             delete t;
2036         }
2037         Transliterator::unregister(IDS[i]);
2038         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2039         if (t != NULL) {
2040             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2041                   IDS[i]);
2042             delete t;
2043         }
2044     }
2045 
2046     // Make sure getAvailable API reflects removal
2047     int32_t n = Transliterator::countAvailableIDs();
2048     for (i=0; i<n; ++i) {
2049         UnicodeString id = Transliterator::getAvailableID(i);
2050         for (j=0; j<3; ++j) {
2051             if (id.caseCompare(FULL_IDS[j],0)==0) {
2052                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2053             }
2054         }
2055     }
2056     n = Transliterator::countAvailableTargets("Any");
2057     for (i=0; i<n; ++i) {
2058         UnicodeString t;
2059         Transliterator::getAvailableTarget(i, "Any", t);
2060         if (t.caseCompare(IDS[0],0)==0) {
2061             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2062         }
2063     }
2064     n = Transliterator::countAvailableSources();
2065     for (i=0; i<n; ++i) {
2066         UnicodeString s;
2067         Transliterator::getAvailableSource(i, s);
2068         for (j=0; j<3; ++j) {
2069             if (SOURCES[j] == NULL) continue;
2070             if (s.caseCompare(SOURCES[j],0)==0) {
2071                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2072             }
2073         }
2074     }
2075 }
2076 
2077 /**
2078  * Test inverse of Greek-Latin; Title()
2079  */
TestCompoundInverse(void)2080 void TransliteratorTest::TestCompoundInverse(void) {
2081     UParseError parseError;
2082     UErrorCode status = U_ZERO_ERROR;
2083     Transliterator *t = Transliterator::createInstance
2084         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2085     if (t == 0) {
2086         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2087         return;
2088     }
2089     UnicodeString exp("(Title);Latin-Greek");
2090     if (t->getID() == exp) {
2091         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092               t->getID());
2093     } else {
2094         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095               t->getID() + "\", expected \"" + exp + "\"");
2096     }
2097     delete t;
2098 }
2099 
2100 /**
2101  * Test NFD chaining with RBT
2102  */
TestNFDChainRBT()2103 void TransliteratorTest::TestNFDChainRBT() {
2104     UParseError pe;
2105     UErrorCode ec = U_ZERO_ERROR;
2106     Transliterator* t = Transliterator::createFromRules(
2107                                "TEST", "::NFD; aa > Q; a > q;",
2108                                UTRANS_FORWARD, pe, ec);
2109     if (t == NULL || U_FAILURE(ec)) {
2110         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2111         return;
2112     }
2113     expect(*t, "aa", "Q");
2114     delete t;
2115 
2116     // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=-    UnicodeString s, s2;
2118 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=-    expect(*t, s, s2);
2122 //=-    delete t;
2123 //=-
2124 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=-    expect(*t, s2, s);
2126 //=-    delete t;
2127 //=-
2128 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=-    expect(*t, s, s);
2131 //=-    delete t;
2132 
2133 //    const char* source[] = {
2134 //        /*
2135 //        "\\u015Br\\u012Bmad",
2136 //        "bhagavadg\\u012Bt\\u0101",
2137 //        "adhy\\u0101ya",
2138 //        "arjuna",
2139 //        "vi\\u1E63\\u0101da",
2140 //        "y\\u014Dga",
2141 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 //        "uv\\u0101cr\\u0325",
2143 //        */
2144 //        "rmk\\u1E63\\u0113t",
2145 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2146 //        /*
2147 //        "kuruk\\u1E63\\u0113tr\\u0113",
2148 //        "samav\\u0113t\\u0101",
2149 //        "yuyutsava-\\u1E25",
2150 //        "m\\u0101mak\\u0101-\\u1E25",
2151 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2152 //        "kimakurvata",
2153 //        "san\\u0304java",
2154 //        */
2155 //
2156 //        0
2157 //    };
2158 //    const char* expected[] = {
2159 //        /*
2160 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 //        "\\u092f\\u094b\\u0917",
2166 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168 //        */
2169 //        "\\u0927",
2170 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 //        /*
2172 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 //        "\\u0938\\u0902\\u091c\\u0935",
2179 //        */
2180 //        0
2181 //    };
2182 //    UErrorCode status = U_ZERO_ERROR;
2183 //    UParseError parseError;
2184 //    UnicodeString message;
2185 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 //    if(U_FAILURE(status)){
2188 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2189 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 //        delete latinToDevToLatin;
2191 //        delete devToLatinToDev;
2192 //        return;
2193 //    }
2194 //    UnicodeString gotResult;
2195 //    for(int i= 0; source[i] != 0; i++){
2196 //        gotResult = source[i];
2197 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2199 //    }
2200 //    delete latinToDevToLatin;
2201 //    delete devToLatinToDev;
2202 }
2203 
2204 /**
2205  * Inverse of "Null" should be "Null". (J21)
2206  */
TestNullInverse()2207 void TransliteratorTest::TestNullInverse() {
2208     UParseError pe;
2209     UErrorCode ec = U_ZERO_ERROR;
2210     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2211     if (t == 0 || U_FAILURE(ec)) {
2212         errln("FAIL: createInstance");
2213         return;
2214     }
2215     Transliterator *u = t->createInverse(ec);
2216     if (u == 0 || U_FAILURE(ec)) {
2217         errln("FAIL: createInverse");
2218         delete t;
2219         return;
2220     }
2221     if (u->getID() != "Null") {
2222         errln("FAIL: Inverse of Null should be Null");
2223     }
2224     delete t;
2225     delete u;
2226 }
2227 
2228 /**
2229  * Check ID of inverse of alias. (J22)
2230  */
TestAliasInverseID()2231 void TransliteratorTest::TestAliasInverseID() {
2232     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2233     UParseError pe;
2234     UErrorCode ec = U_ZERO_ERROR;
2235     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2236     if (t == 0 || U_FAILURE(ec)) {
2237         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2238         return;
2239     }
2240     Transliterator *u = t->createInverse(ec);
2241     if (u == 0 || U_FAILURE(ec)) {
2242         errln("FAIL: createInverse");
2243         delete t;
2244         return;
2245     }
2246     UnicodeString exp = "Hangul-Latin";
2247     UnicodeString got = u->getID();
2248     if (got != exp) {
2249         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2250               ", expected " + exp);
2251     }
2252     delete t;
2253     delete u;
2254 }
2255 
2256 /**
2257  * Test IDs of inverses of compound transliterators. (J20)
2258  */
TestCompoundInverseID()2259 void TransliteratorTest::TestCompoundInverseID() {
2260     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2261     UParseError pe;
2262     UErrorCode ec = U_ZERO_ERROR;
2263     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2264     if (t == 0 || U_FAILURE(ec)) {
2265         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2266         return;
2267     }
2268     Transliterator *u = t->createInverse(ec);
2269     if (u == 0 || U_FAILURE(ec)) {
2270         errln("FAIL: createInverse");
2271         delete t;
2272         return;
2273     }
2274     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2275     UnicodeString got = u->getID();
2276     if (got != exp) {
2277         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2278               ", expected " + exp);
2279     }
2280     delete t;
2281     delete u;
2282 }
2283 
2284 /**
2285  * Test undefined variable.
2286 
2287  */
TestUndefinedVariable()2288 void TransliteratorTest::TestUndefinedVariable() {
2289     UnicodeString rule = "$initial } a <> \\u1161;";
2290     UParseError pe;
2291     UErrorCode ec = U_ZERO_ERROR;
2292     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2293     delete t;
2294     if (U_FAILURE(ec)) {
2295         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2296               u_errorName(ec));
2297         return;
2298     }
2299     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2300           u_errorName(ec));
2301 }
2302 
2303 /**
2304  * Test empty context.
2305  */
TestEmptyContext()2306 void TransliteratorTest::TestEmptyContext() {
2307     expect(" { a } > b;", "xay a ", "xby b ");
2308 }
2309 
2310 /**
2311 * Test compound filter ID syntax
2312 */
TestCompoundFilterID(void)2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314     static const char* DATA[] = {
2315         // Col. 1 = ID or rule set (latter must start with #)
2316 
2317         // = columns > 1 are null if expect col. 1 to be illegal =
2318 
2319         // Col. 2 = direction, "F..." or "R..."
2320         // Col. 3 = source string
2321         // Col. 4 = exp result
2322 
2323         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2324         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2325         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329         NULL,
2330     };
2331 
2332     for (int32_t i=0; DATA[i]; i+=4) {
2333         UnicodeString id = CharsToUnicodeString(DATA[i]);
2334         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2335             UTRANS_REVERSE : UTRANS_FORWARD;
2336         UnicodeString source;
2337         UnicodeString exp;
2338         if (DATA[i+2] != NULL) {
2339             source = CharsToUnicodeString(DATA[i+2]);
2340             exp = CharsToUnicodeString(DATA[i+3]);
2341         }
2342         UBool expOk = (DATA[i+1] != NULL);
2343         Transliterator* t = NULL;
2344         UParseError pe;
2345         UErrorCode ec = U_ZERO_ERROR;
2346         if (id.charAt(0) == 0x23/*#*/) {
2347             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2348         } else {
2349             t = Transliterator::createInstance(id, direction, pe, ec);
2350         }
2351         UBool ok = (t != NULL && U_SUCCESS(ec));
2352         UnicodeString transID;
2353         if (t!=0) {
2354             transID = t->getID();
2355         }
2356         else {
2357             transID = UnicodeString("NULL", "");
2358         }
2359         if (ok == expOk) {
2360             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2361                   u_errorName(ec));
2362             if (source.length() != 0) {
2363                 expect(*t, source, exp);
2364             }
2365             delete t;
2366         } else {
2367             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368                   u_errorName(ec));
2369         }
2370     }
2371 }
2372 
2373 /**
2374  * Test new property set syntax
2375  */
TestPropertySet()2376 void TransliteratorTest::TestPropertySet() {
2377     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 }
2381 
2382 /**
2383  * Test various failure points of the new 2.0 engine.
2384  */
TestNewEngine()2385 void TransliteratorTest::TestNewEngine() {
2386     UParseError pe;
2387     UErrorCode ec = U_ZERO_ERROR;
2388     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389     if (t == 0 || U_FAILURE(ec)) {
2390         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391         return;
2392     }
2393     // Katakana should be untouched
2394     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396 
2397     delete t;
2398 
2399 #if 1
2400     // This test will only work if Transliterator.ROLLBACK is
2401     // true.  Otherwise, this test will fail, revealing a
2402     // limitation of global filters in incremental mode.
2403     Transliterator *a =
2404         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405     Transliterator *A =
2406         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407     if (U_FAILURE(ec)) {
2408         delete a;
2409         delete A;
2410         return;
2411     }
2412 
2413     Transliterator* array[3];
2414     array[0] = a;
2415     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416     array[2] = A;
2417     if (U_FAILURE(ec)) {
2418         errln("FAIL: createInstance NFD");
2419         delete a;
2420         delete A;
2421         delete array[1];
2422         return;
2423     }
2424 
2425     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426     if (U_FAILURE(ec)) {
2427         errln("FAIL: UnicodeSet constructor");
2428         delete a;
2429         delete A;
2430         delete array[1];
2431         delete t;
2432         return;
2433     }
2434 
2435     expect(*t, "aAaA", "bAbA");
2436 
2437     assertTrue("countElements", t->countElements() == 3);
2438     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441     assertSuccess("getElement", ec);
2442 
2443     delete a;
2444     delete A;
2445     delete array[1];
2446     delete t;
2447 #endif
2448 
2449     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450            "a",
2451            "ax");
2452 
2453     UnicodeString gr = CharsToUnicodeString(
2454         "$ddot = \\u0308 ;"
2455         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456         "$rough = \\u0314 ;"
2457         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458         "\\u03b1 <> a ;"
2459         "$rough <> h ;");
2460 
2461     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 }
2463 
2464 /**
2465  * Test quantified segment behavior.  We want:
2466  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467  */
TestQuantifiedSegment(void)2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2469     // The normal case
2470     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471 
2472     // The tricky case; the quantifier is around the segment
2473     expect("([abc])+ > x $1 x;", "cba", "xax");
2474 
2475     // Tricky case in reverse direction
2476     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477 
2478     // Check post-context segment
2479     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480 
2481     // Test toRule/toPattern for non-quantified segment.
2482     // Careful with spacing here.
2483     UnicodeString r("([a-c]){q} > x $1 x;");
2484     UParseError pe;
2485     UErrorCode ec = U_ZERO_ERROR;
2486     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487     if (U_FAILURE(ec)) {
2488         errln("FAIL: createFromRules");
2489         delete t;
2490         return;
2491     }
2492     UnicodeString rr;
2493     t->toRules(rr, TRUE);
2494     if (r != rr) {
2495         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496     } else {
2497         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498     }
2499     delete t;
2500 
2501     // Test toRule/toPattern for quantified segment.
2502     // Careful with spacing here.
2503     r = "([a-c])+{q} > x $1 x;";
2504     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505     if (U_FAILURE(ec)) {
2506         errln("FAIL: createFromRules");
2507         delete t;
2508         return;
2509     }
2510     t->toRules(rr, TRUE);
2511     if (r != rr) {
2512         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513     } else {
2514         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515     }
2516     delete t;
2517 }
2518 
2519 //======================================================================
2520 // Ram's tests
2521 //======================================================================
TestDevanagariLatinRT()2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523     const int MAX_LEN= 52;
2524     const char* const source[MAX_LEN] = {
2525         "bh\\u0101rata",
2526         "kra",
2527         "k\\u1E63a",
2528         "khra",
2529         "gra",
2530         "\\u1E45ra",
2531         "cra",
2532         "chra",
2533         "j\\u00F1a",
2534         "jhra",
2535         "\\u00F1ra",
2536         "\\u1E6Dya",
2537         "\\u1E6Dhra",
2538         "\\u1E0Dya",
2539       //"r\\u0323ya", // \u095c is not valid in Devanagari
2540         "\\u1E0Dhya",
2541         "\\u1E5Bhra",
2542         "\\u1E47ra",
2543         "tta",
2544         "thra",
2545         "dda",
2546         "dhra",
2547         "nna",
2548         "pra",
2549         "phra",
2550         "bra",
2551         "bhra",
2552         "mra",
2553         "\\u1E49ra",
2554       //"l\\u0331ra",
2555         "yra",
2556         "\\u1E8Fra",
2557       //"l-",
2558         "vra",
2559         "\\u015Bra",
2560         "\\u1E63ra",
2561         "sra",
2562         "hma",
2563         "\\u1E6D\\u1E6Da",
2564         "\\u1E6D\\u1E6Dha",
2565         "\\u1E6Dh\\u1E6Dha",
2566         "\\u1E0D\\u1E0Da",
2567         "\\u1E0D\\u1E0Dha",
2568         "\\u1E6Dya",
2569         "\\u1E6Dhya",
2570         "\\u1E0Dya",
2571         "\\u1E0Dhya",
2572         // Not roundtrippable --
2573         // \\u0939\\u094d\\u094d\\u092E  - hma
2574         // \\u0939\\u094d\\u092E         - hma
2575         // CharsToUnicodeString("hma"),
2576         "hya",
2577         "\\u015Br\\u0325",
2578         "\\u015Bca",
2579         "\\u0115",
2580         "san\\u0304j\\u012Bb s\\u0113nagupta",
2581         "\\u0101nand vaddir\\u0101ju",
2582         "\\u0101",
2583         "a"
2584     };
2585     const char* const expected[MAX_LEN] = {
2586         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2587         "\\u0915\\u094D\\u0930",          /* kra         */
2588         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2589         "\\u0916\\u094D\\u0930",          /* khra        */
2590         "\\u0917\\u094D\\u0930",          /* gra         */
2591         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2592         "\\u091A\\u094D\\u0930",          /* cra         */
2593         "\\u091B\\u094D\\u0930",          /* chra        */
2594         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2595         "\\u091D\\u094D\\u0930",          /* jhra        */
2596         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2597         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2598         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2599         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2600       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2601         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2602         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2603         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2604         "\\u0924\\u094D\\u0924",          /* tta         */
2605         "\\u0925\\u094D\\u0930",          /* thra        */
2606         "\\u0926\\u094D\\u0926",          /* dda         */
2607         "\\u0927\\u094D\\u0930",          /* dhra        */
2608         "\\u0928\\u094D\\u0928",          /* nna         */
2609         "\\u092A\\u094D\\u0930",          /* pra         */
2610         "\\u092B\\u094D\\u0930",          /* phra        */
2611         "\\u092C\\u094D\\u0930",          /* bra         */
2612         "\\u092D\\u094D\\u0930",          /* bhra        */
2613         "\\u092E\\u094D\\u0930",          /* mra         */
2614         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2615       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2616         "\\u092F\\u094D\\u0930",          /* yra         */
2617         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2618       //"l-",
2619         "\\u0935\\u094D\\u0930",          /* vra         */
2620         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2621         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2622         "\\u0938\\u094D\\u0930",          /* sra         */
2623         "\\u0939\\u094d\\u092E",          /* hma         */
2624         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2625         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2626         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2627         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2628         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2629         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2630         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2631         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2632         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2633      // "hma",                         /* hma         */
2634         "\\u0939\\u094D\\u092F",          /* hya         */
2635         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2636         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2637         "\\u090d",                        /* e\\u0306    */
2638         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640         "\\u0906",
2641         "\\u0905",
2642     };
2643     UErrorCode status = U_ZERO_ERROR;
2644     UParseError parseError;
2645     UnicodeString message;
2646     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648     if(U_FAILURE(status)){
2649         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2650         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651         return;
2652     }
2653     UnicodeString gotResult;
2654     for(int i= 0; i<MAX_LEN; i++){
2655         gotResult = source[i];
2656         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658     }
2659     delete latinToDev;
2660     delete devToLatin;
2661 }
2662 
TestTeluguLatinRT()2663 void TransliteratorTest::TestTeluguLatinRT(){
2664     const int MAX_LEN=10;
2665     const char* const source[MAX_LEN] = {
2666         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2667         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2668         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2669         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2670         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2671         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2672         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2673         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2674         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2675         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2676     };
2677 
2678     const char* const expected[MAX_LEN] = {
2679         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689     };
2690 
2691     UErrorCode status = U_ZERO_ERROR;
2692     UParseError parseError;
2693     UnicodeString message;
2694     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696     if(U_FAILURE(status)){
2697         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2698         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699         return;
2700     }
2701     UnicodeString gotResult;
2702     for(int i= 0; i<MAX_LEN; i++){
2703         gotResult = source[i];
2704         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706     }
2707     delete latinToDev;
2708     delete devToLatin;
2709 }
2710 
TestSanskritLatinRT()2711 void TransliteratorTest::TestSanskritLatinRT(){
2712     const int MAX_LEN =16;
2713     const char* const source[MAX_LEN] = {
2714         "rmk\\u1E63\\u0113t",
2715         "\\u015Br\\u012Bmad",
2716         "bhagavadg\\u012Bt\\u0101",
2717         "adhy\\u0101ya",
2718         "arjuna",
2719         "vi\\u1E63\\u0101da",
2720         "y\\u014Dga",
2721         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722         "uv\\u0101cr\\u0325",
2723         "dharmak\\u1E63\\u0113tr\\u0113",
2724         "kuruk\\u1E63\\u0113tr\\u0113",
2725         "samav\\u0113t\\u0101",
2726         "yuyutsava\\u1E25",
2727         "m\\u0101mak\\u0101\\u1E25",
2728     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729         "kimakurvata",
2730         "san\\u0304java",
2731     };
2732     const char* const expected[MAX_LEN] = {
2733         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739         "\\u092f\\u094b\\u0917",
2740         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749         "\\u0938\\u0902\\u091c\\u0935",
2750     };
2751     UErrorCode status = U_ZERO_ERROR;
2752     UParseError parseError;
2753     UnicodeString message;
2754     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756     if(U_FAILURE(status)){
2757         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2758         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759         return;
2760     }
2761     UnicodeString gotResult;
2762     for(int i= 0; i<MAX_LEN; i++){
2763         gotResult = source[i];
2764         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766     }
2767     delete latinToDev;
2768     delete devToLatin;
2769 }
2770 
2771 
TestCompoundLatinRT()2772 void TransliteratorTest::TestCompoundLatinRT(){
2773     const char* const source[] = {
2774         "rmk\\u1E63\\u0113t",
2775         "\\u015Br\\u012Bmad",
2776         "bhagavadg\\u012Bt\\u0101",
2777         "adhy\\u0101ya",
2778         "arjuna",
2779         "vi\\u1E63\\u0101da",
2780         "y\\u014Dga",
2781         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782         "uv\\u0101cr\\u0325",
2783         "dharmak\\u1E63\\u0113tr\\u0113",
2784         "kuruk\\u1E63\\u0113tr\\u0113",
2785         "samav\\u0113t\\u0101",
2786         "yuyutsava\\u1E25",
2787         "m\\u0101mak\\u0101\\u1E25",
2788      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789         "kimakurvata",
2790         "san\\u0304java"
2791     };
2792     const int MAX_LEN = UPRV_LENGTHOF(source);
2793     const char* const expected[MAX_LEN] = {
2794         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800         "\\u092f\\u094b\\u0917",
2801         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810         "\\u0938\\u0902\\u091c\\u0935"
2811     };
2812     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814         return;
2815     }
2816 
2817     UErrorCode status = U_ZERO_ERROR;
2818     UParseError parseError;
2819     UnicodeString message;
2820     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824 
2825     if(U_FAILURE(status)){
2826         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2827         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828         return;
2829     }
2830     UnicodeString gotResult;
2831     for(int i= 0; i<MAX_LEN; i++){
2832         gotResult = source[i];
2833         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836 
2837     }
2838     delete(latinToDevToLatin);
2839     delete(devToLatinToDev);
2840     delete(devToTelToDev);
2841     delete(latinToTelToLatin);
2842 }
2843 
2844 /**
2845  * Test Gurmukhi-Devanagari Tippi and Bindi
2846  */
TestGurmukhiDevanagari()2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2848     // the rule says:
2849     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2850     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2851     UErrorCode status = U_ZERO_ERROR;
2852     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854     UParseError parseError;
2855 
2856     UnicodeSetIterator vIter(vowel);
2857     UnicodeSetIterator nvIter(non_vowel);
2858     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859     if(U_FAILURE(status)) {
2860       dataerrln("Error creating transliterator %s", u_errorName(status));
2861       delete trans;
2862       return;
2863     }
2864     UnicodeString src (" \\u0902", -1, US_INV);
2865     UnicodeString expected(" \\u0A02", -1, US_INV);
2866     src = src.unescape();
2867     expected= expected.unescape();
2868 
2869     while(vIter.next()){
2870         src.setCharAt(0,(UChar) vIter.getCodepoint());
2871         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872         expect(*trans,src,expected);
2873     }
2874 
2875     expected.setCharAt(1,0x0A70);
2876     while(nvIter.next()){
2877         //src.setCharAt(0,(char) nvIter.codepoint);
2878         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880         expect(*trans,src,expected);
2881     }
2882     delete trans;
2883 }
2884 /**
2885  * Test instantiation from a locale.
2886  */
TestLocaleInstantiation(void)2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2888     UParseError pe;
2889     UErrorCode ec = U_ZERO_ERROR;
2890     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891     if (U_FAILURE(ec)) {
2892         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893         delete t;
2894         return;
2895     }
2896     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897     delete t;
2898 
2899     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900     if (U_FAILURE(ec)) {
2901         errln("FAIL: createInstance(en-el)");
2902         delete t;
2903         return;
2904     }
2905     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906     delete t;
2907 }
2908 
2909 /**
2910  * Test title case handling of accent (should ignore accents)
2911  */
TestTitleAccents(void)2912 void TransliteratorTest::TestTitleAccents(void) {
2913     UParseError pe;
2914     UErrorCode ec = U_ZERO_ERROR;
2915     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916     if (U_FAILURE(ec)) {
2917         errln("FAIL: createInstance(Title)");
2918         delete t;
2919         return;
2920     }
2921     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922     delete t;
2923 }
2924 
2925 /**
2926  * Basic test of a locale resource based rule.
2927  */
TestLocaleResource()2928 void TransliteratorTest::TestLocaleResource() {
2929     const char* DATA[] = {
2930         // id                    from               to
2931         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2932         "Latin-el",              "b",               "\\u03bc\\u03c0",
2933         "Latin-Greek",           "b",               "\\u03B2",
2934         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2935         "el-Latin",              "\\u03B2",         "v",
2936         "Greek-Latin",           "\\u03B2",         "b",
2937     };
2938     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939     for (int32_t i=0; i<DATA_length; i+=3) {
2940         UParseError pe;
2941         UErrorCode ec = U_ZERO_ERROR;
2942         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943         if (U_FAILURE(ec)) {
2944             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945             delete t;
2946             continue;
2947         }
2948         expect(*t, CharsToUnicodeString(DATA[i+1]),
2949                CharsToUnicodeString(DATA[i+2]));
2950         delete t;
2951     }
2952 }
2953 
2954 /**
2955  * Make sure parse errors reference the right line.
2956  */
TestParseError()2957 void TransliteratorTest::TestParseError() {
2958     static const char* rule =
2959         "a > b;\n"
2960         "# more stuff\n"
2961         "d << b;";
2962     UErrorCode ec = U_ZERO_ERROR;
2963     UParseError pe;
2964     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965     delete t;
2966     if (U_FAILURE(ec)) {
2967         UnicodeString err(pe.preContext);
2968         err.append((UChar)124/*|*/).append(pe.postContext);
2969         if (err.indexOf("d << b") >= 0) {
2970             logln("Ok: " + err);
2971         } else {
2972             errln("FAIL: " + err);
2973         }
2974     }
2975     else {
2976         errln("FAIL: no syntax error");
2977     }
2978     static const char* maskingRule =
2979         "a>x;\n"
2980         "# more stuff\n"
2981         "ab>y;";
2982     ec = U_ZERO_ERROR;
2983     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984     if (ec != U_RULE_MASK_ERROR) {
2985         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986     }
2987     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988         errln("FAIL: did not get expected precontext");
2989     }
2990     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991         errln("FAIL: did not get expected postcontext");
2992     }
2993 }
2994 
2995 /**
2996  * Make sure sets on output are disallowed.
2997  */
TestOutputSet()2998 void TransliteratorTest::TestOutputSet() {
2999     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000     UErrorCode ec = U_ZERO_ERROR;
3001     UParseError pe;
3002     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003     delete t;
3004     if (U_FAILURE(ec)) {
3005         UnicodeString err(pe.preContext);
3006         err.append((UChar)124/*|*/).append(pe.postContext);
3007         logln("Ok: " + err);
3008         return;
3009     }
3010     errln("FAIL: No syntax error");
3011 }
3012 
3013 /**
3014  * Test the use variable range pragma, making sure that use of
3015  * variable range characters is detected and flagged as an error.
3016  */
TestVariableRange()3017 void TransliteratorTest::TestVariableRange() {
3018     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019     UErrorCode ec = U_ZERO_ERROR;
3020     UParseError pe;
3021     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022     delete t;
3023     if (U_FAILURE(ec)) {
3024         UnicodeString err(pe.preContext);
3025         err.append((UChar)124/*|*/).append(pe.postContext);
3026         logln("Ok: " + err);
3027         return;
3028     }
3029     errln("FAIL: No syntax error");
3030 }
3031 
3032 /**
3033  * Test invalid post context error handling
3034  */
TestInvalidPostContext()3035 void TransliteratorTest::TestInvalidPostContext() {
3036     UnicodeString rule = "a}b{c>d;";
3037     UErrorCode ec = U_ZERO_ERROR;
3038     UParseError pe;
3039     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040     delete t;
3041     if (U_FAILURE(ec)) {
3042         UnicodeString err(pe.preContext);
3043         err.append((UChar)124/*|*/).append(pe.postContext);
3044         if (err.indexOf("a}b{c") >= 0) {
3045             logln("Ok: " + err);
3046         } else {
3047             errln("FAIL: " + err);
3048         }
3049         return;
3050     }
3051     errln("FAIL: No syntax error");
3052 }
3053 
3054 /**
3055  * Test ID form variants
3056  */
TestIDForms()3057 void TransliteratorTest::TestIDForms() {
3058     const char* DATA[] = {
3059         "NFC", NULL, "NFD",
3060         "nfd", NULL, "NFC", // make sure case is ignored
3061         "Any-NFKD", NULL, "Any-NFKC",
3062         "Null", NULL, "Null",
3063         "-nfkc", "nfkc", "NFKD",
3064         "-nfkc/", "nfkc", "NFKD",
3065         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068         "Source-", NULL, NULL,
3069         "Source/Variant-", NULL, NULL,
3070         "Source-/Variant", NULL, NULL,
3071         "/Variant", NULL, NULL,
3072         "/Variant-", NULL, NULL,
3073         "-/Variant", NULL, NULL,
3074         "-/", NULL, NULL,
3075         "-", NULL, NULL,
3076         "/", NULL, NULL,
3077     };
3078     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079 
3080     for (int32_t i=0; i<DATA_length; i+=3) {
3081         const char* ID = DATA[i];
3082         const char* expID = DATA[i+1];
3083         const char* expInvID = DATA[i+2];
3084         UBool expValid = (expInvID != NULL);
3085         if (expID == NULL) {
3086             expID = ID;
3087         }
3088         UParseError pe;
3089         UErrorCode ec = U_ZERO_ERROR;
3090         Transliterator *t =
3091             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092         if (U_FAILURE(ec)) {
3093             if (!expValid) {
3094                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095             } else {
3096                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097             }
3098             delete t;
3099             continue;
3100         }
3101         Transliterator *u = t->createInverse(ec);
3102         if (U_FAILURE(ec)) {
3103             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104             delete t;
3105             delete u;
3106             continue;
3107         }
3108         if (t->getID() == expID &&
3109             u->getID() == expInvID) {
3110             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111         } else {
3112             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113                   t->getID() + " x getInverse() => " + u->getID() +
3114                   ", expected " + expInvID);
3115         }
3116         delete t;
3117         delete u;
3118     }
3119 }
3120 
3121 static const UChar SPACE[]   = {32,0};
3122 static const UChar NEWLINE[] = {10,0};
3123 static const UChar RETURN[]  = {13,0};
3124 static const UChar EMPTY[]   = {0};
3125 
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127                                     const UnicodeString& testRulesForward) {
3128     UnicodeString rules2; t2.toRules(rules2, TRUE);
3129     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130     rules2.findAndReplace(SPACE, EMPTY);
3131     rules2.findAndReplace(NEWLINE, EMPTY);
3132     rules2.findAndReplace(RETURN, EMPTY);
3133 
3134     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135 
3136     if (rules2 != testRules) {
3137         errln(label);
3138         logln((UnicodeString)"GENERATED RULES: " + rules2);
3139         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3140     }
3141 }
3142 
3143 /**
3144  * Mark's toRules test.
3145  */
TestToRulesMark()3146 void TransliteratorTest::TestToRulesMark() {
3147     const char* testRules =
3148         "::[[:Latin:][:Mark:]];"
3149         "::NFKD (NFC);"
3150         "::Lower (Lower);"
3151         "a <> \\u03B1;" // alpha
3152         "::NFKC (NFD);"
3153         "::Upper (Lower);"
3154         "::Lower ();"
3155         "::([[:Greek:][:Mark:]]);"
3156         ;
3157     const char* testRulesForward =
3158         "::[[:Latin:][:Mark:]];"
3159         "::NFKD(NFC);"
3160         "::Lower(Lower);"
3161         "a > \\u03B1;"
3162         "::NFKC(NFD);"
3163         "::Upper (Lower);"
3164         "::Lower ();"
3165         ;
3166     const char* testRulesBackward =
3167         "::[[:Greek:][:Mark:]];"
3168         "::Lower (Upper);"
3169         "::NFD(NFKC);"
3170         "\\u03B1 > a;"
3171         "::Lower(Lower);"
3172         "::NFC(NFKD);"
3173         ;
3174     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176 
3177     UParseError pe;
3178     UErrorCode ec = U_ZERO_ERROR;
3179     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3180     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3181 
3182     if (U_FAILURE(ec)) {
3183         delete t2;
3184         delete t3;
3185         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186         return;
3187     }
3188 
3189     expect(*t2, source, target);
3190     expect(*t3, target, source);
3191 
3192     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194 
3195     delete t2;
3196     delete t3;
3197 }
3198 
3199 /**
3200  * Test Escape and Unescape transliterators.
3201  */
TestEscape()3202 void TransliteratorTest::TestEscape() {
3203     UParseError pe;
3204     UErrorCode ec;
3205     Transliterator *t;
3206 
3207     ec = U_ZERO_ERROR;
3208     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3209     if (U_FAILURE(ec)) {
3210         errln((UnicodeString)"FAIL: createInstance");
3211     } else {
3212         expect(*t,
3213                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3214                "@12Q");
3215     }
3216     delete t;
3217 
3218     ec = U_ZERO_ERROR;
3219     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3220     if (U_FAILURE(ec)) {
3221         errln((UnicodeString)"FAIL: createInstance");
3222     } else {
3223         expect(*t,
3224                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3226     }
3227     delete t;
3228 
3229     ec = U_ZERO_ERROR;
3230     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3231     if (U_FAILURE(ec)) {
3232         errln((UnicodeString)"FAIL: createInstance");
3233     } else {
3234         expect(*t,
3235                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3237     }
3238     delete t;
3239 
3240     ec = U_ZERO_ERROR;
3241     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3242     if (U_FAILURE(ec)) {
3243         errln((UnicodeString)"FAIL: createInstance");
3244     } else {
3245         expect(*t,
3246                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3248     }
3249     delete t;
3250 }
3251 
3252 
TestAnchorMasking()3253 void TransliteratorTest::TestAnchorMasking(){
3254     UnicodeString rule ("^a > Q; a > q;");
3255     UErrorCode status= U_ZERO_ERROR;
3256     UParseError parseError;
3257 
3258     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3259     if(U_FAILURE(status)){
3260         errln(UnicodeString("FAIL: ") + "ID" +
3261               ".createFromRules() => bad rules" +
3262               /*", parse error " + parseError.code +*/
3263               ", line " + parseError.line +
3264               ", offset " + parseError.offset +
3265               ", context " + prettify(parseError.preContext, TRUE) +
3266               ", rules: " + prettify(rule, TRUE));
3267     }
3268     delete t;
3269 }
3270 
3271 /**
3272  * Make sure display names of variants look reasonable.
3273  */
TestDisplayName()3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277     return;
3278 #else
3279     static const char* DATA[] = {
3280         // ID, forward name, reverse name
3281         // Update the text as necessary -- the important thing is
3282         // not the text itself, but how various cases are handled.
3283 
3284         // Basic test
3285         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286 
3287         // Variants
3288         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289 
3290         // Target-only IDs
3291         "NFC", "Any to NFC", "Any to NFD",
3292     };
3293 
3294     int32_t DATA_length = UPRV_LENGTHOF(DATA);
3295 
3296     Locale US("en", "US");
3297 
3298     for (int32_t i=0; i<DATA_length; i+=3) {
3299         UnicodeString name;
3300         Transliterator::getDisplayName(DATA[i], US, name);
3301         if (name != DATA[i+1]) {
3302             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3303                   name + ", expected " + DATA[i+1]);
3304         } else {
3305             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3306         }
3307         UErrorCode ec = U_ZERO_ERROR;
3308         UParseError pe;
3309         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3310         if (U_FAILURE(ec)) {
3311             delete t;
3312             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3313             continue;
3314         }
3315         name = Transliterator::getDisplayName(t->getID(), US, name);
3316         if (name != DATA[i+2]) {
3317             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3318                   name + ", expected " + DATA[i+2]);
3319         } else {
3320             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3321         }
3322         delete t;
3323     }
3324 #endif
3325 }
3326 
TestSpecialCases(void)3327 void TransliteratorTest::TestSpecialCases(void) {
3328     const UnicodeString registerRules[] = {
3329         "Any-Dev1", "x > X; y > Y;",
3330         "Any-Dev2", "XY > Z",
3331         "Greek-Latin/FAKE",
3332             CharsToUnicodeString
3333             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334         "" // END MARKER
3335     };
3336 
3337     const UnicodeString testCases[] = {
3338         // NORMALIZATION
3339         // should add more test cases
3340         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344 
3345         // mp -> b BUG
3346         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3348 
3349         // check for devanagari bug
3350         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3351 
3352         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3355 
3356         //TODO: enable this test once Titlecase works right
3357         /*
3358         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3360                  */
3361         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3363         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3364                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3365 
3366         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3368 
3369          // FORMS OF S
3370         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3378         // Tatiana bug
3379         // Upper: TAT\\u02B9\\u00C2NA
3380         // Lower: tat\\u02B9\\u00E2na
3381         // Title: Tat\\u02B9\\u00E2na
3382         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3388 
3389         "" // END MARKER
3390     };
3391 
3392     UParseError pos;
3393     int32_t i;
3394     for (i = 0; registerRules[i].length()!=0; i+=2) {
3395         UErrorCode status = U_ZERO_ERROR;
3396 
3397         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3398             registerRules[i+1], UTRANS_FORWARD, pos, status);
3399         if (U_FAILURE(status)) {
3400             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3401         } else {
3402             Transliterator::registerInstance(t);
3403         }
3404     }
3405     for (i = 0; testCases[i].length()!=0; i+=3) {
3406         UErrorCode ec = U_ZERO_ERROR;
3407         UParseError pe;
3408         const UnicodeString& name = testCases[i];
3409         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3410         if (U_FAILURE(ec)) {
3411             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3412             delete t;
3413             continue;
3414         }
3415         const UnicodeString& id = t->getID();
3416         const UnicodeString& source = testCases[i+1];
3417         UnicodeString target;
3418 
3419         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3420 
3421         if (testCases[i+2].length() > 0) {
3422             target = testCases[i+2];
3423         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3424             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3425         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3426             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3427         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3428             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3429         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3430             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3431         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3432             target = source;
3433             target.toLower(Locale::getUS());
3434         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3435             target = source;
3436             target.toUpper(Locale::getUS());
3437         }
3438         if (U_FAILURE(ec)) {
3439             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3440             continue;
3441         }
3442 
3443         expect(*t, source, target);
3444         delete t;
3445     }
3446     for (i = 0; registerRules[i].length()!=0; i+=2) {
3447         Transliterator::unregister(registerRules[i]);
3448     }
3449 }
3450 
Char32ToEscapedChars(UChar32 ch,char * buffer)3451 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3452     if (ch <= 0xFFFF) {
3453         sprintf(buffer, "\\u%04x", (int)ch);
3454     } else {
3455         sprintf(buffer, "\\U%08x", (int)ch);
3456     }
3457     return buffer;
3458 }
3459 
TestSurrogateCasing(void)3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461     // check that casing handles surrogates
3462     // titlecase is currently defective
3463     char buffer[20];
3464     UChar buffer2[20];
3465     UChar32 dee;
3466     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3467     UnicodeString DEE(u_totitle(dee));
3468     if (DEE != DESERET_DEE) {
3469         err("Fails titlecase of surrogates");
3470         err(Char32ToEscapedChars(dee, buffer));
3471         err(", ");
3472         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3473     }
3474 
3475     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3476     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3477     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3478     UErrorCode status= U_ZERO_ERROR;
3479 
3480     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3481     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3482         errln("Fails: Can't uppercase surrogates.");
3483     }
3484 
3485     status= U_ZERO_ERROR;
3486     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3487     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3488         errln("Fails: Can't lowercase surrogates.");
3489     }
3490 }
3491 
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3492 static void _trans(Transliterator& t, const UnicodeString& src,
3493                    UnicodeString& result) {
3494     result = src;
3495     t.transliterate(result);
3496 }
3497 
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3498 static void _trans(const UnicodeString& id, const UnicodeString& src,
3499                    UnicodeString& result, UErrorCode ec) {
3500     UParseError pe;
3501     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3502     if (U_SUCCESS(ec)) {
3503         _trans(*t, src, result);
3504     }
3505     delete t;
3506 }
3507 
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3508 static UnicodeString _findMatch(const UnicodeString& source,
3509                                        const UnicodeString* pairs) {
3510     UnicodeString empty;
3511     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3512         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3513             return pairs[i+1];
3514         }
3515     }
3516     return empty;
3517 }
3518 
3519 // Check to see that incremental gets at least part way through a reasonable string.
3520 
TestIncrementalProgress(void)3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522     UErrorCode ec = U_ZERO_ERROR;
3523     UnicodeString latinTest = "The Quick Brown Fox.";
3524     UnicodeString devaTest;
3525     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3526     UnicodeString kataTest;
3527     _trans("Latin-Katakana", latinTest, kataTest, ec);
3528     if (U_FAILURE(ec)) {
3529         errln("FAIL: Internal error");
3530         return;
3531     }
3532     const UnicodeString tests[] = {
3533         "Any", latinTest,
3534         "Latin", latinTest,
3535         "Halfwidth", latinTest,
3536         "Devanagari", devaTest,
3537         "Katakana", kataTest,
3538         "" // END MARKER
3539     };
3540 
3541     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542     int32_t i = 0, j=0, k=0;
3543     int32_t sources = Transliterator::countAvailableSources();
3544     for (i = 0; i < sources; i++) {
3545         UnicodeString source;
3546         Transliterator::getAvailableSource(i, source);
3547         UnicodeString test = _findMatch(source, tests);
3548         if (test.length() == 0) {
3549             logln((UnicodeString)"Skipping " + source + "-X");
3550             continue;
3551         }
3552         int32_t targets = Transliterator::countAvailableTargets(source);
3553         for (j = 0; j < targets; j++) {
3554             UnicodeString target;
3555             Transliterator::getAvailableTarget(j, source, target);
3556             int32_t variants = Transliterator::countAvailableVariants(source, target);
3557             for (k =0; k< variants; k++) {
3558                 UnicodeString variant;
3559                 UParseError err;
3560                 UErrorCode status = U_ZERO_ERROR;
3561 
3562                 Transliterator::getAvailableVariant(k, source, target, variant);
3563                 UnicodeString id = source + "-" + target + "/" + variant;
3564 
3565                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3566                 if (U_FAILURE(status)) {
3567                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3568                     delete t;
3569                     continue;
3570                 }
3571                 status = U_ZERO_ERROR;
3572                 CheckIncrementalAux(t, test);
3573 
3574                 UnicodeString rev;
3575                 _trans(*t, test, rev);
3576                 Transliterator *inv = t->createInverse(status);
3577                 if (U_FAILURE(status)) {
3578                     // The following are forward-only, it is OK that creating an inverse will not work:
3579                     // 1. Devanagari-Arabic
3580                     // 2. Any-*/BGN
3581                     // 2a. Any-*/BGN_1981
3582                     // 3. Any-*/UNGEGN
3583                     // 4. Any-*/MNS
3584                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3585                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3586                          && !(id.startsWith((UnicodeString)"Any-") &&
3587                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3588                              )
3589 #if UCONFIG_NO_BREAK_ITERATION
3590                          && id.compare((UnicodeString)"Latin-Thai/") != 0
3591 #endif
3592                        )
3593                     {
3594                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3595                     }
3596                     delete t;
3597                     delete inv;
3598                     continue;
3599                 }
3600                 CheckIncrementalAux(inv, rev);
3601                 delete t;
3602                 delete inv;
3603             }
3604         }
3605     }
3606 }
3607 
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3608 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3609                                                       const UnicodeString& input) {
3610     UErrorCode ec = U_ZERO_ERROR;
3611     UTransPosition pos;
3612     UnicodeString test = input;
3613 
3614     pos.contextStart = 0;
3615     pos.contextLimit = input.length();
3616     pos.start = 0;
3617     pos.limit = input.length();
3618 
3619     t->transliterate(test, pos, ec);
3620     if (U_FAILURE(ec)) {
3621         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3622         return;
3623     }
3624     UBool gotError = FALSE;
3625     (void)gotError;    // Suppress set but not used warning.
3626 
3627     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3628 
3629     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3630         errln((UnicodeString)"No Progress, " +
3631               t->getID() + ": " + formatInput(test, input, pos));
3632         gotError = TRUE;
3633     } else {
3634         logln((UnicodeString)"PASS Progress, " +
3635               t->getID() + ": " + formatInput(test, input, pos));
3636     }
3637     t->finishTransliteration(test, pos);
3638     if (pos.start != pos.limit) {
3639         errln((UnicodeString)"Incomplete, " +
3640               t->getID() + ": " + formatInput(test, input, pos));
3641         gotError = TRUE;
3642     }
3643 }
3644 
TestFunction()3645 void TransliteratorTest::TestFunction() {
3646     // Careful with spacing and ';' here:  Phrase this exactly
3647     // as toRules() is going to return it.  If toRules() changes
3648     // with regard to spacing or ';', then adjust this string.
3649     UnicodeString rule =
3650         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3651 
3652     UParseError pe;
3653     UErrorCode ec = U_ZERO_ERROR;
3654     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3655     if (t == NULL) {
3656         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3657         return;
3658     }
3659 
3660     UnicodeString r;
3661     t->toRules(r, TRUE);
3662     if (r == rule) {
3663         logln((UnicodeString)"OK: toRules() => " + r);
3664     } else {
3665         errln((UnicodeString)"FAIL: toRules() => " + r +
3666               ", expected " + rule);
3667     }
3668 
3669     expect(*t, "The Quick Brown Fox",
3670            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3671 
3672     delete t;
3673 }
3674 
TestInvalidBackRef(void)3675 void TransliteratorTest::TestInvalidBackRef(void) {
3676     UnicodeString rule =  ". > $1;";
3677     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3678     UParseError pe;
3679     UErrorCode ec = U_ZERO_ERROR;
3680     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3681     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3682 
3683     if (t != NULL) {
3684         errln("FAIL: createFromRules should have returned NULL");
3685         delete t;
3686     }
3687 
3688     if (t2 != NULL) {
3689         errln("FAIL: createFromRules should have returned NULL");
3690         delete t2;
3691     }
3692 
3693     if (U_SUCCESS(ec)) {
3694         errln("FAIL: Ok: . > $1; => no error");
3695     } else {
3696         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3697     }
3698 }
3699 
TestMulticharStringSet()3700 void TransliteratorTest::TestMulticharStringSet() {
3701     // Basic testing
3702     const char* rule =
3703         "       [{aa}]       > x;"
3704         "         a          > y;"
3705         "       [b{bc}]      > z;"
3706         "[{gd}] { e          > q;"
3707         "         e } [{fg}] > r;" ;
3708 
3709     UParseError pe;
3710     UErrorCode ec = U_ZERO_ERROR;
3711     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3712     if (t == NULL || U_FAILURE(ec)) {
3713         delete t;
3714         errln("FAIL: createFromRules failed");
3715         return;
3716     }
3717 
3718     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3719            "y x yz z d gd de gdq gdqfg ddrfg");
3720     delete t;
3721 
3722     // Overlapped string test.  Make sure that when multiple
3723     // strings can match that the longest one is matched.
3724     rule =
3725         "    [a {ab} {abc}]    > x;"
3726         "           b          > y;"
3727         "           c          > z;"
3728         " q [t {st} {rst}] { e > p;" ;
3729 
3730     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3731     if (t == NULL || U_FAILURE(ec)) {
3732         delete t;
3733         errln("FAIL: createFromRules failed");
3734         return;
3735     }
3736 
3737     expect(*t, "a ab abc qte qste qrste",
3738            "x x x qtp qstp qrstp");
3739     delete t;
3740 }
3741 
3742 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3743 // BEGIN TestUserFunction support factory
3744 
3745 Transliterator* _TUFF[4];
3746 UnicodeString* _TUFID[4];
3747 
_TUFFactory(const UnicodeString &,Transliterator::Token context)3748 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3749                                    Transliterator::Token context) {
3750     return _TUFF[context.integer]->clone();
3751 }
3752 
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3753 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3754     _TUFF[n] = t;
3755     _TUFID[n] = new UnicodeString(ID);
3756     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3757 }
3758 
_TUFUnreg(int32_t n)3759 static void _TUFUnreg(int32_t n) {
3760     if (_TUFF[n] != NULL) {
3761         Transliterator::unregister(*_TUFID[n]);
3762         delete _TUFF[n];
3763         delete _TUFID[n];
3764     }
3765 }
3766 
3767 // END TestUserFunction support factory
3768 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3769 
3770 /**
3771  * Test that user-registered transliterators can be used under function
3772  * syntax.
3773  */
TestUserFunction()3774 void TransliteratorTest::TestUserFunction() {
3775 
3776     Transliterator* t;
3777     UParseError pe;
3778     UErrorCode ec = U_ZERO_ERROR;
3779 
3780     // Setup our factory
3781     int32_t i;
3782     for (i=0; i<4; ++i) {
3783         _TUFF[i] = NULL;
3784     }
3785 
3786     // There's no need to register inverses if we don't use them
3787     t = Transliterator::createFromRules("gif",
3788                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3789                                         UTRANS_FORWARD, pe, ec);
3790     if (t == NULL || U_FAILURE(ec)) {
3791         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3792         return;
3793     }
3794     _TUFReg("Any-gif", t, 0);
3795 
3796     t = Transliterator::createFromRules("RemoveCurly",
3797                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3798                                         UTRANS_FORWARD, pe, ec);
3799     if (t == NULL || U_FAILURE(ec)) {
3800         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3801         goto FAIL;
3802     }
3803     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3804     _TUFReg("Any-RemoveCurly", t, 1);
3805 
3806     logln("Trying &hex");
3807     t = Transliterator::createFromRules("hex2",
3808                                         "(.) > &hex($1);",
3809                                         UTRANS_FORWARD, pe, ec);
3810     if (t == NULL || U_FAILURE(ec)) {
3811         errln("FAIL: createFromRules");
3812         goto FAIL;
3813     }
3814     logln("Registering");
3815     _TUFReg("Any-hex2", t, 2);
3816     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3817     if (t == NULL || U_FAILURE(ec)) {
3818         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3819         goto FAIL;
3820     }
3821     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3822     delete t;
3823 
3824     logln("Trying &gif");
3825     t = Transliterator::createFromRules("gif2",
3826                                         "(.) > &Gif(&Hex2($1));",
3827                                         UTRANS_FORWARD, pe, ec);
3828     if (t == NULL || U_FAILURE(ec)) {
3829         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3830         goto FAIL;
3831     }
3832     logln("Registering");
3833     _TUFReg("Any-gif2", t, 3);
3834     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3835     if (t == NULL || U_FAILURE(ec)) {
3836         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3837         goto FAIL;
3838     }
3839     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3840            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3841     delete t;
3842 
3843     // Test that filters are allowed after &
3844     t = Transliterator::createFromRules("test",
3845                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3846                                         UTRANS_FORWARD, pe, ec);
3847     if (t == NULL || U_FAILURE(ec)) {
3848         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3849         goto FAIL;
3850     }
3851     expect(*t, "abc",
3852            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3853     delete t;
3854 
3855  FAIL:
3856     for (i=0; i<4; ++i) {
3857         _TUFUnreg(i);
3858     }
3859 }
3860 
3861 /**
3862  * Test the Any-X transliterators.
3863  */
TestAnyX(void)3864 void TransliteratorTest::TestAnyX(void) {
3865     UParseError parseError;
3866     UErrorCode status = U_ZERO_ERROR;
3867     Transliterator* anyLatin =
3868         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3869     if (anyLatin==0) {
3870         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3871         delete anyLatin;
3872         return;
3873     }
3874 
3875     expect(*anyLatin,
3876            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3877            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3878 
3879     delete anyLatin;
3880 }
3881 
3882 /**
3883  * Test Any-X transliterators with sample letters from all scripts.
3884  */
TestAny(void)3885 void TransliteratorTest::TestAny(void) {
3886     UErrorCode status = U_ZERO_ERROR;
3887     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3888     //       function call parameters going on in this test.
3889     UnicodeSet alphabetic("[:alphabetic:]", status);
3890     if (U_FAILURE(status)) {
3891         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3892         return;
3893     }
3894     alphabetic.freeze();
3895 
3896     UnicodeString testString;
3897     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3898         const char *scriptName = uscript_getShortName((UScriptCode)i);
3899         if (scriptName == NULL) {
3900             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3901             return;
3902         }
3903 
3904         UnicodeSet sample;
3905         sample.applyPropertyAlias("script", scriptName, status);
3906         if (U_FAILURE(status)) {
3907             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3908             return;
3909         }
3910         sample.retainAll(alphabetic);
3911         for (int32_t count=0; count<5; count++) {
3912             UChar32 c = sample.charAt(count);
3913             if (c == -1) {
3914                 break;
3915             }
3916             testString.append(c);
3917         }
3918     }
3919 
3920     UParseError parseError;
3921     Transliterator* anyLatin =
3922         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3923     if (U_FAILURE(status)) {
3924         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3925         return;
3926     }
3927 
3928     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3929     anyLatin->transliterate(testString);
3930     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3931     delete anyLatin;
3932 }
3933 
3934 
3935 /**
3936  * Test the source and target set API.  These are only implemented
3937  * for RBT and CompoundTransliterator at this time.
3938  */
TestSourceTargetSet()3939 void TransliteratorTest::TestSourceTargetSet() {
3940     UErrorCode ec = U_ZERO_ERROR;
3941 
3942     // Rules
3943     const char* r =
3944         "a > b; "
3945         "r [x{lu}] > q;";
3946 
3947     // Expected source
3948     UnicodeSet expSrc("[arx{lu}]", ec);
3949 
3950     // Expected target
3951     UnicodeSet expTrg("[bq]", ec);
3952 
3953     UParseError pe;
3954     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3955 
3956     if (U_FAILURE(ec)) {
3957         delete t;
3958         errln("FAIL: Couldn't set up test");
3959         return;
3960     }
3961 
3962     UnicodeSet src; t->getSourceSet(src);
3963     UnicodeSet trg; t->getTargetSet(trg);
3964 
3965     if (src == expSrc && trg == expTrg) {
3966         UnicodeString a, b;
3967         logln((UnicodeString)"Ok: " +
3968               r + " => source = " + src.toPattern(a, TRUE) +
3969               ", target = " + trg.toPattern(b, TRUE));
3970     } else {
3971         UnicodeString a, b, c, d;
3972         errln((UnicodeString)"FAIL: " +
3973               r + " => source = " + src.toPattern(a, TRUE) +
3974               ", expected " + expSrc.toPattern(b, TRUE) +
3975               "; target = " + trg.toPattern(c, TRUE) +
3976               ", expected " + expTrg.toPattern(d, TRUE));
3977     }
3978 
3979     delete t;
3980 }
3981 
3982 /**
3983  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3984  */
TestPatternWhiteSpace()3985 void TransliteratorTest::TestPatternWhiteSpace() {
3986     // Rules
3987     const char* r = "a > \\u200E b;";
3988 
3989     UErrorCode ec = U_ZERO_ERROR;
3990     UParseError pe;
3991     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3992 
3993     if (U_FAILURE(ec)) {
3994         errln("FAIL: Couldn't set up test");
3995     } else {
3996         expect(*t, "a", "b");
3997     }
3998     delete t;
3999 
4000     // UnicodeSet
4001     ec = U_ZERO_ERROR;
4002     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4003 
4004     if (U_FAILURE(ec)) {
4005         errln("FAIL: Couldn't set up test");
4006     } else {
4007         if (set.contains(0x200E)) {
4008             errln("FAIL: U+200E not being ignored by UnicodeSet");
4009         }
4010     }
4011 }
4012 //======================================================================
4013 // this method is in TestUScript.java
4014 //======================================================================
TestAllCodepoints()4015 void TransliteratorTest::TestAllCodepoints(){
4016     UScriptCode code= USCRIPT_INVALID_CODE;
4017     char id[256]={'\0'};
4018     char abbr[256]={'\0'};
4019     char newId[256]={'\0'};
4020     char newAbbrId[256]={'\0'};
4021     char oldId[256]={'\0'};
4022     char oldAbbrId[256]={'\0'};
4023 
4024     UErrorCode status =U_ZERO_ERROR;
4025     UParseError pe;
4026 
4027     for(uint32_t i = 0; i<=0x10ffff; i++){
4028         code =  uscript_getScript(i,&status);
4029         if(code == USCRIPT_INVALID_CODE){
4030             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4031         }
4032         const char* myId = uscript_getName(code);
4033         if(!myId) {
4034           dataerrln("Valid script code returned NULL name. Check your data!");
4035           return;
4036         }
4037         uprv_strcpy(id,myId);
4038         uprv_strcpy(abbr,uscript_getShortName(code));
4039 
4040         uprv_strcpy(newId,"[:");
4041         uprv_strcat(newId,id);
4042         uprv_strcat(newId,":];NFD");
4043 
4044         uprv_strcpy(newAbbrId,"[:");
4045         uprv_strcat(newAbbrId,abbr);
4046         uprv_strcat(newAbbrId,":];NFD");
4047 
4048         if(uprv_strcmp(newId,oldId)!=0){
4049             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4050             if(t==NULL || U_FAILURE(status)){
4051                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4052             }
4053             delete t;
4054         }
4055         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4056             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4057             if(t==NULL || U_FAILURE(status)){
4058                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4059             }
4060             delete t;
4061         }
4062         uprv_strcpy(oldId,newId);
4063         uprv_strcpy(oldAbbrId, newAbbrId);
4064 
4065     }
4066 
4067 }
4068 
4069 #define TEST_TRANSLIT_ID(id, cls) { \
4070   UErrorCode ec = U_ZERO_ERROR; \
4071   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4072   if (U_FAILURE(ec)) { \
4073     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4074   } else { \
4075     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077     } \
4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079   } \
4080   delete t; \
4081 }
4082 
4083 #define TEST_TRANSLIT_RULE(rule, cls) { \
4084   UErrorCode ec = U_ZERO_ERROR; \
4085   UParseError pe; \
4086   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4087   if (U_FAILURE(ec)) { \
4088     errln("FAIL: Couldn't create " rule); \
4089   } else { \
4090     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4091       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4092     } \
4093     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4094   } \
4095   delete t; \
4096 }
4097 
TestBoilerplate()4098 void TransliteratorTest::TestBoilerplate() {
4099     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4100     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4101     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4102     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4103     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4104     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4105     TEST_TRANSLIT_ID("Null", NullTransliterator);
4106     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4107     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4108     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4109     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4110     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4111     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4112 }
4113 
TestAlternateSyntax()4114 void TransliteratorTest::TestAlternateSyntax() {
4115     // U+2206 == &
4116     // U+2190 == <
4117     // U+2192 == >
4118     // U+2194 == <>
4119     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4120            "abc",
4121            "xbz");
4122     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4123            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4124            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4125 }
4126 
4127 static const char* BEGIN_END_RULES[] = {
4128     // [0]
4129     "abc > xy;"
4130     "aba > z;",
4131 
4132     // [1]
4133 /*
4134     "::BEGIN;"
4135     "abc > xy;"
4136     "::END;"
4137     "::BEGIN;"
4138     "aba > z;"
4139     "::END;",
4140 */
4141     "", // test case commented out below, this is here to keep from messing up the indexes
4142 
4143     // [2]
4144 /*
4145     "abc > xy;"
4146     "::BEGIN;"
4147     "aba > z;"
4148     "::END;",
4149 */
4150     "", // test case commented out below, this is here to keep from messing up the indexes
4151 
4152     // [3]
4153 /*
4154     "::BEGIN;"
4155     "abc > xy;"
4156     "::END;"
4157     "aba > z;",
4158 */
4159     "", // test case commented out below, this is here to keep from messing up the indexes
4160 
4161     // [4]
4162     "abc > xy;"
4163     "::Null;"
4164     "aba > z;",
4165 
4166     // [5]
4167     "::Upper;"
4168     "ABC > xy;"
4169     "AB > x;"
4170     "C > z;"
4171     "::Upper;"
4172     "XYZ > p;"
4173     "XY > q;"
4174     "Z > r;"
4175     "::Upper;",
4176 
4177     // [6]
4178     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4179     "$delim = [\\-$ws];"
4180     "$ws $delim* > ' ';"
4181     "'-' $delim* > '-';",
4182 
4183     // [7]
4184     "::Null;"
4185     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4186     "$delim = [\\-$ws];"
4187     "$ws $delim* > ' ';"
4188     "'-' $delim* > '-';",
4189 
4190     // [8]
4191     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192     "$delim = [\\-$ws];"
4193     "$ws $delim* > ' ';"
4194     "'-' $delim* > '-';"
4195     "::Null;",
4196 
4197     // [9]
4198     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4199     "$delim = [\\-$ws];"
4200     "::Null;"
4201     "$ws $delim* > ' ';"
4202     "'-' $delim* > '-';",
4203 
4204     // [10]
4205 /*
4206     "::BEGIN;"
4207     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4208     "$delim = [\\-$ws];"
4209     "::END;"
4210     "$ws $delim* > ' ';"
4211     "'-' $delim* > '-';",
4212 */
4213     "", // test case commented out below, this is here to keep from messing up the indexes
4214 
4215     // [11]
4216 /*
4217     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4218     "$delim = [\\-$ws];"
4219     "::BEGIN;"
4220     "$ws $delim* > ' ';"
4221     "'-' $delim* > '-';"
4222     "::END;",
4223 */
4224     "", // test case commented out below, this is here to keep from messing up the indexes
4225 
4226     // [12]
4227 /*
4228     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4229     "$delim = [\\-$ws];"
4230     "$ab = [ab];"
4231     "::BEGIN;"
4232     "$ws $delim* > ' ';"
4233     "'-' $delim* > '-';"
4234     "::END;"
4235     "::BEGIN;"
4236     "$ab { ' ' } $ab > '-';"
4237     "c { ' ' > ;"
4238     "::END;"
4239     "::BEGIN;"
4240     "'a-a' > a\\%|a;"
4241     "::END;",
4242 */
4243     "", // test case commented out below, this is here to keep from messing up the indexes
4244 
4245     // [13]
4246     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4247     "$delim = [\\-$ws];"
4248     "$ab = [ab];"
4249     "::Null;"
4250     "$ws $delim* > ' ';"
4251     "'-' $delim* > '-';"
4252     "::Null;"
4253     "$ab { ' ' } $ab > '-';"
4254     "c { ' ' > ;"
4255     "::Null;"
4256     "'a-a' > a\\%|a;",
4257 
4258     // [14]
4259 /*
4260     "::[abc];"
4261     "::BEGIN;"
4262     "abc > xy;"
4263     "::END;"
4264     "::BEGIN;"
4265     "aba > yz;"
4266     "::END;"
4267     "::Upper;",
4268 */
4269     "", // test case commented out below, this is here to keep from messing up the indexes
4270 
4271     // [15]
4272     "::[abc];"
4273     "abc > xy;"
4274     "::Null;"
4275     "aba > yz;"
4276     "::Upper;",
4277 
4278     // [16]
4279 /*
4280     "::[abc];"
4281     "::BEGIN;"
4282     "abc <> xy;"
4283     "::END;"
4284     "::BEGIN;"
4285     "aba <> yz;"
4286     "::END;"
4287     "::Upper(Lower);"
4288     "::([XYZ]);"
4289 */
4290     "", // test case commented out below, this is here to keep from messing up the indexes
4291 
4292     // [17]
4293     "::[abc];"
4294     "abc <> xy;"
4295     "::Null;"
4296     "aba <> yz;"
4297     "::Upper(Lower);"
4298     "::([XYZ]);"
4299 };
4300 
4301 /*
4302 (This entire test is commented out below and will need some heavy revision when we re-add
4303 the ::BEGIN/::END stuff)
4304 static const char* BOGUS_BEGIN_END_RULES[] = {
4305     // [7]
4306     "::BEGIN;"
4307     "abc > xy;"
4308     "::BEGIN;"
4309     "aba > z;"
4310     "::END;"
4311     "::END;",
4312 
4313     // [8]
4314     "abc > xy;"
4315     " aba > z;"
4316     "::END;",
4317 
4318     // [9]
4319     "::BEGIN;"
4320     "::Upper;"
4321     "::END;"
4322 };
4323 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4324 */
4325 
4326 static const char* BEGIN_END_TEST_CASES[] = {
4327     // rules             input                   expected output
4328     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4329 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4330 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4331 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4332     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4333     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4334 
4335     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4336     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4337     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4338     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4339 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4340 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4341 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4342 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4343 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4344     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4345     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4346     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4347 
4348 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4349     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4350 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4351     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4352 };
4353 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4354 
TestBeginEnd()4355 void TransliteratorTest::TestBeginEnd() {
4356     // run through the list of test cases above
4357     int32_t i = 0;
4358     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4359         expect((UnicodeString)"Test case #" + (i / 3),
4360                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4361                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4362                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4363     }
4364 
4365     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4366     UParseError parseError;
4367     UErrorCode status = U_ZERO_ERROR;
4368     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4369             UTRANS_REVERSE, parseError, status);
4370     if (reversed == 0 || U_FAILURE(status)) {
4371         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4372     } else {
4373         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4374     }
4375     delete reversed;
4376 
4377     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4378     // that all of them cause errors
4379 /*
4380 (commented out until we have the real ::BEGIN/::END stuff in place
4381     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4382         UParseError parseError;
4383         UErrorCode status = U_ZERO_ERROR;
4384         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4385                 UTRANS_FORWARD, parseError, status);
4386         if (!U_FAILURE(status)) {
4387             delete t;
4388             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4389         }
4390     }
4391 */
4392 }
4393 
TestBeginEndToRules()4394 void TransliteratorTest::TestBeginEndToRules() {
4395     // run through the same list of test cases we used above, but this time, instead of just
4396     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4397     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4398     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4399     // to (i.e., does the same thing as) the original rule set
4400     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4401         UParseError parseError;
4402         UErrorCode status = U_ZERO_ERROR;
4403         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4404                 UTRANS_FORWARD, parseError, status);
4405         if (U_FAILURE(status)) {
4406             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4407         } else {
4408             UnicodeString rules;
4409             t->toRules(rules, TRUE);
4410             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4411                     UTRANS_FORWARD, parseError, status);
4412             if (U_FAILURE(status)) {
4413                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4414                         parseError, status);
4415                 delete t;
4416             } else {
4417                 expect(*t2,
4418                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4419                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4420                 delete t;
4421                 delete t2;
4422             }
4423         }
4424     }
4425 
4426     // do the same thing for the reversible test case
4427     UParseError parseError;
4428     UErrorCode status = U_ZERO_ERROR;
4429     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4430             UTRANS_REVERSE, parseError, status);
4431     if (U_FAILURE(status)) {
4432         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4433     } else {
4434         UnicodeString rules;
4435         reversed->toRules(rules, FALSE);
4436         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4437                 parseError, status);
4438         if (U_FAILURE(status)) {
4439             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4440                     parseError, status);
4441             delete reversed;
4442         } else {
4443             expect(*reversed2,
4444                    UnicodeString("xy XY XYZ yz YZ"),
4445                    UnicodeString("xy abc xaba yz aba"));
4446             delete reversed;
4447             delete reversed2;
4448         }
4449     }
4450 }
4451 
TestRegisterAlias()4452 void TransliteratorTest::TestRegisterAlias() {
4453     UnicodeString longID("Lower;[aeiou]Upper");
4454     UnicodeString shortID("Any-CapVowels");
4455     UnicodeString reallyShortID("CapVowels");
4456 
4457     Transliterator::registerAlias(shortID, longID);
4458 
4459     UErrorCode err = U_ZERO_ERROR;
4460     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4461     if (U_FAILURE(err)) {
4462         errln("Failed to instantiate transliterator with long ID");
4463         Transliterator::unregister(shortID);
4464         return;
4465     }
4466     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4467     if (U_FAILURE(err)) {
4468         errln("Failed to instantiate transliterator with short ID");
4469         delete t1;
4470         Transliterator::unregister(shortID);
4471         return;
4472     }
4473 
4474     if (t1->getID() != longID)
4475         errln("Transliterator instantiated with long ID doesn't have long ID");
4476     if (t2->getID() != reallyShortID)
4477         errln("Transliterator instantiated with short ID doesn't have short ID");
4478 
4479     UnicodeString rules1;
4480     UnicodeString rules2;
4481 
4482     t1->toRules(rules1, TRUE);
4483     t2->toRules(rules2, TRUE);
4484     if (rules1 != rules2)
4485         errln("Alias transliterators aren't the same");
4486 
4487     delete t1;
4488     delete t2;
4489     Transliterator::unregister(shortID);
4490 
4491     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4492     if (U_SUCCESS(err)) {
4493         errln("Instantiation with short ID succeeded after short ID was unregistered");
4494         delete t1;
4495     }
4496 
4497     // try the same thing again, but this time with something other than
4498     // an instance of CompoundTransliterator
4499     UnicodeString realID("Latin-Greek");
4500     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4501     Transliterator::registerAlias(fakeID, realID);
4502 
4503     err = U_ZERO_ERROR;
4504     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4505     if (U_FAILURE(err)) {
4506         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4507         Transliterator::unregister(realID);
4508         return;
4509     }
4510     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4511     if (U_FAILURE(err)) {
4512         errln("Failed to instantiate transliterator with fake ID");
4513         delete t1;
4514         Transliterator::unregister(realID);
4515         return;
4516     }
4517 
4518     t1->toRules(rules1, TRUE);
4519     t2->toRules(rules2, TRUE);
4520     if (rules1 != rules2)
4521         errln("Alias transliterators aren't the same");
4522 
4523     delete t1;
4524     delete t2;
4525     Transliterator::unregister(fakeID);
4526 }
4527 
TestRuleStripping()4528 void TransliteratorTest::TestRuleStripping() {
4529     /*
4530 #
4531 \uE001>\u0C01; # SIGN
4532     */
4533     static const UChar rule[] = {
4534         0x0023,0x0020,0x000D,0x000A,
4535         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4536     };
4537     static const UChar expectedRule[] = {
4538         0xE001,0x003E,0x0C01,0x003B,0
4539     };
4540     UChar result[UPRV_LENGTHOF(rule)];
4541     UErrorCode status = U_ZERO_ERROR;
4542     int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4543     if (len != u_strlen(expectedRule)) {
4544         errln("utrans_stripRules return len = %d", len);
4545     }
4546     if (u_strncmp(expectedRule, result, len) != 0) {
4547         errln("utrans_stripRules did not return expected string");
4548     }
4549 }
4550 
4551 /**
4552  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4553  */
TestHalfwidthFullwidth(void)4554 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4555     UParseError parseError;
4556     UErrorCode status = U_ZERO_ERROR;
4557     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4558     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4559     if (hf == 0 || fh == 0) {
4560         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4561         delete hf;
4562         delete fh;
4563         return;
4564     }
4565 
4566     // Array of 2n items
4567     // Each item is
4568     //   "hf"|"fh"|"both",
4569     //   <Halfwidth>,
4570     //   <Fullwidth>
4571     const char* DATA[] = {
4572         "both",
4573         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4574         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4575     };
4576     int32_t DATA_length = UPRV_LENGTHOF(DATA);
4577 
4578     for (int32_t i=0; i<DATA_length; i+=3) {
4579         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4580         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4581         switch (*DATA[i]) {
4582         case 0x68: //'h': // Halfwidth-Fullwidth only
4583             expect(*hf, h, f);
4584             break;
4585         case 0x66: //'f': // Fullwidth-Halfwidth only
4586             expect(*fh, f, h);
4587             break;
4588         case 0x62: //'b': // both directions
4589             expect(*hf, h, f);
4590             expect(*fh, f, h);
4591             break;
4592         }
4593     }
4594     delete hf;
4595     delete fh;
4596 }
4597 
4598 
4599     /**
4600      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4601      *              TODO: confirm that the expected results are correct.
4602      *              For now, test just confirms that C++ and Java give identical results.
4603      */
TestThai(void)4604 void TransliteratorTest::TestThai(void) {
4605 #if !UCONFIG_NO_BREAK_ITERATION
4606     UParseError parseError;
4607     UErrorCode status = U_ZERO_ERROR;
4608     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4609     if (tr == 0) {
4610         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4611         return;
4612     }
4613     if (U_FAILURE(status)) {
4614         errln("FAIL: createInstance failed with %s", u_errorName(status));
4615         return;
4616     }
4617     const char *thaiText =
4618         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4619         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4620         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4621         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4622         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4623         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4624         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4625         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4626         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4627         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4628         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4629         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4630         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4631         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4632         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4633         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4634         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4635         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4636         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4637         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4638         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4639         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4640         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4641         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4642         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4643         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4644         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4645         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4646         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4647         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4648 
4649     const char *latinText =
4650         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4651         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4652         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4653         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4654         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4655         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4656         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4657         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4658         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4659         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4660         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4661         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4662         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4663         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4664         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4665         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4666         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4667         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4668 
4669 
4670     UnicodeString  xlitText(thaiText);
4671     xlitText = xlitText.unescape();
4672     tr->transliterate(xlitText);
4673 
4674     UnicodeString expectedText(latinText);
4675     expectedText = expectedText.unescape();
4676     expect(*tr, xlitText, expectedText);
4677 
4678     delete tr;
4679 #endif
4680 }
4681 
4682 
4683 //======================================================================
4684 // Support methods
4685 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4686 void TransliteratorTest::expectT(const UnicodeString& id,
4687                                  const UnicodeString& source,
4688                                  const UnicodeString& expectedResult) {
4689     UErrorCode ec = U_ZERO_ERROR;
4690     UParseError pe;
4691     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4692     if (U_FAILURE(ec)) {
4693         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4694         delete t;
4695         return;
4696     }
4697     expect(*t, source, expectedResult);
4698     delete t;
4699 }
4700 
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4701 void TransliteratorTest::reportParseError(const UnicodeString& message,
4702                                           const UParseError& parseError,
4703                                           const UErrorCode& status) {
4704     dataerrln(message +
4705           /*", parse error " + parseError.code +*/
4706           ", line " + parseError.line +
4707           ", offset " + parseError.offset +
4708           ", pre-context " + prettify(parseError.preContext, TRUE) +
4709           ", post-context " + prettify(parseError.postContext,TRUE) +
4710           ", Error: " + u_errorName(status));
4711 }
4712 
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4713 void TransliteratorTest::expect(const UnicodeString& rules,
4714                                 const UnicodeString& source,
4715                                 const UnicodeString& expectedResult,
4716                                 UTransPosition *pos) {
4717     expect("<ID>", rules, source, expectedResult, pos);
4718 }
4719 
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4720 void TransliteratorTest::expect(const UnicodeString& id,
4721                                 const UnicodeString& rules,
4722                                 const UnicodeString& source,
4723                                 const UnicodeString& expectedResult,
4724                                 UTransPosition *pos) {
4725     UErrorCode status = U_ZERO_ERROR;
4726     UParseError parseError;
4727     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4728     if (U_FAILURE(status)) {
4729         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4730     } else {
4731         expect(*t, source, expectedResult, pos);
4732     }
4733     delete t;
4734 }
4735 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4736 void TransliteratorTest::expect(const Transliterator& t,
4737                                 const UnicodeString& source,
4738                                 const UnicodeString& expectedResult,
4739                                 const Transliterator& reverseTransliterator) {
4740     expect(t, source, expectedResult);
4741     expect(reverseTransliterator, expectedResult, source);
4742 }
4743 
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4744 void TransliteratorTest::expect(const Transliterator& t,
4745                                 const UnicodeString& source,
4746                                 const UnicodeString& expectedResult,
4747                                 UTransPosition *pos) {
4748     if (pos == 0) {
4749         UnicodeString result(source);
4750         t.transliterate(result);
4751         expectAux(t.getID() + ":String", source, result, expectedResult);
4752     }
4753     UTransPosition index={0, 0, 0, 0};
4754     if (pos != 0) {
4755         index = *pos;
4756     }
4757 
4758     UnicodeString rsource(source);
4759     if (pos == 0) {
4760         t.transliterate(rsource);
4761     } else {
4762         // Do it all at once -- below we do it incrementally
4763         t.finishTransliteration(rsource, *pos);
4764     }
4765     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4766 
4767     // Test keyboard (incremental) transliteration -- this result
4768     // must be the same after we finalize (see below).
4769     UnicodeString log;
4770     rsource.remove();
4771     if (pos != 0) {
4772         rsource = source;
4773         formatInput(log, rsource, index);
4774         log.append(" -> ");
4775         UErrorCode status = U_ZERO_ERROR;
4776         t.transliterate(rsource, index, status);
4777         formatInput(log, rsource, index);
4778     } else {
4779         for (int32_t i=0; i<source.length(); ++i) {
4780             if (i != 0) {
4781                 log.append(" + ");
4782             }
4783             log.append(source.charAt(i)).append(" -> ");
4784             UErrorCode status = U_ZERO_ERROR;
4785             t.transliterate(rsource, index, source.charAt(i), status);
4786             formatInput(log, rsource, index);
4787         }
4788     }
4789 
4790     // As a final step in keyboard transliteration, we must call
4791     // transliterate to finish off any pending partial matches that
4792     // were waiting for more input.
4793     t.finishTransliteration(rsource, index);
4794     log.append(" => ").append(rsource);
4795 
4796     expectAux(t.getID() + ":Keyboard", log,
4797               rsource == expectedResult,
4798               expectedResult);
4799 }
4800 
4801 
4802 /**
4803  * @param appendTo result is appended to this param.
4804  * @param input the string being transliterated
4805  * @param pos the index struct
4806  */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4807 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4808                                                const UnicodeString& input,
4809                                                const UTransPosition& pos) {
4810     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4811     // the {} indicate the context start and limit, and the ||
4812     // indicate the start and limit.
4813     if (0 <= pos.contextStart &&
4814         pos.contextStart <= pos.start &&
4815         pos.start <= pos.limit &&
4816         pos.limit <= pos.contextLimit &&
4817         pos.contextLimit <= input.length()) {
4818 
4819         UnicodeString a, b, c, d, e;
4820         input.extractBetween(0, pos.contextStart, a);
4821         input.extractBetween(pos.contextStart, pos.start, b);
4822         input.extractBetween(pos.start, pos.limit, c);
4823         input.extractBetween(pos.limit, pos.contextLimit, d);
4824         input.extractBetween(pos.contextLimit, input.length(), e);
4825         appendTo.append(a).append((UChar)123/*{*/).append(b).
4826             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4827             append((UChar)125/*}*/).append(e);
4828     } else {
4829         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4830                         pos.contextStart + ", s=" + pos.start + ", l=" +
4831                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4832                         input);
4833     }
4834     return appendTo;
4835 }
4836 
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4837 void TransliteratorTest::expectAux(const UnicodeString& tag,
4838                                    const UnicodeString& source,
4839                                    const UnicodeString& result,
4840                                    const UnicodeString& expectedResult) {
4841     expectAux(tag, source + " -> " + result,
4842               result == expectedResult,
4843               expectedResult);
4844 }
4845 
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4846 void TransliteratorTest::expectAux(const UnicodeString& tag,
4847                                    const UnicodeString& summary, UBool pass,
4848                                    const UnicodeString& expectedResult) {
4849     if (pass) {
4850         logln(UnicodeString("(")+tag+") " + prettify(summary));
4851     } else {
4852         dataerrln(UnicodeString("FAIL: (")+tag+") "
4853               + prettify(summary)
4854               + ", expected " + prettify(expectedResult));
4855     }
4856 }
4857 
4858 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4859