1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "rbt_pars.h"
30 #include "anytrans.h"
31 #include "esctrn.h"
32 #include "name2uni.h"
33 #include "nortrans.h"
34 #include "remtrans.h"
35 #include "titletrn.h"
36 #include "tolowtrn.h"
37 #include "toupptrn.h"
38 #include "unesctrn.h"
39 #include "uni2name.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include <stdio.h>
43
44 /***********************************************************************
45
46 HOW TO USE THIS TEST FILE
47 -or-
48 How I developed on two platforms
49 without losing (too much of) my mind
50
51
52 1. Add new tests by copying/pasting/changing existing tests. On Java,
53 any public void method named Test...() taking no parameters becomes
54 a test. On C++, you need to modify the header and add a line to
55 the runIndexedTest() dispatch method.
56
57 2. Make liberal use of the expect() method; it is your friend.
58
59 3. The tests in this file exactly match those in a sister file on the
60 other side. The two files are:
61
62 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
63 icu4c: source/test/intltest/transtst.cpp
64
65 ==> THIS IS THE IMPORTANT PART <==
66
67 When you add a test in this file, add it in TransliteratorTest.java
68 too. Give it the same name and put it in the same relative place.
69 This makes maintenance a lot simpler for any poor soul who ends up
70 trying to synchronize the tests between icu4j and icu4c.
71
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73 then add it in the special non-mirrored section. These are
74 labeled
75
76 "icu4j ONLY"
77
78 or
79
80 "icu4c ONLY"
81
82 Make sure you document the reason the test is here and not there.
83
84
85 Thank you.
86 The Management
87 ***********************************************************************/
88
89 // Define character constants thusly to be EBCDIC-friendly
90 enum {
91 LEFT_BRACE=((UChar)0x007B), /*{*/
92 PIPE =((UChar)0x007C), /*|*/
93 ZERO =((UChar)0x0030), /*0*/
94 UPPER_A =((UChar)0x0041) /*A*/
95 };
96
TransliteratorTest()97 TransliteratorTest::TransliteratorTest()
98 : DESERET_DEE((UChar32)0x10414),
99 DESERET_dee((UChar32)0x1043C)
100 {
101 }
102
~TransliteratorTest()103 TransliteratorTest::~TransliteratorTest() {}
104
105 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
107 const char* &name, char* /*par*/) {
108 switch (index) {
109 TESTCASE(0,TestInstantiation);
110 TESTCASE(1,TestSimpleRules);
111 TESTCASE(2,TestRuleBasedInverse);
112 TESTCASE(3,TestKeyboard);
113 TESTCASE(4,TestKeyboard2);
114 TESTCASE(5,TestKeyboard3);
115 TESTCASE(6,TestArabic);
116 TESTCASE(7,TestCompoundKana);
117 TESTCASE(8,TestCompoundHex);
118 TESTCASE(9,TestFiltering);
119 TESTCASE(10,TestInlineSet);
120 TESTCASE(11,TestPatternQuoting);
121 TESTCASE(12,TestJ277);
122 TESTCASE(13,TestJ243);
123 TESTCASE(14,TestJ329);
124 TESTCASE(15,TestSegments);
125 TESTCASE(16,TestCursorOffset);
126 TESTCASE(17,TestArbitraryVariableValues);
127 TESTCASE(18,TestPositionHandling);
128 TESTCASE(19,TestHiraganaKatakana);
129 TESTCASE(20,TestCopyJ476);
130 TESTCASE(21,TestAnchors);
131 TESTCASE(22,TestInterIndic);
132 TESTCASE(23,TestFilterIDs);
133 TESTCASE(24,TestCaseMap);
134 TESTCASE(25,TestNameMap);
135 TESTCASE(26,TestLiberalizedID);
136 TESTCASE(27,TestCreateInstance);
137 TESTCASE(28,TestNormalizationTransliterator);
138 TESTCASE(29,TestCompoundRBT);
139 TESTCASE(30,TestCompoundFilter);
140 TESTCASE(31,TestRemove);
141 TESTCASE(32,TestToRules);
142 TESTCASE(33,TestContext);
143 TESTCASE(34,TestSupplemental);
144 TESTCASE(35,TestQuantifier);
145 TESTCASE(36,TestSTV);
146 TESTCASE(37,TestCompoundInverse);
147 TESTCASE(38,TestNFDChainRBT);
148 TESTCASE(39,TestNullInverse);
149 TESTCASE(40,TestAliasInverseID);
150 TESTCASE(41,TestCompoundInverseID);
151 TESTCASE(42,TestUndefinedVariable);
152 TESTCASE(43,TestEmptyContext);
153 TESTCASE(44,TestCompoundFilterID);
154 TESTCASE(45,TestPropertySet);
155 TESTCASE(46,TestNewEngine);
156 TESTCASE(47,TestQuantifiedSegment);
157 TESTCASE(48,TestDevanagariLatinRT);
158 TESTCASE(49,TestTeluguLatinRT);
159 TESTCASE(50,TestCompoundLatinRT);
160 TESTCASE(51,TestSanskritLatinRT);
161 TESTCASE(52,TestLocaleInstantiation);
162 TESTCASE(53,TestTitleAccents);
163 TESTCASE(54,TestLocaleResource);
164 TESTCASE(55,TestParseError);
165 TESTCASE(56,TestOutputSet);
166 TESTCASE(57,TestVariableRange);
167 TESTCASE(58,TestInvalidPostContext);
168 TESTCASE(59,TestIDForms);
169 TESTCASE(60,TestToRulesMark);
170 TESTCASE(61,TestEscape);
171 TESTCASE(62,TestAnchorMasking);
172 TESTCASE(63,TestDisplayName);
173 TESTCASE(64,TestSpecialCases);
174 TESTCASE(65,TestIncrementalProgress);
175 TESTCASE(66,TestSurrogateCasing);
176 TESTCASE(67,TestFunction);
177 TESTCASE(68,TestInvalidBackRef);
178 TESTCASE(69,TestMulticharStringSet);
179 TESTCASE(70,TestUserFunction);
180 TESTCASE(71,TestAnyX);
181 TESTCASE(72,TestSourceTargetSet);
182 TESTCASE(73,TestGurmukhiDevanagari);
183 TESTCASE(74,TestRuleWhitespace);
184 TESTCASE(75,TestAllCodepoints);
185 TESTCASE(76,TestBoilerplate);
186 TESTCASE(77,TestAlternateSyntax);
187 TESTCASE(78,TestBeginEnd);
188 TESTCASE(79,TestBeginEndToRules);
189 TESTCASE(80,TestRegisterAlias);
190 TESTCASE(81,TestRuleStripping);
191 default: name = ""; break;
192 }
193 }
194
195 static const UVersionInfo ICU_39 = {3,9,0,0};
196 /**
197 * Make sure every system transliterator can be instantiated.
198 *
199 * ALSO test that the result of toRules() for each rule is a valid
200 * rule. Do this here so we don't have to have another test that
201 * instantiates everything as well.
202 */
TestInstantiation()203 void TransliteratorTest::TestInstantiation() {
204 UErrorCode ec = U_ZERO_ERROR;
205 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
206 assertSuccess("getAvailableIDs()", ec);
207 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
208 int32_t n = Transliterator::countAvailableIDs();
209 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
210 avail->count(ec) == n);
211 assertSuccess("count()", ec);
212 UnicodeString name;
213 for (int32_t i=0; i<n; ++i) {
214 const UnicodeString& id = *avail->snext(ec);
215 if (!assertSuccess("snext()", ec) ||
216 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
217 break;
218 }
219 UnicodeString id2 = Transliterator::getAvailableID(i);
220 if (id.length() < 1) {
221 errln(UnicodeString("FAIL: getAvailableID(") +
222 i + ") returned empty string");
223 continue;
224 }
225 if (id != id2) {
226 errln(UnicodeString("FAIL: getAvailableID(") +
227 i + ") != getAvailableIDs().snext()");
228 continue;
229 }
230 if(id2.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_39)){
231 /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
232 continue;
233 }
234 UParseError parseError;
235 UErrorCode status = U_ZERO_ERROR;
236 Transliterator* t = Transliterator::createInstance(id,
237 UTRANS_FORWARD, parseError,status);
238 name.truncate(0);
239 Transliterator::getDisplayName(id, name);
240 if (t == 0) {
241 errln(UnicodeString("FAIL: Couldn't create ") + id +
242 /*", parse error " + parseError.code +*/
243 ", line " + parseError.line +
244 ", offset " + parseError.offset +
245 ", pre-context " + prettify(parseError.preContext, TRUE) +
246 ", post-context " +prettify(parseError.postContext,TRUE) +
247 ", Error: " + u_errorName(status));
248 // When createInstance fails, it deletes the failing
249 // entry from the available ID list. We detect this
250 // here by looking for a change in countAvailableIDs.
251 int32_t nn = Transliterator::countAvailableIDs();
252 if (nn == (n - 1)) {
253 n = nn;
254 --i; // Compensate for deleted entry
255 }
256 } else {
257 logln(UnicodeString("OK: ") + name + " (" + id + ")");
258
259 // Now test toRules
260 UnicodeString rules;
261 t->toRules(rules, TRUE);
262 Transliterator *u = Transliterator::createFromRules("x",
263 rules, UTRANS_FORWARD, parseError,status);
264 if (u == 0) {
265 errln(UnicodeString("FAIL: ") + id +
266 ".createFromRules() => bad rules" +
267 /*", parse error " + parseError.code +*/
268 ", line " + parseError.line +
269 ", offset " + parseError.offset +
270 ", context " + prettify(parseError.preContext, TRUE) +
271 ", rules: " + prettify(rules, TRUE));
272 } else {
273 delete u;
274 }
275 delete t;
276 }
277 }
278 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
279 assertSuccess("snext()", ec);
280 delete avail;
281
282 // Now test the failure path
283 UParseError parseError;
284 UErrorCode status = U_ZERO_ERROR;
285 UnicodeString id("<Not a valid Transliterator ID>");
286 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
287 if (t != 0) {
288 errln("FAIL: " + id + " returned a transliterator");
289 delete t;
290 } else {
291 logln("OK: Bogus ID handled properly");
292 }
293 }
294
TestSimpleRules(void)295 void TransliteratorTest::TestSimpleRules(void) {
296 /* Example: rules 1. ab>x|y
297 * 2. yc>z
298 *
299 * []|eabcd start - no match, copy e to tranlated buffer
300 * [e]|abcd match rule 1 - copy output & adjust cursor
301 * [ex|y]cd match rule 2 - copy output & adjust cursor
302 * [exz]|d no match, copy d to transliterated buffer
303 * [exzd]| done
304 */
305 expect(UnicodeString("ab>x|y;", "") +
306 "yc>z",
307 "eabcd", "exzd");
308
309 /* Another set of rules:
310 * 1. ab>x|yzacw
311 * 2. za>q
312 * 3. qc>r
313 * 4. cw>n
314 *
315 * []|ab Rule 1
316 * [x|yzacw] No match
317 * [xy|zacw] Rule 2
318 * [xyq|cw] Rule 4
319 * [xyqn]| Done
320 */
321 expect(UnicodeString("ab>x|yzacw;") +
322 "za>q;" +
323 "qc>r;" +
324 "cw>n",
325 "ab", "xyqn");
326
327 /* Test categories
328 */
329 UErrorCode status = U_ZERO_ERROR;
330 UParseError parseError;
331 Transliterator *t = Transliterator::createFromRules(
332 "<ID>",
333 UnicodeString("$dummy=").append((UChar)0xE100) +
334 UnicodeString(";"
335 "$vowel=[aeiouAEIOU];"
336 "$lu=[:Lu:];"
337 "$vowel } $lu > '!';"
338 "$vowel > '&';"
339 "'!' { $lu > '^';"
340 "$lu > '*';"
341 "a > ERROR", ""),
342 UTRANS_FORWARD, parseError,
343 status);
344 if (U_FAILURE(status)) {
345 errln("FAIL: RBT constructor failed");
346 return;
347 }
348 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
349 delete t;
350 }
351
352 /**
353 * Test inline set syntax and set variable syntax.
354 */
TestInlineSet(void)355 void TransliteratorTest::TestInlineSet(void) {
356 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
357 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
358
359 expect(UnicodeString(
360 "$digit = [0-9];"
361 "$alpha = [a-zA-Z];"
362 "$alphanumeric = [$digit $alpha];" // ***
363 "$special = [^$alphanumeric];" // ***
364 "$alphanumeric > '-';"
365 "$special > '*';", ""),
366
367 "thx-1138", "---*----");
368 }
369
370 /**
371 * Create some inverses and confirm that they work. We have to be
372 * careful how we do this, since the inverses will not be true
373 * inverses -- we can't throw any random string at the composition
374 * of the transliterators and expect the identity function. F x
375 * F' != I. However, if we are careful about the input, we will
376 * get the expected results.
377 */
TestRuleBasedInverse(void)378 void TransliteratorTest::TestRuleBasedInverse(void) {
379 UnicodeString RULES =
380 UnicodeString("abc>zyx;") +
381 "ab>yz;" +
382 "bc>zx;" +
383 "ca>xy;" +
384 "a>x;" +
385 "b>y;" +
386 "c>z;" +
387
388 "abc<zyx;" +
389 "ab<yz;" +
390 "bc<zx;" +
391 "ca<xy;" +
392 "a<x;" +
393 "b<y;" +
394 "c<z;" +
395
396 "";
397
398 const char* DATA[] = {
399 // Careful here -- random strings will not work. If we keep
400 // the left side to the domain and the right side to the range
401 // we will be okay though (left, abc; right xyz).
402 "a", "x",
403 "abcacab", "zyxxxyy",
404 "caccb", "xyzzy",
405 };
406
407 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
408
409 UErrorCode status = U_ZERO_ERROR;
410 UParseError parseError;
411 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
412 UTRANS_FORWARD, parseError, status);
413 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
414 UTRANS_REVERSE, parseError, status);
415 if (U_FAILURE(status)) {
416 errln("FAIL: RBT constructor failed");
417 return;
418 }
419 for (int32_t i=0; i<DATA_length; i+=2) {
420 expect(*fwd, DATA[i], DATA[i+1]);
421 expect(*rev, DATA[i+1], DATA[i]);
422 }
423 delete fwd;
424 delete rev;
425 }
426
427 /**
428 * Basic test of keyboard.
429 */
TestKeyboard(void)430 void TransliteratorTest::TestKeyboard(void) {
431 UParseError parseError;
432 UErrorCode status = U_ZERO_ERROR;
433 Transliterator *t = Transliterator::createFromRules("<ID>",
434 UnicodeString("psch>Y;")
435 +"ps>y;"
436 +"ch>x;"
437 +"a>A;",
438 UTRANS_FORWARD, parseError,
439 status);
440 if (U_FAILURE(status)) {
441 errln("FAIL: RBT constructor failed");
442 return;
443 }
444 const char* DATA[] = {
445 // insertion, buffer
446 "a", "A",
447 "p", "Ap",
448 "s", "Aps",
449 "c", "Apsc",
450 "a", "AycA",
451 "psch", "AycAY",
452 0, "AycAY", // null means finishKeyboardTransliteration
453 };
454
455 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
456 delete t;
457 }
458
459 /**
460 * Basic test of keyboard with cursor.
461 */
TestKeyboard2(void)462 void TransliteratorTest::TestKeyboard2(void) {
463 UParseError parseError;
464 UErrorCode status = U_ZERO_ERROR;
465 Transliterator *t = Transliterator::createFromRules("<ID>",
466 UnicodeString("ych>Y;")
467 +"ps>|y;"
468 +"ch>x;"
469 +"a>A;",
470 UTRANS_FORWARD, parseError,
471 status);
472 if (U_FAILURE(status)) {
473 errln("FAIL: RBT constructor failed");
474 return;
475 }
476 const char* DATA[] = {
477 // insertion, buffer
478 "a", "A",
479 "p", "Ap",
480 "s", "Aps", // modified for rollback - "Ay",
481 "c", "Apsc", // modified for rollback - "Ayc",
482 "a", "AycA",
483 "p", "AycAp",
484 "s", "AycAps", // modified for rollback - "AycAy",
485 "c", "AycApsc", // modified for rollback - "AycAyc",
486 "h", "AycAY",
487 0, "AycAY", // null means finishKeyboardTransliteration
488 };
489
490 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
491 delete t;
492 }
493
494 /**
495 * Test keyboard transliteration with back-replacement.
496 */
TestKeyboard3(void)497 void TransliteratorTest::TestKeyboard3(void) {
498 // We want th>z but t>y. Furthermore, during keyboard
499 // transliteration we want t>y then yh>z if t, then h are
500 // typed.
501 UnicodeString RULES("t>|y;"
502 "yh>z;");
503
504 const char* DATA[] = {
505 // Column 1: characters to add to buffer (as if typed)
506 // Column 2: expected appearance of buffer after
507 // keyboard xliteration.
508 "a", "a",
509 "b", "ab",
510 "t", "abt", // modified for rollback - "aby",
511 "c", "abyc",
512 "t", "abyct", // modified for rollback - "abycy",
513 "h", "abycz",
514 0, "abycz", // null means finishKeyboardTransliteration
515 };
516
517 UParseError parseError;
518 UErrorCode status = U_ZERO_ERROR;
519 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
520 if (U_FAILURE(status)) {
521 errln("FAIL: RBT constructor failed");
522 return;
523 }
524 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
525 delete t;
526 }
527
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)528 void TransliteratorTest::keyboardAux(const Transliterator& t,
529 const char* DATA[], int32_t DATA_length) {
530 UErrorCode status = U_ZERO_ERROR;
531 UTransPosition index={0, 0, 0, 0};
532 UnicodeString s;
533 for (int32_t i=0; i<DATA_length; i+=2) {
534 UnicodeString log;
535 if (DATA[i] != 0) {
536 log = s + " + "
537 + DATA[i]
538 + " -> ";
539 t.transliterate(s, index, DATA[i], status);
540 } else {
541 log = s + " => ";
542 t.finishTransliteration(s, index);
543 }
544 // Show the start index '{' and the cursor '|'
545 UnicodeString a, b, c;
546 s.extractBetween(0, index.contextStart, a);
547 s.extractBetween(index.contextStart, index.start, b);
548 s.extractBetween(index.start, s.length(), c);
549 log.append(a).
550 append((UChar)LEFT_BRACE).
551 append(b).
552 append((UChar)PIPE).
553 append(c);
554 if (s == DATA[i+1] && U_SUCCESS(status)) {
555 logln(log);
556 } else {
557 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
558 }
559 }
560 }
561
TestArabic(void)562 void TransliteratorTest::TestArabic(void) {
563 // Test disabled for 2.0 until new Arabic transliterator can be written.
564 // /*
565 // const char* DATA[] = {
566 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
567 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
568 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
569 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
570 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
571 // "\u062c\u0645\u064a\u0644\u0629",
572 // };
573 // */
574 //
575 // UChar ar_raw[] = {
576 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
577 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
578 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
579 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
580 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
581 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
582 // };
583 // UnicodeString ar(ar_raw);
584 // UErrorCode status=U_ZERO_ERROR;
585 // UParseError parseError;
586 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
587 // if (t == 0) {
588 // errln("FAIL: createInstance failed");
589 // return;
590 // }
591 // expect(*t, "Arabic", ar);
592 // delete t;
593 }
594
595 /**
596 * Compose the Kana transliterator forward and reverse and try
597 * some strings that should come out unchanged.
598 */
TestCompoundKana(void)599 void TransliteratorTest::TestCompoundKana(void) {
600 UParseError parseError;
601 UErrorCode status = U_ZERO_ERROR;
602 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
603 if (t == 0) {
604 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
605 } else {
606 expect(*t, "aaaaa", "aaaaa");
607 delete t;
608 }
609 }
610
611 /**
612 * Compose the hex transliterators forward and reverse.
613 */
TestCompoundHex(void)614 void TransliteratorTest::TestCompoundHex(void) {
615 UParseError parseError;
616 UErrorCode status = U_ZERO_ERROR;
617 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
618 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
619 Transliterator* transab[] = { a, b };
620 Transliterator* transba[] = { b, a };
621 if (a == 0 || b == 0) {
622 errln("FAIL: construction failed");
623 delete a;
624 delete b;
625 return;
626 }
627 // Do some basic tests of a
628 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
629 // Do some basic tests of b
630 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
631
632 Transliterator* ab = new CompoundTransliterator(transab, 2);
633 UnicodeString s("abcde", "");
634 expect(*ab, s, s);
635
636 UnicodeString str(s);
637 a->transliterate(str);
638 Transliterator* ba = new CompoundTransliterator(transba, 2);
639 expect(*ba, str, str);
640
641 delete ab;
642 delete ba;
643 delete a;
644 delete b;
645 }
646
647 int gTestFilterClassID = 0;
648 /**
649 * Used by TestFiltering().
650 */
651 class TestFilter : public UnicodeFilter {
clone() const652 virtual UnicodeFunctor* clone() const {
653 return new TestFilter(*this);
654 }
contains(UChar32 c) const655 virtual UBool contains(UChar32 c) const {
656 return c != (UChar)0x0063 /*c*/;
657 }
658 // Stubs
toPattern(UnicodeString & result,UBool) const659 virtual UnicodeString& toPattern(UnicodeString& result,
660 UBool /*escapeUnprintable*/) const {
661 return result;
662 }
matchesIndexValue(uint8_t) const663 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
664 return FALSE;
665 }
addMatchSetTo(UnicodeSet &) const666 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
667 public:
getDynamicClassID() const668 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
669 };
670
671 /**
672 * Do some basic tests of filtering.
673 */
TestFiltering(void)674 void TransliteratorTest::TestFiltering(void) {
675 UParseError parseError;
676 UErrorCode status = U_ZERO_ERROR;
677 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
678 if (hex == 0) {
679 errln("FAIL: createInstance(Any-Hex) failed");
680 return;
681 }
682 hex->adoptFilter(new TestFilter());
683 UnicodeString s("abcde");
684 hex->transliterate(s);
685 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
686 if (s == exp) {
687 logln(UnicodeString("Ok: \"") + exp + "\"");
688 } else {
689 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
690 }
691
692 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
693 UnicodeFilter *f = hex->orphanFilter();
694 if (f == NULL){
695 errln("FAIL: orphanFilter() should get a UnicodeFilter");
696 } else {
697 delete f;
698 }
699 delete hex;
700 }
701
702 /**
703 * Test anchors
704 */
TestAnchors(void)705 void TransliteratorTest::TestAnchors(void) {
706 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
707 "aaa",
708 "012");
709 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
710 "aaa",
711 "012");
712 expect(UnicodeString("^ab > 01 ;"
713 " ab > |8 ;"
714 " b > k ;"
715 " 8x$ > 45 ;"
716 " 8x > 77 ;", ""),
717
718 "ababbabxabx",
719 "018k7745");
720 expect(UnicodeString("$s = [z$] ;"
721 "$s{ab > 01 ;"
722 " ab > |8 ;"
723 " b > k ;"
724 " 8x}$s > 45 ;"
725 " 8x > 77 ;", ""),
726
727 "abzababbabxzabxabx",
728 "01z018k45z01x45");
729 }
730
731 /**
732 * Test pattern quoting and escape mechanisms.
733 */
TestPatternQuoting(void)734 void TransliteratorTest::TestPatternQuoting(void) {
735 // Array of 3n items
736 // Each item is <rules>, <input>, <expected output>
737 const UnicodeString DATA[] = {
738 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
739 UnicodeString(UChar(0x4E01)),
740 "[male adult]"
741 };
742
743 for (int32_t i=0; i<3; i+=3) {
744 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
745 UParseError parseError;
746 UErrorCode status = U_ZERO_ERROR;
747 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
748 if (U_FAILURE(status)) {
749 errln("RBT constructor failed");
750 } else {
751 expect(*t, DATA[i+1], DATA[i+2]);
752 }
753 delete t;
754 }
755 }
756
757 /**
758 * Regression test for bugs found in Greek transliteration.
759 */
TestJ277(void)760 void TransliteratorTest::TestJ277(void) {
761 UErrorCode status = U_ZERO_ERROR;
762 UParseError parseError;
763 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
764 if (gl == NULL) {
765 errln("FAIL: createInstance(Greek-Latin) returned NULL");
766 return;
767 }
768
769 UChar sigma = 0x3C3;
770 UChar upsilon = 0x3C5;
771 UChar nu = 0x3BD;
772 // UChar PHI = 0x3A6;
773 UChar alpha = 0x3B1;
774 // UChar omega = 0x3C9;
775 // UChar omicron = 0x3BF;
776 // UChar epsilon = 0x3B5;
777
778 // sigma upsilon nu -> syn
779 UnicodeString syn;
780 syn.append(sigma).append(upsilon).append(nu);
781 expect(*gl, syn, "syn");
782
783 // sigma alpha upsilon nu -> saun
784 UnicodeString sayn;
785 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
786 expect(*gl, sayn, "saun");
787
788 // Again, using a smaller rule set
789 UnicodeString rules(
790 "$alpha = \\u03B1;"
791 "$nu = \\u03BD;"
792 "$sigma = \\u03C3;"
793 "$ypsilon = \\u03C5;"
794 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
795 "s <> $sigma;"
796 "a <> $alpha;"
797 "u <> $vowel { $ypsilon;"
798 "y <> $ypsilon;"
799 "n <> $nu;",
800 "");
801 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
802 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
803 expect(*mini, syn, "syn");
804 expect(*mini, sayn, "saun");
805 delete mini;
806 mini = NULL;
807
808 #if !UCONFIG_NO_FORMATTING
809 // Transliterate the Greek locale data
810 Locale el("el");
811 DateFormatSymbols syms(el, status);
812 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
813 int32_t i, count;
814 const UnicodeString* data = syms.getMonths(count);
815 for (i=0; i<count; ++i) {
816 if (data[i].length() == 0) {
817 continue;
818 }
819 UnicodeString out(data[i]);
820 gl->transliterate(out);
821 UBool ok = TRUE;
822 if (data[i].length() >= 2 && out.length() >= 2 &&
823 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
824 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
825 ok = FALSE;
826 }
827 }
828 if (ok) {
829 logln(prettify(data[i] + " -> " + out));
830 } else {
831 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
832 }
833 }
834 #endif
835
836 delete gl;
837 }
838
839 /**
840 * Prefix, suffix support in hex transliterators
841 */
TestJ243(void)842 void TransliteratorTest::TestJ243(void) {
843 UErrorCode ec = U_ZERO_ERROR;
844
845 // Test default Hex-Any, which should handle
846 // \u, \U, u+, and U+
847 Transliterator *hex =
848 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
849 if (assertSuccess("getInstance", ec)) {
850 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
851 }
852 delete hex;
853
854 // // Try a custom Hex-Unicode
855 // // \uXXXX and &#xXXXX;
856 // ec = U_ZERO_ERROR;
857 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
858 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
859 // "abcd5fx0123");
860 // // Try custom Any-Hex (default is tested elsewhere)
861 // ec = U_ZERO_ERROR;
862 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
863 // expect(hex3, "012", "012");
864 }
865
866 /**
867 * Parsers need better syntax error messages.
868 */
TestJ329(void)869 void TransliteratorTest::TestJ329(void) {
870
871 struct { UBool containsErrors; const char* rule; } DATA[] = {
872 { FALSE, "a > b; c > d" },
873 { TRUE, "a > b; no operator; c > d" },
874 };
875 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
876
877 for (int32_t i=0; i<DATA_length; ++i) {
878 UErrorCode status = U_ZERO_ERROR;
879 UParseError parseError;
880 Transliterator *rbt = Transliterator::createFromRules("<ID>",
881 DATA[i].rule,
882 UTRANS_FORWARD,
883 parseError,
884 status);
885 UBool gotError = U_FAILURE(status);
886 UnicodeString desc(DATA[i].rule);
887 desc.append(gotError ? " -> error" : " -> no error");
888 if (gotError) {
889 desc = desc + ", ParseError code=" + u_errorName(status) +
890 " line=" + parseError.line +
891 " offset=" + parseError.offset +
892 " context=" + parseError.preContext;
893 }
894 if (gotError == DATA[i].containsErrors) {
895 logln(UnicodeString("Ok: ") + desc);
896 } else {
897 errln(UnicodeString("FAIL: ") + desc);
898 }
899 delete rbt;
900 }
901 }
902
903 /**
904 * Test segments and segment references.
905 */
TestSegments(void)906 void TransliteratorTest::TestSegments(void) {
907 // Array of 3n items
908 // Each item is <rules>, <input>, <expected output>
909 UnicodeString DATA[] = {
910 "([a-z]) '.' ([0-9]) > $2 '-' $1",
911 "abc.123.xyz.456",
912 "ab1-c23.xy4-z56",
913
914 // nested
915 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
916 "a1 b2",
917 "a1.a.1 b2.b.2",
918 };
919 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
920
921 for (int32_t i=0; i<DATA_length; i+=3) {
922 logln("Pattern: " + prettify(DATA[i]));
923 UParseError parseError;
924 UErrorCode status = U_ZERO_ERROR;
925 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
926 if (U_FAILURE(status)) {
927 errln("FAIL: RBT constructor");
928 } else {
929 expect(*t, DATA[i+1], DATA[i+2]);
930 }
931 delete t;
932 }
933 }
934
935 /**
936 * Test cursor positioning outside of the key
937 */
TestCursorOffset(void)938 void TransliteratorTest::TestCursorOffset(void) {
939 // Array of 3n items
940 // Each item is <rules>, <input>, <expected output>
941 UnicodeString DATA[] = {
942 "pre {alpha} post > | @ ALPHA ;"
943 "eALPHA > beta ;"
944 "pre {beta} post > BETA @@ | ;"
945 "post > xyz",
946
947 "prealphapost prebetapost",
948
949 "prbetaxyz preBETApost",
950 };
951 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
952
953 for (int32_t i=0; i<DATA_length; i+=3) {
954 logln("Pattern: " + prettify(DATA[i]));
955 UParseError parseError;
956 UErrorCode status = U_ZERO_ERROR;
957 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
958 if (U_FAILURE(status)) {
959 errln("FAIL: RBT constructor");
960 } else {
961 expect(*t, DATA[i+1], DATA[i+2]);
962 }
963 delete t;
964 }
965 }
966
967 /**
968 * Test zero length and > 1 char length variable values. Test
969 * use of variable refs in UnicodeSets.
970 */
TestArbitraryVariableValues(void)971 void TransliteratorTest::TestArbitraryVariableValues(void) {
972 // Array of 3n items
973 // Each item is <rules>, <input>, <expected output>
974 UnicodeString DATA[] = {
975 "$abe = ab;"
976 "$pat = x[yY]z;"
977 "$ll = 'a-z';"
978 "$llZ = [$ll];"
979 "$llY = [$ll$pat];"
980 "$emp = ;"
981
982 "$abe > ABE;"
983 "$pat > END;"
984 "$llZ > 1;"
985 "$llY > 2;"
986 "7$emp 8 > 9;"
987 "",
988
989 "ab xYzxyz stY78",
990 "ABE ENDEND 1129",
991 };
992 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
993
994 for (int32_t i=0; i<DATA_length; i+=3) {
995 logln("Pattern: " + prettify(DATA[i]));
996 UParseError parseError;
997 UErrorCode status = U_ZERO_ERROR;
998 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
999 if (U_FAILURE(status)) {
1000 errln("FAIL: RBT constructor");
1001 } else {
1002 expect(*t, DATA[i+1], DATA[i+2]);
1003 }
1004 delete t;
1005 }
1006 }
1007
1008 /**
1009 * Confirm that the contextStart, contextLimit, start, and limit
1010 * behave correctly. J474.
1011 */
TestPositionHandling(void)1012 void TransliteratorTest::TestPositionHandling(void) {
1013 // Array of 3n items
1014 // Each item is <rules>, <input>, <expected output>
1015 const char* DATA[] = {
1016 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1017 "xtat txtb", // pos 0,9,0,9
1018 "xTTaSS TTxUUb",
1019
1020 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1021 "xtat txtb", // pos 2,9,3,8
1022 "xtaSS TTxUUb",
1023
1024 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1025 "xtat txtb", // pos 3,8,3,8
1026 "xtaTT TTxTTb",
1027 };
1028
1029 // Array of 4n positions -- these go with the DATA array
1030 // They are: contextStart, contextLimit, start, limit
1031 int32_t POS[] = {
1032 0, 9, 0, 9,
1033 2, 9, 3, 8,
1034 3, 8, 3, 8,
1035 };
1036
1037 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1038 for (int32_t i=0; i<n; i++) {
1039 UErrorCode status = U_ZERO_ERROR;
1040 UParseError parseError;
1041 Transliterator *t = Transliterator::createFromRules("<ID>",
1042 DATA[3*i], UTRANS_FORWARD, parseError, status);
1043 if (U_FAILURE(status)) {
1044 delete t;
1045 errln("FAIL: RBT constructor");
1046 return;
1047 }
1048 UTransPosition pos;
1049 pos.contextStart= POS[4*i];
1050 pos.contextLimit = POS[4*i+1];
1051 pos.start = POS[4*i+2];
1052 pos.limit = POS[4*i+3];
1053 UnicodeString rsource(DATA[3*i+1]);
1054 t->transliterate(rsource, pos, status);
1055 if (U_FAILURE(status)) {
1056 delete t;
1057 errln("FAIL: transliterate");
1058 return;
1059 }
1060 t->finishTransliteration(rsource, pos);
1061 expectAux(DATA[3*i],
1062 DATA[3*i+1],
1063 rsource,
1064 DATA[3*i+2]);
1065 delete t;
1066 }
1067 }
1068
1069 /**
1070 * Test the Hiragana-Katakana transliterator.
1071 */
TestHiraganaKatakana(void)1072 void TransliteratorTest::TestHiraganaKatakana(void) {
1073 UParseError parseError;
1074 UErrorCode status = U_ZERO_ERROR;
1075 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1076 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1077 if (hk == 0 || kh == 0) {
1078 errln("FAIL: createInstance failed");
1079 delete hk;
1080 delete kh;
1081 return;
1082 }
1083
1084 // Array of 3n items
1085 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1086 const char* DATA[] = {
1087 "both",
1088 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1089 "\\u30A2\\u30F8\\u30F2\\u30B0",
1090
1091 "kh",
1092 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1093 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1094 };
1095 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1096
1097 for (int32_t i=0; i<DATA_length; i+=3) {
1098 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1099 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1100 switch (*DATA[i]) {
1101 case 0x68: //'h': // Hiragana-Katakana
1102 expect(*hk, h, k);
1103 break;
1104 case 0x6B: //'k': // Katakana-Hiragana
1105 expect(*kh, k, h);
1106 break;
1107 case 0x62: //'b': // both
1108 expect(*hk, h, k);
1109 expect(*kh, k, h);
1110 break;
1111 }
1112 }
1113 delete hk;
1114 delete kh;
1115 }
1116
1117 /**
1118 * Test cloning / copy constructor of RBT.
1119 */
TestCopyJ476(void)1120 void TransliteratorTest::TestCopyJ476(void) {
1121 // The real test here is what happens when the destructors are
1122 // called. So we let one object get destructed, and check to
1123 // see that its copy still works.
1124 Transliterator *t2 = 0;
1125 {
1126 UParseError parseError;
1127 UErrorCode status = U_ZERO_ERROR;
1128 Transliterator *t1 = Transliterator::createFromRules("t1",
1129 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1130 if (U_FAILURE(status)) {
1131 errln("FAIL: RBT constructor");
1132 return;
1133 }
1134 t2 = t1->clone(); // Call copy constructor under the covers.
1135 expect(*t1, "abcfoofoo", "ABcbar");
1136 delete t1;
1137 }
1138 expect(*t2, "abcfoofoo", "ABcbar");
1139 delete t2;
1140 }
1141
1142 /**
1143 * Test inter-Indic transliterators. These are composed.
1144 * ICU4C Jitterbug 483.
1145 */
TestInterIndic(void)1146 void TransliteratorTest::TestInterIndic(void) {
1147 UnicodeString ID("Devanagari-Gujarati", "");
1148 UErrorCode status = U_ZERO_ERROR;
1149 UParseError parseError;
1150 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1151 if (dg == 0) {
1152 errln("FAIL: createInstance(" + ID + ") returned NULL");
1153 return;
1154 }
1155 UnicodeString id = dg->getID();
1156 if (id != ID) {
1157 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1158 }
1159 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1160 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1161 expect(*dg, dev, guj);
1162 delete dg;
1163 }
1164
1165 /**
1166 * Test filter syntax in IDs. (J918)
1167 */
TestFilterIDs(void)1168 void TransliteratorTest::TestFilterIDs(void) {
1169 // Array of 3n strings:
1170 // <id>, <inverse id>, <input>, <expected output>
1171 const char* DATA[] = {
1172 "[aeiou]Any-Hex", // ID
1173 "[aeiou]Hex-Any", // expected inverse ID
1174 "quizzical", // src
1175 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1176
1177 "[aeiou]Any-Hex;[^5]Hex-Any",
1178 "[^5]Any-Hex;[aeiou]Hex-Any",
1179 "quizzical",
1180 "q\\u0075izzical",
1181
1182 "[abc]Null",
1183 "[abc]Null",
1184 "xyz",
1185 "xyz",
1186 };
1187 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1188
1189 for (int i=0; i<DATA_length; i+=4) {
1190 UnicodeString ID(DATA[i], "");
1191 UnicodeString uID(DATA[i+1], "");
1192 UnicodeString data2(DATA[i+2], "");
1193 UnicodeString data3(DATA[i+3], "");
1194 UParseError parseError;
1195 UErrorCode status = U_ZERO_ERROR;
1196 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1197 if (t == 0) {
1198 errln("FAIL: createInstance(" + ID + ") returned NULL");
1199 return;
1200 }
1201 expect(*t, data2, data3);
1202
1203 // Check the ID
1204 if (ID != t->getID()) {
1205 errln("FAIL: createInstance(" + ID + ").getID() => " +
1206 t->getID());
1207 }
1208
1209 // Check the inverse
1210 Transliterator *u = t->createInverse(status);
1211 if (u == 0) {
1212 errln("FAIL: " + ID + ".createInverse() returned NULL");
1213 } else if (u->getID() != uID) {
1214 errln("FAIL: " + ID + ".createInverse().getID() => " +
1215 u->getID() + ", expected " + uID);
1216 }
1217
1218 delete t;
1219 delete u;
1220 }
1221 }
1222
1223 /**
1224 * Test the case mapping transliterators.
1225 */
TestCaseMap(void)1226 void TransliteratorTest::TestCaseMap(void) {
1227 UParseError parseError;
1228 UErrorCode status = U_ZERO_ERROR;
1229 Transliterator* toUpper =
1230 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1231 Transliterator* toLower =
1232 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1233 Transliterator* toTitle =
1234 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1235 if (toUpper==0 || toLower==0 || toTitle==0) {
1236 errln("FAIL: createInstance returned NULL");
1237 delete toUpper;
1238 delete toLower;
1239 delete toTitle;
1240 return;
1241 }
1242
1243 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1244 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1245 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1246 "the quick brown foX jumped over the lazY dogs.");
1247 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1248 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1249
1250 delete toUpper;
1251 delete toLower;
1252 delete toTitle;
1253 }
1254
1255 /**
1256 * Test the name mapping transliterators.
1257 */
TestNameMap(void)1258 void TransliteratorTest::TestNameMap(void) {
1259 UParseError parseError;
1260 UErrorCode status = U_ZERO_ERROR;
1261 Transliterator* uni2name =
1262 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1263 Transliterator* name2uni =
1264 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1265 if (uni2name==0 || name2uni==0) {
1266 errln("FAIL: createInstance returned NULL");
1267 delete uni2name;
1268 delete name2uni;
1269 return;
1270 }
1271
1272 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1273 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1274 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1275 expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
1276 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1277
1278 delete uni2name;
1279 delete name2uni;
1280
1281 // round trip
1282 Transliterator* t =
1283 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1284 if (t==0) {
1285 errln("FAIL: createInstance returned NULL");
1286 delete t;
1287 return;
1288 }
1289
1290 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1291 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1292 expect(*t, s, s);
1293 delete t;
1294 }
1295
1296 /**
1297 * Test liberalized ID syntax. 1006c
1298 */
TestLiberalizedID(void)1299 void TransliteratorTest::TestLiberalizedID(void) {
1300 // Some test cases have an expected getID() value of NULL. This
1301 // means I have disabled the test case for now. This stuff is
1302 // still under development, and I haven't decided whether to make
1303 // getID() return canonical case yet. It will all get rewritten
1304 // with the move to Source-Target/Variant IDs anyway. [aliu]
1305 const char* DATA[] = {
1306 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1307 " Null ", "Null", "whitespace",
1308 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1309 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1310 };
1311 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1312 UParseError parseError;
1313 UErrorCode status= U_ZERO_ERROR;
1314 for (int32_t i=0; i<DATA_length; i+=3) {
1315 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1316 if (t == 0) {
1317 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1318 " cannot create ID \"" + DATA[i] + "\"");
1319 } else {
1320 UnicodeString exp;
1321 if (DATA[i+1]) {
1322 exp = UnicodeString(DATA[i+1], "");
1323 }
1324 // Don't worry about getID() if the expected char*
1325 // is NULL -- see above.
1326 if (exp.length() == 0 || exp == t->getID()) {
1327 logln(UnicodeString("Ok: ") + DATA[i+2] +
1328 " create ID \"" + DATA[i] + "\" => \"" +
1329 exp + "\"");
1330 } else {
1331 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1332 " create ID \"" + DATA[i] + "\" => \"" +
1333 t->getID() + "\", exp \"" + exp + "\"");
1334 }
1335 delete t;
1336 }
1337 }
1338 }
1339
1340 /* test for Jitterbug 912 */
TestCreateInstance()1341 void TransliteratorTest::TestCreateInstance(){
1342 const char* FORWARD = "F";
1343 const char* REVERSE = "R";
1344 const char* DATA[] = {
1345 // Column 1: id
1346 // Column 2: direction
1347 // Column 3: expected ID, or "" if expect failure
1348 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1349
1350 // JB#2689: bad compound causes crash
1351 "InvalidSource-InvalidTarget", FORWARD, "",
1352 "InvalidSource-InvalidTarget", REVERSE, "",
1353 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1354 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1355 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1356 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1357
1358 NULL
1359 };
1360
1361 for (int32_t i=0; DATA[i]; i+=3) {
1362 UParseError err;
1363 UErrorCode ec = U_ZERO_ERROR;
1364 UnicodeString id(DATA[i]);
1365 UTransDirection dir = (DATA[i+1]==FORWARD)?
1366 UTRANS_FORWARD:UTRANS_REVERSE;
1367 UnicodeString expID(DATA[i+2]);
1368 Transliterator* t =
1369 Transliterator::createInstance(id,dir,err,ec);
1370 UnicodeString newID;
1371 if (t) {
1372 newID = t->getID();
1373 }
1374 UBool ok = (newID == expID);
1375 if (!t) {
1376 newID = u_errorName(ec);
1377 }
1378 if (ok) {
1379 logln((UnicodeString)"Ok: createInstance(" +
1380 id + "," + DATA[i+1] + ") => " + newID);
1381 } else {
1382 errln((UnicodeString)"FAIL: createInstance(" +
1383 id + "," + DATA[i+1] + ") => " + newID +
1384 ", expected " + expID);
1385 }
1386 delete t;
1387 }
1388 }
1389
1390 /**
1391 * Test the normalization transliterator.
1392 */
TestNormalizationTransliterator()1393 void TransliteratorTest::TestNormalizationTransliterator() {
1394 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1395 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1396 const char* CANON[] = {
1397 // Input Decomposed Composed
1398 "cat", "cat", "cat" ,
1399 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1400
1401 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1402 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1403
1404 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1405 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1406 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1407
1408 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1409 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1410
1411 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1412 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1413 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1414
1415 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1416 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1417
1418 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1419 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1420
1421 "Henry IV", "Henry IV", "Henry IV" ,
1422 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1423
1424 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1425 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1426 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1427 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1428 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1429
1430 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1431 0 // end
1432 };
1433
1434 const char* COMPAT[] = {
1435 // Input Decomposed Composed
1436 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1437
1438 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1439 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1440
1441 "Henry IV", "Henry IV", "Henry IV" ,
1442 "Henry \\u2163", "Henry IV", "Henry IV" ,
1443
1444 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1445 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1446
1447 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1448 0 // end
1449 };
1450
1451 int32_t i;
1452 UParseError parseError;
1453 UErrorCode status = U_ZERO_ERROR;
1454 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1455 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1456 if (!NFD || !NFC) {
1457 errln("FAIL: createInstance failed");
1458 delete NFD;
1459 delete NFC;
1460 return;
1461 }
1462 for (i=0; CANON[i]; i+=3) {
1463 UnicodeString in = CharsToUnicodeString(CANON[i]);
1464 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1465 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1466 expect(*NFD, in, expd);
1467 expect(*NFC, in, expc);
1468 }
1469 delete NFD;
1470 delete NFC;
1471
1472 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1473 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1474 if (!NFKD || !NFKC) {
1475 errln("FAIL: createInstance failed");
1476 delete NFKD;
1477 delete NFKC;
1478 return;
1479 }
1480 for (i=0; COMPAT[i]; i+=3) {
1481 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1482 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1483 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1484 expect(*NFKD, in, expkd);
1485 expect(*NFKC, in, expkc);
1486 }
1487 delete NFKD;
1488 delete NFKC;
1489
1490 UParseError pe;
1491 status = U_ZERO_ERROR;
1492 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1493 UTRANS_FORWARD,
1494 pe, status);
1495 if (t == 0) {
1496 errln("FAIL: createInstance failed");
1497 }
1498 expect(*t, CharsToUnicodeString("\\u010dx"),
1499 CharsToUnicodeString("c\\u030C"));
1500 delete t;
1501 }
1502
1503 /**
1504 * Test compound RBT rules.
1505 */
TestCompoundRBT(void)1506 void TransliteratorTest::TestCompoundRBT(void) {
1507 // Careful with spacing and ';' here: Phrase this exactly
1508 // as toRules() is going to return it. If toRules() changes
1509 // with regard to spacing or ';', then adjust this string.
1510 UnicodeString rule("::Hex-Any;\n"
1511 "::Any-Lower;\n"
1512 "a > '.A.';\n"
1513 "b > '.B.';\n"
1514 "::[^t]Any-Upper;", "");
1515 UParseError parseError;
1516 UErrorCode status = U_ZERO_ERROR;
1517 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1518 if (t == 0) {
1519 errln("FAIL: createFromRules failed");
1520 return;
1521 }
1522 expect(*t, "\\u0043at in the hat, bat on the mat",
1523 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1524 UnicodeString r;
1525 t->toRules(r, TRUE);
1526 if (r == rule) {
1527 logln((UnicodeString)"OK: toRules() => " + r);
1528 } else {
1529 errln((UnicodeString)"FAIL: toRules() => " + r +
1530 ", expected " + rule);
1531 }
1532 delete t;
1533
1534 // Now test toRules
1535 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1536 if (t == 0) {
1537 errln("FAIL: createInstance failed");
1538 return;
1539 }
1540 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1541 t->toRules(r, TRUE);
1542 if (r != exp) {
1543 errln((UnicodeString)"FAIL: toRules() => " + r +
1544 ", expected " + exp);
1545 } else {
1546 logln((UnicodeString)"OK: toRules() => " + r);
1547 }
1548 delete t;
1549
1550 // Round trip the result of toRules
1551 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1552 if (t == 0) {
1553 errln("FAIL: createFromRules #2 failed");
1554 return;
1555 } else {
1556 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1557 }
1558
1559 // Test toRules again
1560 t->toRules(r, TRUE);
1561 if (r != exp) {
1562 errln((UnicodeString)"FAIL: toRules() => " + r +
1563 ", expected " + exp);
1564 } else {
1565 logln((UnicodeString)"OK: toRules() => " + r);
1566 }
1567
1568 delete t;
1569
1570 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1571 // to what the regenerated ID will look like.
1572 UnicodeString id("Upper(Lower);(NFKC)", "");
1573 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1574 if (t == 0) {
1575 errln("FAIL: createInstance #2 failed");
1576 return;
1577 }
1578 if (t->getID() == id) {
1579 logln((UnicodeString)"OK: created " + id);
1580 } else {
1581 errln((UnicodeString)"FAIL: createInstance(" + id +
1582 ").getID() => " + t->getID());
1583 }
1584
1585 Transliterator *u = t->createInverse(status);
1586 if (u == 0) {
1587 errln("FAIL: createInverse failed");
1588 delete t;
1589 return;
1590 }
1591 exp = "NFKC();Lower(Upper)";
1592 if (u->getID() == exp) {
1593 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1594 u->getID());
1595 } else {
1596 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1597 u->getID());
1598 }
1599 delete t;
1600 delete u;
1601 }
1602
1603 /**
1604 * Compound filter semantics were orginially not implemented
1605 * correctly. Originally, each component filter f(i) is replaced by
1606 * f'(i) = f(i) && g, where g is the filter for the compound
1607 * transliterator.
1608 *
1609 * From Mark:
1610 *
1611 * Suppose and I have a transliterator X. Internally X is
1612 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1613 *
1614 * The compound should convert all greek characters (through latin) to
1615 * cyrillic, then lowercase the result. The filter should say "don't
1616 * touch 'A' in the original". But because an intermediate result
1617 * happens to go through "A", the Greek Alpha gets hung up.
1618 */
TestCompoundFilter(void)1619 void TransliteratorTest::TestCompoundFilter(void) {
1620 UParseError parseError;
1621 UErrorCode status = U_ZERO_ERROR;
1622 Transliterator *t = Transliterator::createInstance
1623 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1624 if (t == 0) {
1625 errln("FAIL: createInstance failed");
1626 return;
1627 }
1628 t->adoptFilter(new UnicodeSet("[^A]", status));
1629 if (U_FAILURE(status)) {
1630 errln("FAIL: UnicodeSet ct failed");
1631 delete t;
1632 return;
1633 }
1634
1635 // Only the 'A' at index 1 should remain unchanged
1636 expect(*t,
1637 CharsToUnicodeString("BA\\u039A\\u0391"),
1638 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1639 delete t;
1640 }
1641
TestRemove(void)1642 void TransliteratorTest::TestRemove(void) {
1643 UParseError parseError;
1644 UErrorCode status = U_ZERO_ERROR;
1645 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1646 if (t == 0) {
1647 errln("FAIL: createInstance failed");
1648 return;
1649 }
1650
1651 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1652
1653 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1654 // duplicating the filter
1655 Transliterator* t2 = t->clone();
1656 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1657
1658 delete t;
1659 delete t2;
1660 }
1661
TestToRules(void)1662 void TransliteratorTest::TestToRules(void) {
1663 const char* RBT = "rbt";
1664 const char* SET = "set";
1665 static const char* DATA[] = {
1666 RBT,
1667 "$a=\\u4E61; [$a] > A;",
1668 "[\\u4E61] > A;",
1669
1670 RBT,
1671 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1672 "[[:Zs:][:Zl:]]{a} > A;",
1673
1674 SET,
1675 "[[:Zs:][:Zl:]]",
1676 "[[:Zs:][:Zl:]]",
1677
1678 SET,
1679 "[:Ps:]",
1680 "[:Ps:]",
1681
1682 SET,
1683 "[:L:]",
1684 "[:L:]",
1685
1686 SET,
1687 "[[:L:]-[A]]",
1688 "[[:L:]-[A]]",
1689
1690 SET,
1691 "[~[:Lu:][:Ll:]]",
1692 "[~[:Lu:][:Ll:]]",
1693
1694 SET,
1695 "[~[a-z]]",
1696 "[~[a-z]]",
1697
1698 RBT,
1699 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1700 "[^[:Zs:]]{a} > A;",
1701
1702 RBT,
1703 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1704 "[[a-z]-[:Zs:]]{a} > A;",
1705
1706 RBT,
1707 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1708 "[[:Zs:]&[a-z]]{a} > A;",
1709
1710 RBT,
1711 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1712 "[x[:Zs:]]{a} > A;",
1713
1714 RBT,
1715 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1716 "$macron = \\u0304 ;"
1717 "$evowel = [aeiouyAEIOUY] ;"
1718 "$iotasub = \\u0345 ;"
1719 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1720 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1721
1722 RBT,
1723 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1724 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1725 };
1726 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1727
1728 for (int32_t d=0; d < DATA_length; d+=3) {
1729 if (DATA[d] == RBT) {
1730 // Transliterator test
1731 UParseError parseError;
1732 UErrorCode status = U_ZERO_ERROR;
1733 Transliterator *t = Transliterator::createFromRules("ID",
1734 DATA[d+1], UTRANS_FORWARD, parseError, status);
1735 if (t == 0) {
1736 errln("FAIL: createFromRules failed");
1737 return;
1738 }
1739 UnicodeString rules, escapedRules;
1740 t->toRules(rules, FALSE);
1741 t->toRules(escapedRules, TRUE);
1742 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1743 UnicodeString expEscapedRules(DATA[d+2]);
1744 if (rules == expRules) {
1745 logln((UnicodeString)"Ok: " + DATA[d+1] +
1746 " => " + rules);
1747 } else {
1748 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1749 " => " + rules + ", exp " + expRules);
1750 }
1751 if (escapedRules == expEscapedRules) {
1752 logln((UnicodeString)"Ok: " + DATA[d+1] +
1753 " => " + escapedRules);
1754 } else {
1755 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1756 " => " + escapedRules + ", exp " + expEscapedRules);
1757 }
1758 delete t;
1759
1760 } else {
1761 // UnicodeSet test
1762 UErrorCode status = U_ZERO_ERROR;
1763 UnicodeString pat(DATA[d+1]);
1764 UnicodeString expToPat(DATA[d+2]);
1765 UnicodeSet set(pat, status);
1766 if (U_FAILURE(status)) {
1767 errln("FAIL: UnicodeSet ct failed");
1768 return;
1769 }
1770 // Adjust spacing etc. as necessary.
1771 UnicodeString toPat;
1772 set.toPattern(toPat);
1773 if (expToPat == toPat) {
1774 logln((UnicodeString)"Ok: " + pat +
1775 " => " + toPat);
1776 } else {
1777 errln((UnicodeString)"FAIL: " + pat +
1778 " => " + prettify(toPat, TRUE) +
1779 ", exp " + prettify(pat, TRUE));
1780 }
1781 }
1782 }
1783 }
1784
TestContext()1785 void TransliteratorTest::TestContext() {
1786 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1787 expect("de > x; {d}e > y;",
1788 "de",
1789 "ye",
1790 &pos);
1791
1792 expect("ab{c} > z;",
1793 "xadabdabcy",
1794 "xadabdabzy");
1795 }
1796
TestSupplemental()1797 void TransliteratorTest::TestSupplemental() {
1798
1799 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1800 "a > $a; $s > i;"),
1801 CharsToUnicodeString("ab\\U0001030Fx"),
1802 CharsToUnicodeString("\\U00010300bix"));
1803
1804 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1805 "$b=[A-Z\\U00010400-\\U0001044D];"
1806 "($a)($b) > $2 $1;"),
1807 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1808 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1809
1810 // k|ax\\U00010300xm
1811
1812 // k|a\\U00010400\\U00010300xm
1813 // ky|\\U00010400\\U00010300xm
1814 // ky\\U00010400|\\U00010300xm
1815
1816 // ky\\U00010400|\\U00010300\\U00010400m
1817 // ky\\U00010400y|\\U00010400m
1818 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1819 "$a {x} > | @ \\U00010400;"
1820 "{$a} [^\\u0000-\\uFFFF] > y;"),
1821 CharsToUnicodeString("kax\\U00010300xm"),
1822 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1823
1824 expectT("Any-Name",
1825 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1826 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1827
1828 expectT("Any-Hex/Unicode",
1829 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1830 "U+10330U+10FF00U+E0061U+00A0");
1831
1832 expectT("Any-Hex/C",
1833 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1834 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1835
1836 expectT("Any-Hex/Perl",
1837 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1839
1840 expectT("Any-Hex/Java",
1841 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1843
1844 expectT("Any-Hex/XML",
1845 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846 "𐌰􏼀󠁡 ");
1847
1848 expectT("Any-Hex/XML10",
1849 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850 "𐌰􏼀󠁡 ");
1851
1852 expectT("[\\U000E0000-\\U000E0FFF] Remove",
1853 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1855 }
1856
TestQuantifier()1857 void TransliteratorTest::TestQuantifier() {
1858
1859 // Make sure @ in a quantified anteContext works
1860 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1861 "AAAAAb",
1862 "aaa(aac)");
1863
1864 // Make sure @ in a quantified postContext works
1865 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1866 "baaaaa",
1867 "caa(aaa)");
1868
1869 // Make sure @ in a quantified postContext with seg ref works
1870 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1871 "baaaaa",
1872 "baa(aaa)");
1873
1874 // Make sure @ past ante context doesn't enter ante context
1875 UTransPosition pos = {0, 5, 3, 5};
1876 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1877 "xxxab",
1878 "xxx(ac)",
1879 &pos);
1880
1881 // Make sure @ past post context doesn't pass limit
1882 UTransPosition pos2 = {0, 4, 0, 2};
1883 expect("{b} a+ > c @@ |; x > y; a > A;",
1884 "baxx",
1885 "caxx",
1886 &pos2);
1887
1888 // Make sure @ past post context doesn't enter post context
1889 expect("{b} a+ > c @@ |; x > y; a > A;",
1890 "baxx",
1891 "cayy");
1892
1893 expect("(ab)? c > d;",
1894 "c abc ababc",
1895 "d d abd");
1896
1897 // NOTE: The (ab)+ when referenced just yields a single "ab",
1898 // not the full sequence of them. This accords with perl behavior.
1899 expect("(ab)+ {x} > '(' $1 ')';",
1900 "x abx ababxy",
1901 "x ab(ab) abab(ab)y");
1902
1903 expect("b+ > x;",
1904 "ac abc abbc abbbc",
1905 "ac axc axc axc");
1906
1907 expect("[abc]+ > x;",
1908 "qac abrc abbcs abtbbc",
1909 "qx xrx xs xtx");
1910
1911 expect("q{(ab)+} > x;",
1912 "qa qab qaba qababc qaba",
1913 "qa qx qxa qxc qxa");
1914
1915 expect("q(ab)* > x;",
1916 "qa qab qaba qababc",
1917 "xa x xa xc");
1918
1919 // NOTE: The (ab)+ when referenced just yields a single "ab",
1920 // not the full sequence of them. This accords with perl behavior.
1921 expect("q(ab)* > '(' $1 ')';",
1922 "qa qab qaba qababc",
1923 "()a (ab) (ab)a (ab)c");
1924
1925 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1926 // quoted string
1927 expect("'ab'+ > x;",
1928 "bb ab ababb",
1929 "bb x xb");
1930
1931 // $foo+ and $foo* -- the quantifier should apply to the entire
1932 // variable reference
1933 expect("$var = ab; $var+ > x;",
1934 "bb ab ababb",
1935 "bb x xb");
1936 }
1937
1938 class TestTrans : public Transliterator {
1939 public:
TestTrans(const UnicodeString & id)1940 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1941 }
clone(void) const1942 virtual Transliterator* clone(void) const {
1943 return new TestTrans(getID());
1944 }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1945 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1946 UBool /*isIncremental*/) const
1947 {
1948 offsets.start = offsets.limit;
1949 }
1950 virtual UClassID getDynamicClassID() const;
1951 static UClassID U_EXPORT2 getStaticClassID();
1952 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1953 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1954
1955 /**
1956 * Test Source-Target/Variant.
1957 */
1958 void TransliteratorTest::TestSTV(void) {
1959 int32_t ns = Transliterator::countAvailableSources();
1960 if (ns < 0 || ns > 255) {
1961 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1962 return;
1963 }
1964 int32_t i, j;
1965 for (i=0; i<ns; ++i) {
1966 UnicodeString source;
1967 Transliterator::getAvailableSource(i, source);
1968 logln((UnicodeString)"" + i + ": " + source);
1969 if (source.length() == 0) {
1970 errln("FAIL: empty source");
1971 continue;
1972 }
1973 int32_t nt = Transliterator::countAvailableTargets(source);
1974 if (nt < 0 || nt > 255) {
1975 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1976 continue;
1977 }
1978 for (int32_t j=0; j<nt; ++j) {
1979 UnicodeString target;
1980 Transliterator::getAvailableTarget(j, source, target);
1981 logln((UnicodeString)" " + j + ": " + target);
1982 if (target.length() == 0) {
1983 errln("FAIL: empty target");
1984 continue;
1985 }
1986 int32_t nv = Transliterator::countAvailableVariants(source, target);
1987 if (nv < 0 || nv > 255) {
1988 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1989 continue;
1990 }
1991 for (int32_t k=0; k<nv; ++k) {
1992 UnicodeString variant;
1993 Transliterator::getAvailableVariant(k, source, target, variant);
1994 if (variant.length() == 0) {
1995 logln((UnicodeString)" " + k + ": <empty>");
1996 } else {
1997 logln((UnicodeString)" " + k + ": " + variant);
1998 }
1999 }
2000 }
2001 }
2002
2003 // Test registration
2004 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2005 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2006 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2007 for (i=0; i<3; ++i) {
2008 Transliterator *t = new TestTrans(IDS[i]);
2009 if (t == 0) {
2010 errln("FAIL: out of memory");
2011 return;
2012 }
2013 if (t->getID() != IDS[i]) {
2014 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2015 delete t;
2016 return;
2017 }
2018 Transliterator::registerInstance(t);
2019 UErrorCode status = U_ZERO_ERROR;
2020 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2021 if (t == NULL) {
2022 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2023 IDS[i]);
2024 } else {
2025 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2026 IDS[i]);
2027 delete t;
2028 }
2029 Transliterator::unregister(IDS[i]);
2030 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2031 if (t != NULL) {
2032 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2033 IDS[i]);
2034 delete t;
2035 }
2036 }
2037
2038 // Make sure getAvailable API reflects removal
2039 int32_t n = Transliterator::countAvailableIDs();
2040 for (i=0; i<n; ++i) {
2041 UnicodeString id = Transliterator::getAvailableID(i);
2042 for (j=0; j<3; ++j) {
2043 if (id.caseCompare(FULL_IDS[j],0)==0) {
2044 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2045 }
2046 }
2047 }
2048 n = Transliterator::countAvailableTargets("Any");
2049 for (i=0; i<n; ++i) {
2050 UnicodeString t;
2051 Transliterator::getAvailableTarget(i, "Any", t);
2052 if (t.caseCompare(IDS[0],0)==0) {
2053 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2054 }
2055 }
2056 n = Transliterator::countAvailableSources();
2057 for (i=0; i<n; ++i) {
2058 UnicodeString s;
2059 Transliterator::getAvailableSource(i, s);
2060 for (j=0; j<3; ++j) {
2061 if (SOURCES[j] == NULL) continue;
2062 if (s.caseCompare(SOURCES[j],0)==0) {
2063 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2064 }
2065 }
2066 }
2067 }
2068
2069 /**
2070 * Test inverse of Greek-Latin; Title()
2071 */
TestCompoundInverse(void)2072 void TransliteratorTest::TestCompoundInverse(void) {
2073 UParseError parseError;
2074 UErrorCode status = U_ZERO_ERROR;
2075 Transliterator *t = Transliterator::createInstance
2076 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2077 if (t == 0) {
2078 errln("FAIL: createInstance");
2079 return;
2080 }
2081 UnicodeString exp("(Title);Latin-Greek");
2082 if (t->getID() == exp) {
2083 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2084 t->getID());
2085 } else {
2086 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2087 t->getID() + "\", expected \"" + exp + "\"");
2088 }
2089 delete t;
2090 }
2091
2092 /**
2093 * Test NFD chaining with RBT
2094 */
TestNFDChainRBT()2095 void TransliteratorTest::TestNFDChainRBT() {
2096 UParseError pe;
2097 UErrorCode ec = U_ZERO_ERROR;
2098 Transliterator* t = Transliterator::createFromRules(
2099 "TEST", "::NFD; aa > Q; a > q;",
2100 UTRANS_FORWARD, pe, ec);
2101 if (t == NULL || U_FAILURE(ec)) {
2102 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2103 return;
2104 }
2105 expect(*t, "aa", "Q");
2106 delete t;
2107
2108 // TEMPORARY TESTS -- BEING DEBUGGED
2109 //=- UnicodeString s, s2;
2110 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2111 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2112 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2113 //=- expect(*t, s, s2);
2114 //=- delete t;
2115 //=-
2116 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2117 //=- expect(*t, s2, s);
2118 //=- delete t;
2119 //=-
2120 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2121 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2122 //=- expect(*t, s, s);
2123 //=- delete t;
2124
2125 // const char* source[] = {
2126 // /*
2127 // "\\u015Br\\u012Bmad",
2128 // "bhagavadg\\u012Bt\\u0101",
2129 // "adhy\\u0101ya",
2130 // "arjuna",
2131 // "vi\\u1E63\\u0101da",
2132 // "y\\u014Dga",
2133 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2134 // "uv\\u0101cr\\u0325",
2135 // */
2136 // "rmk\\u1E63\\u0113t",
2137 // //"dharmak\\u1E63\\u0113tr\\u0113",
2138 // /*
2139 // "kuruk\\u1E63\\u0113tr\\u0113",
2140 // "samav\\u0113t\\u0101",
2141 // "yuyutsava-\\u1E25",
2142 // "m\\u0101mak\\u0101-\\u1E25",
2143 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2144 // "kimakurvata",
2145 // "san\\u0304java",
2146 // */
2147 //
2148 // 0
2149 // };
2150 // const char* expected[] = {
2151 // /*
2152 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2153 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2154 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2155 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2156 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2157 // "\\u092f\\u094b\\u0917",
2158 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2159 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2160 // */
2161 // "\\u0927",
2162 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2163 // /*
2164 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2165 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2166 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2167 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2168 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2169 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2170 // "\\u0938\\u0902\\u091c\\u0935",
2171 // */
2172 // 0
2173 // };
2174 // UErrorCode status = U_ZERO_ERROR;
2175 // UParseError parseError;
2176 // UnicodeString message;
2177 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2178 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2179 // if(U_FAILURE(status)){
2180 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2181 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2182 // delete latinToDevToLatin;
2183 // delete devToLatinToDev;
2184 // return;
2185 // }
2186 // UnicodeString gotResult;
2187 // for(int i= 0; source[i] != 0; i++){
2188 // gotResult = source[i];
2189 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2190 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2191 // }
2192 // delete latinToDevToLatin;
2193 // delete devToLatinToDev;
2194 }
2195
2196 /**
2197 * Inverse of "Null" should be "Null". (J21)
2198 */
TestNullInverse()2199 void TransliteratorTest::TestNullInverse() {
2200 UParseError pe;
2201 UErrorCode ec = U_ZERO_ERROR;
2202 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2203 if (t == 0 || U_FAILURE(ec)) {
2204 errln("FAIL: createInstance");
2205 return;
2206 }
2207 Transliterator *u = t->createInverse(ec);
2208 if (u == 0 || U_FAILURE(ec)) {
2209 errln("FAIL: createInverse");
2210 delete t;
2211 return;
2212 }
2213 if (u->getID() != "Null") {
2214 errln("FAIL: Inverse of Null should be Null");
2215 }
2216 delete t;
2217 delete u;
2218 }
2219
2220 /**
2221 * Check ID of inverse of alias. (J22)
2222 */
TestAliasInverseID()2223 void TransliteratorTest::TestAliasInverseID() {
2224 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2225 UParseError pe;
2226 UErrorCode ec = U_ZERO_ERROR;
2227 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2228 if (t == 0 || U_FAILURE(ec)) {
2229 errln("FAIL: createInstance");
2230 return;
2231 }
2232 Transliterator *u = t->createInverse(ec);
2233 if (u == 0 || U_FAILURE(ec)) {
2234 errln("FAIL: createInverse");
2235 delete t;
2236 return;
2237 }
2238 UnicodeString exp = "Hangul-Latin";
2239 UnicodeString got = u->getID();
2240 if (got != exp) {
2241 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2242 ", expected " + exp);
2243 }
2244 delete t;
2245 delete u;
2246 }
2247
2248 /**
2249 * Test IDs of inverses of compound transliterators. (J20)
2250 */
TestCompoundInverseID()2251 void TransliteratorTest::TestCompoundInverseID() {
2252 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2253 UParseError pe;
2254 UErrorCode ec = U_ZERO_ERROR;
2255 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2256 if (t == 0 || U_FAILURE(ec)) {
2257 errln("FAIL: createInstance");
2258 return;
2259 }
2260 Transliterator *u = t->createInverse(ec);
2261 if (u == 0 || U_FAILURE(ec)) {
2262 errln("FAIL: createInverse");
2263 delete t;
2264 return;
2265 }
2266 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2267 UnicodeString got = u->getID();
2268 if (got != exp) {
2269 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2270 ", expected " + exp);
2271 }
2272 delete t;
2273 delete u;
2274 }
2275
2276 /**
2277 * Test undefined variable.
2278
2279 */
TestUndefinedVariable()2280 void TransliteratorTest::TestUndefinedVariable() {
2281 UnicodeString rule = "$initial } a <> \\u1161;";
2282 UParseError pe;
2283 UErrorCode ec = U_ZERO_ERROR;
2284 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2285 delete t;
2286 if (U_FAILURE(ec)) {
2287 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2288 u_errorName(ec));
2289 return;
2290 }
2291 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2292 u_errorName(ec));
2293 }
2294
2295 /**
2296 * Test empty context.
2297 */
TestEmptyContext()2298 void TransliteratorTest::TestEmptyContext() {
2299 expect(" { a } > b;", "xay a ", "xby b ");
2300 }
2301
2302 /**
2303 * Test compound filter ID syntax
2304 */
TestCompoundFilterID(void)2305 void TransliteratorTest::TestCompoundFilterID(void) {
2306 static const char* DATA[] = {
2307 // Col. 1 = ID or rule set (latter must start with #)
2308
2309 // = columns > 1 are null if expect col. 1 to be illegal =
2310
2311 // Col. 2 = direction, "F..." or "R..."
2312 // Col. 3 = source string
2313 // Col. 4 = exp result
2314
2315 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2316 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2317 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2318 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2319 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2320 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2321 NULL,
2322 };
2323
2324 for (int32_t i=0; DATA[i]; i+=4) {
2325 UnicodeString id = CharsToUnicodeString(DATA[i]);
2326 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2327 UTRANS_REVERSE : UTRANS_FORWARD;
2328 UnicodeString source;
2329 UnicodeString exp;
2330 if (DATA[i+2] != NULL) {
2331 source = CharsToUnicodeString(DATA[i+2]);
2332 exp = CharsToUnicodeString(DATA[i+3]);
2333 }
2334 UBool expOk = (DATA[i+1] != NULL);
2335 Transliterator* t = NULL;
2336 UParseError pe;
2337 UErrorCode ec = U_ZERO_ERROR;
2338 if (id.charAt(0) == 0x23/*#*/) {
2339 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2340 } else {
2341 t = Transliterator::createInstance(id, direction, pe, ec);
2342 }
2343 UBool ok = (t != NULL && U_SUCCESS(ec));
2344 UnicodeString transID;
2345 if (t!=0) {
2346 transID = t->getID();
2347 }
2348 else {
2349 transID = UnicodeString("NULL", "");
2350 }
2351 if (ok == expOk) {
2352 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2353 u_errorName(ec));
2354 if (source.length() != 0) {
2355 expect(*t, source, exp);
2356 }
2357 delete t;
2358 } else {
2359 errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2360 u_errorName(ec));
2361 }
2362 }
2363 }
2364
2365 /**
2366 * Test new property set syntax
2367 */
TestPropertySet()2368 void TransliteratorTest::TestPropertySet() {
2369 expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2370 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2371 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2372 }
2373
2374 /**
2375 * Test various failure points of the new 2.0 engine.
2376 */
TestNewEngine()2377 void TransliteratorTest::TestNewEngine() {
2378 UParseError pe;
2379 UErrorCode ec = U_ZERO_ERROR;
2380 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2381 if (t == 0 || U_FAILURE(ec)) {
2382 errln("FAIL: createInstance Latin-Hiragana");
2383 return;
2384 }
2385 // Katakana should be untouched
2386 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2387 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2388
2389 delete t;
2390
2391 #if 1
2392 // This test will only work if Transliterator.ROLLBACK is
2393 // true. Otherwise, this test will fail, revealing a
2394 // limitation of global filters in incremental mode.
2395 Transliterator *a =
2396 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2397 Transliterator *A =
2398 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2399 if (U_FAILURE(ec)) {
2400 delete a;
2401 delete A;
2402 return;
2403 }
2404
2405 Transliterator* array[3];
2406 array[0] = a;
2407 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2408 array[2] = A;
2409 if (U_FAILURE(ec)) {
2410 errln("FAIL: createInstance NFD");
2411 delete a;
2412 delete A;
2413 delete array[1];
2414 return;
2415 }
2416
2417 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2418 if (U_FAILURE(ec)) {
2419 errln("FAIL: UnicodeSet constructor");
2420 delete a;
2421 delete A;
2422 delete array[1];
2423 delete t;
2424 return;
2425 }
2426
2427 expect(*t, "aAaA", "bAbA");
2428
2429 assertTrue("countElements", t->countElements() == 3);
2430 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2431 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2432 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2433 assertSuccess("getElement", ec);
2434
2435 delete a;
2436 delete A;
2437 delete array[1];
2438 delete t;
2439 #endif
2440
2441 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2442 "a",
2443 "ax");
2444
2445 UnicodeString gr = CharsToUnicodeString(
2446 "$ddot = \\u0308 ;"
2447 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2448 "$rough = \\u0314 ;"
2449 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2450 "\\u03b1 <> a ;"
2451 "$rough <> h ;");
2452
2453 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2454 }
2455
2456 /**
2457 * Test quantified segment behavior. We want:
2458 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2459 */
TestQuantifiedSegment(void)2460 void TransliteratorTest::TestQuantifiedSegment(void) {
2461 // The normal case
2462 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2463
2464 // The tricky case; the quantifier is around the segment
2465 expect("([abc])+ > x $1 x;", "cba", "xax");
2466
2467 // Tricky case in reverse direction
2468 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2469
2470 // Check post-context segment
2471 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2472
2473 // Test toRule/toPattern for non-quantified segment.
2474 // Careful with spacing here.
2475 UnicodeString r("([a-c]){q} > x $1 x;");
2476 UParseError pe;
2477 UErrorCode ec = U_ZERO_ERROR;
2478 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2479 if (U_FAILURE(ec)) {
2480 errln("FAIL: createFromRules");
2481 delete t;
2482 return;
2483 }
2484 UnicodeString rr;
2485 t->toRules(rr, TRUE);
2486 if (r != rr) {
2487 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2488 } else {
2489 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2490 }
2491 delete t;
2492
2493 // Test toRule/toPattern for quantified segment.
2494 // Careful with spacing here.
2495 r = "([a-c])+{q} > x $1 x;";
2496 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2497 if (U_FAILURE(ec)) {
2498 errln("FAIL: createFromRules");
2499 delete t;
2500 return;
2501 }
2502 t->toRules(rr, TRUE);
2503 if (r != rr) {
2504 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2505 } else {
2506 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2507 }
2508 delete t;
2509 }
2510
2511 //======================================================================
2512 // Ram's tests
2513 //======================================================================
TestDevanagariLatinRT()2514 void TransliteratorTest::TestDevanagariLatinRT(){
2515 const int MAX_LEN= 52;
2516 const char* const source[MAX_LEN] = {
2517 "bh\\u0101rata",
2518 "kra",
2519 "k\\u1E63a",
2520 "khra",
2521 "gra",
2522 "\\u1E45ra",
2523 "cra",
2524 "chra",
2525 "j\\u00F1a",
2526 "jhra",
2527 "\\u00F1ra",
2528 "\\u1E6Dya",
2529 "\\u1E6Dhra",
2530 "\\u1E0Dya",
2531 //"r\\u0323ya", // \u095c is not valid in Devanagari
2532 "\\u1E0Dhya",
2533 "\\u1E5Bhra",
2534 "\\u1E47ra",
2535 "tta",
2536 "thra",
2537 "dda",
2538 "dhra",
2539 "nna",
2540 "pra",
2541 "phra",
2542 "bra",
2543 "bhra",
2544 "mra",
2545 "\\u1E49ra",
2546 //"l\\u0331ra",
2547 "yra",
2548 "\\u1E8Fra",
2549 //"l-",
2550 "vra",
2551 "\\u015Bra",
2552 "\\u1E63ra",
2553 "sra",
2554 "hma",
2555 "\\u1E6D\\u1E6Da",
2556 "\\u1E6D\\u1E6Dha",
2557 "\\u1E6Dh\\u1E6Dha",
2558 "\\u1E0D\\u1E0Da",
2559 "\\u1E0D\\u1E0Dha",
2560 "\\u1E6Dya",
2561 "\\u1E6Dhya",
2562 "\\u1E0Dya",
2563 "\\u1E0Dhya",
2564 // Not roundtrippable --
2565 // \\u0939\\u094d\\u094d\\u092E - hma
2566 // \\u0939\\u094d\\u092E - hma
2567 // CharsToUnicodeString("hma"),
2568 "hya",
2569 "\\u015Br\\u0325",
2570 "\\u015Bca",
2571 "\\u0115",
2572 "san\\u0304j\\u012Bb s\\u0113nagupta",
2573 "\\u0101nand vaddir\\u0101ju",
2574 "\\u0101",
2575 "a"
2576 };
2577 const char* const expected[MAX_LEN] = {
2578 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2579 "\\u0915\\u094D\\u0930", /* kra */
2580 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2581 "\\u0916\\u094D\\u0930", /* khra */
2582 "\\u0917\\u094D\\u0930", /* gra */
2583 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2584 "\\u091A\\u094D\\u0930", /* cra */
2585 "\\u091B\\u094D\\u0930", /* chra */
2586 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2587 "\\u091D\\u094D\\u0930", /* jhra */
2588 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2589 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2590 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2591 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2592 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2593 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2594 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2595 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2596 "\\u0924\\u094D\\u0924", /* tta */
2597 "\\u0925\\u094D\\u0930", /* thra */
2598 "\\u0926\\u094D\\u0926", /* dda */
2599 "\\u0927\\u094D\\u0930", /* dhra */
2600 "\\u0928\\u094D\\u0928", /* nna */
2601 "\\u092A\\u094D\\u0930", /* pra */
2602 "\\u092B\\u094D\\u0930", /* phra */
2603 "\\u092C\\u094D\\u0930", /* bra */
2604 "\\u092D\\u094D\\u0930", /* bhra */
2605 "\\u092E\\u094D\\u0930", /* mra */
2606 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2607 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2608 "\\u092F\\u094D\\u0930", /* yra */
2609 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2610 //"l-",
2611 "\\u0935\\u094D\\u0930", /* vra */
2612 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2613 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2614 "\\u0938\\u094D\\u0930", /* sra */
2615 "\\u0939\\u094d\\u092E", /* hma */
2616 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2617 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2618 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2619 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2620 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2621 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2622 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2623 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2624 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2625 // "hma", /* hma */
2626 "\\u0939\\u094D\\u092F", /* hya */
2627 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2628 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2629 "\\u090d", /* e\\u0306 */
2630 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2631 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2632 "\\u0906",
2633 "\\u0905",
2634 };
2635 UErrorCode status = U_ZERO_ERROR;
2636 UParseError parseError;
2637 UnicodeString message;
2638 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2639 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2640 if(U_FAILURE(status)){
2641 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2642 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2643 return;
2644 }
2645 UnicodeString gotResult;
2646 for(int i= 0; i<MAX_LEN; i++){
2647 gotResult = source[i];
2648 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2649 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2650 }
2651 delete latinToDev;
2652 delete devToLatin;
2653 }
2654
TestTeluguLatinRT()2655 void TransliteratorTest::TestTeluguLatinRT(){
2656 const int MAX_LEN=10;
2657 const char* const source[MAX_LEN] = {
2658 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2659 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2660 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2661 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2662 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2663 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2664 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2665 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2666 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2667 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2668 };
2669
2670 const char* const expected[MAX_LEN] = {
2671 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2672 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2673 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2674 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2675 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2676 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2677 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2679 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2680 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2681 };
2682
2683 UErrorCode status = U_ZERO_ERROR;
2684 UParseError parseError;
2685 UnicodeString message;
2686 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2687 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2688 if(U_FAILURE(status)){
2689 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2690 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2691 return;
2692 }
2693 UnicodeString gotResult;
2694 for(int i= 0; i<MAX_LEN; i++){
2695 gotResult = source[i];
2696 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2697 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2698 }
2699 delete latinToDev;
2700 delete devToLatin;
2701 }
2702
TestSanskritLatinRT()2703 void TransliteratorTest::TestSanskritLatinRT(){
2704 const int MAX_LEN =16;
2705 const char* const source[MAX_LEN] = {
2706 "rmk\\u1E63\\u0113t",
2707 "\\u015Br\\u012Bmad",
2708 "bhagavadg\\u012Bt\\u0101",
2709 "adhy\\u0101ya",
2710 "arjuna",
2711 "vi\\u1E63\\u0101da",
2712 "y\\u014Dga",
2713 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2714 "uv\\u0101cr\\u0325",
2715 "dharmak\\u1E63\\u0113tr\\u0113",
2716 "kuruk\\u1E63\\u0113tr\\u0113",
2717 "samav\\u0113t\\u0101",
2718 "yuyutsava\\u1E25",
2719 "m\\u0101mak\\u0101\\u1E25",
2720 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2721 "kimakurvata",
2722 "san\\u0304java",
2723 };
2724 const char* const expected[MAX_LEN] = {
2725 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2726 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2727 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2728 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2729 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2730 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2731 "\\u092f\\u094b\\u0917",
2732 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2733 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2734 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2735 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2736 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2737 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2738 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2739 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2740 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2741 "\\u0938\\u0902\\u091c\\u0935",
2742 };
2743 UErrorCode status = U_ZERO_ERROR;
2744 UParseError parseError;
2745 UnicodeString message;
2746 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2747 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2748 if(U_FAILURE(status)){
2749 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2750 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2751 return;
2752 }
2753 UnicodeString gotResult;
2754 for(int i= 0; i<MAX_LEN; i++){
2755 gotResult = source[i];
2756 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2757 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2758 }
2759 delete latinToDev;
2760 delete devToLatin;
2761 }
2762
2763
TestCompoundLatinRT()2764 void TransliteratorTest::TestCompoundLatinRT(){
2765 const char* const source[] = {
2766 "rmk\\u1E63\\u0113t",
2767 "\\u015Br\\u012Bmad",
2768 "bhagavadg\\u012Bt\\u0101",
2769 "adhy\\u0101ya",
2770 "arjuna",
2771 "vi\\u1E63\\u0101da",
2772 "y\\u014Dga",
2773 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2774 "uv\\u0101cr\\u0325",
2775 "dharmak\\u1E63\\u0113tr\\u0113",
2776 "kuruk\\u1E63\\u0113tr\\u0113",
2777 "samav\\u0113t\\u0101",
2778 "yuyutsava\\u1E25",
2779 "m\\u0101mak\\u0101\\u1E25",
2780 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2781 "kimakurvata",
2782 "san\\u0304java"
2783 };
2784 const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2785 const char* const expected[MAX_LEN] = {
2786 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2787 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2788 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2789 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2790 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2791 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2792 "\\u092f\\u094b\\u0917",
2793 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2794 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2795 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2796 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2797 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2798 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2799 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2800 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2801 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2802 "\\u0938\\u0902\\u091c\\u0935"
2803 };
2804 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2805 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2806 return;
2807 }
2808
2809 UErrorCode status = U_ZERO_ERROR;
2810 UParseError parseError;
2811 UnicodeString message;
2812 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2813 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2814 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2815 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2816
2817 if(U_FAILURE(status)){
2818 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2819 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2820 return;
2821 }
2822 UnicodeString gotResult;
2823 for(int i= 0; i<MAX_LEN; i++){
2824 gotResult = source[i];
2825 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2826 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2827 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2828
2829 }
2830 delete(latinToDevToLatin);
2831 delete(devToLatinToDev);
2832 delete(devToTelToDev);
2833 delete(latinToTelToLatin);
2834 }
2835
2836 /**
2837 * Test Gurmukhi-Devanagari Tippi and Bindi
2838 */
TestGurmukhiDevanagari()2839 void TransliteratorTest::TestGurmukhiDevanagari(){
2840 // the rule says:
2841 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2842 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2843 UErrorCode status = U_ZERO_ERROR;
2844 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
2845 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
2846 UParseError parseError;
2847
2848 UnicodeSetIterator vIter(vowel);
2849 UnicodeSetIterator nvIter(non_vowel);
2850 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2851 if(U_FAILURE(status)) {
2852 errln("Error creating transliterator %s", u_errorName(status));
2853 delete trans;
2854 return;
2855 }
2856 UnicodeString src (" \\u0902");
2857 UnicodeString expected(" \\u0A02");
2858 src = src.unescape();
2859 expected= expected.unescape();
2860
2861 while(vIter.next()){
2862 src.setCharAt(0,(UChar) vIter.getCodepoint());
2863 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2864 expect(*trans,src,expected);
2865 }
2866
2867 expected.setCharAt(1,0x0A70);
2868 while(nvIter.next()){
2869 //src.setCharAt(0,(char) nvIter.codepoint);
2870 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2871 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2872 expect(*trans,src,expected);
2873 }
2874 delete trans;
2875 }
2876 /**
2877 * Test instantiation from a locale.
2878 */
TestLocaleInstantiation(void)2879 void TransliteratorTest::TestLocaleInstantiation(void) {
2880 UParseError pe;
2881 UErrorCode ec = U_ZERO_ERROR;
2882 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2883 if (U_FAILURE(ec)) {
2884 errln("FAIL: createInstance(ru_RU-Latin)");
2885 delete t;
2886 return;
2887 }
2888 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2889 delete t;
2890
2891 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2892 if (U_FAILURE(ec)) {
2893 errln("FAIL: createInstance(en-el)");
2894 delete t;
2895 return;
2896 }
2897 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2898 delete t;
2899 }
2900
2901 /**
2902 * Test title case handling of accent (should ignore accents)
2903 */
TestTitleAccents(void)2904 void TransliteratorTest::TestTitleAccents(void) {
2905 UParseError pe;
2906 UErrorCode ec = U_ZERO_ERROR;
2907 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2908 if (U_FAILURE(ec)) {
2909 errln("FAIL: createInstance(Title)");
2910 delete t;
2911 return;
2912 }
2913 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2914 delete t;
2915 }
2916
2917 /**
2918 * Basic test of a locale resource based rule.
2919 */
TestLocaleResource()2920 void TransliteratorTest::TestLocaleResource() {
2921 const char* DATA[] = {
2922 // id from to
2923 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2924 "Latin-el", "b", "\\u03bc\\u03c0",
2925 "Latin-Greek", "b", "\\u03B2",
2926 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2927 "el-Latin", "\\u03B2", "v",
2928 "Greek-Latin", "\\u03B2", "b",
2929 };
2930 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2931 for (int32_t i=0; i<DATA_length; i+=3) {
2932 UParseError pe;
2933 UErrorCode ec = U_ZERO_ERROR;
2934 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2935 if (U_FAILURE(ec)) {
2936 errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2937 delete t;
2938 continue;
2939 }
2940 expect(*t, CharsToUnicodeString(DATA[i+1]),
2941 CharsToUnicodeString(DATA[i+2]));
2942 delete t;
2943 }
2944 }
2945
2946 /**
2947 * Make sure parse errors reference the right line.
2948 */
TestParseError()2949 void TransliteratorTest::TestParseError() {
2950 static const char* rule =
2951 "a > b;\n"
2952 "# more stuff\n"
2953 "d << b;";
2954 UErrorCode ec = U_ZERO_ERROR;
2955 UParseError pe;
2956 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2957 delete t;
2958 if (U_FAILURE(ec)) {
2959 UnicodeString err(pe.preContext);
2960 err.append((UChar)124/*|*/).append(pe.postContext);
2961 if (err.indexOf("d << b") >= 0) {
2962 logln("Ok: " + err);
2963 } else {
2964 errln("FAIL: " + err);
2965 }
2966 }
2967 else {
2968 errln("FAIL: no syntax error");
2969 }
2970 static const char* maskingRule =
2971 "a>x;\n"
2972 "# more stuff\n"
2973 "ab>y;";
2974 ec = U_ZERO_ERROR;
2975 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2976 if (ec != U_RULE_MASK_ERROR) {
2977 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2978 }
2979 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2980 errln("FAIL: did not get expected precontext");
2981 }
2982 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2983 errln("FAIL: did not get expected postcontext");
2984 }
2985 }
2986
2987 /**
2988 * Make sure sets on output are disallowed.
2989 */
TestOutputSet()2990 void TransliteratorTest::TestOutputSet() {
2991 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2992 UErrorCode ec = U_ZERO_ERROR;
2993 UParseError pe;
2994 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2995 delete t;
2996 if (U_FAILURE(ec)) {
2997 UnicodeString err(pe.preContext);
2998 err.append((UChar)124/*|*/).append(pe.postContext);
2999 logln("Ok: " + err);
3000 return;
3001 }
3002 errln("FAIL: No syntax error");
3003 }
3004
3005 /**
3006 * Test the use variable range pragma, making sure that use of
3007 * variable range characters is detected and flagged as an error.
3008 */
TestVariableRange()3009 void TransliteratorTest::TestVariableRange() {
3010 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3011 UErrorCode ec = U_ZERO_ERROR;
3012 UParseError pe;
3013 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3014 delete t;
3015 if (U_FAILURE(ec)) {
3016 UnicodeString err(pe.preContext);
3017 err.append((UChar)124/*|*/).append(pe.postContext);
3018 logln("Ok: " + err);
3019 return;
3020 }
3021 errln("FAIL: No syntax error");
3022 }
3023
3024 /**
3025 * Test invalid post context error handling
3026 */
TestInvalidPostContext()3027 void TransliteratorTest::TestInvalidPostContext() {
3028 UnicodeString rule = "a}b{c>d;";
3029 UErrorCode ec = U_ZERO_ERROR;
3030 UParseError pe;
3031 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3032 delete t;
3033 if (U_FAILURE(ec)) {
3034 UnicodeString err(pe.preContext);
3035 err.append((UChar)124/*|*/).append(pe.postContext);
3036 if (err.indexOf("a}b{c") >= 0) {
3037 logln("Ok: " + err);
3038 } else {
3039 errln("FAIL: " + err);
3040 }
3041 return;
3042 }
3043 errln("FAIL: No syntax error");
3044 }
3045
3046 /**
3047 * Test ID form variants
3048 */
TestIDForms()3049 void TransliteratorTest::TestIDForms() {
3050 const char* DATA[] = {
3051 "NFC", NULL, "NFD",
3052 "nfd", NULL, "NFC", // make sure case is ignored
3053 "Any-NFKD", NULL, "Any-NFKC",
3054 "Null", NULL, "Null",
3055 "-nfkc", "nfkc", "NFKD",
3056 "-nfkc/", "nfkc", "NFKD",
3057 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3058 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3059 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3060 "Source-", NULL, NULL,
3061 "Source/Variant-", NULL, NULL,
3062 "Source-/Variant", NULL, NULL,
3063 "/Variant", NULL, NULL,
3064 "/Variant-", NULL, NULL,
3065 "-/Variant", NULL, NULL,
3066 "-/", NULL, NULL,
3067 "-", NULL, NULL,
3068 "/", NULL, NULL,
3069 };
3070 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3071
3072 for (int32_t i=0; i<DATA_length; i+=3) {
3073 const char* ID = DATA[i];
3074 const char* expID = DATA[i+1];
3075 const char* expInvID = DATA[i+2];
3076 UBool expValid = (expInvID != NULL);
3077 if (expID == NULL) {
3078 expID = ID;
3079 }
3080 UParseError pe;
3081 UErrorCode ec = U_ZERO_ERROR;
3082 Transliterator *t =
3083 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3084 if (U_FAILURE(ec)) {
3085 if (!expValid) {
3086 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3087 } else {
3088 errln((UnicodeString)"FAIL: Couldn't create " + ID);
3089 }
3090 delete t;
3091 continue;
3092 }
3093 Transliterator *u = t->createInverse(ec);
3094 if (U_FAILURE(ec)) {
3095 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3096 delete t;
3097 delete u;
3098 continue;
3099 }
3100 if (t->getID() == expID &&
3101 u->getID() == expInvID) {
3102 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3103 } else {
3104 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3105 t->getID() + " x getInverse() => " + u->getID() +
3106 ", expected " + expInvID);
3107 }
3108 delete t;
3109 delete u;
3110 }
3111 }
3112
3113 static const UChar SPACE[] = {32,0};
3114 static const UChar NEWLINE[] = {10,0};
3115 static const UChar RETURN[] = {13,0};
3116 static const UChar EMPTY[] = {0};
3117
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3118 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3119 const UnicodeString& testRulesForward) {
3120 UnicodeString rules2; t2.toRules(rules2, TRUE);
3121 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3122 rules2.findAndReplace(SPACE, EMPTY);
3123 rules2.findAndReplace(NEWLINE, EMPTY);
3124 rules2.findAndReplace(RETURN, EMPTY);
3125
3126 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3127
3128 if (rules2 != testRules) {
3129 errln(label);
3130 logln((UnicodeString)"GENERATED RULES: " + rules2);
3131 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3132 }
3133 }
3134
3135 /**
3136 * Mark's toRules test.
3137 */
TestToRulesMark()3138 void TransliteratorTest::TestToRulesMark() {
3139 const char* testRules =
3140 "::[[:Latin:][:Mark:]];"
3141 "::NFKD (NFC);"
3142 "::Lower (Lower);"
3143 "a <> \\u03B1;" // alpha
3144 "::NFKC (NFD);"
3145 "::Upper (Lower);"
3146 "::Lower ();"
3147 "::([[:Greek:][:Mark:]]);"
3148 ;
3149 const char* testRulesForward =
3150 "::[[:Latin:][:Mark:]];"
3151 "::NFKD(NFC);"
3152 "::Lower(Lower);"
3153 "a > \\u03B1;"
3154 "::NFKC(NFD);"
3155 "::Upper (Lower);"
3156 "::Lower ();"
3157 ;
3158 const char* testRulesBackward =
3159 "::[[:Greek:][:Mark:]];"
3160 "::Lower (Upper);"
3161 "::NFD(NFKC);"
3162 "\\u03B1 > a;"
3163 "::Lower(Lower);"
3164 "::NFC(NFKD);"
3165 ;
3166 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3167 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3168
3169 UParseError pe;
3170 UErrorCode ec = U_ZERO_ERROR;
3171 Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
3172 Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
3173
3174 if (U_FAILURE(ec)) {
3175 delete t2;
3176 delete t3;
3177 errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3178 return;
3179 }
3180
3181 expect(*t2, source, target);
3182 expect(*t3, target, source);
3183
3184 checkRules("Failed toRules FORWARD", *t2, testRulesForward);
3185 checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
3186
3187 delete t2;
3188 delete t3;
3189 }
3190
3191 /**
3192 * Test Escape and Unescape transliterators.
3193 */
TestEscape()3194 void TransliteratorTest::TestEscape() {
3195 UParseError pe;
3196 UErrorCode ec;
3197 Transliterator *t;
3198
3199 ec = U_ZERO_ERROR;
3200 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3201 if (U_FAILURE(ec)) {
3202 errln((UnicodeString)"FAIL: createInstance");
3203 } else {
3204 expect(*t,
3205 "\\x{40}\\U000000312Q",
3206 "@12Q");
3207 }
3208 delete t;
3209
3210 ec = U_ZERO_ERROR;
3211 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3212 if (U_FAILURE(ec)) {
3213 errln((UnicodeString)"FAIL: createInstance");
3214 } else {
3215 expect(*t,
3216 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3217 "\\u0041\\U0010BEEF\\uFEED");
3218 }
3219 delete t;
3220
3221 ec = U_ZERO_ERROR;
3222 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3223 if (U_FAILURE(ec)) {
3224 errln((UnicodeString)"FAIL: createInstance");
3225 } else {
3226 expect(*t,
3227 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3228 "\\u0041\\uDBEF\\uDEEF\\uFEED");
3229 }
3230 delete t;
3231
3232 ec = U_ZERO_ERROR;
3233 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3234 if (U_FAILURE(ec)) {
3235 errln((UnicodeString)"FAIL: createInstance");
3236 } else {
3237 expect(*t,
3238 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3239 "\\x{41}\\x{10BEEF}\\x{FEED}");
3240 }
3241 delete t;
3242 }
3243
3244
TestAnchorMasking()3245 void TransliteratorTest::TestAnchorMasking(){
3246 UnicodeString rule ("^a > Q; a > q;");
3247 UErrorCode status= U_ZERO_ERROR;
3248 UParseError parseError;
3249
3250 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3251 if(U_FAILURE(status)){
3252 errln(UnicodeString("FAIL: ") + "ID" +
3253 ".createFromRules() => bad rules" +
3254 /*", parse error " + parseError.code +*/
3255 ", line " + parseError.line +
3256 ", offset " + parseError.offset +
3257 ", context " + prettify(parseError.preContext, TRUE) +
3258 ", rules: " + prettify(rule, TRUE));
3259 }
3260 delete t;
3261 }
3262
3263 /**
3264 * Make sure display names of variants look reasonable.
3265 */
TestDisplayName()3266 void TransliteratorTest::TestDisplayName() {
3267 #if UCONFIG_NO_FORMATTING
3268 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3269 return;
3270 #else
3271 static const char* DATA[] = {
3272 // ID, forward name, reverse name
3273 // Update the text as necessary -- the important thing is
3274 // not the text itself, but how various cases are handled.
3275
3276 // Basic test
3277 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3278
3279 // Variants
3280 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3281
3282 // Target-only IDs
3283 "NFC", "Any to NFC", "Any to NFD",
3284 };
3285
3286 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3287
3288 Locale US("en", "US");
3289
3290 for (int32_t i=0; i<DATA_length; i+=3) {
3291 UnicodeString name;
3292 Transliterator::getDisplayName(DATA[i], US, name);
3293 if (name != DATA[i+1]) {
3294 errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3295 name + ", expected " + DATA[i+1]);
3296 } else {
3297 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3298 }
3299 UErrorCode ec = U_ZERO_ERROR;
3300 UParseError pe;
3301 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3302 if (U_FAILURE(ec)) {
3303 delete t;
3304 errln("FAIL: createInstance failed");
3305 continue;
3306 }
3307 name = Transliterator::getDisplayName(t->getID(), US, name);
3308 if (name != DATA[i+2]) {
3309 errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3310 name + ", expected " + DATA[i+2]);
3311 } else {
3312 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3313 }
3314 delete t;
3315 }
3316 #endif
3317 }
3318
TestSpecialCases(void)3319 void TransliteratorTest::TestSpecialCases(void) {
3320 const UnicodeString registerRules[] = {
3321 "Any-Dev1", "x > X; y > Y;",
3322 "Any-Dev2", "XY > Z",
3323 "Greek-Latin/FAKE",
3324 CharsToUnicodeString
3325 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3326 "" // END MARKER
3327 };
3328
3329 const UnicodeString testCases[] = {
3330 // NORMALIZATION
3331 // should add more test cases
3332 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3334 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3335 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3336
3337 // mp -> b BUG
3338 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3339 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3340
3341 // check for devanagari bug
3342 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3343
3344 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3345 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3346 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3347
3348 //TODO: enable this test once Titlecase works right
3349 /*
3350 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352 */
3353 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3355 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3357
3358 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3359 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3360
3361 // FORMS OF S
3362 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3363 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3364 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3365 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3366 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3367 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3368 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3369 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3370 // Tatiana bug
3371 // Upper: TAT\\u02B9\\u00C2NA
3372 // Lower: tat\\u02B9\\u00E2na
3373 // Title: Tat\\u02B9\\u00E2na
3374 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3375 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3376 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3377 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3378 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3379 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3380
3381 "" // END MARKER
3382 };
3383
3384 UParseError pos;
3385 int32_t i;
3386 for (i = 0; registerRules[i].length()!=0; i+=2) {
3387 UErrorCode status = U_ZERO_ERROR;
3388
3389 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3390 registerRules[i+1], UTRANS_FORWARD, pos, status);
3391 if (U_FAILURE(status)) {
3392 errln("Fails: Unable to create the transliterator from rules.");
3393 } else {
3394 Transliterator::registerInstance(t);
3395 }
3396 }
3397 for (i = 0; testCases[i].length()!=0; i+=3) {
3398 UErrorCode ec = U_ZERO_ERROR;
3399 UParseError pe;
3400 const UnicodeString& name = testCases[i];
3401 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3402 if (U_FAILURE(ec)) {
3403 errln((UnicodeString)"FAIL: Couldn't create " + name);
3404 delete t;
3405 continue;
3406 }
3407 const UnicodeString& id = t->getID();
3408 const UnicodeString& source = testCases[i+1];
3409 UnicodeString target;
3410
3411 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3412
3413 if (testCases[i+2].length() > 0) {
3414 target = testCases[i+2];
3415 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3416 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3417 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3418 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3419 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3420 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3421 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3422 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3423 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3424 target = source;
3425 target.toLower(Locale::getUS());
3426 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3427 target = source;
3428 target.toUpper(Locale::getUS());
3429 }
3430 if (U_FAILURE(ec)) {
3431 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3432 continue;
3433 }
3434
3435 expect(*t, source, target);
3436 delete t;
3437 }
3438 for (i = 0; registerRules[i].length()!=0; i+=2) {
3439 Transliterator::unregister(registerRules[i]);
3440 }
3441 }
3442
Char32ToEscapedChars(UChar32 ch,char * buffer)3443 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3444 if (ch <= 0xFFFF) {
3445 sprintf(buffer, "\\u%04x", (int)ch);
3446 } else {
3447 sprintf(buffer, "\\U%08x", (int)ch);
3448 }
3449 return buffer;
3450 }
3451
TestSurrogateCasing(void)3452 void TransliteratorTest::TestSurrogateCasing (void) {
3453 // check that casing handles surrogates
3454 // titlecase is currently defective
3455 char buffer[20];
3456 UChar buffer2[20];
3457 UChar32 dee;
3458 UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3459 UnicodeString DEE(u_totitle(dee));
3460 if (DEE != DESERET_DEE) {
3461 err("Fails titlecase of surrogates");
3462 err(Char32ToEscapedChars(dee, buffer));
3463 err(", ");
3464 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3465 }
3466
3467 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3468 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3469 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3470 UErrorCode status= U_ZERO_ERROR;
3471
3472 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3473 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3474 errln("Fails: Can't uppercase surrogates.");
3475 }
3476
3477 status= U_ZERO_ERROR;
3478 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3480 errln("Fails: Can't lowercase surrogates.");
3481 }
3482 }
3483
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3484 static void _trans(Transliterator& t, const UnicodeString& src,
3485 UnicodeString& result) {
3486 result = src;
3487 t.transliterate(result);
3488 }
3489
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3490 static void _trans(const UnicodeString& id, const UnicodeString& src,
3491 UnicodeString& result, UErrorCode ec) {
3492 UParseError pe;
3493 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3494 if (U_SUCCESS(ec)) {
3495 _trans(*t, src, result);
3496 }
3497 delete t;
3498 }
3499
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3500 static UnicodeString _findMatch(const UnicodeString& source,
3501 const UnicodeString* pairs) {
3502 UnicodeString empty;
3503 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3504 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3505 return pairs[i+1];
3506 }
3507 }
3508 return empty;
3509 }
3510
3511 // Check to see that incremental gets at least part way through a reasonable string.
3512
TestIncrementalProgress(void)3513 void TransliteratorTest::TestIncrementalProgress(void) {
3514 UErrorCode ec = U_ZERO_ERROR;
3515 UnicodeString latinTest = "The Quick Brown Fox.";
3516 UnicodeString devaTest;
3517 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3518 UnicodeString kataTest;
3519 _trans("Latin-Katakana", latinTest, kataTest, ec);
3520 if (U_FAILURE(ec)) {
3521 errln("FAIL: Internal error");
3522 return;
3523 }
3524 const UnicodeString tests[] = {
3525 "Any", latinTest,
3526 "Latin", latinTest,
3527 "Halfwidth", latinTest,
3528 "Devanagari", devaTest,
3529 "Katakana", kataTest,
3530 "" // END MARKER
3531 };
3532
3533 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3534 int32_t i = 0, j=0, k=0;
3535 int32_t sources = Transliterator::countAvailableSources();
3536 for (i = 0; i < sources; i++) {
3537 UnicodeString source;
3538 Transliterator::getAvailableSource(i, source);
3539 UnicodeString test = _findMatch(source, tests);
3540 if (test.length() == 0) {
3541 logln((UnicodeString)"Skipping " + source + "-X");
3542 continue;
3543 }
3544 int32_t targets = Transliterator::countAvailableTargets(source);
3545 for (j = 0; j < targets; j++) {
3546 UnicodeString target;
3547 Transliterator::getAvailableTarget(j, source, target);
3548 int32_t variants = Transliterator::countAvailableVariants(source, target);
3549 for (k =0; k< variants; k++) {
3550 UnicodeString variant;
3551 UParseError err;
3552 UErrorCode status = U_ZERO_ERROR;
3553
3554 Transliterator::getAvailableVariant(k, source, target, variant);
3555 UnicodeString id = source + "-" + target + "/" + variant;
3556
3557 if(id.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_39)){
3558 /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
3559 continue;
3560 }
3561 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3562 if (U_FAILURE(status)) {
3563 errln((UnicodeString)"FAIL: Could not create " + id);
3564 delete t;
3565 continue;
3566 }
3567 status = U_ZERO_ERROR;
3568 CheckIncrementalAux(t, test);
3569
3570 UnicodeString rev;
3571 _trans(*t, test, rev);
3572 Transliterator *inv = t->createInverse(status);
3573 if (U_FAILURE(status)) {
3574 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3575 delete t;
3576 delete inv;
3577 continue;
3578 }
3579 CheckIncrementalAux(inv, rev);
3580 delete t;
3581 delete inv;
3582 }
3583 }
3584 }
3585 }
3586
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3587 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3588 const UnicodeString& input) {
3589 UErrorCode ec = U_ZERO_ERROR;
3590 UTransPosition pos;
3591 UnicodeString test = input;
3592
3593 pos.contextStart = 0;
3594 pos.contextLimit = input.length();
3595 pos.start = 0;
3596 pos.limit = input.length();
3597
3598 t->transliterate(test, pos, ec);
3599 if (U_FAILURE(ec)) {
3600 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3601 return;
3602 }
3603 UBool gotError = FALSE;
3604
3605 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3606
3607 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3608 errln((UnicodeString)"No Progress, " +
3609 t->getID() + ": " + formatInput(test, input, pos));
3610 gotError = TRUE;
3611 } else {
3612 logln((UnicodeString)"PASS Progress, " +
3613 t->getID() + ": " + formatInput(test, input, pos));
3614 }
3615 t->finishTransliteration(test, pos);
3616 if (pos.start != pos.limit) {
3617 errln((UnicodeString)"Incomplete, " +
3618 t->getID() + ": " + formatInput(test, input, pos));
3619 gotError = TRUE;
3620 }
3621 }
3622
TestFunction()3623 void TransliteratorTest::TestFunction() {
3624 // Careful with spacing and ';' here: Phrase this exactly
3625 // as toRules() is going to return it. If toRules() changes
3626 // with regard to spacing or ';', then adjust this string.
3627 UnicodeString rule =
3628 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3629
3630 UParseError pe;
3631 UErrorCode ec = U_ZERO_ERROR;
3632 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3633 if (t == NULL) {
3634 errln("FAIL: createFromRules failed");
3635 return;
3636 }
3637
3638 UnicodeString r;
3639 t->toRules(r, TRUE);
3640 if (r == rule) {
3641 logln((UnicodeString)"OK: toRules() => " + r);
3642 } else {
3643 errln((UnicodeString)"FAIL: toRules() => " + r +
3644 ", expected " + rule);
3645 }
3646
3647 expect(*t, "The Quick Brown Fox",
3648 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3649
3650 delete t;
3651 }
3652
TestInvalidBackRef(void)3653 void TransliteratorTest::TestInvalidBackRef(void) {
3654 UnicodeString rule = ". > $1;";
3655 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3656 UParseError pe;
3657 UErrorCode ec = U_ZERO_ERROR;
3658 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3659 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3660
3661 if (t != NULL) {
3662 errln("FAIL: createFromRules should have returned NULL");
3663 delete t;
3664 }
3665
3666 if (t2 != NULL) {
3667 errln("FAIL: createFromRules should have returned NULL");
3668 delete t2;
3669 }
3670
3671 if (U_SUCCESS(ec)) {
3672 errln("FAIL: Ok: . > $1; => no error");
3673 } else {
3674 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3675 }
3676 }
3677
TestMulticharStringSet()3678 void TransliteratorTest::TestMulticharStringSet() {
3679 // Basic testing
3680 const char* rule =
3681 " [{aa}] > x;"
3682 " a > y;"
3683 " [b{bc}] > z;"
3684 "[{gd}] { e > q;"
3685 " e } [{fg}] > r;" ;
3686
3687 UParseError pe;
3688 UErrorCode ec = U_ZERO_ERROR;
3689 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3690 if (t == NULL || U_FAILURE(ec)) {
3691 delete t;
3692 errln("FAIL: createFromRules failed");
3693 return;
3694 }
3695
3696 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3697 "y x yz z d gd de gdq gdqfg ddrfg");
3698 delete t;
3699
3700 // Overlapped string test. Make sure that when multiple
3701 // strings can match that the longest one is matched.
3702 rule =
3703 " [a {ab} {abc}] > x;"
3704 " b > y;"
3705 " c > z;"
3706 " q [t {st} {rst}] { e > p;" ;
3707
3708 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3709 if (t == NULL || U_FAILURE(ec)) {
3710 delete t;
3711 errln("FAIL: createFromRules failed");
3712 return;
3713 }
3714
3715 expect(*t, "a ab abc qte qste qrste",
3716 "x x x qtp qstp qrstp");
3717 delete t;
3718 }
3719
3720 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3721 // BEGIN TestUserFunction support factory
3722
3723 Transliterator* _TUFF[4];
3724 UnicodeString* _TUFID[4];
3725
_TUFFactory(const UnicodeString &,Transliterator::Token context)3726 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3727 Transliterator::Token context) {
3728 return _TUFF[context.integer]->clone();
3729 }
3730
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3731 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3732 _TUFF[n] = t;
3733 _TUFID[n] = new UnicodeString(ID);
3734 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3735 }
3736
_TUFUnreg(int32_t n)3737 static void _TUFUnreg(int32_t n) {
3738 if (_TUFF[n] != NULL) {
3739 Transliterator::unregister(*_TUFID[n]);
3740 delete _TUFF[n];
3741 delete _TUFID[n];
3742 }
3743 }
3744
3745 // END TestUserFunction support factory
3746 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3747
3748 /**
3749 * Test that user-registered transliterators can be used under function
3750 * syntax.
3751 */
TestUserFunction()3752 void TransliteratorTest::TestUserFunction() {
3753
3754 Transliterator* t;
3755 UParseError pe;
3756 UErrorCode ec = U_ZERO_ERROR;
3757
3758 // Setup our factory
3759 int32_t i;
3760 for (i=0; i<4; ++i) {
3761 _TUFF[i] = NULL;
3762 }
3763
3764 // There's no need to register inverses if we don't use them
3765 t = Transliterator::createFromRules("gif",
3766 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
3767 UTRANS_FORWARD, pe, ec);
3768 if (t == NULL || U_FAILURE(ec)) {
3769 errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3770 return;
3771 }
3772 _TUFReg("Any-gif", t, 0);
3773
3774 t = Transliterator::createFromRules("RemoveCurly",
3775 "[\\{\\}] > ; '\\N' > ;",
3776 UTRANS_FORWARD, pe, ec);
3777 if (t == NULL || U_FAILURE(ec)) {
3778 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3779 goto FAIL;
3780 }
3781 expect(*t, "\\N{name}", "name");
3782 _TUFReg("Any-RemoveCurly", t, 1);
3783
3784 logln("Trying &hex");
3785 t = Transliterator::createFromRules("hex2",
3786 "(.) > &hex($1);",
3787 UTRANS_FORWARD, pe, ec);
3788 if (t == NULL || U_FAILURE(ec)) {
3789 errln("FAIL: createFromRules");
3790 goto FAIL;
3791 }
3792 logln("Registering");
3793 _TUFReg("Any-hex2", t, 2);
3794 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3795 if (t == NULL || U_FAILURE(ec)) {
3796 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3797 goto FAIL;
3798 }
3799 expect(*t, "abc", "\\u0061\\u0062\\u0063");
3800 delete t;
3801
3802 logln("Trying &gif");
3803 t = Transliterator::createFromRules("gif2",
3804 "(.) > &Gif(&Hex2($1));",
3805 UTRANS_FORWARD, pe, ec);
3806 if (t == NULL || U_FAILURE(ec)) {
3807 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3808 goto FAIL;
3809 }
3810 logln("Registering");
3811 _TUFReg("Any-gif2", t, 3);
3812 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3813 if (t == NULL || U_FAILURE(ec)) {
3814 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3815 goto FAIL;
3816 }
3817 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3818 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3819 delete t;
3820
3821 // Test that filters are allowed after &
3822 t = Transliterator::createFromRules("test",
3823 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3824 UTRANS_FORWARD, pe, ec);
3825 if (t == NULL || U_FAILURE(ec)) {
3826 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3827 goto FAIL;
3828 }
3829 expect(*t, "abc",
3830 "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
3831 delete t;
3832
3833 FAIL:
3834 for (i=0; i<4; ++i) {
3835 _TUFUnreg(i);
3836 }
3837 }
3838
3839 /**
3840 * Test the Any-X transliterators.
3841 */
TestAnyX(void)3842 void TransliteratorTest::TestAnyX(void) {
3843 UParseError parseError;
3844 UErrorCode status = U_ZERO_ERROR;
3845 Transliterator* anyLatin =
3846 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3847 if (anyLatin==0) {
3848 errln("FAIL: createInstance returned NULL");
3849 delete anyLatin;
3850 return;
3851 }
3852
3853 expect(*anyLatin,
3854 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3855 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3856
3857 delete anyLatin;
3858 }
3859
3860 /**
3861 * Test the source and target set API. These are only implemented
3862 * for RBT and CompoundTransliterator at this time.
3863 */
TestSourceTargetSet()3864 void TransliteratorTest::TestSourceTargetSet() {
3865 UErrorCode ec = U_ZERO_ERROR;
3866
3867 // Rules
3868 const char* r =
3869 "a > b; "
3870 "r [x{lu}] > q;";
3871
3872 // Expected source
3873 UnicodeSet expSrc("[arx{lu}]", ec);
3874
3875 // Expected target
3876 UnicodeSet expTrg("[bq]", ec);
3877
3878 UParseError pe;
3879 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3880
3881 if (U_FAILURE(ec)) {
3882 delete t;
3883 errln("FAIL: Couldn't set up test");
3884 return;
3885 }
3886
3887 UnicodeSet src; t->getSourceSet(src);
3888 UnicodeSet trg; t->getTargetSet(trg);
3889
3890 if (src == expSrc && trg == expTrg) {
3891 UnicodeString a, b;
3892 logln((UnicodeString)"Ok: " +
3893 r + " => source = " + src.toPattern(a, TRUE) +
3894 ", target = " + trg.toPattern(b, TRUE));
3895 } else {
3896 UnicodeString a, b, c, d;
3897 errln((UnicodeString)"FAIL: " +
3898 r + " => source = " + src.toPattern(a, TRUE) +
3899 ", expected " + expSrc.toPattern(b, TRUE) +
3900 "; target = " + trg.toPattern(c, TRUE) +
3901 ", expected " + expTrg.toPattern(d, TRUE));
3902 }
3903
3904 delete t;
3905 }
3906
3907 /**
3908 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3909 */
TestRuleWhitespace()3910 void TransliteratorTest::TestRuleWhitespace() {
3911 // Rules
3912 const char* r = "a > \\u200E b;";
3913
3914 UErrorCode ec = U_ZERO_ERROR;
3915 UParseError pe;
3916 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3917
3918 if (U_FAILURE(ec)) {
3919 errln("FAIL: Couldn't set up test");
3920 } else {
3921 expect(*t, "a", "b");
3922 }
3923 delete t;
3924
3925 // UnicodeSet
3926 ec = U_ZERO_ERROR;
3927 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3928
3929 if (U_FAILURE(ec)) {
3930 errln("FAIL: Couldn't set up test");
3931 } else {
3932 if (set.contains(0x200E)) {
3933 errln("FAIL: U+200E not being ignored by UnicodeSet");
3934 }
3935 }
3936 }
3937 //======================================================================
3938 // this method is in TestUScript.java
3939 //======================================================================
TestAllCodepoints()3940 void TransliteratorTest::TestAllCodepoints(){
3941 UScriptCode code= USCRIPT_INVALID_CODE;
3942 char id[256]={'\0'};
3943 char abbr[256]={'\0'};
3944 char newId[256]={'\0'};
3945 char newAbbrId[256]={'\0'};
3946 char oldId[256]={'\0'};
3947 char oldAbbrId[256]={'\0'};
3948
3949 UErrorCode status =U_ZERO_ERROR;
3950 UParseError pe;
3951
3952 for(uint32_t i = 0; i<=0x10ffff; i++){
3953 code = uscript_getScript(i,&status);
3954 if(code == USCRIPT_INVALID_CODE){
3955 errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
3956 }
3957 const char* myId = uscript_getName(code);
3958 if(!myId) {
3959 errln("Valid script code returned NULL name. Check your data!");
3960 return;
3961 }
3962 uprv_strcpy(id,myId);
3963 uprv_strcpy(abbr,uscript_getShortName(code));
3964
3965 uprv_strcpy(newId,"[:");
3966 uprv_strcat(newId,id);
3967 uprv_strcat(newId,":];NFD");
3968
3969 uprv_strcpy(newAbbrId,"[:");
3970 uprv_strcat(newAbbrId,abbr);
3971 uprv_strcat(newAbbrId,":];NFD");
3972
3973 if(uprv_strcmp(newId,oldId)!=0){
3974 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
3975 if(t==NULL || U_FAILURE(status)){
3976 errln((UnicodeString)"FAIL: Could not create " + id);
3977 }
3978 delete t;
3979 }
3980 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
3981 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
3982 if(t==NULL || U_FAILURE(status)){
3983 errln((UnicodeString)"FAIL: Could not create " + id);
3984 }
3985 delete t;
3986 }
3987 uprv_strcpy(oldId,newId);
3988 uprv_strcpy(oldAbbrId, newAbbrId);
3989
3990 }
3991
3992 }
3993
3994 #define TEST_TRANSLIT_ID(id, cls) { \
3995 UErrorCode ec = U_ZERO_ERROR; \
3996 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3997 if (U_FAILURE(ec)) { \
3998 errln("FAIL: Couldn't create " id); \
3999 } else { \
4000 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4001 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4002 } \
4003 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4004 } \
4005 delete t; \
4006 }
4007
4008 #define TEST_TRANSLIT_RULE(rule, cls) { \
4009 UErrorCode ec = U_ZERO_ERROR; \
4010 UParseError pe; \
4011 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4012 if (U_FAILURE(ec)) { \
4013 errln("FAIL: Couldn't create " rule); \
4014 } else { \
4015 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4016 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4017 } \
4018 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4019 } \
4020 delete t; \
4021 }
4022
TestBoilerplate()4023 void TransliteratorTest::TestBoilerplate() {
4024 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4025 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4026 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4027 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4028 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4029 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4030 TEST_TRANSLIT_ID("Null", NullTransliterator);
4031 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4032 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4033 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4034 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4035 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4036 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4037 }
4038
TestAlternateSyntax()4039 void TransliteratorTest::TestAlternateSyntax() {
4040 // U+2206 == &
4041 // U+2190 == <
4042 // U+2192 == >
4043 // U+2194 == <>
4044 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4045 "abc",
4046 "xbz");
4047 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4048 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4049 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
4050 }
4051
4052 static const char* BEGIN_END_RULES[] = {
4053 // [0]
4054 "abc > xy;"
4055 "aba > z;",
4056
4057 // [1]
4058 /*
4059 "::BEGIN;"
4060 "abc > xy;"
4061 "::END;"
4062 "::BEGIN;"
4063 "aba > z;"
4064 "::END;",
4065 */
4066 "", // test case commented out below, this is here to keep from messing up the indexes
4067
4068 // [2]
4069 /*
4070 "abc > xy;"
4071 "::BEGIN;"
4072 "aba > z;"
4073 "::END;",
4074 */
4075 "", // test case commented out below, this is here to keep from messing up the indexes
4076
4077 // [3]
4078 /*
4079 "::BEGIN;"
4080 "abc > xy;"
4081 "::END;"
4082 "aba > z;",
4083 */
4084 "", // test case commented out below, this is here to keep from messing up the indexes
4085
4086 // [4]
4087 "abc > xy;"
4088 "::Null;"
4089 "aba > z;",
4090
4091 // [5]
4092 "::Upper;"
4093 "ABC > xy;"
4094 "AB > x;"
4095 "C > z;"
4096 "::Upper;"
4097 "XYZ > p;"
4098 "XY > q;"
4099 "Z > r;"
4100 "::Upper;",
4101
4102 // [6]
4103 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4104 "$delim = [\\-$ws];"
4105 "$ws $delim* > ' ';"
4106 "'-' $delim* > '-';",
4107
4108 // [7]
4109 "::Null;"
4110 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4111 "$delim = [\\-$ws];"
4112 "$ws $delim* > ' ';"
4113 "'-' $delim* > '-';",
4114
4115 // [8]
4116 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4117 "$delim = [\\-$ws];"
4118 "$ws $delim* > ' ';"
4119 "'-' $delim* > '-';"
4120 "::Null;",
4121
4122 // [9]
4123 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4124 "$delim = [\\-$ws];"
4125 "::Null;"
4126 "$ws $delim* > ' ';"
4127 "'-' $delim* > '-';",
4128
4129 // [10]
4130 /*
4131 "::BEGIN;"
4132 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4133 "$delim = [\\-$ws];"
4134 "::END;"
4135 "$ws $delim* > ' ';"
4136 "'-' $delim* > '-';",
4137 */
4138 "", // test case commented out below, this is here to keep from messing up the indexes
4139
4140 // [11]
4141 /*
4142 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4143 "$delim = [\\-$ws];"
4144 "::BEGIN;"
4145 "$ws $delim* > ' ';"
4146 "'-' $delim* > '-';"
4147 "::END;",
4148 */
4149 "", // test case commented out below, this is here to keep from messing up the indexes
4150
4151 // [12]
4152 /*
4153 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4154 "$delim = [\\-$ws];"
4155 "$ab = [ab];"
4156 "::BEGIN;"
4157 "$ws $delim* > ' ';"
4158 "'-' $delim* > '-';"
4159 "::END;"
4160 "::BEGIN;"
4161 "$ab { ' ' } $ab > '-';"
4162 "c { ' ' > ;"
4163 "::END;"
4164 "::BEGIN;"
4165 "'a-a' > a\\%|a;"
4166 "::END;",
4167 */
4168 "", // test case commented out below, this is here to keep from messing up the indexes
4169
4170 // [13]
4171 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4172 "$delim = [\\-$ws];"
4173 "$ab = [ab];"
4174 "::Null;"
4175 "$ws $delim* > ' ';"
4176 "'-' $delim* > '-';"
4177 "::Null;"
4178 "$ab { ' ' } $ab > '-';"
4179 "c { ' ' > ;"
4180 "::Null;"
4181 "'a-a' > a\\%|a;",
4182
4183 // [14]
4184 /*
4185 "::[abc];"
4186 "::BEGIN;"
4187 "abc > xy;"
4188 "::END;"
4189 "::BEGIN;"
4190 "aba > yz;"
4191 "::END;"
4192 "::Upper;",
4193 */
4194 "", // test case commented out below, this is here to keep from messing up the indexes
4195
4196 // [15]
4197 "::[abc];"
4198 "abc > xy;"
4199 "::Null;"
4200 "aba > yz;"
4201 "::Upper;",
4202
4203 // [16]
4204 /*
4205 "::[abc];"
4206 "::BEGIN;"
4207 "abc <> xy;"
4208 "::END;"
4209 "::BEGIN;"
4210 "aba <> yz;"
4211 "::END;"
4212 "::Upper(Lower);"
4213 "::([XYZ]);"
4214 */
4215 "", // test case commented out below, this is here to keep from messing up the indexes
4216
4217 // [17]
4218 "::[abc];"
4219 "abc <> xy;"
4220 "::Null;"
4221 "aba <> yz;"
4222 "::Upper(Lower);"
4223 "::([XYZ]);"
4224 };
4225 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4226
4227 /*
4228 (This entire test is commented out below and will need some heavy revision when we re-add
4229 the ::BEGIN/::END stuff)
4230 static const char* BOGUS_BEGIN_END_RULES[] = {
4231 // [7]
4232 "::BEGIN;"
4233 "abc > xy;"
4234 "::BEGIN;"
4235 "aba > z;"
4236 "::END;"
4237 "::END;",
4238
4239 // [8]
4240 "abc > xy;"
4241 " aba > z;"
4242 "::END;",
4243
4244 // [9]
4245 "::BEGIN;"
4246 "::Upper;"
4247 "::END;"
4248 };
4249 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4250 */
4251
4252 static const char* BEGIN_END_TEST_CASES[] = {
4253 // rules input expected output
4254 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4255 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4256 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4257 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4258 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4259 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4260
4261 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4262 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4263 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4264 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4265 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4266 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4267 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4268 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4269 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4270 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4271 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4272 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4273
4274 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4275 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4276 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4277 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4278 };
4279 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4280
TestBeginEnd()4281 void TransliteratorTest::TestBeginEnd() {
4282 // run through the list of test cases above
4283 int32_t i = 0;
4284 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4285 expect((UnicodeString)"Test case #" + (i / 3),
4286 UnicodeString(BEGIN_END_TEST_CASES[i]),
4287 UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
4288 UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
4289 }
4290
4291 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4292 UParseError parseError;
4293 UErrorCode status = U_ZERO_ERROR;
4294 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4295 UTRANS_REVERSE, parseError, status);
4296 if (reversed == 0 || U_FAILURE(status)) {
4297 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4298 } else {
4299 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4300 }
4301 delete reversed;
4302
4303 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4304 // that all of them cause errors
4305 /*
4306 (commented out until we have the real ::BEGIN/::END stuff in place
4307 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4308 UParseError parseError;
4309 UErrorCode status = U_ZERO_ERROR;
4310 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4311 UTRANS_FORWARD, parseError, status);
4312 if (!U_FAILURE(status)) {
4313 delete t;
4314 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4315 }
4316 }
4317 */
4318 }
4319
TestBeginEndToRules()4320 void TransliteratorTest::TestBeginEndToRules() {
4321 // run through the same list of test cases we used above, but this time, instead of just
4322 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4323 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4324 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4325 // to (i.e., does the same thing as) the original rule set
4326 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4327 UParseError parseError;
4328 UErrorCode status = U_ZERO_ERROR;
4329 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]),
4330 UTRANS_FORWARD, parseError, status);
4331 if (U_FAILURE(status)) {
4332 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4333 } else {
4334 UnicodeString rules;
4335 t->toRules(rules, TRUE);
4336 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4337 UTRANS_FORWARD, parseError, status);
4338 if (U_FAILURE(status)) {
4339 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4340 parseError, status);
4341 delete t;
4342 } else {
4343 expect(*t2,
4344 UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
4345 UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
4346 delete t;
4347 delete t2;
4348 }
4349 }
4350 }
4351
4352 // do the same thing for the reversible test case
4353 UParseError parseError;
4354 UErrorCode status = U_ZERO_ERROR;
4355 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4356 UTRANS_REVERSE, parseError, status);
4357 if (U_FAILURE(status)) {
4358 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4359 } else {
4360 UnicodeString rules;
4361 reversed->toRules(rules, FALSE);
4362 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4363 parseError, status);
4364 if (U_FAILURE(status)) {
4365 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4366 parseError, status);
4367 delete reversed;
4368 } else {
4369 expect(*reversed2,
4370 UnicodeString("xy XY XYZ yz YZ"),
4371 UnicodeString("xy abc xaba yz aba"));
4372 delete reversed;
4373 delete reversed2;
4374 }
4375 }
4376 }
4377
TestRegisterAlias()4378 void TransliteratorTest::TestRegisterAlias() {
4379 UnicodeString longID("Lower;[aeiou]Upper");
4380 UnicodeString shortID("Any-CapVowels");
4381 UnicodeString reallyShortID("CapVowels");
4382
4383 Transliterator::registerAlias(shortID, longID);
4384
4385 UErrorCode err = U_ZERO_ERROR;
4386 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4387 if (U_FAILURE(err)) {
4388 errln("Failed to instantiate transliterator with long ID");
4389 Transliterator::unregister(shortID);
4390 return;
4391 }
4392 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4393 if (U_FAILURE(err)) {
4394 errln("Failed to instantiate transliterator with short ID");
4395 delete t1;
4396 Transliterator::unregister(shortID);
4397 return;
4398 }
4399
4400 if (t1->getID() != longID)
4401 errln("Transliterator instantiated with long ID doesn't have long ID");
4402 if (t2->getID() != reallyShortID)
4403 errln("Transliterator instantiated with short ID doesn't have short ID");
4404
4405 UnicodeString rules1;
4406 UnicodeString rules2;
4407
4408 t1->toRules(rules1, TRUE);
4409 t2->toRules(rules2, TRUE);
4410 if (rules1 != rules2)
4411 errln("Alias transliterators aren't the same");
4412
4413 delete t1;
4414 delete t2;
4415 Transliterator::unregister(shortID);
4416
4417 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4418 if (U_SUCCESS(err)) {
4419 errln("Instantiation with short ID succeeded after short ID was unregistered");
4420 delete t1;
4421 }
4422
4423 // try the same thing again, but this time with something other than
4424 // an instance of CompoundTransliterator
4425 UnicodeString realID("Latin-Greek");
4426 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4427 Transliterator::registerAlias(fakeID, realID);
4428
4429 err = U_ZERO_ERROR;
4430 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4431 if (U_FAILURE(err)) {
4432 errln("Failed to instantiate transliterator with real ID");
4433 Transliterator::unregister(realID);
4434 return;
4435 }
4436 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4437 if (U_FAILURE(err)) {
4438 errln("Failed to instantiate transliterator with fake ID");
4439 delete t1;
4440 Transliterator::unregister(realID);
4441 return;
4442 }
4443
4444 t1->toRules(rules1, TRUE);
4445 t2->toRules(rules2, TRUE);
4446 if (rules1 != rules2)
4447 errln("Alias transliterators aren't the same");
4448
4449 delete t1;
4450 delete t2;
4451 Transliterator::unregister(fakeID);
4452 }
4453
TestRuleStripping()4454 void TransliteratorTest::TestRuleStripping() {
4455 /*
4456 #
4457 \uE001>\u0C01; # SIGN
4458 */
4459 static const UChar rule[] = {
4460 0x0023,0x0020,0x000D,0x000A,
4461 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4462 };
4463 static const UChar expectedRule[] = {
4464 0xE001,0x003E,0x0C01,0x003B,0
4465 };
4466 UChar result[sizeof(rule)/sizeof(rule[0])];
4467 UErrorCode status = U_ZERO_ERROR;
4468 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4469 if (len != u_strlen(expectedRule)) {
4470 errln("utrans_stripRules return len = %d", len);
4471 }
4472 if (u_strncmp(expectedRule, result, len) != 0) {
4473 errln("utrans_stripRules did not return expected string");
4474 }
4475 }
4476
4477 //======================================================================
4478 // Support methods
4479 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4480 void TransliteratorTest::expectT(const UnicodeString& id,
4481 const UnicodeString& source,
4482 const UnicodeString& expectedResult) {
4483 UErrorCode ec = U_ZERO_ERROR;
4484 UParseError pe;
4485 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4486 if (U_FAILURE(ec)) {
4487 errln((UnicodeString)"FAIL: Could not create " + id);
4488 delete t;
4489 return;
4490 }
4491 expect(*t, source, expectedResult);
4492 delete t;
4493 }
4494
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4495 void TransliteratorTest::reportParseError(const UnicodeString& message,
4496 const UParseError& parseError,
4497 const UErrorCode& status) {
4498 errln(message +
4499 /*", parse error " + parseError.code +*/
4500 ", line " + parseError.line +
4501 ", offset " + parseError.offset +
4502 ", pre-context " + prettify(parseError.preContext, TRUE) +
4503 ", post-context " + prettify(parseError.postContext,TRUE) +
4504 ", Error: " + u_errorName(status));
4505 }
4506
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4507 void TransliteratorTest::expect(const UnicodeString& rules,
4508 const UnicodeString& source,
4509 const UnicodeString& expectedResult,
4510 UTransPosition *pos) {
4511 expect("<ID>", rules, source, expectedResult, pos);
4512 }
4513
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4514 void TransliteratorTest::expect(const UnicodeString& id,
4515 const UnicodeString& rules,
4516 const UnicodeString& source,
4517 const UnicodeString& expectedResult,
4518 UTransPosition *pos) {
4519 UErrorCode status = U_ZERO_ERROR;
4520 UParseError parseError;
4521 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4522 if (U_FAILURE(status)) {
4523 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4524 } else {
4525 expect(*t, source, expectedResult, pos);
4526 }
4527 delete t;
4528 }
4529
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4530 void TransliteratorTest::expect(const Transliterator& t,
4531 const UnicodeString& source,
4532 const UnicodeString& expectedResult,
4533 const Transliterator& reverseTransliterator) {
4534 expect(t, source, expectedResult);
4535 expect(reverseTransliterator, expectedResult, source);
4536 }
4537
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4538 void TransliteratorTest::expect(const Transliterator& t,
4539 const UnicodeString& source,
4540 const UnicodeString& expectedResult,
4541 UTransPosition *pos) {
4542 if (pos == 0) {
4543 UnicodeString result(source);
4544 t.transliterate(result);
4545 expectAux(t.getID() + ":String", source, result, expectedResult);
4546 }
4547 UTransPosition index={0, 0, 0, 0};
4548 if (pos != 0) {
4549 index = *pos;
4550 }
4551
4552 UnicodeString rsource(source);
4553 if (pos == 0) {
4554 t.transliterate(rsource);
4555 } else {
4556 // Do it all at once -- below we do it incrementally
4557 t.finishTransliteration(rsource, *pos);
4558 }
4559 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4560
4561 // Test keyboard (incremental) transliteration -- this result
4562 // must be the same after we finalize (see below).
4563 UnicodeString log;
4564 rsource.remove();
4565 if (pos != 0) {
4566 rsource = source;
4567 formatInput(log, rsource, index);
4568 log.append(" -> ");
4569 UErrorCode status = U_ZERO_ERROR;
4570 t.transliterate(rsource, index, status);
4571 formatInput(log, rsource, index);
4572 } else {
4573 for (int32_t i=0; i<source.length(); ++i) {
4574 if (i != 0) {
4575 log.append(" + ");
4576 }
4577 log.append(source.charAt(i)).append(" -> ");
4578 UErrorCode status = U_ZERO_ERROR;
4579 t.transliterate(rsource, index, source.charAt(i), status);
4580 formatInput(log, rsource, index);
4581 }
4582 }
4583
4584 // As a final step in keyboard transliteration, we must call
4585 // transliterate to finish off any pending partial matches that
4586 // were waiting for more input.
4587 t.finishTransliteration(rsource, index);
4588 log.append(" => ").append(rsource);
4589
4590 expectAux(t.getID() + ":Keyboard", log,
4591 rsource == expectedResult,
4592 expectedResult);
4593 }
4594
4595
4596 /**
4597 * @param appendTo result is appended to this param.
4598 * @param input the string being transliterated
4599 * @param pos the index struct
4600 */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4601 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4602 const UnicodeString& input,
4603 const UTransPosition& pos) {
4604 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4605 // the {} indicate the context start and limit, and the ||
4606 // indicate the start and limit.
4607 if (0 <= pos.contextStart &&
4608 pos.contextStart <= pos.start &&
4609 pos.start <= pos.limit &&
4610 pos.limit <= pos.contextLimit &&
4611 pos.contextLimit <= input.length()) {
4612
4613 UnicodeString a, b, c, d, e;
4614 input.extractBetween(0, pos.contextStart, a);
4615 input.extractBetween(pos.contextStart, pos.start, b);
4616 input.extractBetween(pos.start, pos.limit, c);
4617 input.extractBetween(pos.limit, pos.contextLimit, d);
4618 input.extractBetween(pos.contextLimit, input.length(), e);
4619 appendTo.append(a).append((UChar)123/*{*/).append(b).
4620 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4621 append((UChar)125/*}*/).append(e);
4622 } else {
4623 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4624 pos.contextStart + ", s=" + pos.start + ", l=" +
4625 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4626 input);
4627 }
4628 return appendTo;
4629 }
4630
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4631 void TransliteratorTest::expectAux(const UnicodeString& tag,
4632 const UnicodeString& source,
4633 const UnicodeString& result,
4634 const UnicodeString& expectedResult) {
4635 expectAux(tag, source + " -> " + result,
4636 result == expectedResult,
4637 expectedResult);
4638 }
4639
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4640 void TransliteratorTest::expectAux(const UnicodeString& tag,
4641 const UnicodeString& summary, UBool pass,
4642 const UnicodeString& expectedResult) {
4643 if (pass) {
4644 logln(UnicodeString("(")+tag+") " + prettify(summary));
4645 } else {
4646 errln(UnicodeString("FAIL: (")+tag+") "
4647 + prettify(summary)
4648 + ", expected " + prettify(expectedResult));
4649 }
4650 }
4651
4652 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4653