1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2009, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "rbt_pars.h"
30 #include "anytrans.h"
31 #include "esctrn.h"
32 #include "name2uni.h"
33 #include "nortrans.h"
34 #include "remtrans.h"
35 #include "titletrn.h"
36 #include "tolowtrn.h"
37 #include "toupptrn.h"
38 #include "unesctrn.h"
39 #include "uni2name.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include <stdio.h>
43
44 /***********************************************************************
45
46 HOW TO USE THIS TEST FILE
47 -or-
48 How I developed on two platforms
49 without losing (too much of) my mind
50
51
52 1. Add new tests by copying/pasting/changing existing tests. On Java,
53 any public void method named Test...() taking no parameters becomes
54 a test. On C++, you need to modify the header and add a line to
55 the runIndexedTest() dispatch method.
56
57 2. Make liberal use of the expect() method; it is your friend.
58
59 3. The tests in this file exactly match those in a sister file on the
60 other side. The two files are:
61
62 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
63 icu4c: source/test/intltest/transtst.cpp
64
65 ==> THIS IS THE IMPORTANT PART <==
66
67 When you add a test in this file, add it in TransliteratorTest.java
68 too. Give it the same name and put it in the same relative place.
69 This makes maintenance a lot simpler for any poor soul who ends up
70 trying to synchronize the tests between icu4j and icu4c.
71
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73 then add it in the special non-mirrored section. These are
74 labeled
75
76 "icu4j ONLY"
77
78 or
79
80 "icu4c ONLY"
81
82 Make sure you document the reason the test is here and not there.
83
84
85 Thank you.
86 The Management
87 ***********************************************************************/
88
89 // Define character constants thusly to be EBCDIC-friendly
90 enum {
91 LEFT_BRACE=((UChar)0x007B), /*{*/
92 PIPE =((UChar)0x007C), /*|*/
93 ZERO =((UChar)0x0030), /*0*/
94 UPPER_A =((UChar)0x0041) /*A*/
95 };
96
TransliteratorTest()97 TransliteratorTest::TransliteratorTest()
98 : DESERET_DEE((UChar32)0x10414),
99 DESERET_dee((UChar32)0x1043C)
100 {
101 }
102
~TransliteratorTest()103 TransliteratorTest::~TransliteratorTest() {}
104
105 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
107 const char* &name, char* /*par*/) {
108 switch (index) {
109 TESTCASE(0,TestInstantiation);
110 TESTCASE(1,TestSimpleRules);
111 TESTCASE(2,TestRuleBasedInverse);
112 TESTCASE(3,TestKeyboard);
113 TESTCASE(4,TestKeyboard2);
114 TESTCASE(5,TestKeyboard3);
115 TESTCASE(6,TestArabic);
116 TESTCASE(7,TestCompoundKana);
117 TESTCASE(8,TestCompoundHex);
118 TESTCASE(9,TestFiltering);
119 TESTCASE(10,TestInlineSet);
120 TESTCASE(11,TestPatternQuoting);
121 TESTCASE(12,TestJ277);
122 TESTCASE(13,TestJ243);
123 TESTCASE(14,TestJ329);
124 TESTCASE(15,TestSegments);
125 TESTCASE(16,TestCursorOffset);
126 TESTCASE(17,TestArbitraryVariableValues);
127 TESTCASE(18,TestPositionHandling);
128 TESTCASE(19,TestHiraganaKatakana);
129 TESTCASE(20,TestCopyJ476);
130 TESTCASE(21,TestAnchors);
131 TESTCASE(22,TestInterIndic);
132 TESTCASE(23,TestFilterIDs);
133 TESTCASE(24,TestCaseMap);
134 TESTCASE(25,TestNameMap);
135 TESTCASE(26,TestLiberalizedID);
136 TESTCASE(27,TestCreateInstance);
137 TESTCASE(28,TestNormalizationTransliterator);
138 TESTCASE(29,TestCompoundRBT);
139 TESTCASE(30,TestCompoundFilter);
140 TESTCASE(31,TestRemove);
141 TESTCASE(32,TestToRules);
142 TESTCASE(33,TestContext);
143 TESTCASE(34,TestSupplemental);
144 TESTCASE(35,TestQuantifier);
145 TESTCASE(36,TestSTV);
146 TESTCASE(37,TestCompoundInverse);
147 TESTCASE(38,TestNFDChainRBT);
148 TESTCASE(39,TestNullInverse);
149 TESTCASE(40,TestAliasInverseID);
150 TESTCASE(41,TestCompoundInverseID);
151 TESTCASE(42,TestUndefinedVariable);
152 TESTCASE(43,TestEmptyContext);
153 TESTCASE(44,TestCompoundFilterID);
154 TESTCASE(45,TestPropertySet);
155 TESTCASE(46,TestNewEngine);
156 TESTCASE(47,TestQuantifiedSegment);
157 TESTCASE(48,TestDevanagariLatinRT);
158 TESTCASE(49,TestTeluguLatinRT);
159 TESTCASE(50,TestCompoundLatinRT);
160 TESTCASE(51,TestSanskritLatinRT);
161 TESTCASE(52,TestLocaleInstantiation);
162 TESTCASE(53,TestTitleAccents);
163 TESTCASE(54,TestLocaleResource);
164 TESTCASE(55,TestParseError);
165 TESTCASE(56,TestOutputSet);
166 TESTCASE(57,TestVariableRange);
167 TESTCASE(58,TestInvalidPostContext);
168 TESTCASE(59,TestIDForms);
169 TESTCASE(60,TestToRulesMark);
170 TESTCASE(61,TestEscape);
171 TESTCASE(62,TestAnchorMasking);
172 TESTCASE(63,TestDisplayName);
173 TESTCASE(64,TestSpecialCases);
174 TESTCASE(65,TestIncrementalProgress);
175 TESTCASE(66,TestSurrogateCasing);
176 TESTCASE(67,TestFunction);
177 TESTCASE(68,TestInvalidBackRef);
178 TESTCASE(69,TestMulticharStringSet);
179 TESTCASE(70,TestUserFunction);
180 TESTCASE(71,TestAnyX);
181 TESTCASE(72,TestSourceTargetSet);
182 TESTCASE(73,TestGurmukhiDevanagari);
183 TESTCASE(74,TestRuleWhitespace);
184 TESTCASE(75,TestAllCodepoints);
185 TESTCASE(76,TestBoilerplate);
186 TESTCASE(77,TestAlternateSyntax);
187 TESTCASE(78,TestBeginEnd);
188 TESTCASE(79,TestBeginEndToRules);
189 TESTCASE(80,TestRegisterAlias);
190 TESTCASE(81,TestRuleStripping);
191 TESTCASE(82,TestHalfwidthFullwidth);
192 TESTCASE(83,TestThai);
193 TESTCASE(84,TestAny);
194 default: name = ""; break;
195 }
196 }
197
198 static const UVersionInfo ICU_39 = {3,9,4,0};
199 /**
200 * Make sure every system transliterator can be instantiated.
201 *
202 * ALSO test that the result of toRules() for each rule is a valid
203 * rule. Do this here so we don't have to have another test that
204 * instantiates everything as well.
205 */
TestInstantiation()206 void TransliteratorTest::TestInstantiation() {
207 UErrorCode ec = U_ZERO_ERROR;
208 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
209 assertSuccess("getAvailableIDs()", ec);
210 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
211 int32_t n = Transliterator::countAvailableIDs();
212 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
213 avail->count(ec) == n);
214 assertSuccess("count()", ec);
215 UnicodeString name;
216 for (int32_t i=0; i<n; ++i) {
217 const UnicodeString& id = *avail->snext(ec);
218 if (!assertSuccess("snext()", ec) ||
219 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
220 break;
221 }
222 UnicodeString id2 = Transliterator::getAvailableID(i);
223 if (id.length() < 1) {
224 errln(UnicodeString("FAIL: getAvailableID(") +
225 i + ") returned empty string");
226 continue;
227 }
228 if (id != id2) {
229 errln(UnicodeString("FAIL: getAvailableID(") +
230 i + ") != getAvailableIDs().snext()");
231 continue;
232 }
233 UParseError parseError;
234 UErrorCode status = U_ZERO_ERROR;
235 Transliterator* t = Transliterator::createInstance(id,
236 UTRANS_FORWARD, parseError,status);
237 name.truncate(0);
238 Transliterator::getDisplayName(id, name);
239 if (t == 0) {
240 errln(UnicodeString("FAIL: Couldn't create ") + id +
241 /*", parse error " + parseError.code +*/
242 ", line " + parseError.line +
243 ", offset " + parseError.offset +
244 ", pre-context " + prettify(parseError.preContext, TRUE) +
245 ", post-context " +prettify(parseError.postContext,TRUE) +
246 ", Error: " + u_errorName(status));
247 // When createInstance fails, it deletes the failing
248 // entry from the available ID list. We detect this
249 // here by looking for a change in countAvailableIDs.
250 int32_t nn = Transliterator::countAvailableIDs();
251 if (nn == (n - 1)) {
252 n = nn;
253 --i; // Compensate for deleted entry
254 }
255 } else {
256 logln(UnicodeString("OK: ") + name + " (" + id + ")");
257
258 // Now test toRules
259 UnicodeString rules;
260 t->toRules(rules, TRUE);
261 Transliterator *u = Transliterator::createFromRules("x",
262 rules, UTRANS_FORWARD, parseError,status);
263 if (u == 0) {
264 errln(UnicodeString("FAIL: ") + id +
265 ".createFromRules() => bad rules" +
266 /*", parse error " + parseError.code +*/
267 ", line " + parseError.line +
268 ", offset " + parseError.offset +
269 ", context " + prettify(parseError.preContext, TRUE) +
270 ", rules: " + prettify(rules, TRUE));
271 } else {
272 delete u;
273 }
274 delete t;
275 }
276 }
277 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
278 assertSuccess("snext()", ec);
279 delete avail;
280
281 // Now test the failure path
282 UParseError parseError;
283 UErrorCode status = U_ZERO_ERROR;
284 UnicodeString id("<Not a valid Transliterator ID>");
285 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
286 if (t != 0) {
287 errln("FAIL: " + id + " returned a transliterator");
288 delete t;
289 } else {
290 logln("OK: Bogus ID handled properly");
291 }
292 }
293
TestSimpleRules(void)294 void TransliteratorTest::TestSimpleRules(void) {
295 /* Example: rules 1. ab>x|y
296 * 2. yc>z
297 *
298 * []|eabcd start - no match, copy e to tranlated buffer
299 * [e]|abcd match rule 1 - copy output & adjust cursor
300 * [ex|y]cd match rule 2 - copy output & adjust cursor
301 * [exz]|d no match, copy d to transliterated buffer
302 * [exzd]| done
303 */
304 expect(UnicodeString("ab>x|y;", "") +
305 "yc>z",
306 "eabcd", "exzd");
307
308 /* Another set of rules:
309 * 1. ab>x|yzacw
310 * 2. za>q
311 * 3. qc>r
312 * 4. cw>n
313 *
314 * []|ab Rule 1
315 * [x|yzacw] No match
316 * [xy|zacw] Rule 2
317 * [xyq|cw] Rule 4
318 * [xyqn]| Done
319 */
320 expect(UnicodeString("ab>x|yzacw;") +
321 "za>q;" +
322 "qc>r;" +
323 "cw>n",
324 "ab", "xyqn");
325
326 /* Test categories
327 */
328 UErrorCode status = U_ZERO_ERROR;
329 UParseError parseError;
330 Transliterator *t = Transliterator::createFromRules(
331 "<ID>",
332 UnicodeString("$dummy=").append((UChar)0xE100) +
333 UnicodeString(";"
334 "$vowel=[aeiouAEIOU];"
335 "$lu=[:Lu:];"
336 "$vowel } $lu > '!';"
337 "$vowel > '&';"
338 "'!' { $lu > '^';"
339 "$lu > '*';"
340 "a > ERROR", ""),
341 UTRANS_FORWARD, parseError,
342 status);
343 if (U_FAILURE(status)) {
344 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
345 return;
346 }
347 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
348 delete t;
349 }
350
351 /**
352 * Test inline set syntax and set variable syntax.
353 */
TestInlineSet(void)354 void TransliteratorTest::TestInlineSet(void) {
355 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
356 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
357
358 expect(UnicodeString(
359 "$digit = [0-9];"
360 "$alpha = [a-zA-Z];"
361 "$alphanumeric = [$digit $alpha];" // ***
362 "$special = [^$alphanumeric];" // ***
363 "$alphanumeric > '-';"
364 "$special > '*';", ""),
365
366 "thx-1138", "---*----");
367 }
368
369 /**
370 * Create some inverses and confirm that they work. We have to be
371 * careful how we do this, since the inverses will not be true
372 * inverses -- we can't throw any random string at the composition
373 * of the transliterators and expect the identity function. F x
374 * F' != I. However, if we are careful about the input, we will
375 * get the expected results.
376 */
TestRuleBasedInverse(void)377 void TransliteratorTest::TestRuleBasedInverse(void) {
378 UnicodeString RULES =
379 UnicodeString("abc>zyx;") +
380 "ab>yz;" +
381 "bc>zx;" +
382 "ca>xy;" +
383 "a>x;" +
384 "b>y;" +
385 "c>z;" +
386
387 "abc<zyx;" +
388 "ab<yz;" +
389 "bc<zx;" +
390 "ca<xy;" +
391 "a<x;" +
392 "b<y;" +
393 "c<z;" +
394
395 "";
396
397 const char* DATA[] = {
398 // Careful here -- random strings will not work. If we keep
399 // the left side to the domain and the right side to the range
400 // we will be okay though (left, abc; right xyz).
401 "a", "x",
402 "abcacab", "zyxxxyy",
403 "caccb", "xyzzy",
404 };
405
406 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
407
408 UErrorCode status = U_ZERO_ERROR;
409 UParseError parseError;
410 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
411 UTRANS_FORWARD, parseError, status);
412 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
413 UTRANS_REVERSE, parseError, status);
414 if (U_FAILURE(status)) {
415 errln("FAIL: RBT constructor failed");
416 return;
417 }
418 for (int32_t i=0; i<DATA_length; i+=2) {
419 expect(*fwd, DATA[i], DATA[i+1]);
420 expect(*rev, DATA[i+1], DATA[i]);
421 }
422 delete fwd;
423 delete rev;
424 }
425
426 /**
427 * Basic test of keyboard.
428 */
TestKeyboard(void)429 void TransliteratorTest::TestKeyboard(void) {
430 UParseError parseError;
431 UErrorCode status = U_ZERO_ERROR;
432 Transliterator *t = Transliterator::createFromRules("<ID>",
433 UnicodeString("psch>Y;")
434 +"ps>y;"
435 +"ch>x;"
436 +"a>A;",
437 UTRANS_FORWARD, parseError,
438 status);
439 if (U_FAILURE(status)) {
440 errln("FAIL: RBT constructor failed");
441 return;
442 }
443 const char* DATA[] = {
444 // insertion, buffer
445 "a", "A",
446 "p", "Ap",
447 "s", "Aps",
448 "c", "Apsc",
449 "a", "AycA",
450 "psch", "AycAY",
451 0, "AycAY", // null means finishKeyboardTransliteration
452 };
453
454 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
455 delete t;
456 }
457
458 /**
459 * Basic test of keyboard with cursor.
460 */
TestKeyboard2(void)461 void TransliteratorTest::TestKeyboard2(void) {
462 UParseError parseError;
463 UErrorCode status = U_ZERO_ERROR;
464 Transliterator *t = Transliterator::createFromRules("<ID>",
465 UnicodeString("ych>Y;")
466 +"ps>|y;"
467 +"ch>x;"
468 +"a>A;",
469 UTRANS_FORWARD, parseError,
470 status);
471 if (U_FAILURE(status)) {
472 errln("FAIL: RBT constructor failed");
473 return;
474 }
475 const char* DATA[] = {
476 // insertion, buffer
477 "a", "A",
478 "p", "Ap",
479 "s", "Aps", // modified for rollback - "Ay",
480 "c", "Apsc", // modified for rollback - "Ayc",
481 "a", "AycA",
482 "p", "AycAp",
483 "s", "AycAps", // modified for rollback - "AycAy",
484 "c", "AycApsc", // modified for rollback - "AycAyc",
485 "h", "AycAY",
486 0, "AycAY", // null means finishKeyboardTransliteration
487 };
488
489 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
490 delete t;
491 }
492
493 /**
494 * Test keyboard transliteration with back-replacement.
495 */
TestKeyboard3(void)496 void TransliteratorTest::TestKeyboard3(void) {
497 // We want th>z but t>y. Furthermore, during keyboard
498 // transliteration we want t>y then yh>z if t, then h are
499 // typed.
500 UnicodeString RULES("t>|y;"
501 "yh>z;");
502
503 const char* DATA[] = {
504 // Column 1: characters to add to buffer (as if typed)
505 // Column 2: expected appearance of buffer after
506 // keyboard xliteration.
507 "a", "a",
508 "b", "ab",
509 "t", "abt", // modified for rollback - "aby",
510 "c", "abyc",
511 "t", "abyct", // modified for rollback - "abycy",
512 "h", "abycz",
513 0, "abycz", // null means finishKeyboardTransliteration
514 };
515
516 UParseError parseError;
517 UErrorCode status = U_ZERO_ERROR;
518 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
519 if (U_FAILURE(status)) {
520 errln("FAIL: RBT constructor failed");
521 return;
522 }
523 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
524 delete t;
525 }
526
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)527 void TransliteratorTest::keyboardAux(const Transliterator& t,
528 const char* DATA[], int32_t DATA_length) {
529 UErrorCode status = U_ZERO_ERROR;
530 UTransPosition index={0, 0, 0, 0};
531 UnicodeString s;
532 for (int32_t i=0; i<DATA_length; i+=2) {
533 UnicodeString log;
534 if (DATA[i] != 0) {
535 log = s + " + "
536 + DATA[i]
537 + " -> ";
538 t.transliterate(s, index, DATA[i], status);
539 } else {
540 log = s + " => ";
541 t.finishTransliteration(s, index);
542 }
543 // Show the start index '{' and the cursor '|'
544 UnicodeString a, b, c;
545 s.extractBetween(0, index.contextStart, a);
546 s.extractBetween(index.contextStart, index.start, b);
547 s.extractBetween(index.start, s.length(), c);
548 log.append(a).
549 append((UChar)LEFT_BRACE).
550 append(b).
551 append((UChar)PIPE).
552 append(c);
553 if (s == DATA[i+1] && U_SUCCESS(status)) {
554 logln(log);
555 } else {
556 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
557 }
558 }
559 }
560
TestArabic(void)561 void TransliteratorTest::TestArabic(void) {
562 // Test disabled for 2.0 until new Arabic transliterator can be written.
563 // /*
564 // const char* DATA[] = {
565 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
566 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
567 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
568 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
569 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
570 // "\u062c\u0645\u064a\u0644\u0629",
571 // };
572 // */
573 //
574 // UChar ar_raw[] = {
575 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
576 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
577 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
578 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
579 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
580 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
581 // };
582 // UnicodeString ar(ar_raw);
583 // UErrorCode status=U_ZERO_ERROR;
584 // UParseError parseError;
585 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
586 // if (t == 0) {
587 // errln("FAIL: createInstance failed");
588 // return;
589 // }
590 // expect(*t, "Arabic", ar);
591 // delete t;
592 }
593
594 /**
595 * Compose the Kana transliterator forward and reverse and try
596 * some strings that should come out unchanged.
597 */
TestCompoundKana(void)598 void TransliteratorTest::TestCompoundKana(void) {
599 UParseError parseError;
600 UErrorCode status = U_ZERO_ERROR;
601 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
602 if (t == 0) {
603 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
604 } else {
605 expect(*t, "aaaaa", "aaaaa");
606 delete t;
607 }
608 }
609
610 /**
611 * Compose the hex transliterators forward and reverse.
612 */
TestCompoundHex(void)613 void TransliteratorTest::TestCompoundHex(void) {
614 UParseError parseError;
615 UErrorCode status = U_ZERO_ERROR;
616 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
617 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
618 Transliterator* transab[] = { a, b };
619 Transliterator* transba[] = { b, a };
620 if (a == 0 || b == 0) {
621 errln("FAIL: construction failed");
622 delete a;
623 delete b;
624 return;
625 }
626 // Do some basic tests of a
627 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
628 // Do some basic tests of b
629 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
630
631 Transliterator* ab = new CompoundTransliterator(transab, 2);
632 UnicodeString s("abcde", "");
633 expect(*ab, s, s);
634
635 UnicodeString str(s);
636 a->transliterate(str);
637 Transliterator* ba = new CompoundTransliterator(transba, 2);
638 expect(*ba, str, str);
639
640 delete ab;
641 delete ba;
642 delete a;
643 delete b;
644 }
645
646 int gTestFilterClassID = 0;
647 /**
648 * Used by TestFiltering().
649 */
650 class TestFilter : public UnicodeFilter {
clone() const651 virtual UnicodeFunctor* clone() const {
652 return new TestFilter(*this);
653 }
contains(UChar32 c) const654 virtual UBool contains(UChar32 c) const {
655 return c != (UChar)0x0063 /*c*/;
656 }
657 // Stubs
toPattern(UnicodeString & result,UBool) const658 virtual UnicodeString& toPattern(UnicodeString& result,
659 UBool /*escapeUnprintable*/) const {
660 return result;
661 }
matchesIndexValue(uint8_t) const662 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
663 return FALSE;
664 }
addMatchSetTo(UnicodeSet &) const665 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
666 public:
getDynamicClassID() const667 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
668 };
669
670 /**
671 * Do some basic tests of filtering.
672 */
TestFiltering(void)673 void TransliteratorTest::TestFiltering(void) {
674 UParseError parseError;
675 UErrorCode status = U_ZERO_ERROR;
676 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
677 if (hex == 0) {
678 errln("FAIL: createInstance(Any-Hex) failed");
679 return;
680 }
681 hex->adoptFilter(new TestFilter());
682 UnicodeString s("abcde");
683 hex->transliterate(s);
684 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
685 if (s == exp) {
686 logln(UnicodeString("Ok: \"") + exp + "\"");
687 } else {
688 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
689 }
690
691 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
692 UnicodeFilter *f = hex->orphanFilter();
693 if (f == NULL){
694 errln("FAIL: orphanFilter() should get a UnicodeFilter");
695 } else {
696 delete f;
697 }
698 delete hex;
699 }
700
701 /**
702 * Test anchors
703 */
TestAnchors(void)704 void TransliteratorTest::TestAnchors(void) {
705 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
706 "aaa",
707 "012");
708 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
709 "aaa",
710 "012");
711 expect(UnicodeString("^ab > 01 ;"
712 " ab > |8 ;"
713 " b > k ;"
714 " 8x$ > 45 ;"
715 " 8x > 77 ;", ""),
716
717 "ababbabxabx",
718 "018k7745");
719 expect(UnicodeString("$s = [z$] ;"
720 "$s{ab > 01 ;"
721 " ab > |8 ;"
722 " b > k ;"
723 " 8x}$s > 45 ;"
724 " 8x > 77 ;", ""),
725
726 "abzababbabxzabxabx",
727 "01z018k45z01x45");
728 }
729
730 /**
731 * Test pattern quoting and escape mechanisms.
732 */
TestPatternQuoting(void)733 void TransliteratorTest::TestPatternQuoting(void) {
734 // Array of 3n items
735 // Each item is <rules>, <input>, <expected output>
736 const UnicodeString DATA[] = {
737 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
738 UnicodeString(UChar(0x4E01)),
739 "[male adult]"
740 };
741
742 for (int32_t i=0; i<3; i+=3) {
743 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
744 UParseError parseError;
745 UErrorCode status = U_ZERO_ERROR;
746 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
747 if (U_FAILURE(status)) {
748 errln("RBT constructor failed");
749 } else {
750 expect(*t, DATA[i+1], DATA[i+2]);
751 }
752 delete t;
753 }
754 }
755
756 /**
757 * Regression test for bugs found in Greek transliteration.
758 */
TestJ277(void)759 void TransliteratorTest::TestJ277(void) {
760 UErrorCode status = U_ZERO_ERROR;
761 UParseError parseError;
762 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
763 if (gl == NULL) {
764 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
765 return;
766 }
767
768 UChar sigma = 0x3C3;
769 UChar upsilon = 0x3C5;
770 UChar nu = 0x3BD;
771 // UChar PHI = 0x3A6;
772 UChar alpha = 0x3B1;
773 // UChar omega = 0x3C9;
774 // UChar omicron = 0x3BF;
775 // UChar epsilon = 0x3B5;
776
777 // sigma upsilon nu -> syn
778 UnicodeString syn;
779 syn.append(sigma).append(upsilon).append(nu);
780 expect(*gl, syn, "syn");
781
782 // sigma alpha upsilon nu -> saun
783 UnicodeString sayn;
784 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
785 expect(*gl, sayn, "saun");
786
787 // Again, using a smaller rule set
788 UnicodeString rules(
789 "$alpha = \\u03B1;"
790 "$nu = \\u03BD;"
791 "$sigma = \\u03C3;"
792 "$ypsilon = \\u03C5;"
793 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
794 "s <> $sigma;"
795 "a <> $alpha;"
796 "u <> $vowel { $ypsilon;"
797 "y <> $ypsilon;"
798 "n <> $nu;",
799 "");
800 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
801 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
802 expect(*mini, syn, "syn");
803 expect(*mini, sayn, "saun");
804 delete mini;
805 mini = NULL;
806
807 #if !UCONFIG_NO_FORMATTING
808 // Transliterate the Greek locale data
809 Locale el("el");
810 DateFormatSymbols syms(el, status);
811 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
812 int32_t i, count;
813 const UnicodeString* data = syms.getMonths(count);
814 for (i=0; i<count; ++i) {
815 if (data[i].length() == 0) {
816 continue;
817 }
818 UnicodeString out(data[i]);
819 gl->transliterate(out);
820 UBool ok = TRUE;
821 if (data[i].length() >= 2 && out.length() >= 2 &&
822 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
823 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
824 ok = FALSE;
825 }
826 }
827 if (ok) {
828 logln(prettify(data[i] + " -> " + out));
829 } else {
830 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
831 }
832 }
833 #endif
834
835 delete gl;
836 }
837
838 /**
839 * Prefix, suffix support in hex transliterators
840 */
TestJ243(void)841 void TransliteratorTest::TestJ243(void) {
842 UErrorCode ec = U_ZERO_ERROR;
843
844 // Test default Hex-Any, which should handle
845 // \u, \U, u+, and U+
846 Transliterator *hex =
847 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
848 if (assertSuccess("getInstance", ec)) {
849 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
850 }
851 delete hex;
852
853 // // Try a custom Hex-Unicode
854 // // \uXXXX and &#xXXXX;
855 // ec = U_ZERO_ERROR;
856 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
857 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
858 // "abcd5fx0123");
859 // // Try custom Any-Hex (default is tested elsewhere)
860 // ec = U_ZERO_ERROR;
861 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
862 // expect(hex3, "012", "012");
863 }
864
865 /**
866 * Parsers need better syntax error messages.
867 */
TestJ329(void)868 void TransliteratorTest::TestJ329(void) {
869
870 struct { UBool containsErrors; const char* rule; } DATA[] = {
871 { FALSE, "a > b; c > d" },
872 { TRUE, "a > b; no operator; c > d" },
873 };
874 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
875
876 for (int32_t i=0; i<DATA_length; ++i) {
877 UErrorCode status = U_ZERO_ERROR;
878 UParseError parseError;
879 Transliterator *rbt = Transliterator::createFromRules("<ID>",
880 DATA[i].rule,
881 UTRANS_FORWARD,
882 parseError,
883 status);
884 UBool gotError = U_FAILURE(status);
885 UnicodeString desc(DATA[i].rule);
886 desc.append(gotError ? " -> error" : " -> no error");
887 if (gotError) {
888 desc = desc + ", ParseError code=" + u_errorName(status) +
889 " line=" + parseError.line +
890 " offset=" + parseError.offset +
891 " context=" + parseError.preContext;
892 }
893 if (gotError == DATA[i].containsErrors) {
894 logln(UnicodeString("Ok: ") + desc);
895 } else {
896 errln(UnicodeString("FAIL: ") + desc);
897 }
898 delete rbt;
899 }
900 }
901
902 /**
903 * Test segments and segment references.
904 */
TestSegments(void)905 void TransliteratorTest::TestSegments(void) {
906 // Array of 3n items
907 // Each item is <rules>, <input>, <expected output>
908 UnicodeString DATA[] = {
909 "([a-z]) '.' ([0-9]) > $2 '-' $1",
910 "abc.123.xyz.456",
911 "ab1-c23.xy4-z56",
912
913 // nested
914 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
915 "a1 b2",
916 "a1.a.1 b2.b.2",
917 };
918 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
919
920 for (int32_t i=0; i<DATA_length; i+=3) {
921 logln("Pattern: " + prettify(DATA[i]));
922 UParseError parseError;
923 UErrorCode status = U_ZERO_ERROR;
924 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
925 if (U_FAILURE(status)) {
926 errln("FAIL: RBT constructor");
927 } else {
928 expect(*t, DATA[i+1], DATA[i+2]);
929 }
930 delete t;
931 }
932 }
933
934 /**
935 * Test cursor positioning outside of the key
936 */
TestCursorOffset(void)937 void TransliteratorTest::TestCursorOffset(void) {
938 // Array of 3n items
939 // Each item is <rules>, <input>, <expected output>
940 UnicodeString DATA[] = {
941 "pre {alpha} post > | @ ALPHA ;"
942 "eALPHA > beta ;"
943 "pre {beta} post > BETA @@ | ;"
944 "post > xyz",
945
946 "prealphapost prebetapost",
947
948 "prbetaxyz preBETApost",
949 };
950 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
951
952 for (int32_t i=0; i<DATA_length; i+=3) {
953 logln("Pattern: " + prettify(DATA[i]));
954 UParseError parseError;
955 UErrorCode status = U_ZERO_ERROR;
956 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
957 if (U_FAILURE(status)) {
958 errln("FAIL: RBT constructor");
959 } else {
960 expect(*t, DATA[i+1], DATA[i+2]);
961 }
962 delete t;
963 }
964 }
965
966 /**
967 * Test zero length and > 1 char length variable values. Test
968 * use of variable refs in UnicodeSets.
969 */
TestArbitraryVariableValues(void)970 void TransliteratorTest::TestArbitraryVariableValues(void) {
971 // Array of 3n items
972 // Each item is <rules>, <input>, <expected output>
973 UnicodeString DATA[] = {
974 "$abe = ab;"
975 "$pat = x[yY]z;"
976 "$ll = 'a-z';"
977 "$llZ = [$ll];"
978 "$llY = [$ll$pat];"
979 "$emp = ;"
980
981 "$abe > ABE;"
982 "$pat > END;"
983 "$llZ > 1;"
984 "$llY > 2;"
985 "7$emp 8 > 9;"
986 "",
987
988 "ab xYzxyz stY78",
989 "ABE ENDEND 1129",
990 };
991 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
992
993 for (int32_t i=0; i<DATA_length; i+=3) {
994 logln("Pattern: " + prettify(DATA[i]));
995 UParseError parseError;
996 UErrorCode status = U_ZERO_ERROR;
997 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
998 if (U_FAILURE(status)) {
999 errln("FAIL: RBT constructor");
1000 } else {
1001 expect(*t, DATA[i+1], DATA[i+2]);
1002 }
1003 delete t;
1004 }
1005 }
1006
1007 /**
1008 * Confirm that the contextStart, contextLimit, start, and limit
1009 * behave correctly. J474.
1010 */
TestPositionHandling(void)1011 void TransliteratorTest::TestPositionHandling(void) {
1012 // Array of 3n items
1013 // Each item is <rules>, <input>, <expected output>
1014 const char* DATA[] = {
1015 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1016 "xtat txtb", // pos 0,9,0,9
1017 "xTTaSS TTxUUb",
1018
1019 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1020 "xtat txtb", // pos 2,9,3,8
1021 "xtaSS TTxUUb",
1022
1023 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1024 "xtat txtb", // pos 3,8,3,8
1025 "xtaTT TTxTTb",
1026 };
1027
1028 // Array of 4n positions -- these go with the DATA array
1029 // They are: contextStart, contextLimit, start, limit
1030 int32_t POS[] = {
1031 0, 9, 0, 9,
1032 2, 9, 3, 8,
1033 3, 8, 3, 8,
1034 };
1035
1036 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1037 for (int32_t i=0; i<n; i++) {
1038 UErrorCode status = U_ZERO_ERROR;
1039 UParseError parseError;
1040 Transliterator *t = Transliterator::createFromRules("<ID>",
1041 DATA[3*i], UTRANS_FORWARD, parseError, status);
1042 if (U_FAILURE(status)) {
1043 delete t;
1044 errln("FAIL: RBT constructor");
1045 return;
1046 }
1047 UTransPosition pos;
1048 pos.contextStart= POS[4*i];
1049 pos.contextLimit = POS[4*i+1];
1050 pos.start = POS[4*i+2];
1051 pos.limit = POS[4*i+3];
1052 UnicodeString rsource(DATA[3*i+1]);
1053 t->transliterate(rsource, pos, status);
1054 if (U_FAILURE(status)) {
1055 delete t;
1056 errln("FAIL: transliterate");
1057 return;
1058 }
1059 t->finishTransliteration(rsource, pos);
1060 expectAux(DATA[3*i],
1061 DATA[3*i+1],
1062 rsource,
1063 DATA[3*i+2]);
1064 delete t;
1065 }
1066 }
1067
1068 /**
1069 * Test the Hiragana-Katakana transliterator.
1070 */
TestHiraganaKatakana(void)1071 void TransliteratorTest::TestHiraganaKatakana(void) {
1072 UParseError parseError;
1073 UErrorCode status = U_ZERO_ERROR;
1074 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1075 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1076 if (hk == 0 || kh == 0) {
1077 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1078 delete hk;
1079 delete kh;
1080 return;
1081 }
1082
1083 // Array of 3n items
1084 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1085 const char* DATA[] = {
1086 "both",
1087 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1088 "\\u30A2\\u30F8\\u30F2\\u30B0",
1089
1090 "kh",
1091 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1092 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1093 };
1094 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1095
1096 for (int32_t i=0; i<DATA_length; i+=3) {
1097 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1098 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1099 switch (*DATA[i]) {
1100 case 0x68: //'h': // Hiragana-Katakana
1101 expect(*hk, h, k);
1102 break;
1103 case 0x6B: //'k': // Katakana-Hiragana
1104 expect(*kh, k, h);
1105 break;
1106 case 0x62: //'b': // both
1107 expect(*hk, h, k);
1108 expect(*kh, k, h);
1109 break;
1110 }
1111 }
1112 delete hk;
1113 delete kh;
1114 }
1115
1116 /**
1117 * Test cloning / copy constructor of RBT.
1118 */
TestCopyJ476(void)1119 void TransliteratorTest::TestCopyJ476(void) {
1120 // The real test here is what happens when the destructors are
1121 // called. So we let one object get destructed, and check to
1122 // see that its copy still works.
1123 Transliterator *t2 = 0;
1124 {
1125 UParseError parseError;
1126 UErrorCode status = U_ZERO_ERROR;
1127 Transliterator *t1 = Transliterator::createFromRules("t1",
1128 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1129 if (U_FAILURE(status)) {
1130 errln("FAIL: RBT constructor");
1131 return;
1132 }
1133 t2 = t1->clone(); // Call copy constructor under the covers.
1134 expect(*t1, "abcfoofoo", "ABcbar");
1135 delete t1;
1136 }
1137 expect(*t2, "abcfoofoo", "ABcbar");
1138 delete t2;
1139 }
1140
1141 /**
1142 * Test inter-Indic transliterators. These are composed.
1143 * ICU4C Jitterbug 483.
1144 */
TestInterIndic(void)1145 void TransliteratorTest::TestInterIndic(void) {
1146 UnicodeString ID("Devanagari-Gujarati", "");
1147 UErrorCode status = U_ZERO_ERROR;
1148 UParseError parseError;
1149 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1150 if (dg == 0) {
1151 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1152 return;
1153 }
1154 UnicodeString id = dg->getID();
1155 if (id != ID) {
1156 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1157 }
1158 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1159 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1160 expect(*dg, dev, guj);
1161 delete dg;
1162 }
1163
1164 /**
1165 * Test filter syntax in IDs. (J918)
1166 */
TestFilterIDs(void)1167 void TransliteratorTest::TestFilterIDs(void) {
1168 // Array of 3n strings:
1169 // <id>, <inverse id>, <input>, <expected output>
1170 const char* DATA[] = {
1171 "[aeiou]Any-Hex", // ID
1172 "[aeiou]Hex-Any", // expected inverse ID
1173 "quizzical", // src
1174 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1175
1176 "[aeiou]Any-Hex;[^5]Hex-Any",
1177 "[^5]Any-Hex;[aeiou]Hex-Any",
1178 "quizzical",
1179 "q\\u0075izzical",
1180
1181 "[abc]Null",
1182 "[abc]Null",
1183 "xyz",
1184 "xyz",
1185 };
1186 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1187
1188 for (int i=0; i<DATA_length; i+=4) {
1189 UnicodeString ID(DATA[i], "");
1190 UnicodeString uID(DATA[i+1], "");
1191 UnicodeString data2(DATA[i+2], "");
1192 UnicodeString data3(DATA[i+3], "");
1193 UParseError parseError;
1194 UErrorCode status = U_ZERO_ERROR;
1195 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1196 if (t == 0) {
1197 errln("FAIL: createInstance(" + ID + ") returned NULL");
1198 return;
1199 }
1200 expect(*t, data2, data3);
1201
1202 // Check the ID
1203 if (ID != t->getID()) {
1204 errln("FAIL: createInstance(" + ID + ").getID() => " +
1205 t->getID());
1206 }
1207
1208 // Check the inverse
1209 Transliterator *u = t->createInverse(status);
1210 if (u == 0) {
1211 errln("FAIL: " + ID + ".createInverse() returned NULL");
1212 } else if (u->getID() != uID) {
1213 errln("FAIL: " + ID + ".createInverse().getID() => " +
1214 u->getID() + ", expected " + uID);
1215 }
1216
1217 delete t;
1218 delete u;
1219 }
1220 }
1221
1222 /**
1223 * Test the case mapping transliterators.
1224 */
TestCaseMap(void)1225 void TransliteratorTest::TestCaseMap(void) {
1226 UParseError parseError;
1227 UErrorCode status = U_ZERO_ERROR;
1228 Transliterator* toUpper =
1229 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1230 Transliterator* toLower =
1231 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1232 Transliterator* toTitle =
1233 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1234 if (toUpper==0 || toLower==0 || toTitle==0) {
1235 errln("FAIL: createInstance returned NULL");
1236 delete toUpper;
1237 delete toLower;
1238 delete toTitle;
1239 return;
1240 }
1241
1242 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1243 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1244 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1245 "the quick brown foX jumped over the lazY dogs.");
1246 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1247 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1248
1249 delete toUpper;
1250 delete toLower;
1251 delete toTitle;
1252 }
1253
1254 /**
1255 * Test the name mapping transliterators.
1256 */
TestNameMap(void)1257 void TransliteratorTest::TestNameMap(void) {
1258 UParseError parseError;
1259 UErrorCode status = U_ZERO_ERROR;
1260 Transliterator* uni2name =
1261 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1262 Transliterator* name2uni =
1263 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1264 if (uni2name==0 || name2uni==0) {
1265 errln("FAIL: createInstance returned NULL");
1266 delete uni2name;
1267 delete name2uni;
1268 return;
1269 }
1270
1271 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1272 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1273 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1274 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1275 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1276
1277 delete uni2name;
1278 delete name2uni;
1279
1280 // round trip
1281 Transliterator* t =
1282 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1283 if (t==0) {
1284 errln("FAIL: createInstance returned NULL");
1285 delete t;
1286 return;
1287 }
1288
1289 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1290 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1291 expect(*t, s, s);
1292 delete t;
1293 }
1294
1295 /**
1296 * Test liberalized ID syntax. 1006c
1297 */
TestLiberalizedID(void)1298 void TransliteratorTest::TestLiberalizedID(void) {
1299 // Some test cases have an expected getID() value of NULL. This
1300 // means I have disabled the test case for now. This stuff is
1301 // still under development, and I haven't decided whether to make
1302 // getID() return canonical case yet. It will all get rewritten
1303 // with the move to Source-Target/Variant IDs anyway. [aliu]
1304 const char* DATA[] = {
1305 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1306 " Null ", "Null", "whitespace",
1307 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1308 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1309 };
1310 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1311 UParseError parseError;
1312 UErrorCode status= U_ZERO_ERROR;
1313 for (int32_t i=0; i<DATA_length; i+=3) {
1314 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1315 if (t == 0) {
1316 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1317 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1318 } else {
1319 UnicodeString exp;
1320 if (DATA[i+1]) {
1321 exp = UnicodeString(DATA[i+1], "");
1322 }
1323 // Don't worry about getID() if the expected char*
1324 // is NULL -- see above.
1325 if (exp.length() == 0 || exp == t->getID()) {
1326 logln(UnicodeString("Ok: ") + DATA[i+2] +
1327 " create ID \"" + DATA[i] + "\" => \"" +
1328 exp + "\"");
1329 } else {
1330 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1331 " create ID \"" + DATA[i] + "\" => \"" +
1332 t->getID() + "\", exp \"" + exp + "\"");
1333 }
1334 delete t;
1335 }
1336 }
1337 }
1338
1339 /* test for Jitterbug 912 */
TestCreateInstance()1340 void TransliteratorTest::TestCreateInstance(){
1341 const char* FORWARD = "F";
1342 const char* REVERSE = "R";
1343 const char* DATA[] = {
1344 // Column 1: id
1345 // Column 2: direction
1346 // Column 3: expected ID, or "" if expect failure
1347 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1348
1349 // JB#2689: bad compound causes crash
1350 "InvalidSource-InvalidTarget", FORWARD, "",
1351 "InvalidSource-InvalidTarget", REVERSE, "",
1352 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1353 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1354 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1355 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1356
1357 NULL
1358 };
1359
1360 for (int32_t i=0; DATA[i]; i+=3) {
1361 UParseError err;
1362 UErrorCode ec = U_ZERO_ERROR;
1363 UnicodeString id(DATA[i]);
1364 UTransDirection dir = (DATA[i+1]==FORWARD)?
1365 UTRANS_FORWARD:UTRANS_REVERSE;
1366 UnicodeString expID(DATA[i+2]);
1367 Transliterator* t =
1368 Transliterator::createInstance(id,dir,err,ec);
1369 UnicodeString newID;
1370 if (t) {
1371 newID = t->getID();
1372 }
1373 UBool ok = (newID == expID);
1374 if (!t) {
1375 newID = u_errorName(ec);
1376 }
1377 if (ok) {
1378 logln((UnicodeString)"Ok: createInstance(" +
1379 id + "," + DATA[i+1] + ") => " + newID);
1380 } else {
1381 dataerrln((UnicodeString)"FAIL: createInstance(" +
1382 id + "," + DATA[i+1] + ") => " + newID +
1383 ", expected " + expID);
1384 }
1385 delete t;
1386 }
1387 }
1388
1389 /**
1390 * Test the normalization transliterator.
1391 */
TestNormalizationTransliterator()1392 void TransliteratorTest::TestNormalizationTransliterator() {
1393 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1394 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1395 const char* CANON[] = {
1396 // Input Decomposed Composed
1397 "cat", "cat", "cat" ,
1398 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1399
1400 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1401 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1402
1403 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1404 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1405 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1406
1407 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1408 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1409
1410 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1411 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1412 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1413
1414 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1415 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1416
1417 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1418 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1419
1420 "Henry IV", "Henry IV", "Henry IV" ,
1421 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1422
1423 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1424 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1425 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1426 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1427 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1428
1429 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1430 0 // end
1431 };
1432
1433 const char* COMPAT[] = {
1434 // Input Decomposed Composed
1435 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1436
1437 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1438 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1439
1440 "Henry IV", "Henry IV", "Henry IV" ,
1441 "Henry \\u2163", "Henry IV", "Henry IV" ,
1442
1443 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1444 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1445
1446 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1447 0 // end
1448 };
1449
1450 int32_t i;
1451 UParseError parseError;
1452 UErrorCode status = U_ZERO_ERROR;
1453 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1454 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1455 if (!NFD || !NFC) {
1456 errln("FAIL: createInstance failed");
1457 delete NFD;
1458 delete NFC;
1459 return;
1460 }
1461 for (i=0; CANON[i]; i+=3) {
1462 UnicodeString in = CharsToUnicodeString(CANON[i]);
1463 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1464 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1465 expect(*NFD, in, expd);
1466 expect(*NFC, in, expc);
1467 }
1468 delete NFD;
1469 delete NFC;
1470
1471 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1472 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1473 if (!NFKD || !NFKC) {
1474 errln("FAIL: createInstance failed");
1475 delete NFKD;
1476 delete NFKC;
1477 return;
1478 }
1479 for (i=0; COMPAT[i]; i+=3) {
1480 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1481 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1482 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1483 expect(*NFKD, in, expkd);
1484 expect(*NFKC, in, expkc);
1485 }
1486 delete NFKD;
1487 delete NFKC;
1488
1489 UParseError pe;
1490 status = U_ZERO_ERROR;
1491 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1492 UTRANS_FORWARD,
1493 pe, status);
1494 if (t == 0) {
1495 errln("FAIL: createInstance failed");
1496 }
1497 expect(*t, CharsToUnicodeString("\\u010dx"),
1498 CharsToUnicodeString("c\\u030C"));
1499 delete t;
1500 }
1501
1502 /**
1503 * Test compound RBT rules.
1504 */
TestCompoundRBT(void)1505 void TransliteratorTest::TestCompoundRBT(void) {
1506 // Careful with spacing and ';' here: Phrase this exactly
1507 // as toRules() is going to return it. If toRules() changes
1508 // with regard to spacing or ';', then adjust this string.
1509 UnicodeString rule("::Hex-Any;\n"
1510 "::Any-Lower;\n"
1511 "a > '.A.';\n"
1512 "b > '.B.';\n"
1513 "::[^t]Any-Upper;", "");
1514 UParseError parseError;
1515 UErrorCode status = U_ZERO_ERROR;
1516 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1517 if (t == 0) {
1518 errln("FAIL: createFromRules failed");
1519 return;
1520 }
1521 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1522 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1523 UnicodeString r;
1524 t->toRules(r, TRUE);
1525 if (r == rule) {
1526 logln((UnicodeString)"OK: toRules() => " + r);
1527 } else {
1528 errln((UnicodeString)"FAIL: toRules() => " + r +
1529 ", expected " + rule);
1530 }
1531 delete t;
1532
1533 // Now test toRules
1534 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1535 if (t == 0) {
1536 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1537 return;
1538 }
1539 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1540 t->toRules(r, TRUE);
1541 if (r != exp) {
1542 errln((UnicodeString)"FAIL: toRules() => " + r +
1543 ", expected " + exp);
1544 } else {
1545 logln((UnicodeString)"OK: toRules() => " + r);
1546 }
1547 delete t;
1548
1549 // Round trip the result of toRules
1550 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1551 if (t == 0) {
1552 errln("FAIL: createFromRules #2 failed");
1553 return;
1554 } else {
1555 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1556 }
1557
1558 // Test toRules again
1559 t->toRules(r, TRUE);
1560 if (r != exp) {
1561 errln((UnicodeString)"FAIL: toRules() => " + r +
1562 ", expected " + exp);
1563 } else {
1564 logln((UnicodeString)"OK: toRules() => " + r);
1565 }
1566
1567 delete t;
1568
1569 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1570 // to what the regenerated ID will look like.
1571 UnicodeString id("Upper(Lower);(NFKC)", "");
1572 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1573 if (t == 0) {
1574 errln("FAIL: createInstance #2 failed");
1575 return;
1576 }
1577 if (t->getID() == id) {
1578 logln((UnicodeString)"OK: created " + id);
1579 } else {
1580 errln((UnicodeString)"FAIL: createInstance(" + id +
1581 ").getID() => " + t->getID());
1582 }
1583
1584 Transliterator *u = t->createInverse(status);
1585 if (u == 0) {
1586 errln("FAIL: createInverse failed");
1587 delete t;
1588 return;
1589 }
1590 exp = "NFKC();Lower(Upper)";
1591 if (u->getID() == exp) {
1592 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1593 u->getID());
1594 } else {
1595 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1596 u->getID());
1597 }
1598 delete t;
1599 delete u;
1600 }
1601
1602 /**
1603 * Compound filter semantics were orginially not implemented
1604 * correctly. Originally, each component filter f(i) is replaced by
1605 * f'(i) = f(i) && g, where g is the filter for the compound
1606 * transliterator.
1607 *
1608 * From Mark:
1609 *
1610 * Suppose and I have a transliterator X. Internally X is
1611 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1612 *
1613 * The compound should convert all greek characters (through latin) to
1614 * cyrillic, then lowercase the result. The filter should say "don't
1615 * touch 'A' in the original". But because an intermediate result
1616 * happens to go through "A", the Greek Alpha gets hung up.
1617 */
TestCompoundFilter(void)1618 void TransliteratorTest::TestCompoundFilter(void) {
1619 UParseError parseError;
1620 UErrorCode status = U_ZERO_ERROR;
1621 Transliterator *t = Transliterator::createInstance
1622 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1623 if (t == 0) {
1624 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1625 return;
1626 }
1627 t->adoptFilter(new UnicodeSet("[^A]", status));
1628 if (U_FAILURE(status)) {
1629 errln("FAIL: UnicodeSet ct failed");
1630 delete t;
1631 return;
1632 }
1633
1634 // Only the 'A' at index 1 should remain unchanged
1635 expect(*t,
1636 CharsToUnicodeString("BA\\u039A\\u0391"),
1637 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1638 delete t;
1639 }
1640
TestRemove(void)1641 void TransliteratorTest::TestRemove(void) {
1642 UParseError parseError;
1643 UErrorCode status = U_ZERO_ERROR;
1644 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1645 if (t == 0) {
1646 errln("FAIL: createInstance failed");
1647 return;
1648 }
1649
1650 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1651
1652 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1653 // duplicating the filter
1654 Transliterator* t2 = t->clone();
1655 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1656
1657 delete t;
1658 delete t2;
1659 }
1660
TestToRules(void)1661 void TransliteratorTest::TestToRules(void) {
1662 const char* RBT = "rbt";
1663 const char* SET = "set";
1664 static const char* DATA[] = {
1665 RBT,
1666 "$a=\\u4E61; [$a] > A;",
1667 "[\\u4E61] > A;",
1668
1669 RBT,
1670 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1671 "[[:Zs:][:Zl:]]{a} > A;",
1672
1673 SET,
1674 "[[:Zs:][:Zl:]]",
1675 "[[:Zs:][:Zl:]]",
1676
1677 SET,
1678 "[:Ps:]",
1679 "[:Ps:]",
1680
1681 SET,
1682 "[:L:]",
1683 "[:L:]",
1684
1685 SET,
1686 "[[:L:]-[A]]",
1687 "[[:L:]-[A]]",
1688
1689 SET,
1690 "[~[:Lu:][:Ll:]]",
1691 "[~[:Lu:][:Ll:]]",
1692
1693 SET,
1694 "[~[a-z]]",
1695 "[~[a-z]]",
1696
1697 RBT,
1698 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1699 "[^[:Zs:]]{a} > A;",
1700
1701 RBT,
1702 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1703 "[[a-z]-[:Zs:]]{a} > A;",
1704
1705 RBT,
1706 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1707 "[[:Zs:]&[a-z]]{a} > A;",
1708
1709 RBT,
1710 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1711 "[x[:Zs:]]{a} > A;",
1712
1713 RBT,
1714 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1715 "$macron = \\u0304 ;"
1716 "$evowel = [aeiouyAEIOUY] ;"
1717 "$iotasub = \\u0345 ;"
1718 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1719 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1720
1721 RBT,
1722 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1723 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1724 };
1725 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1726
1727 for (int32_t d=0; d < DATA_length; d+=3) {
1728 if (DATA[d] == RBT) {
1729 // Transliterator test
1730 UParseError parseError;
1731 UErrorCode status = U_ZERO_ERROR;
1732 Transliterator *t = Transliterator::createFromRules("ID",
1733 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1734 if (t == 0) {
1735 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1736 return;
1737 }
1738 UnicodeString rules, escapedRules;
1739 t->toRules(rules, FALSE);
1740 t->toRules(escapedRules, TRUE);
1741 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1742 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1743 if (rules == expRules) {
1744 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1745 " => " + rules);
1746 } else {
1747 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1748 " => " + rules + ", exp " + expRules);
1749 }
1750 if (escapedRules == expEscapedRules) {
1751 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752 " => " + escapedRules);
1753 } else {
1754 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755 " => " + escapedRules + ", exp " + expEscapedRules);
1756 }
1757 delete t;
1758
1759 } else {
1760 // UnicodeSet test
1761 UErrorCode status = U_ZERO_ERROR;
1762 UnicodeString pat(DATA[d+1], -1, US_INV);
1763 UnicodeString expToPat(DATA[d+2], -1, US_INV);
1764 UnicodeSet set(pat, status);
1765 if (U_FAILURE(status)) {
1766 errln("FAIL: UnicodeSet ct failed");
1767 return;
1768 }
1769 // Adjust spacing etc. as necessary.
1770 UnicodeString toPat;
1771 set.toPattern(toPat);
1772 if (expToPat == toPat) {
1773 logln((UnicodeString)"Ok: " + pat +
1774 " => " + toPat);
1775 } else {
1776 errln((UnicodeString)"FAIL: " + pat +
1777 " => " + prettify(toPat, TRUE) +
1778 ", exp " + prettify(pat, TRUE));
1779 }
1780 }
1781 }
1782 }
1783
TestContext()1784 void TransliteratorTest::TestContext() {
1785 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1786 expect("de > x; {d}e > y;",
1787 "de",
1788 "ye",
1789 &pos);
1790
1791 expect("ab{c} > z;",
1792 "xadabdabcy",
1793 "xadabdabzy");
1794 }
1795
TestSupplemental()1796 void TransliteratorTest::TestSupplemental() {
1797
1798 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1799 "a > $a; $s > i;"),
1800 CharsToUnicodeString("ab\\U0001030Fx"),
1801 CharsToUnicodeString("\\U00010300bix"));
1802
1803 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1804 "$b=[A-Z\\U00010400-\\U0001044D];"
1805 "($a)($b) > $2 $1;"),
1806 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1807 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1808
1809 // k|ax\\U00010300xm
1810
1811 // k|a\\U00010400\\U00010300xm
1812 // ky|\\U00010400\\U00010300xm
1813 // ky\\U00010400|\\U00010300xm
1814
1815 // ky\\U00010400|\\U00010300\\U00010400m
1816 // ky\\U00010400y|\\U00010400m
1817 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1818 "$a {x} > | @ \\U00010400;"
1819 "{$a} [^\\u0000-\\uFFFF] > y;"),
1820 CharsToUnicodeString("kax\\U00010300xm"),
1821 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1822
1823 expectT("Any-Name",
1824 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1825 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1826
1827 expectT("Any-Hex/Unicode",
1828 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1829 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1830
1831 expectT("Any-Hex/C",
1832 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1833 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1834
1835 expectT("Any-Hex/Perl",
1836 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1837 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1838
1839 expectT("Any-Hex/Java",
1840 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1841 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1842
1843 expectT("Any-Hex/XML",
1844 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1845 "𐌰􏼀󠁡 ");
1846
1847 expectT("Any-Hex/XML10",
1848 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1849 "𐌰􏼀󠁡 ");
1850
1851 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1852 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1853 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1854 }
1855
TestQuantifier()1856 void TransliteratorTest::TestQuantifier() {
1857
1858 // Make sure @ in a quantified anteContext works
1859 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1860 "AAAAAb",
1861 "aaa(aac)");
1862
1863 // Make sure @ in a quantified postContext works
1864 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1865 "baaaaa",
1866 "caa(aaa)");
1867
1868 // Make sure @ in a quantified postContext with seg ref works
1869 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1870 "baaaaa",
1871 "baa(aaa)");
1872
1873 // Make sure @ past ante context doesn't enter ante context
1874 UTransPosition pos = {0, 5, 3, 5};
1875 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1876 "xxxab",
1877 "xxx(ac)",
1878 &pos);
1879
1880 // Make sure @ past post context doesn't pass limit
1881 UTransPosition pos2 = {0, 4, 0, 2};
1882 expect("{b} a+ > c @@ |; x > y; a > A;",
1883 "baxx",
1884 "caxx",
1885 &pos2);
1886
1887 // Make sure @ past post context doesn't enter post context
1888 expect("{b} a+ > c @@ |; x > y; a > A;",
1889 "baxx",
1890 "cayy");
1891
1892 expect("(ab)? c > d;",
1893 "c abc ababc",
1894 "d d abd");
1895
1896 // NOTE: The (ab)+ when referenced just yields a single "ab",
1897 // not the full sequence of them. This accords with perl behavior.
1898 expect("(ab)+ {x} > '(' $1 ')';",
1899 "x abx ababxy",
1900 "x ab(ab) abab(ab)y");
1901
1902 expect("b+ > x;",
1903 "ac abc abbc abbbc",
1904 "ac axc axc axc");
1905
1906 expect("[abc]+ > x;",
1907 "qac abrc abbcs abtbbc",
1908 "qx xrx xs xtx");
1909
1910 expect("q{(ab)+} > x;",
1911 "qa qab qaba qababc qaba",
1912 "qa qx qxa qxc qxa");
1913
1914 expect("q(ab)* > x;",
1915 "qa qab qaba qababc",
1916 "xa x xa xc");
1917
1918 // NOTE: The (ab)+ when referenced just yields a single "ab",
1919 // not the full sequence of them. This accords with perl behavior.
1920 expect("q(ab)* > '(' $1 ')';",
1921 "qa qab qaba qababc",
1922 "()a (ab) (ab)a (ab)c");
1923
1924 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1925 // quoted string
1926 expect("'ab'+ > x;",
1927 "bb ab ababb",
1928 "bb x xb");
1929
1930 // $foo+ and $foo* -- the quantifier should apply to the entire
1931 // variable reference
1932 expect("$var = ab; $var+ > x;",
1933 "bb ab ababb",
1934 "bb x xb");
1935 }
1936
1937 class TestTrans : public Transliterator {
1938 public:
TestTrans(const UnicodeString & id)1939 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1940 }
clone(void) const1941 virtual Transliterator* clone(void) const {
1942 return new TestTrans(getID());
1943 }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1944 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1945 UBool /*isIncremental*/) const
1946 {
1947 offsets.start = offsets.limit;
1948 }
1949 virtual UClassID getDynamicClassID() const;
1950 static UClassID U_EXPORT2 getStaticClassID();
1951 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1952 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1953
1954 /**
1955 * Test Source-Target/Variant.
1956 */
1957 void TransliteratorTest::TestSTV(void) {
1958 int32_t ns = Transliterator::countAvailableSources();
1959 if (ns < 0 || ns > 255) {
1960 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1961 return;
1962 }
1963 int32_t i, j;
1964 for (i=0; i<ns; ++i) {
1965 UnicodeString source;
1966 Transliterator::getAvailableSource(i, source);
1967 logln((UnicodeString)"" + i + ": " + source);
1968 if (source.length() == 0) {
1969 errln("FAIL: empty source");
1970 continue;
1971 }
1972 int32_t nt = Transliterator::countAvailableTargets(source);
1973 if (nt < 0 || nt > 255) {
1974 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1975 continue;
1976 }
1977 for (int32_t j=0; j<nt; ++j) {
1978 UnicodeString target;
1979 Transliterator::getAvailableTarget(j, source, target);
1980 logln((UnicodeString)" " + j + ": " + target);
1981 if (target.length() == 0) {
1982 errln("FAIL: empty target");
1983 continue;
1984 }
1985 int32_t nv = Transliterator::countAvailableVariants(source, target);
1986 if (nv < 0 || nv > 255) {
1987 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1988 continue;
1989 }
1990 for (int32_t k=0; k<nv; ++k) {
1991 UnicodeString variant;
1992 Transliterator::getAvailableVariant(k, source, target, variant);
1993 if (variant.length() == 0) {
1994 logln((UnicodeString)" " + k + ": <empty>");
1995 } else {
1996 logln((UnicodeString)" " + k + ": " + variant);
1997 }
1998 }
1999 }
2000 }
2001
2002 // Test registration
2003 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2004 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2005 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2006 for (i=0; i<3; ++i) {
2007 Transliterator *t = new TestTrans(IDS[i]);
2008 if (t == 0) {
2009 errln("FAIL: out of memory");
2010 return;
2011 }
2012 if (t->getID() != IDS[i]) {
2013 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2014 delete t;
2015 return;
2016 }
2017 Transliterator::registerInstance(t);
2018 UErrorCode status = U_ZERO_ERROR;
2019 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2020 if (t == NULL) {
2021 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2022 IDS[i]);
2023 } else {
2024 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2025 IDS[i]);
2026 delete t;
2027 }
2028 Transliterator::unregister(IDS[i]);
2029 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2030 if (t != NULL) {
2031 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2032 IDS[i]);
2033 delete t;
2034 }
2035 }
2036
2037 // Make sure getAvailable API reflects removal
2038 int32_t n = Transliterator::countAvailableIDs();
2039 for (i=0; i<n; ++i) {
2040 UnicodeString id = Transliterator::getAvailableID(i);
2041 for (j=0; j<3; ++j) {
2042 if (id.caseCompare(FULL_IDS[j],0)==0) {
2043 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2044 }
2045 }
2046 }
2047 n = Transliterator::countAvailableTargets("Any");
2048 for (i=0; i<n; ++i) {
2049 UnicodeString t;
2050 Transliterator::getAvailableTarget(i, "Any", t);
2051 if (t.caseCompare(IDS[0],0)==0) {
2052 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2053 }
2054 }
2055 n = Transliterator::countAvailableSources();
2056 for (i=0; i<n; ++i) {
2057 UnicodeString s;
2058 Transliterator::getAvailableSource(i, s);
2059 for (j=0; j<3; ++j) {
2060 if (SOURCES[j] == NULL) continue;
2061 if (s.caseCompare(SOURCES[j],0)==0) {
2062 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2063 }
2064 }
2065 }
2066 }
2067
2068 /**
2069 * Test inverse of Greek-Latin; Title()
2070 */
TestCompoundInverse(void)2071 void TransliteratorTest::TestCompoundInverse(void) {
2072 UParseError parseError;
2073 UErrorCode status = U_ZERO_ERROR;
2074 Transliterator *t = Transliterator::createInstance
2075 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2076 if (t == 0) {
2077 dataerrln("FAIL: createInstance - %s", u_errorName(status));
2078 return;
2079 }
2080 UnicodeString exp("(Title);Latin-Greek");
2081 if (t->getID() == exp) {
2082 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2083 t->getID());
2084 } else {
2085 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2086 t->getID() + "\", expected \"" + exp + "\"");
2087 }
2088 delete t;
2089 }
2090
2091 /**
2092 * Test NFD chaining with RBT
2093 */
TestNFDChainRBT()2094 void TransliteratorTest::TestNFDChainRBT() {
2095 UParseError pe;
2096 UErrorCode ec = U_ZERO_ERROR;
2097 Transliterator* t = Transliterator::createFromRules(
2098 "TEST", "::NFD; aa > Q; a > q;",
2099 UTRANS_FORWARD, pe, ec);
2100 if (t == NULL || U_FAILURE(ec)) {
2101 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2102 return;
2103 }
2104 expect(*t, "aa", "Q");
2105 delete t;
2106
2107 // TEMPORARY TESTS -- BEING DEBUGGED
2108 //=- UnicodeString s, s2;
2109 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2110 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2111 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2112 //=- expect(*t, s, s2);
2113 //=- delete t;
2114 //=-
2115 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2116 //=- expect(*t, s2, s);
2117 //=- delete t;
2118 //=-
2119 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2120 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2121 //=- expect(*t, s, s);
2122 //=- delete t;
2123
2124 // const char* source[] = {
2125 // /*
2126 // "\\u015Br\\u012Bmad",
2127 // "bhagavadg\\u012Bt\\u0101",
2128 // "adhy\\u0101ya",
2129 // "arjuna",
2130 // "vi\\u1E63\\u0101da",
2131 // "y\\u014Dga",
2132 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2133 // "uv\\u0101cr\\u0325",
2134 // */
2135 // "rmk\\u1E63\\u0113t",
2136 // //"dharmak\\u1E63\\u0113tr\\u0113",
2137 // /*
2138 // "kuruk\\u1E63\\u0113tr\\u0113",
2139 // "samav\\u0113t\\u0101",
2140 // "yuyutsava-\\u1E25",
2141 // "m\\u0101mak\\u0101-\\u1E25",
2142 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2143 // "kimakurvata",
2144 // "san\\u0304java",
2145 // */
2146 //
2147 // 0
2148 // };
2149 // const char* expected[] = {
2150 // /*
2151 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2152 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2153 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2154 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2155 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2156 // "\\u092f\\u094b\\u0917",
2157 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2158 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2159 // */
2160 // "\\u0927",
2161 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2162 // /*
2163 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2164 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2165 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2166 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2167 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2168 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2169 // "\\u0938\\u0902\\u091c\\u0935",
2170 // */
2171 // 0
2172 // };
2173 // UErrorCode status = U_ZERO_ERROR;
2174 // UParseError parseError;
2175 // UnicodeString message;
2176 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2177 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2178 // if(U_FAILURE(status)){
2179 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2180 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2181 // delete latinToDevToLatin;
2182 // delete devToLatinToDev;
2183 // return;
2184 // }
2185 // UnicodeString gotResult;
2186 // for(int i= 0; source[i] != 0; i++){
2187 // gotResult = source[i];
2188 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2189 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2190 // }
2191 // delete latinToDevToLatin;
2192 // delete devToLatinToDev;
2193 }
2194
2195 /**
2196 * Inverse of "Null" should be "Null". (J21)
2197 */
TestNullInverse()2198 void TransliteratorTest::TestNullInverse() {
2199 UParseError pe;
2200 UErrorCode ec = U_ZERO_ERROR;
2201 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2202 if (t == 0 || U_FAILURE(ec)) {
2203 errln("FAIL: createInstance");
2204 return;
2205 }
2206 Transliterator *u = t->createInverse(ec);
2207 if (u == 0 || U_FAILURE(ec)) {
2208 errln("FAIL: createInverse");
2209 delete t;
2210 return;
2211 }
2212 if (u->getID() != "Null") {
2213 errln("FAIL: Inverse of Null should be Null");
2214 }
2215 delete t;
2216 delete u;
2217 }
2218
2219 /**
2220 * Check ID of inverse of alias. (J22)
2221 */
TestAliasInverseID()2222 void TransliteratorTest::TestAliasInverseID() {
2223 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2224 UParseError pe;
2225 UErrorCode ec = U_ZERO_ERROR;
2226 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2227 if (t == 0 || U_FAILURE(ec)) {
2228 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2229 return;
2230 }
2231 Transliterator *u = t->createInverse(ec);
2232 if (u == 0 || U_FAILURE(ec)) {
2233 errln("FAIL: createInverse");
2234 delete t;
2235 return;
2236 }
2237 UnicodeString exp = "Hangul-Latin";
2238 UnicodeString got = u->getID();
2239 if (got != exp) {
2240 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2241 ", expected " + exp);
2242 }
2243 delete t;
2244 delete u;
2245 }
2246
2247 /**
2248 * Test IDs of inverses of compound transliterators. (J20)
2249 */
TestCompoundInverseID()2250 void TransliteratorTest::TestCompoundInverseID() {
2251 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2252 UParseError pe;
2253 UErrorCode ec = U_ZERO_ERROR;
2254 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2255 if (t == 0 || U_FAILURE(ec)) {
2256 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2257 return;
2258 }
2259 Transliterator *u = t->createInverse(ec);
2260 if (u == 0 || U_FAILURE(ec)) {
2261 errln("FAIL: createInverse");
2262 delete t;
2263 return;
2264 }
2265 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2266 UnicodeString got = u->getID();
2267 if (got != exp) {
2268 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2269 ", expected " + exp);
2270 }
2271 delete t;
2272 delete u;
2273 }
2274
2275 /**
2276 * Test undefined variable.
2277
2278 */
TestUndefinedVariable()2279 void TransliteratorTest::TestUndefinedVariable() {
2280 UnicodeString rule = "$initial } a <> \\u1161;";
2281 UParseError pe;
2282 UErrorCode ec = U_ZERO_ERROR;
2283 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2284 delete t;
2285 if (U_FAILURE(ec)) {
2286 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2287 u_errorName(ec));
2288 return;
2289 }
2290 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2291 u_errorName(ec));
2292 }
2293
2294 /**
2295 * Test empty context.
2296 */
TestEmptyContext()2297 void TransliteratorTest::TestEmptyContext() {
2298 expect(" { a } > b;", "xay a ", "xby b ");
2299 }
2300
2301 /**
2302 * Test compound filter ID syntax
2303 */
TestCompoundFilterID(void)2304 void TransliteratorTest::TestCompoundFilterID(void) {
2305 static const char* DATA[] = {
2306 // Col. 1 = ID or rule set (latter must start with #)
2307
2308 // = columns > 1 are null if expect col. 1 to be illegal =
2309
2310 // Col. 2 = direction, "F..." or "R..."
2311 // Col. 3 = source string
2312 // Col. 4 = exp result
2313
2314 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2315 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2316 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2317 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2318 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2319 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2320 NULL,
2321 };
2322
2323 for (int32_t i=0; DATA[i]; i+=4) {
2324 UnicodeString id = CharsToUnicodeString(DATA[i]);
2325 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2326 UTRANS_REVERSE : UTRANS_FORWARD;
2327 UnicodeString source;
2328 UnicodeString exp;
2329 if (DATA[i+2] != NULL) {
2330 source = CharsToUnicodeString(DATA[i+2]);
2331 exp = CharsToUnicodeString(DATA[i+3]);
2332 }
2333 UBool expOk = (DATA[i+1] != NULL);
2334 Transliterator* t = NULL;
2335 UParseError pe;
2336 UErrorCode ec = U_ZERO_ERROR;
2337 if (id.charAt(0) == 0x23/*#*/) {
2338 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2339 } else {
2340 t = Transliterator::createInstance(id, direction, pe, ec);
2341 }
2342 UBool ok = (t != NULL && U_SUCCESS(ec));
2343 UnicodeString transID;
2344 if (t!=0) {
2345 transID = t->getID();
2346 }
2347 else {
2348 transID = UnicodeString("NULL", "");
2349 }
2350 if (ok == expOk) {
2351 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2352 u_errorName(ec));
2353 if (source.length() != 0) {
2354 expect(*t, source, exp);
2355 }
2356 delete t;
2357 } else {
2358 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2359 u_errorName(ec));
2360 }
2361 }
2362 }
2363
2364 /**
2365 * Test new property set syntax
2366 */
TestPropertySet()2367 void TransliteratorTest::TestPropertySet() {
2368 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2369 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2370 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2371 }
2372
2373 /**
2374 * Test various failure points of the new 2.0 engine.
2375 */
TestNewEngine()2376 void TransliteratorTest::TestNewEngine() {
2377 UParseError pe;
2378 UErrorCode ec = U_ZERO_ERROR;
2379 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2380 if (t == 0 || U_FAILURE(ec)) {
2381 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2382 return;
2383 }
2384 // Katakana should be untouched
2385 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2386 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2387
2388 delete t;
2389
2390 #if 1
2391 // This test will only work if Transliterator.ROLLBACK is
2392 // true. Otherwise, this test will fail, revealing a
2393 // limitation of global filters in incremental mode.
2394 Transliterator *a =
2395 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2396 Transliterator *A =
2397 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2398 if (U_FAILURE(ec)) {
2399 delete a;
2400 delete A;
2401 return;
2402 }
2403
2404 Transliterator* array[3];
2405 array[0] = a;
2406 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2407 array[2] = A;
2408 if (U_FAILURE(ec)) {
2409 errln("FAIL: createInstance NFD");
2410 delete a;
2411 delete A;
2412 delete array[1];
2413 return;
2414 }
2415
2416 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2417 if (U_FAILURE(ec)) {
2418 errln("FAIL: UnicodeSet constructor");
2419 delete a;
2420 delete A;
2421 delete array[1];
2422 delete t;
2423 return;
2424 }
2425
2426 expect(*t, "aAaA", "bAbA");
2427
2428 assertTrue("countElements", t->countElements() == 3);
2429 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2430 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2431 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2432 assertSuccess("getElement", ec);
2433
2434 delete a;
2435 delete A;
2436 delete array[1];
2437 delete t;
2438 #endif
2439
2440 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2441 "a",
2442 "ax");
2443
2444 UnicodeString gr = CharsToUnicodeString(
2445 "$ddot = \\u0308 ;"
2446 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2447 "$rough = \\u0314 ;"
2448 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2449 "\\u03b1 <> a ;"
2450 "$rough <> h ;");
2451
2452 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2453 }
2454
2455 /**
2456 * Test quantified segment behavior. We want:
2457 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2458 */
TestQuantifiedSegment(void)2459 void TransliteratorTest::TestQuantifiedSegment(void) {
2460 // The normal case
2461 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2462
2463 // The tricky case; the quantifier is around the segment
2464 expect("([abc])+ > x $1 x;", "cba", "xax");
2465
2466 // Tricky case in reverse direction
2467 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2468
2469 // Check post-context segment
2470 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2471
2472 // Test toRule/toPattern for non-quantified segment.
2473 // Careful with spacing here.
2474 UnicodeString r("([a-c]){q} > x $1 x;");
2475 UParseError pe;
2476 UErrorCode ec = U_ZERO_ERROR;
2477 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2478 if (U_FAILURE(ec)) {
2479 errln("FAIL: createFromRules");
2480 delete t;
2481 return;
2482 }
2483 UnicodeString rr;
2484 t->toRules(rr, TRUE);
2485 if (r != rr) {
2486 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2487 } else {
2488 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2489 }
2490 delete t;
2491
2492 // Test toRule/toPattern for quantified segment.
2493 // Careful with spacing here.
2494 r = "([a-c])+{q} > x $1 x;";
2495 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2496 if (U_FAILURE(ec)) {
2497 errln("FAIL: createFromRules");
2498 delete t;
2499 return;
2500 }
2501 t->toRules(rr, TRUE);
2502 if (r != rr) {
2503 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2504 } else {
2505 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2506 }
2507 delete t;
2508 }
2509
2510 //======================================================================
2511 // Ram's tests
2512 //======================================================================
TestDevanagariLatinRT()2513 void TransliteratorTest::TestDevanagariLatinRT(){
2514 const int MAX_LEN= 52;
2515 const char* const source[MAX_LEN] = {
2516 "bh\\u0101rata",
2517 "kra",
2518 "k\\u1E63a",
2519 "khra",
2520 "gra",
2521 "\\u1E45ra",
2522 "cra",
2523 "chra",
2524 "j\\u00F1a",
2525 "jhra",
2526 "\\u00F1ra",
2527 "\\u1E6Dya",
2528 "\\u1E6Dhra",
2529 "\\u1E0Dya",
2530 //"r\\u0323ya", // \u095c is not valid in Devanagari
2531 "\\u1E0Dhya",
2532 "\\u1E5Bhra",
2533 "\\u1E47ra",
2534 "tta",
2535 "thra",
2536 "dda",
2537 "dhra",
2538 "nna",
2539 "pra",
2540 "phra",
2541 "bra",
2542 "bhra",
2543 "mra",
2544 "\\u1E49ra",
2545 //"l\\u0331ra",
2546 "yra",
2547 "\\u1E8Fra",
2548 //"l-",
2549 "vra",
2550 "\\u015Bra",
2551 "\\u1E63ra",
2552 "sra",
2553 "hma",
2554 "\\u1E6D\\u1E6Da",
2555 "\\u1E6D\\u1E6Dha",
2556 "\\u1E6Dh\\u1E6Dha",
2557 "\\u1E0D\\u1E0Da",
2558 "\\u1E0D\\u1E0Dha",
2559 "\\u1E6Dya",
2560 "\\u1E6Dhya",
2561 "\\u1E0Dya",
2562 "\\u1E0Dhya",
2563 // Not roundtrippable --
2564 // \\u0939\\u094d\\u094d\\u092E - hma
2565 // \\u0939\\u094d\\u092E - hma
2566 // CharsToUnicodeString("hma"),
2567 "hya",
2568 "\\u015Br\\u0325",
2569 "\\u015Bca",
2570 "\\u0115",
2571 "san\\u0304j\\u012Bb s\\u0113nagupta",
2572 "\\u0101nand vaddir\\u0101ju",
2573 "\\u0101",
2574 "a"
2575 };
2576 const char* const expected[MAX_LEN] = {
2577 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2578 "\\u0915\\u094D\\u0930", /* kra */
2579 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2580 "\\u0916\\u094D\\u0930", /* khra */
2581 "\\u0917\\u094D\\u0930", /* gra */
2582 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2583 "\\u091A\\u094D\\u0930", /* cra */
2584 "\\u091B\\u094D\\u0930", /* chra */
2585 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2586 "\\u091D\\u094D\\u0930", /* jhra */
2587 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2588 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2589 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2590 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2591 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2592 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2593 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2594 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2595 "\\u0924\\u094D\\u0924", /* tta */
2596 "\\u0925\\u094D\\u0930", /* thra */
2597 "\\u0926\\u094D\\u0926", /* dda */
2598 "\\u0927\\u094D\\u0930", /* dhra */
2599 "\\u0928\\u094D\\u0928", /* nna */
2600 "\\u092A\\u094D\\u0930", /* pra */
2601 "\\u092B\\u094D\\u0930", /* phra */
2602 "\\u092C\\u094D\\u0930", /* bra */
2603 "\\u092D\\u094D\\u0930", /* bhra */
2604 "\\u092E\\u094D\\u0930", /* mra */
2605 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2606 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2607 "\\u092F\\u094D\\u0930", /* yra */
2608 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2609 //"l-",
2610 "\\u0935\\u094D\\u0930", /* vra */
2611 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2612 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2613 "\\u0938\\u094D\\u0930", /* sra */
2614 "\\u0939\\u094d\\u092E", /* hma */
2615 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2616 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2617 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2618 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2619 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2620 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2621 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2622 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2623 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2624 // "hma", /* hma */
2625 "\\u0939\\u094D\\u092F", /* hya */
2626 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2627 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2628 "\\u090d", /* e\\u0306 */
2629 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2630 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2631 "\\u0906",
2632 "\\u0905",
2633 };
2634 UErrorCode status = U_ZERO_ERROR;
2635 UParseError parseError;
2636 UnicodeString message;
2637 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2638 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2639 if(U_FAILURE(status)){
2640 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2641 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2642 return;
2643 }
2644 UnicodeString gotResult;
2645 for(int i= 0; i<MAX_LEN; i++){
2646 gotResult = source[i];
2647 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2648 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2649 }
2650 delete latinToDev;
2651 delete devToLatin;
2652 }
2653
TestTeluguLatinRT()2654 void TransliteratorTest::TestTeluguLatinRT(){
2655 const int MAX_LEN=10;
2656 const char* const source[MAX_LEN] = {
2657 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2658 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2659 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2660 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2661 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2662 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2663 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2664 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2665 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2666 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2667 };
2668
2669 const char* const expected[MAX_LEN] = {
2670 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2671 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2672 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2673 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2674 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2675 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2676 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2677 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2678 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2679 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2680 };
2681
2682 UErrorCode status = U_ZERO_ERROR;
2683 UParseError parseError;
2684 UnicodeString message;
2685 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2686 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2687 if(U_FAILURE(status)){
2688 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2689 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2690 return;
2691 }
2692 UnicodeString gotResult;
2693 for(int i= 0; i<MAX_LEN; i++){
2694 gotResult = source[i];
2695 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2696 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2697 }
2698 delete latinToDev;
2699 delete devToLatin;
2700 }
2701
TestSanskritLatinRT()2702 void TransliteratorTest::TestSanskritLatinRT(){
2703 const int MAX_LEN =16;
2704 const char* const source[MAX_LEN] = {
2705 "rmk\\u1E63\\u0113t",
2706 "\\u015Br\\u012Bmad",
2707 "bhagavadg\\u012Bt\\u0101",
2708 "adhy\\u0101ya",
2709 "arjuna",
2710 "vi\\u1E63\\u0101da",
2711 "y\\u014Dga",
2712 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2713 "uv\\u0101cr\\u0325",
2714 "dharmak\\u1E63\\u0113tr\\u0113",
2715 "kuruk\\u1E63\\u0113tr\\u0113",
2716 "samav\\u0113t\\u0101",
2717 "yuyutsava\\u1E25",
2718 "m\\u0101mak\\u0101\\u1E25",
2719 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2720 "kimakurvata",
2721 "san\\u0304java",
2722 };
2723 const char* const expected[MAX_LEN] = {
2724 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2725 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2726 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2727 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2728 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2729 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2730 "\\u092f\\u094b\\u0917",
2731 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2732 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2733 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2734 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2735 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2736 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2737 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2738 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2739 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2740 "\\u0938\\u0902\\u091c\\u0935",
2741 };
2742 UErrorCode status = U_ZERO_ERROR;
2743 UParseError parseError;
2744 UnicodeString message;
2745 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2746 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2747 if(U_FAILURE(status)){
2748 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2749 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2750 return;
2751 }
2752 UnicodeString gotResult;
2753 for(int i= 0; i<MAX_LEN; i++){
2754 gotResult = source[i];
2755 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2756 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2757 }
2758 delete latinToDev;
2759 delete devToLatin;
2760 }
2761
2762
TestCompoundLatinRT()2763 void TransliteratorTest::TestCompoundLatinRT(){
2764 const char* const source[] = {
2765 "rmk\\u1E63\\u0113t",
2766 "\\u015Br\\u012Bmad",
2767 "bhagavadg\\u012Bt\\u0101",
2768 "adhy\\u0101ya",
2769 "arjuna",
2770 "vi\\u1E63\\u0101da",
2771 "y\\u014Dga",
2772 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2773 "uv\\u0101cr\\u0325",
2774 "dharmak\\u1E63\\u0113tr\\u0113",
2775 "kuruk\\u1E63\\u0113tr\\u0113",
2776 "samav\\u0113t\\u0101",
2777 "yuyutsava\\u1E25",
2778 "m\\u0101mak\\u0101\\u1E25",
2779 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2780 "kimakurvata",
2781 "san\\u0304java"
2782 };
2783 const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2784 const char* const expected[MAX_LEN] = {
2785 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2786 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2787 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2788 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2789 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2790 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2791 "\\u092f\\u094b\\u0917",
2792 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2793 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2794 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2795 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2796 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2797 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2798 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2799 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2800 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2801 "\\u0938\\u0902\\u091c\\u0935"
2802 };
2803 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2804 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2805 return;
2806 }
2807
2808 UErrorCode status = U_ZERO_ERROR;
2809 UParseError parseError;
2810 UnicodeString message;
2811 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2812 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2813 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2814 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2815
2816 if(U_FAILURE(status)){
2817 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2818 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2819 return;
2820 }
2821 UnicodeString gotResult;
2822 for(int i= 0; i<MAX_LEN; i++){
2823 gotResult = source[i];
2824 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2825 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2826 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2827
2828 }
2829 delete(latinToDevToLatin);
2830 delete(devToLatinToDev);
2831 delete(devToTelToDev);
2832 delete(latinToTelToLatin);
2833 }
2834
2835 /**
2836 * Test Gurmukhi-Devanagari Tippi and Bindi
2837 */
TestGurmukhiDevanagari()2838 void TransliteratorTest::TestGurmukhiDevanagari(){
2839 // the rule says:
2840 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2841 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2842 UErrorCode status = U_ZERO_ERROR;
2843 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2844 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2845 UParseError parseError;
2846
2847 UnicodeSetIterator vIter(vowel);
2848 UnicodeSetIterator nvIter(non_vowel);
2849 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2850 if(U_FAILURE(status)) {
2851 dataerrln("Error creating transliterator %s", u_errorName(status));
2852 delete trans;
2853 return;
2854 }
2855 UnicodeString src (" \\u0902", -1, US_INV);
2856 UnicodeString expected(" \\u0A02", -1, US_INV);
2857 src = src.unescape();
2858 expected= expected.unescape();
2859
2860 while(vIter.next()){
2861 src.setCharAt(0,(UChar) vIter.getCodepoint());
2862 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2863 expect(*trans,src,expected);
2864 }
2865
2866 expected.setCharAt(1,0x0A70);
2867 while(nvIter.next()){
2868 //src.setCharAt(0,(char) nvIter.codepoint);
2869 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2870 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2871 expect(*trans,src,expected);
2872 }
2873 delete trans;
2874 }
2875 /**
2876 * Test instantiation from a locale.
2877 */
TestLocaleInstantiation(void)2878 void TransliteratorTest::TestLocaleInstantiation(void) {
2879 UParseError pe;
2880 UErrorCode ec = U_ZERO_ERROR;
2881 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2882 if (U_FAILURE(ec)) {
2883 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2884 delete t;
2885 return;
2886 }
2887 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2888 delete t;
2889
2890 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2891 if (U_FAILURE(ec)) {
2892 errln("FAIL: createInstance(en-el)");
2893 delete t;
2894 return;
2895 }
2896 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2897 delete t;
2898 }
2899
2900 /**
2901 * Test title case handling of accent (should ignore accents)
2902 */
TestTitleAccents(void)2903 void TransliteratorTest::TestTitleAccents(void) {
2904 UParseError pe;
2905 UErrorCode ec = U_ZERO_ERROR;
2906 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2907 if (U_FAILURE(ec)) {
2908 errln("FAIL: createInstance(Title)");
2909 delete t;
2910 return;
2911 }
2912 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2913 delete t;
2914 }
2915
2916 /**
2917 * Basic test of a locale resource based rule.
2918 */
TestLocaleResource()2919 void TransliteratorTest::TestLocaleResource() {
2920 const char* DATA[] = {
2921 // id from to
2922 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2923 "Latin-el", "b", "\\u03bc\\u03c0",
2924 "Latin-Greek", "b", "\\u03B2",
2925 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2926 "el-Latin", "\\u03B2", "v",
2927 "Greek-Latin", "\\u03B2", "b",
2928 };
2929 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2930 for (int32_t i=0; i<DATA_length; i+=3) {
2931 UParseError pe;
2932 UErrorCode ec = U_ZERO_ERROR;
2933 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2934 if (U_FAILURE(ec)) {
2935 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2936 delete t;
2937 continue;
2938 }
2939 expect(*t, CharsToUnicodeString(DATA[i+1]),
2940 CharsToUnicodeString(DATA[i+2]));
2941 delete t;
2942 }
2943 }
2944
2945 /**
2946 * Make sure parse errors reference the right line.
2947 */
TestParseError()2948 void TransliteratorTest::TestParseError() {
2949 static const char* rule =
2950 "a > b;\n"
2951 "# more stuff\n"
2952 "d << b;";
2953 UErrorCode ec = U_ZERO_ERROR;
2954 UParseError pe;
2955 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2956 delete t;
2957 if (U_FAILURE(ec)) {
2958 UnicodeString err(pe.preContext);
2959 err.append((UChar)124/*|*/).append(pe.postContext);
2960 if (err.indexOf("d << b") >= 0) {
2961 logln("Ok: " + err);
2962 } else {
2963 errln("FAIL: " + err);
2964 }
2965 }
2966 else {
2967 errln("FAIL: no syntax error");
2968 }
2969 static const char* maskingRule =
2970 "a>x;\n"
2971 "# more stuff\n"
2972 "ab>y;";
2973 ec = U_ZERO_ERROR;
2974 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2975 if (ec != U_RULE_MASK_ERROR) {
2976 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2977 }
2978 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2979 errln("FAIL: did not get expected precontext");
2980 }
2981 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2982 errln("FAIL: did not get expected postcontext");
2983 }
2984 }
2985
2986 /**
2987 * Make sure sets on output are disallowed.
2988 */
TestOutputSet()2989 void TransliteratorTest::TestOutputSet() {
2990 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2991 UErrorCode ec = U_ZERO_ERROR;
2992 UParseError pe;
2993 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2994 delete t;
2995 if (U_FAILURE(ec)) {
2996 UnicodeString err(pe.preContext);
2997 err.append((UChar)124/*|*/).append(pe.postContext);
2998 logln("Ok: " + err);
2999 return;
3000 }
3001 errln("FAIL: No syntax error");
3002 }
3003
3004 /**
3005 * Test the use variable range pragma, making sure that use of
3006 * variable range characters is detected and flagged as an error.
3007 */
TestVariableRange()3008 void TransliteratorTest::TestVariableRange() {
3009 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3010 UErrorCode ec = U_ZERO_ERROR;
3011 UParseError pe;
3012 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3013 delete t;
3014 if (U_FAILURE(ec)) {
3015 UnicodeString err(pe.preContext);
3016 err.append((UChar)124/*|*/).append(pe.postContext);
3017 logln("Ok: " + err);
3018 return;
3019 }
3020 errln("FAIL: No syntax error");
3021 }
3022
3023 /**
3024 * Test invalid post context error handling
3025 */
TestInvalidPostContext()3026 void TransliteratorTest::TestInvalidPostContext() {
3027 UnicodeString rule = "a}b{c>d;";
3028 UErrorCode ec = U_ZERO_ERROR;
3029 UParseError pe;
3030 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3031 delete t;
3032 if (U_FAILURE(ec)) {
3033 UnicodeString err(pe.preContext);
3034 err.append((UChar)124/*|*/).append(pe.postContext);
3035 if (err.indexOf("a}b{c") >= 0) {
3036 logln("Ok: " + err);
3037 } else {
3038 errln("FAIL: " + err);
3039 }
3040 return;
3041 }
3042 errln("FAIL: No syntax error");
3043 }
3044
3045 /**
3046 * Test ID form variants
3047 */
TestIDForms()3048 void TransliteratorTest::TestIDForms() {
3049 const char* DATA[] = {
3050 "NFC", NULL, "NFD",
3051 "nfd", NULL, "NFC", // make sure case is ignored
3052 "Any-NFKD", NULL, "Any-NFKC",
3053 "Null", NULL, "Null",
3054 "-nfkc", "nfkc", "NFKD",
3055 "-nfkc/", "nfkc", "NFKD",
3056 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3057 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3058 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3059 "Source-", NULL, NULL,
3060 "Source/Variant-", NULL, NULL,
3061 "Source-/Variant", NULL, NULL,
3062 "/Variant", NULL, NULL,
3063 "/Variant-", NULL, NULL,
3064 "-/Variant", NULL, NULL,
3065 "-/", NULL, NULL,
3066 "-", NULL, NULL,
3067 "/", NULL, NULL,
3068 };
3069 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3070
3071 for (int32_t i=0; i<DATA_length; i+=3) {
3072 const char* ID = DATA[i];
3073 const char* expID = DATA[i+1];
3074 const char* expInvID = DATA[i+2];
3075 UBool expValid = (expInvID != NULL);
3076 if (expID == NULL) {
3077 expID = ID;
3078 }
3079 UParseError pe;
3080 UErrorCode ec = U_ZERO_ERROR;
3081 Transliterator *t =
3082 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3083 if (U_FAILURE(ec)) {
3084 if (!expValid) {
3085 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3086 } else {
3087 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3088 }
3089 delete t;
3090 continue;
3091 }
3092 Transliterator *u = t->createInverse(ec);
3093 if (U_FAILURE(ec)) {
3094 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3095 delete t;
3096 delete u;
3097 continue;
3098 }
3099 if (t->getID() == expID &&
3100 u->getID() == expInvID) {
3101 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3102 } else {
3103 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3104 t->getID() + " x getInverse() => " + u->getID() +
3105 ", expected " + expInvID);
3106 }
3107 delete t;
3108 delete u;
3109 }
3110 }
3111
3112 static const UChar SPACE[] = {32,0};
3113 static const UChar NEWLINE[] = {10,0};
3114 static const UChar RETURN[] = {13,0};
3115 static const UChar EMPTY[] = {0};
3116
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3117 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3118 const UnicodeString& testRulesForward) {
3119 UnicodeString rules2; t2.toRules(rules2, TRUE);
3120 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3121 rules2.findAndReplace(SPACE, EMPTY);
3122 rules2.findAndReplace(NEWLINE, EMPTY);
3123 rules2.findAndReplace(RETURN, EMPTY);
3124
3125 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3126
3127 if (rules2 != testRules) {
3128 errln(label);
3129 logln((UnicodeString)"GENERATED RULES: " + rules2);
3130 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3131 }
3132 }
3133
3134 /**
3135 * Mark's toRules test.
3136 */
TestToRulesMark()3137 void TransliteratorTest::TestToRulesMark() {
3138 const char* testRules =
3139 "::[[:Latin:][:Mark:]];"
3140 "::NFKD (NFC);"
3141 "::Lower (Lower);"
3142 "a <> \\u03B1;" // alpha
3143 "::NFKC (NFD);"
3144 "::Upper (Lower);"
3145 "::Lower ();"
3146 "::([[:Greek:][:Mark:]]);"
3147 ;
3148 const char* testRulesForward =
3149 "::[[:Latin:][:Mark:]];"
3150 "::NFKD(NFC);"
3151 "::Lower(Lower);"
3152 "a > \\u03B1;"
3153 "::NFKC(NFD);"
3154 "::Upper (Lower);"
3155 "::Lower ();"
3156 ;
3157 const char* testRulesBackward =
3158 "::[[:Greek:][:Mark:]];"
3159 "::Lower (Upper);"
3160 "::NFD(NFKC);"
3161 "\\u03B1 > a;"
3162 "::Lower(Lower);"
3163 "::NFC(NFKD);"
3164 ;
3165 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3166 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3167
3168 UParseError pe;
3169 UErrorCode ec = U_ZERO_ERROR;
3170 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3171 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3172
3173 if (U_FAILURE(ec)) {
3174 delete t2;
3175 delete t3;
3176 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3177 return;
3178 }
3179
3180 expect(*t2, source, target);
3181 expect(*t3, target, source);
3182
3183 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3184 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3185
3186 delete t2;
3187 delete t3;
3188 }
3189
3190 /**
3191 * Test Escape and Unescape transliterators.
3192 */
TestEscape()3193 void TransliteratorTest::TestEscape() {
3194 UParseError pe;
3195 UErrorCode ec;
3196 Transliterator *t;
3197
3198 ec = U_ZERO_ERROR;
3199 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3200 if (U_FAILURE(ec)) {
3201 errln((UnicodeString)"FAIL: createInstance");
3202 } else {
3203 expect(*t,
3204 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3205 "@12Q");
3206 }
3207 delete t;
3208
3209 ec = U_ZERO_ERROR;
3210 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3211 if (U_FAILURE(ec)) {
3212 errln((UnicodeString)"FAIL: createInstance");
3213 } else {
3214 expect(*t,
3215 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3216 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3217 }
3218 delete t;
3219
3220 ec = U_ZERO_ERROR;
3221 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3222 if (U_FAILURE(ec)) {
3223 errln((UnicodeString)"FAIL: createInstance");
3224 } else {
3225 expect(*t,
3226 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3227 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3228 }
3229 delete t;
3230
3231 ec = U_ZERO_ERROR;
3232 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3233 if (U_FAILURE(ec)) {
3234 errln((UnicodeString)"FAIL: createInstance");
3235 } else {
3236 expect(*t,
3237 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3238 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3239 }
3240 delete t;
3241 }
3242
3243
TestAnchorMasking()3244 void TransliteratorTest::TestAnchorMasking(){
3245 UnicodeString rule ("^a > Q; a > q;");
3246 UErrorCode status= U_ZERO_ERROR;
3247 UParseError parseError;
3248
3249 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3250 if(U_FAILURE(status)){
3251 errln(UnicodeString("FAIL: ") + "ID" +
3252 ".createFromRules() => bad rules" +
3253 /*", parse error " + parseError.code +*/
3254 ", line " + parseError.line +
3255 ", offset " + parseError.offset +
3256 ", context " + prettify(parseError.preContext, TRUE) +
3257 ", rules: " + prettify(rule, TRUE));
3258 }
3259 delete t;
3260 }
3261
3262 /**
3263 * Make sure display names of variants look reasonable.
3264 */
TestDisplayName()3265 void TransliteratorTest::TestDisplayName() {
3266 #if UCONFIG_NO_FORMATTING
3267 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3268 return;
3269 #else
3270 static const char* DATA[] = {
3271 // ID, forward name, reverse name
3272 // Update the text as necessary -- the important thing is
3273 // not the text itself, but how various cases are handled.
3274
3275 // Basic test
3276 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3277
3278 // Variants
3279 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3280
3281 // Target-only IDs
3282 "NFC", "Any to NFC", "Any to NFD",
3283 };
3284
3285 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3286
3287 Locale US("en", "US");
3288
3289 for (int32_t i=0; i<DATA_length; i+=3) {
3290 UnicodeString name;
3291 Transliterator::getDisplayName(DATA[i], US, name);
3292 if (name != DATA[i+1]) {
3293 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3294 name + ", expected " + DATA[i+1]);
3295 } else {
3296 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3297 }
3298 UErrorCode ec = U_ZERO_ERROR;
3299 UParseError pe;
3300 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3301 if (U_FAILURE(ec)) {
3302 delete t;
3303 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3304 continue;
3305 }
3306 name = Transliterator::getDisplayName(t->getID(), US, name);
3307 if (name != DATA[i+2]) {
3308 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3309 name + ", expected " + DATA[i+2]);
3310 } else {
3311 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3312 }
3313 delete t;
3314 }
3315 #endif
3316 }
3317
TestSpecialCases(void)3318 void TransliteratorTest::TestSpecialCases(void) {
3319 const UnicodeString registerRules[] = {
3320 "Any-Dev1", "x > X; y > Y;",
3321 "Any-Dev2", "XY > Z",
3322 "Greek-Latin/FAKE",
3323 CharsToUnicodeString
3324 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3325 "" // END MARKER
3326 };
3327
3328 const UnicodeString testCases[] = {
3329 // NORMALIZATION
3330 // should add more test cases
3331 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3332 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3334 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3335
3336 // mp -> b BUG
3337 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3338 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3339
3340 // check for devanagari bug
3341 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3342
3343 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3344 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3345 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3346
3347 //TODO: enable this test once Titlecase works right
3348 /*
3349 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3350 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3351 */
3352 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3353 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3354 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3355 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3356
3357 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3358 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3359
3360 // FORMS OF S
3361 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3362 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3363 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3364 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3365 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3366 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3367 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3368 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369 // Tatiana bug
3370 // Upper: TAT\\u02B9\\u00C2NA
3371 // Lower: tat\\u02B9\\u00E2na
3372 // Title: Tat\\u02B9\\u00E2na
3373 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3374 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3375 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3376 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3377 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3378 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3379
3380 "" // END MARKER
3381 };
3382
3383 UParseError pos;
3384 int32_t i;
3385 for (i = 0; registerRules[i].length()!=0; i+=2) {
3386 UErrorCode status = U_ZERO_ERROR;
3387
3388 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3389 registerRules[i+1], UTRANS_FORWARD, pos, status);
3390 if (U_FAILURE(status)) {
3391 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3392 } else {
3393 Transliterator::registerInstance(t);
3394 }
3395 }
3396 for (i = 0; testCases[i].length()!=0; i+=3) {
3397 UErrorCode ec = U_ZERO_ERROR;
3398 UParseError pe;
3399 const UnicodeString& name = testCases[i];
3400 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3401 if (U_FAILURE(ec)) {
3402 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3403 delete t;
3404 continue;
3405 }
3406 const UnicodeString& id = t->getID();
3407 const UnicodeString& source = testCases[i+1];
3408 UnicodeString target;
3409
3410 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3411
3412 if (testCases[i+2].length() > 0) {
3413 target = testCases[i+2];
3414 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3415 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3416 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3417 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3418 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3419 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3420 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3421 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3422 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3423 target = source;
3424 target.toLower(Locale::getUS());
3425 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3426 target = source;
3427 target.toUpper(Locale::getUS());
3428 }
3429 if (U_FAILURE(ec)) {
3430 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3431 continue;
3432 }
3433
3434 expect(*t, source, target);
3435 delete t;
3436 }
3437 for (i = 0; registerRules[i].length()!=0; i+=2) {
3438 Transliterator::unregister(registerRules[i]);
3439 }
3440 }
3441
Char32ToEscapedChars(UChar32 ch,char * buffer)3442 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3443 if (ch <= 0xFFFF) {
3444 sprintf(buffer, "\\u%04x", (int)ch);
3445 } else {
3446 sprintf(buffer, "\\U%08x", (int)ch);
3447 }
3448 return buffer;
3449 }
3450
TestSurrogateCasing(void)3451 void TransliteratorTest::TestSurrogateCasing (void) {
3452 // check that casing handles surrogates
3453 // titlecase is currently defective
3454 char buffer[20];
3455 UChar buffer2[20];
3456 UChar32 dee;
3457 UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3458 UnicodeString DEE(u_totitle(dee));
3459 if (DEE != DESERET_DEE) {
3460 err("Fails titlecase of surrogates");
3461 err(Char32ToEscapedChars(dee, buffer));
3462 err(", ");
3463 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3464 }
3465
3466 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3467 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3468 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3469 UErrorCode status= U_ZERO_ERROR;
3470
3471 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3472 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3473 errln("Fails: Can't uppercase surrogates.");
3474 }
3475
3476 status= U_ZERO_ERROR;
3477 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3479 errln("Fails: Can't lowercase surrogates.");
3480 }
3481 }
3482
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3483 static void _trans(Transliterator& t, const UnicodeString& src,
3484 UnicodeString& result) {
3485 result = src;
3486 t.transliterate(result);
3487 }
3488
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3489 static void _trans(const UnicodeString& id, const UnicodeString& src,
3490 UnicodeString& result, UErrorCode ec) {
3491 UParseError pe;
3492 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3493 if (U_SUCCESS(ec)) {
3494 _trans(*t, src, result);
3495 }
3496 delete t;
3497 }
3498
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3499 static UnicodeString _findMatch(const UnicodeString& source,
3500 const UnicodeString* pairs) {
3501 UnicodeString empty;
3502 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3503 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3504 return pairs[i+1];
3505 }
3506 }
3507 return empty;
3508 }
3509
3510 // Check to see that incremental gets at least part way through a reasonable string.
3511
TestIncrementalProgress(void)3512 void TransliteratorTest::TestIncrementalProgress(void) {
3513 UErrorCode ec = U_ZERO_ERROR;
3514 UnicodeString latinTest = "The Quick Brown Fox.";
3515 UnicodeString devaTest;
3516 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3517 UnicodeString kataTest;
3518 _trans("Latin-Katakana", latinTest, kataTest, ec);
3519 if (U_FAILURE(ec)) {
3520 errln("FAIL: Internal error");
3521 return;
3522 }
3523 const UnicodeString tests[] = {
3524 "Any", latinTest,
3525 "Latin", latinTest,
3526 "Halfwidth", latinTest,
3527 "Devanagari", devaTest,
3528 "Katakana", kataTest,
3529 "" // END MARKER
3530 };
3531
3532 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3533 int32_t i = 0, j=0, k=0;
3534 int32_t sources = Transliterator::countAvailableSources();
3535 for (i = 0; i < sources; i++) {
3536 UnicodeString source;
3537 Transliterator::getAvailableSource(i, source);
3538 UnicodeString test = _findMatch(source, tests);
3539 if (test.length() == 0) {
3540 logln((UnicodeString)"Skipping " + source + "-X");
3541 continue;
3542 }
3543 int32_t targets = Transliterator::countAvailableTargets(source);
3544 for (j = 0; j < targets; j++) {
3545 UnicodeString target;
3546 Transliterator::getAvailableTarget(j, source, target);
3547 int32_t variants = Transliterator::countAvailableVariants(source, target);
3548 for (k =0; k< variants; k++) {
3549 UnicodeString variant;
3550 UParseError err;
3551 UErrorCode status = U_ZERO_ERROR;
3552
3553 Transliterator::getAvailableVariant(k, source, target, variant);
3554 UnicodeString id = source + "-" + target + "/" + variant;
3555
3556 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3557 if (U_FAILURE(status)) {
3558 errln((UnicodeString)"FAIL: Could not create " + id);
3559 delete t;
3560 continue;
3561 }
3562 status = U_ZERO_ERROR;
3563 CheckIncrementalAux(t, test);
3564
3565 UnicodeString rev;
3566 _trans(*t, test, rev);
3567 Transliterator *inv = t->createInverse(status);
3568 if (U_FAILURE(status)) {
3569 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3570 delete t;
3571 delete inv;
3572 continue;
3573 }
3574 CheckIncrementalAux(inv, rev);
3575 delete t;
3576 delete inv;
3577 }
3578 }
3579 }
3580 }
3581
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3582 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3583 const UnicodeString& input) {
3584 UErrorCode ec = U_ZERO_ERROR;
3585 UTransPosition pos;
3586 UnicodeString test = input;
3587
3588 pos.contextStart = 0;
3589 pos.contextLimit = input.length();
3590 pos.start = 0;
3591 pos.limit = input.length();
3592
3593 t->transliterate(test, pos, ec);
3594 if (U_FAILURE(ec)) {
3595 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3596 return;
3597 }
3598 UBool gotError = FALSE;
3599
3600 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3601
3602 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3603 errln((UnicodeString)"No Progress, " +
3604 t->getID() + ": " + formatInput(test, input, pos));
3605 gotError = TRUE;
3606 } else {
3607 logln((UnicodeString)"PASS Progress, " +
3608 t->getID() + ": " + formatInput(test, input, pos));
3609 }
3610 t->finishTransliteration(test, pos);
3611 if (pos.start != pos.limit) {
3612 errln((UnicodeString)"Incomplete, " +
3613 t->getID() + ": " + formatInput(test, input, pos));
3614 gotError = TRUE;
3615 }
3616 }
3617
TestFunction()3618 void TransliteratorTest::TestFunction() {
3619 // Careful with spacing and ';' here: Phrase this exactly
3620 // as toRules() is going to return it. If toRules() changes
3621 // with regard to spacing or ';', then adjust this string.
3622 UnicodeString rule =
3623 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3624
3625 UParseError pe;
3626 UErrorCode ec = U_ZERO_ERROR;
3627 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3628 if (t == NULL) {
3629 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3630 return;
3631 }
3632
3633 UnicodeString r;
3634 t->toRules(r, TRUE);
3635 if (r == rule) {
3636 logln((UnicodeString)"OK: toRules() => " + r);
3637 } else {
3638 errln((UnicodeString)"FAIL: toRules() => " + r +
3639 ", expected " + rule);
3640 }
3641
3642 expect(*t, "The Quick Brown Fox",
3643 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3644
3645 delete t;
3646 }
3647
TestInvalidBackRef(void)3648 void TransliteratorTest::TestInvalidBackRef(void) {
3649 UnicodeString rule = ". > $1;";
3650 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3651 UParseError pe;
3652 UErrorCode ec = U_ZERO_ERROR;
3653 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3654 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3655
3656 if (t != NULL) {
3657 errln("FAIL: createFromRules should have returned NULL");
3658 delete t;
3659 }
3660
3661 if (t2 != NULL) {
3662 errln("FAIL: createFromRules should have returned NULL");
3663 delete t2;
3664 }
3665
3666 if (U_SUCCESS(ec)) {
3667 errln("FAIL: Ok: . > $1; => no error");
3668 } else {
3669 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3670 }
3671 }
3672
TestMulticharStringSet()3673 void TransliteratorTest::TestMulticharStringSet() {
3674 // Basic testing
3675 const char* rule =
3676 " [{aa}] > x;"
3677 " a > y;"
3678 " [b{bc}] > z;"
3679 "[{gd}] { e > q;"
3680 " e } [{fg}] > r;" ;
3681
3682 UParseError pe;
3683 UErrorCode ec = U_ZERO_ERROR;
3684 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3685 if (t == NULL || U_FAILURE(ec)) {
3686 delete t;
3687 errln("FAIL: createFromRules failed");
3688 return;
3689 }
3690
3691 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3692 "y x yz z d gd de gdq gdqfg ddrfg");
3693 delete t;
3694
3695 // Overlapped string test. Make sure that when multiple
3696 // strings can match that the longest one is matched.
3697 rule =
3698 " [a {ab} {abc}] > x;"
3699 " b > y;"
3700 " c > z;"
3701 " q [t {st} {rst}] { e > p;" ;
3702
3703 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3704 if (t == NULL || U_FAILURE(ec)) {
3705 delete t;
3706 errln("FAIL: createFromRules failed");
3707 return;
3708 }
3709
3710 expect(*t, "a ab abc qte qste qrste",
3711 "x x x qtp qstp qrstp");
3712 delete t;
3713 }
3714
3715 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3716 // BEGIN TestUserFunction support factory
3717
3718 Transliterator* _TUFF[4];
3719 UnicodeString* _TUFID[4];
3720
_TUFFactory(const UnicodeString &,Transliterator::Token context)3721 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3722 Transliterator::Token context) {
3723 return _TUFF[context.integer]->clone();
3724 }
3725
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3726 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3727 _TUFF[n] = t;
3728 _TUFID[n] = new UnicodeString(ID);
3729 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3730 }
3731
_TUFUnreg(int32_t n)3732 static void _TUFUnreg(int32_t n) {
3733 if (_TUFF[n] != NULL) {
3734 Transliterator::unregister(*_TUFID[n]);
3735 delete _TUFF[n];
3736 delete _TUFID[n];
3737 }
3738 }
3739
3740 // END TestUserFunction support factory
3741 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3742
3743 /**
3744 * Test that user-registered transliterators can be used under function
3745 * syntax.
3746 */
TestUserFunction()3747 void TransliteratorTest::TestUserFunction() {
3748
3749 Transliterator* t;
3750 UParseError pe;
3751 UErrorCode ec = U_ZERO_ERROR;
3752
3753 // Setup our factory
3754 int32_t i;
3755 for (i=0; i<4; ++i) {
3756 _TUFF[i] = NULL;
3757 }
3758
3759 // There's no need to register inverses if we don't use them
3760 t = Transliterator::createFromRules("gif",
3761 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3762 UTRANS_FORWARD, pe, ec);
3763 if (t == NULL || U_FAILURE(ec)) {
3764 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3765 return;
3766 }
3767 _TUFReg("Any-gif", t, 0);
3768
3769 t = Transliterator::createFromRules("RemoveCurly",
3770 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3771 UTRANS_FORWARD, pe, ec);
3772 if (t == NULL || U_FAILURE(ec)) {
3773 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3774 goto FAIL;
3775 }
3776 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3777 _TUFReg("Any-RemoveCurly", t, 1);
3778
3779 logln("Trying &hex");
3780 t = Transliterator::createFromRules("hex2",
3781 "(.) > &hex($1);",
3782 UTRANS_FORWARD, pe, ec);
3783 if (t == NULL || U_FAILURE(ec)) {
3784 errln("FAIL: createFromRules");
3785 goto FAIL;
3786 }
3787 logln("Registering");
3788 _TUFReg("Any-hex2", t, 2);
3789 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3790 if (t == NULL || U_FAILURE(ec)) {
3791 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3792 goto FAIL;
3793 }
3794 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3795 delete t;
3796
3797 logln("Trying &gif");
3798 t = Transliterator::createFromRules("gif2",
3799 "(.) > &Gif(&Hex2($1));",
3800 UTRANS_FORWARD, pe, ec);
3801 if (t == NULL || U_FAILURE(ec)) {
3802 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3803 goto FAIL;
3804 }
3805 logln("Registering");
3806 _TUFReg("Any-gif2", t, 3);
3807 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3808 if (t == NULL || U_FAILURE(ec)) {
3809 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3810 goto FAIL;
3811 }
3812 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3813 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3814 delete t;
3815
3816 // Test that filters are allowed after &
3817 t = Transliterator::createFromRules("test",
3818 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3819 UTRANS_FORWARD, pe, ec);
3820 if (t == NULL || U_FAILURE(ec)) {
3821 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3822 goto FAIL;
3823 }
3824 expect(*t, "abc",
3825 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3826 delete t;
3827
3828 FAIL:
3829 for (i=0; i<4; ++i) {
3830 _TUFUnreg(i);
3831 }
3832 }
3833
3834 /**
3835 * Test the Any-X transliterators.
3836 */
TestAnyX(void)3837 void TransliteratorTest::TestAnyX(void) {
3838 UParseError parseError;
3839 UErrorCode status = U_ZERO_ERROR;
3840 Transliterator* anyLatin =
3841 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3842 if (anyLatin==0) {
3843 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3844 delete anyLatin;
3845 return;
3846 }
3847
3848 expect(*anyLatin,
3849 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3850 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3851
3852 delete anyLatin;
3853 }
3854
3855 /**
3856 * Test Any-X transliterators with sample letters from all scripts.
3857 */
TestAny(void)3858 void TransliteratorTest::TestAny(void) {
3859 UErrorCode status = U_ZERO_ERROR;
3860 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3861 // function call parameters going on in this test.
3862 UnicodeSet alphabetic("[:alphabetic:]", status);
3863 if (U_FAILURE(status)) {
3864 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3865 return;
3866 }
3867 alphabetic.freeze();
3868
3869 UnicodeString testString;
3870 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3871 const char *scriptName = uscript_getShortName((UScriptCode)i);
3872 if (scriptName == NULL) {
3873 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3874 return;
3875 }
3876
3877 UnicodeSet sample;
3878 sample.applyPropertyAlias("script", scriptName, status);
3879 if (U_FAILURE(status)) {
3880 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3881 return;
3882 }
3883 sample.retainAll(alphabetic);
3884 for (int32_t count=0; count<5; count++) {
3885 UChar32 c = sample.charAt(count);
3886 if (c == -1) {
3887 break;
3888 }
3889 testString.append(c);
3890 }
3891 }
3892
3893 UParseError parseError;
3894 Transliterator* anyLatin =
3895 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3896 if (U_FAILURE(status)) {
3897 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3898 return;
3899 }
3900
3901 logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3902 anyLatin->transliterate(testString);
3903 logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3904 delete anyLatin;
3905 }
3906
3907
3908 /**
3909 * Test the source and target set API. These are only implemented
3910 * for RBT and CompoundTransliterator at this time.
3911 */
TestSourceTargetSet()3912 void TransliteratorTest::TestSourceTargetSet() {
3913 UErrorCode ec = U_ZERO_ERROR;
3914
3915 // Rules
3916 const char* r =
3917 "a > b; "
3918 "r [x{lu}] > q;";
3919
3920 // Expected source
3921 UnicodeSet expSrc("[arx{lu}]", ec);
3922
3923 // Expected target
3924 UnicodeSet expTrg("[bq]", ec);
3925
3926 UParseError pe;
3927 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3928
3929 if (U_FAILURE(ec)) {
3930 delete t;
3931 errln("FAIL: Couldn't set up test");
3932 return;
3933 }
3934
3935 UnicodeSet src; t->getSourceSet(src);
3936 UnicodeSet trg; t->getTargetSet(trg);
3937
3938 if (src == expSrc && trg == expTrg) {
3939 UnicodeString a, b;
3940 logln((UnicodeString)"Ok: " +
3941 r + " => source = " + src.toPattern(a, TRUE) +
3942 ", target = " + trg.toPattern(b, TRUE));
3943 } else {
3944 UnicodeString a, b, c, d;
3945 errln((UnicodeString)"FAIL: " +
3946 r + " => source = " + src.toPattern(a, TRUE) +
3947 ", expected " + expSrc.toPattern(b, TRUE) +
3948 "; target = " + trg.toPattern(c, TRUE) +
3949 ", expected " + expTrg.toPattern(d, TRUE));
3950 }
3951
3952 delete t;
3953 }
3954
3955 /**
3956 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3957 */
TestRuleWhitespace()3958 void TransliteratorTest::TestRuleWhitespace() {
3959 // Rules
3960 const char* r = "a > \\u200E b;";
3961
3962 UErrorCode ec = U_ZERO_ERROR;
3963 UParseError pe;
3964 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3965
3966 if (U_FAILURE(ec)) {
3967 errln("FAIL: Couldn't set up test");
3968 } else {
3969 expect(*t, "a", "b");
3970 }
3971 delete t;
3972
3973 // UnicodeSet
3974 ec = U_ZERO_ERROR;
3975 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3976
3977 if (U_FAILURE(ec)) {
3978 errln("FAIL: Couldn't set up test");
3979 } else {
3980 if (set.contains(0x200E)) {
3981 errln("FAIL: U+200E not being ignored by UnicodeSet");
3982 }
3983 }
3984 }
3985 //======================================================================
3986 // this method is in TestUScript.java
3987 //======================================================================
TestAllCodepoints()3988 void TransliteratorTest::TestAllCodepoints(){
3989 UScriptCode code= USCRIPT_INVALID_CODE;
3990 char id[256]={'\0'};
3991 char abbr[256]={'\0'};
3992 char newId[256]={'\0'};
3993 char newAbbrId[256]={'\0'};
3994 char oldId[256]={'\0'};
3995 char oldAbbrId[256]={'\0'};
3996
3997 UErrorCode status =U_ZERO_ERROR;
3998 UParseError pe;
3999
4000 for(uint32_t i = 0; i<=0x10ffff; i++){
4001 code = uscript_getScript(i,&status);
4002 if(code == USCRIPT_INVALID_CODE){
4003 errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
4004 }
4005 const char* myId = uscript_getName(code);
4006 if(!myId) {
4007 dataerrln("Valid script code returned NULL name. Check your data!");
4008 return;
4009 }
4010 uprv_strcpy(id,myId);
4011 uprv_strcpy(abbr,uscript_getShortName(code));
4012
4013 uprv_strcpy(newId,"[:");
4014 uprv_strcat(newId,id);
4015 uprv_strcat(newId,":];NFD");
4016
4017 uprv_strcpy(newAbbrId,"[:");
4018 uprv_strcat(newAbbrId,abbr);
4019 uprv_strcat(newAbbrId,":];NFD");
4020
4021 if(uprv_strcmp(newId,oldId)!=0){
4022 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4023 if(t==NULL || U_FAILURE(status)){
4024 errln((UnicodeString)"FAIL: Could not create " + id);
4025 }
4026 delete t;
4027 }
4028 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4029 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4030 if(t==NULL || U_FAILURE(status)){
4031 errln((UnicodeString)"FAIL: Could not create " + id);
4032 }
4033 delete t;
4034 }
4035 uprv_strcpy(oldId,newId);
4036 uprv_strcpy(oldAbbrId, newAbbrId);
4037
4038 }
4039
4040 }
4041
4042 #define TEST_TRANSLIT_ID(id, cls) { \
4043 UErrorCode ec = U_ZERO_ERROR; \
4044 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4045 if (U_FAILURE(ec)) { \
4046 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4047 } else { \
4048 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4049 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4050 } \
4051 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4052 } \
4053 delete t; \
4054 }
4055
4056 #define TEST_TRANSLIT_RULE(rule, cls) { \
4057 UErrorCode ec = U_ZERO_ERROR; \
4058 UParseError pe; \
4059 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4060 if (U_FAILURE(ec)) { \
4061 errln("FAIL: Couldn't create " rule); \
4062 } else { \
4063 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4064 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4065 } \
4066 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4067 } \
4068 delete t; \
4069 }
4070
TestBoilerplate()4071 void TransliteratorTest::TestBoilerplate() {
4072 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4073 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4074 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4075 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4076 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4077 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4078 TEST_TRANSLIT_ID("Null", NullTransliterator);
4079 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4080 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4081 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4082 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4083 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4084 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4085 }
4086
TestAlternateSyntax()4087 void TransliteratorTest::TestAlternateSyntax() {
4088 // U+2206 == &
4089 // U+2190 == <
4090 // U+2192 == >
4091 // U+2194 == <>
4092 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4093 "abc",
4094 "xbz");
4095 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4096 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4097 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4098 }
4099
4100 static const char* BEGIN_END_RULES[] = {
4101 // [0]
4102 "abc > xy;"
4103 "aba > z;",
4104
4105 // [1]
4106 /*
4107 "::BEGIN;"
4108 "abc > xy;"
4109 "::END;"
4110 "::BEGIN;"
4111 "aba > z;"
4112 "::END;",
4113 */
4114 "", // test case commented out below, this is here to keep from messing up the indexes
4115
4116 // [2]
4117 /*
4118 "abc > xy;"
4119 "::BEGIN;"
4120 "aba > z;"
4121 "::END;",
4122 */
4123 "", // test case commented out below, this is here to keep from messing up the indexes
4124
4125 // [3]
4126 /*
4127 "::BEGIN;"
4128 "abc > xy;"
4129 "::END;"
4130 "aba > z;",
4131 */
4132 "", // test case commented out below, this is here to keep from messing up the indexes
4133
4134 // [4]
4135 "abc > xy;"
4136 "::Null;"
4137 "aba > z;",
4138
4139 // [5]
4140 "::Upper;"
4141 "ABC > xy;"
4142 "AB > x;"
4143 "C > z;"
4144 "::Upper;"
4145 "XYZ > p;"
4146 "XY > q;"
4147 "Z > r;"
4148 "::Upper;",
4149
4150 // [6]
4151 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4152 "$delim = [\\-$ws];"
4153 "$ws $delim* > ' ';"
4154 "'-' $delim* > '-';",
4155
4156 // [7]
4157 "::Null;"
4158 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4159 "$delim = [\\-$ws];"
4160 "$ws $delim* > ' ';"
4161 "'-' $delim* > '-';",
4162
4163 // [8]
4164 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4165 "$delim = [\\-$ws];"
4166 "$ws $delim* > ' ';"
4167 "'-' $delim* > '-';"
4168 "::Null;",
4169
4170 // [9]
4171 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4172 "$delim = [\\-$ws];"
4173 "::Null;"
4174 "$ws $delim* > ' ';"
4175 "'-' $delim* > '-';",
4176
4177 // [10]
4178 /*
4179 "::BEGIN;"
4180 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4181 "$delim = [\\-$ws];"
4182 "::END;"
4183 "$ws $delim* > ' ';"
4184 "'-' $delim* > '-';",
4185 */
4186 "", // test case commented out below, this is here to keep from messing up the indexes
4187
4188 // [11]
4189 /*
4190 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4191 "$delim = [\\-$ws];"
4192 "::BEGIN;"
4193 "$ws $delim* > ' ';"
4194 "'-' $delim* > '-';"
4195 "::END;",
4196 */
4197 "", // test case commented out below, this is here to keep from messing up the indexes
4198
4199 // [12]
4200 /*
4201 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4202 "$delim = [\\-$ws];"
4203 "$ab = [ab];"
4204 "::BEGIN;"
4205 "$ws $delim* > ' ';"
4206 "'-' $delim* > '-';"
4207 "::END;"
4208 "::BEGIN;"
4209 "$ab { ' ' } $ab > '-';"
4210 "c { ' ' > ;"
4211 "::END;"
4212 "::BEGIN;"
4213 "'a-a' > a\\%|a;"
4214 "::END;",
4215 */
4216 "", // test case commented out below, this is here to keep from messing up the indexes
4217
4218 // [13]
4219 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4220 "$delim = [\\-$ws];"
4221 "$ab = [ab];"
4222 "::Null;"
4223 "$ws $delim* > ' ';"
4224 "'-' $delim* > '-';"
4225 "::Null;"
4226 "$ab { ' ' } $ab > '-';"
4227 "c { ' ' > ;"
4228 "::Null;"
4229 "'a-a' > a\\%|a;",
4230
4231 // [14]
4232 /*
4233 "::[abc];"
4234 "::BEGIN;"
4235 "abc > xy;"
4236 "::END;"
4237 "::BEGIN;"
4238 "aba > yz;"
4239 "::END;"
4240 "::Upper;",
4241 */
4242 "", // test case commented out below, this is here to keep from messing up the indexes
4243
4244 // [15]
4245 "::[abc];"
4246 "abc > xy;"
4247 "::Null;"
4248 "aba > yz;"
4249 "::Upper;",
4250
4251 // [16]
4252 /*
4253 "::[abc];"
4254 "::BEGIN;"
4255 "abc <> xy;"
4256 "::END;"
4257 "::BEGIN;"
4258 "aba <> yz;"
4259 "::END;"
4260 "::Upper(Lower);"
4261 "::([XYZ]);"
4262 */
4263 "", // test case commented out below, this is here to keep from messing up the indexes
4264
4265 // [17]
4266 "::[abc];"
4267 "abc <> xy;"
4268 "::Null;"
4269 "aba <> yz;"
4270 "::Upper(Lower);"
4271 "::([XYZ]);"
4272 };
4273 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4274
4275 /*
4276 (This entire test is commented out below and will need some heavy revision when we re-add
4277 the ::BEGIN/::END stuff)
4278 static const char* BOGUS_BEGIN_END_RULES[] = {
4279 // [7]
4280 "::BEGIN;"
4281 "abc > xy;"
4282 "::BEGIN;"
4283 "aba > z;"
4284 "::END;"
4285 "::END;",
4286
4287 // [8]
4288 "abc > xy;"
4289 " aba > z;"
4290 "::END;",
4291
4292 // [9]
4293 "::BEGIN;"
4294 "::Upper;"
4295 "::END;"
4296 };
4297 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4298 */
4299
4300 static const char* BEGIN_END_TEST_CASES[] = {
4301 // rules input expected output
4302 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4303 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4304 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4305 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4306 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4307 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4308
4309 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4310 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4311 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4312 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4313 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4314 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4315 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4316 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4317 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4318 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4319 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4320 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4321
4322 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4323 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4324 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4325 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4326 };
4327 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4328
TestBeginEnd()4329 void TransliteratorTest::TestBeginEnd() {
4330 // run through the list of test cases above
4331 int32_t i = 0;
4332 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4333 expect((UnicodeString)"Test case #" + (i / 3),
4334 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4335 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4336 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4337 }
4338
4339 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4340 UParseError parseError;
4341 UErrorCode status = U_ZERO_ERROR;
4342 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4343 UTRANS_REVERSE, parseError, status);
4344 if (reversed == 0 || U_FAILURE(status)) {
4345 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4346 } else {
4347 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4348 }
4349 delete reversed;
4350
4351 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4352 // that all of them cause errors
4353 /*
4354 (commented out until we have the real ::BEGIN/::END stuff in place
4355 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4356 UParseError parseError;
4357 UErrorCode status = U_ZERO_ERROR;
4358 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4359 UTRANS_FORWARD, parseError, status);
4360 if (!U_FAILURE(status)) {
4361 delete t;
4362 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4363 }
4364 }
4365 */
4366 }
4367
TestBeginEndToRules()4368 void TransliteratorTest::TestBeginEndToRules() {
4369 // run through the same list of test cases we used above, but this time, instead of just
4370 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4371 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4372 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4373 // to (i.e., does the same thing as) the original rule set
4374 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4375 UParseError parseError;
4376 UErrorCode status = U_ZERO_ERROR;
4377 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4378 UTRANS_FORWARD, parseError, status);
4379 if (U_FAILURE(status)) {
4380 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4381 } else {
4382 UnicodeString rules;
4383 t->toRules(rules, TRUE);
4384 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4385 UTRANS_FORWARD, parseError, status);
4386 if (U_FAILURE(status)) {
4387 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4388 parseError, status);
4389 delete t;
4390 } else {
4391 expect(*t2,
4392 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4393 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4394 delete t;
4395 delete t2;
4396 }
4397 }
4398 }
4399
4400 // do the same thing for the reversible test case
4401 UParseError parseError;
4402 UErrorCode status = U_ZERO_ERROR;
4403 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4404 UTRANS_REVERSE, parseError, status);
4405 if (U_FAILURE(status)) {
4406 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4407 } else {
4408 UnicodeString rules;
4409 reversed->toRules(rules, FALSE);
4410 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4411 parseError, status);
4412 if (U_FAILURE(status)) {
4413 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4414 parseError, status);
4415 delete reversed;
4416 } else {
4417 expect(*reversed2,
4418 UnicodeString("xy XY XYZ yz YZ"),
4419 UnicodeString("xy abc xaba yz aba"));
4420 delete reversed;
4421 delete reversed2;
4422 }
4423 }
4424 }
4425
TestRegisterAlias()4426 void TransliteratorTest::TestRegisterAlias() {
4427 UnicodeString longID("Lower;[aeiou]Upper");
4428 UnicodeString shortID("Any-CapVowels");
4429 UnicodeString reallyShortID("CapVowels");
4430
4431 Transliterator::registerAlias(shortID, longID);
4432
4433 UErrorCode err = U_ZERO_ERROR;
4434 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4435 if (U_FAILURE(err)) {
4436 errln("Failed to instantiate transliterator with long ID");
4437 Transliterator::unregister(shortID);
4438 return;
4439 }
4440 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4441 if (U_FAILURE(err)) {
4442 errln("Failed to instantiate transliterator with short ID");
4443 delete t1;
4444 Transliterator::unregister(shortID);
4445 return;
4446 }
4447
4448 if (t1->getID() != longID)
4449 errln("Transliterator instantiated with long ID doesn't have long ID");
4450 if (t2->getID() != reallyShortID)
4451 errln("Transliterator instantiated with short ID doesn't have short ID");
4452
4453 UnicodeString rules1;
4454 UnicodeString rules2;
4455
4456 t1->toRules(rules1, TRUE);
4457 t2->toRules(rules2, TRUE);
4458 if (rules1 != rules2)
4459 errln("Alias transliterators aren't the same");
4460
4461 delete t1;
4462 delete t2;
4463 Transliterator::unregister(shortID);
4464
4465 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4466 if (U_SUCCESS(err)) {
4467 errln("Instantiation with short ID succeeded after short ID was unregistered");
4468 delete t1;
4469 }
4470
4471 // try the same thing again, but this time with something other than
4472 // an instance of CompoundTransliterator
4473 UnicodeString realID("Latin-Greek");
4474 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4475 Transliterator::registerAlias(fakeID, realID);
4476
4477 err = U_ZERO_ERROR;
4478 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4479 if (U_FAILURE(err)) {
4480 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4481 Transliterator::unregister(realID);
4482 return;
4483 }
4484 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4485 if (U_FAILURE(err)) {
4486 errln("Failed to instantiate transliterator with fake ID");
4487 delete t1;
4488 Transliterator::unregister(realID);
4489 return;
4490 }
4491
4492 t1->toRules(rules1, TRUE);
4493 t2->toRules(rules2, TRUE);
4494 if (rules1 != rules2)
4495 errln("Alias transliterators aren't the same");
4496
4497 delete t1;
4498 delete t2;
4499 Transliterator::unregister(fakeID);
4500 }
4501
TestRuleStripping()4502 void TransliteratorTest::TestRuleStripping() {
4503 /*
4504 #
4505 \uE001>\u0C01; # SIGN
4506 */
4507 static const UChar rule[] = {
4508 0x0023,0x0020,0x000D,0x000A,
4509 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4510 };
4511 static const UChar expectedRule[] = {
4512 0xE001,0x003E,0x0C01,0x003B,0
4513 };
4514 UChar result[sizeof(rule)/sizeof(rule[0])];
4515 UErrorCode status = U_ZERO_ERROR;
4516 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4517 if (len != u_strlen(expectedRule)) {
4518 errln("utrans_stripRules return len = %d", len);
4519 }
4520 if (u_strncmp(expectedRule, result, len) != 0) {
4521 errln("utrans_stripRules did not return expected string");
4522 }
4523 }
4524
4525 /**
4526 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4527 */
TestHalfwidthFullwidth(void)4528 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4529 UParseError parseError;
4530 UErrorCode status = U_ZERO_ERROR;
4531 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4532 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4533 if (hf == 0 || fh == 0) {
4534 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4535 delete hf;
4536 delete fh;
4537 return;
4538 }
4539
4540 // Array of 2n items
4541 // Each item is
4542 // "hf"|"fh"|"both",
4543 // <Halfwidth>,
4544 // <Fullwidth>
4545 const char* DATA[] = {
4546 "both",
4547 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4548 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4549 };
4550 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4551
4552 for (int32_t i=0; i<DATA_length; i+=3) {
4553 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4554 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4555 switch (*DATA[i]) {
4556 case 0x68: //'h': // Halfwidth-Fullwidth only
4557 expect(*hf, h, f);
4558 break;
4559 case 0x66: //'f': // Fullwidth-Halfwidth only
4560 expect(*fh, f, h);
4561 break;
4562 case 0x62: //'b': // both directions
4563 expect(*hf, h, f);
4564 expect(*fh, f, h);
4565 break;
4566 }
4567 }
4568 delete hf;
4569 delete fh;
4570 }
4571
4572
4573 /**
4574 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4575 * TODO: confirm that the expected results are correct.
4576 * For now, test just confirms that C++ and Java give identical results.
4577 */
TestThai(void)4578 void TransliteratorTest::TestThai(void) {
4579 UParseError parseError;
4580 UErrorCode status = U_ZERO_ERROR;
4581 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4582 if (tr == 0) {
4583 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4584 return;
4585 }
4586 if (U_FAILURE(status)) {
4587 errln("FAIL: createInstance failed with %s", u_errorName(status));
4588 return;
4589 }
4590 const char *thaiText =
4591 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4592 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4593 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4594 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4595 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4596 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4597 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4598 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4599 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4600 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4601 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4602 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4603 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4604 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4605 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4606 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4607 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4608 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4609 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4610 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4611 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4612 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4613 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4614 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4615 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4616 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4617 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4618 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4619 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4620 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4621
4622 const char *latinText =
4623 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4624 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4625 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4626 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4627 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4628 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4629 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4630 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4631 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4632 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4633 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4634 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4635 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4636 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4637 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4638 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4639 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4640 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4641
4642
4643 UnicodeString xlitText(thaiText);
4644 xlitText = xlitText.unescape();
4645 tr->transliterate(xlitText);
4646
4647 UnicodeString expectedText(latinText);
4648 expectedText = expectedText.unescape();
4649 expect(*tr, xlitText, expectedText);
4650
4651 delete tr;
4652 }
4653
4654
4655 //======================================================================
4656 // Support methods
4657 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4658 void TransliteratorTest::expectT(const UnicodeString& id,
4659 const UnicodeString& source,
4660 const UnicodeString& expectedResult) {
4661 UErrorCode ec = U_ZERO_ERROR;
4662 UParseError pe;
4663 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4664 if (U_FAILURE(ec)) {
4665 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
4666 delete t;
4667 return;
4668 }
4669 expect(*t, source, expectedResult);
4670 delete t;
4671 }
4672
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4673 void TransliteratorTest::reportParseError(const UnicodeString& message,
4674 const UParseError& parseError,
4675 const UErrorCode& status) {
4676 dataerrln(message +
4677 /*", parse error " + parseError.code +*/
4678 ", line " + parseError.line +
4679 ", offset " + parseError.offset +
4680 ", pre-context " + prettify(parseError.preContext, TRUE) +
4681 ", post-context " + prettify(parseError.postContext,TRUE) +
4682 ", Error: " + u_errorName(status));
4683 }
4684
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4685 void TransliteratorTest::expect(const UnicodeString& rules,
4686 const UnicodeString& source,
4687 const UnicodeString& expectedResult,
4688 UTransPosition *pos) {
4689 expect("<ID>", rules, source, expectedResult, pos);
4690 }
4691
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4692 void TransliteratorTest::expect(const UnicodeString& id,
4693 const UnicodeString& rules,
4694 const UnicodeString& source,
4695 const UnicodeString& expectedResult,
4696 UTransPosition *pos) {
4697 UErrorCode status = U_ZERO_ERROR;
4698 UParseError parseError;
4699 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4700 if (U_FAILURE(status)) {
4701 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4702 } else {
4703 expect(*t, source, expectedResult, pos);
4704 }
4705 delete t;
4706 }
4707
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4708 void TransliteratorTest::expect(const Transliterator& t,
4709 const UnicodeString& source,
4710 const UnicodeString& expectedResult,
4711 const Transliterator& reverseTransliterator) {
4712 expect(t, source, expectedResult);
4713 expect(reverseTransliterator, expectedResult, source);
4714 }
4715
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4716 void TransliteratorTest::expect(const Transliterator& t,
4717 const UnicodeString& source,
4718 const UnicodeString& expectedResult,
4719 UTransPosition *pos) {
4720 if (pos == 0) {
4721 UnicodeString result(source);
4722 t.transliterate(result);
4723 expectAux(t.getID() + ":String", source, result, expectedResult);
4724 }
4725 UTransPosition index={0, 0, 0, 0};
4726 if (pos != 0) {
4727 index = *pos;
4728 }
4729
4730 UnicodeString rsource(source);
4731 if (pos == 0) {
4732 t.transliterate(rsource);
4733 } else {
4734 // Do it all at once -- below we do it incrementally
4735 t.finishTransliteration(rsource, *pos);
4736 }
4737 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4738
4739 // Test keyboard (incremental) transliteration -- this result
4740 // must be the same after we finalize (see below).
4741 UnicodeString log;
4742 rsource.remove();
4743 if (pos != 0) {
4744 rsource = source;
4745 formatInput(log, rsource, index);
4746 log.append(" -> ");
4747 UErrorCode status = U_ZERO_ERROR;
4748 t.transliterate(rsource, index, status);
4749 formatInput(log, rsource, index);
4750 } else {
4751 for (int32_t i=0; i<source.length(); ++i) {
4752 if (i != 0) {
4753 log.append(" + ");
4754 }
4755 log.append(source.charAt(i)).append(" -> ");
4756 UErrorCode status = U_ZERO_ERROR;
4757 t.transliterate(rsource, index, source.charAt(i), status);
4758 formatInput(log, rsource, index);
4759 }
4760 }
4761
4762 // As a final step in keyboard transliteration, we must call
4763 // transliterate to finish off any pending partial matches that
4764 // were waiting for more input.
4765 t.finishTransliteration(rsource, index);
4766 log.append(" => ").append(rsource);
4767
4768 expectAux(t.getID() + ":Keyboard", log,
4769 rsource == expectedResult,
4770 expectedResult);
4771 }
4772
4773
4774 /**
4775 * @param appendTo result is appended to this param.
4776 * @param input the string being transliterated
4777 * @param pos the index struct
4778 */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4779 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4780 const UnicodeString& input,
4781 const UTransPosition& pos) {
4782 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4783 // the {} indicate the context start and limit, and the ||
4784 // indicate the start and limit.
4785 if (0 <= pos.contextStart &&
4786 pos.contextStart <= pos.start &&
4787 pos.start <= pos.limit &&
4788 pos.limit <= pos.contextLimit &&
4789 pos.contextLimit <= input.length()) {
4790
4791 UnicodeString a, b, c, d, e;
4792 input.extractBetween(0, pos.contextStart, a);
4793 input.extractBetween(pos.contextStart, pos.start, b);
4794 input.extractBetween(pos.start, pos.limit, c);
4795 input.extractBetween(pos.limit, pos.contextLimit, d);
4796 input.extractBetween(pos.contextLimit, input.length(), e);
4797 appendTo.append(a).append((UChar)123/*{*/).append(b).
4798 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4799 append((UChar)125/*}*/).append(e);
4800 } else {
4801 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4802 pos.contextStart + ", s=" + pos.start + ", l=" +
4803 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4804 input);
4805 }
4806 return appendTo;
4807 }
4808
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4809 void TransliteratorTest::expectAux(const UnicodeString& tag,
4810 const UnicodeString& source,
4811 const UnicodeString& result,
4812 const UnicodeString& expectedResult) {
4813 expectAux(tag, source + " -> " + result,
4814 result == expectedResult,
4815 expectedResult);
4816 }
4817
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4818 void TransliteratorTest::expectAux(const UnicodeString& tag,
4819 const UnicodeString& summary, UBool pass,
4820 const UnicodeString& expectedResult) {
4821 if (pass) {
4822 logln(UnicodeString("(")+tag+") " + prettify(summary));
4823 } else {
4824 dataerrln(UnicodeString("FAIL: (")+tag+") "
4825 + prettify(summary)
4826 + ", expected " + prettify(expectedResult));
4827 }
4828 }
4829
4830 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4831