1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/10/99 aliu Creation.
10 **********************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_TRANSLITERATION
16
17 #include "transtst.h"
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
29 #include "cpdtrans.h"
30 #include "nultrans.h"
31 #include "rbt.h"
32 #include "rbt_pars.h"
33 #include "anytrans.h"
34 #include "esctrn.h"
35 #include "name2uni.h"
36 #include "nortrans.h"
37 #include "remtrans.h"
38 #include "titletrn.h"
39 #include "tolowtrn.h"
40 #include "toupptrn.h"
41 #include "unesctrn.h"
42 #include "uni2name.h"
43 #include "cstring.h"
44 #include "cmemory.h"
45 #include <stdio.h>
46
47 /***********************************************************************
48
49 HOW TO USE THIS TEST FILE
50 -or-
51 How I developed on two platforms
52 without losing (too much of) my mind
53
54
55 1. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
59
60 2. Make liberal use of the expect() method; it is your friend.
61
62 3. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
64
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
67
68 ==> THIS IS THE IMPORTANT PART <==
69
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
74
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
77 labeled
78
79 "icu4j ONLY"
80
81 or
82
83 "icu4c ONLY"
84
85 Make sure you document the reason the test is here and not there.
86
87
88 Thank you.
89 The Management
90 ***********************************************************************/
91
92 // Define character constants thusly to be EBCDIC-friendly
93 enum {
94 LEFT_BRACE=((UChar)0x007B), /*{*/
95 PIPE =((UChar)0x007C), /*|*/
96 ZERO =((UChar)0x0030), /*0*/
97 UPPER_A =((UChar)0x0041) /*A*/
98 };
99
TransliteratorTest()100 TransliteratorTest::TransliteratorTest()
101 : DESERET_DEE((UChar32)0x10414),
102 DESERET_dee((UChar32)0x1043C)
103 {
104 }
105
~TransliteratorTest()106 TransliteratorTest::~TransliteratorTest() {}
107
108 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110 const char* &name, char* /*par*/) {
111 switch (index) {
112 TESTCASE(0,TestInstantiation);
113 TESTCASE(1,TestSimpleRules);
114 TESTCASE(2,TestRuleBasedInverse);
115 TESTCASE(3,TestKeyboard);
116 TESTCASE(4,TestKeyboard2);
117 TESTCASE(5,TestKeyboard3);
118 TESTCASE(6,TestArabic);
119 TESTCASE(7,TestCompoundKana);
120 TESTCASE(8,TestCompoundHex);
121 TESTCASE(9,TestFiltering);
122 TESTCASE(10,TestInlineSet);
123 TESTCASE(11,TestPatternQuoting);
124 TESTCASE(12,TestJ277);
125 TESTCASE(13,TestJ243);
126 TESTCASE(14,TestJ329);
127 TESTCASE(15,TestSegments);
128 TESTCASE(16,TestCursorOffset);
129 TESTCASE(17,TestArbitraryVariableValues);
130 TESTCASE(18,TestPositionHandling);
131 TESTCASE(19,TestHiraganaKatakana);
132 TESTCASE(20,TestCopyJ476);
133 TESTCASE(21,TestAnchors);
134 TESTCASE(22,TestInterIndic);
135 TESTCASE(23,TestFilterIDs);
136 TESTCASE(24,TestCaseMap);
137 TESTCASE(25,TestNameMap);
138 TESTCASE(26,TestLiberalizedID);
139 TESTCASE(27,TestCreateInstance);
140 TESTCASE(28,TestNormalizationTransliterator);
141 TESTCASE(29,TestCompoundRBT);
142 TESTCASE(30,TestCompoundFilter);
143 TESTCASE(31,TestRemove);
144 TESTCASE(32,TestToRules);
145 TESTCASE(33,TestContext);
146 TESTCASE(34,TestSupplemental);
147 TESTCASE(35,TestQuantifier);
148 TESTCASE(36,TestSTV);
149 TESTCASE(37,TestCompoundInverse);
150 TESTCASE(38,TestNFDChainRBT);
151 TESTCASE(39,TestNullInverse);
152 TESTCASE(40,TestAliasInverseID);
153 TESTCASE(41,TestCompoundInverseID);
154 TESTCASE(42,TestUndefinedVariable);
155 TESTCASE(43,TestEmptyContext);
156 TESTCASE(44,TestCompoundFilterID);
157 TESTCASE(45,TestPropertySet);
158 TESTCASE(46,TestNewEngine);
159 TESTCASE(47,TestQuantifiedSegment);
160 TESTCASE(48,TestDevanagariLatinRT);
161 TESTCASE(49,TestTeluguLatinRT);
162 TESTCASE(50,TestCompoundLatinRT);
163 TESTCASE(51,TestSanskritLatinRT);
164 TESTCASE(52,TestLocaleInstantiation);
165 TESTCASE(53,TestTitleAccents);
166 TESTCASE(54,TestLocaleResource);
167 TESTCASE(55,TestParseError);
168 TESTCASE(56,TestOutputSet);
169 TESTCASE(57,TestVariableRange);
170 TESTCASE(58,TestInvalidPostContext);
171 TESTCASE(59,TestIDForms);
172 TESTCASE(60,TestToRulesMark);
173 TESTCASE(61,TestEscape);
174 TESTCASE(62,TestAnchorMasking);
175 TESTCASE(63,TestDisplayName);
176 TESTCASE(64,TestSpecialCases);
177 #if !UCONFIG_NO_FILE_IO
178 TESTCASE(65,TestIncrementalProgress);
179 #endif
180 TESTCASE(66,TestSurrogateCasing);
181 TESTCASE(67,TestFunction);
182 TESTCASE(68,TestInvalidBackRef);
183 TESTCASE(69,TestMulticharStringSet);
184 TESTCASE(70,TestUserFunction);
185 TESTCASE(71,TestAnyX);
186 TESTCASE(72,TestSourceTargetSet);
187 TESTCASE(73,TestGurmukhiDevanagari);
188 TESTCASE(74,TestPatternWhiteSpace);
189 TESTCASE(75,TestAllCodepoints);
190 TESTCASE(76,TestBoilerplate);
191 TESTCASE(77,TestAlternateSyntax);
192 TESTCASE(78,TestBeginEnd);
193 TESTCASE(79,TestBeginEndToRules);
194 TESTCASE(80,TestRegisterAlias);
195 TESTCASE(81,TestRuleStripping);
196 TESTCASE(82,TestHalfwidthFullwidth);
197 TESTCASE(83,TestThai);
198 TESTCASE(84,TestAny);
199 default: name = ""; break;
200 }
201 }
202
203 /**
204 * Make sure every system transliterator can be instantiated.
205 *
206 * ALSO test that the result of toRules() for each rule is a valid
207 * rule. Do this here so we don't have to have another test that
208 * instantiates everything as well.
209 */
TestInstantiation()210 void TransliteratorTest::TestInstantiation() {
211 UErrorCode ec = U_ZERO_ERROR;
212 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
213 assertSuccess("getAvailableIDs()", ec);
214 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
215 int32_t n = Transliterator::countAvailableIDs();
216 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217 avail->count(ec) == n);
218 assertSuccess("count()", ec);
219 UnicodeString name;
220 for (int32_t i=0; i<n; ++i) {
221 const UnicodeString& id = *avail->snext(ec);
222 if (!assertSuccess("snext()", ec) ||
223 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
224 break;
225 }
226 UnicodeString id2 = Transliterator::getAvailableID(i);
227 if (id.length() < 1) {
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i + ") returned empty string");
230 continue;
231 }
232 if (id != id2) {
233 errln(UnicodeString("FAIL: getAvailableID(") +
234 i + ") != getAvailableIDs().snext()");
235 continue;
236 }
237 UParseError parseError;
238 UErrorCode status = U_ZERO_ERROR;
239 Transliterator* t = Transliterator::createInstance(id,
240 UTRANS_FORWARD, parseError,status);
241 name.truncate(0);
242 Transliterator::getDisplayName(id, name);
243 if (t == 0) {
244 #if UCONFIG_NO_BREAK_ITERATION
245 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246 if (id.compare((UnicodeString)"Thai-Latin") != 0)
247 #endif
248 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
249 /*", parse error " + parseError.code +*/
250 ", line " + parseError.line +
251 ", offset " + parseError.offset +
252 ", pre-context " + prettify(parseError.preContext, TRUE) +
253 ", post-context " +prettify(parseError.postContext,TRUE) +
254 ", Error: " + u_errorName(status));
255 // When createInstance fails, it deletes the failing
256 // entry from the available ID list. We detect this
257 // here by looking for a change in countAvailableIDs.
258 int32_t nn = Transliterator::countAvailableIDs();
259 if (nn == (n - 1)) {
260 n = nn;
261 --i; // Compensate for deleted entry
262 }
263 } else {
264 logln(UnicodeString("OK: ") + name + " (" + id + ")");
265
266 // Now test toRules
267 UnicodeString rules;
268 t->toRules(rules, TRUE);
269 Transliterator *u = Transliterator::createFromRules("x",
270 rules, UTRANS_FORWARD, parseError,status);
271 if (u == 0) {
272 errln(UnicodeString("FAIL: ") + id +
273 ".createFromRules() => bad rules" +
274 /*", parse error " + parseError.code +*/
275 ", line " + parseError.line +
276 ", offset " + parseError.offset +
277 ", context " + prettify(parseError.preContext, TRUE) +
278 ", rules: " + prettify(rules, TRUE));
279 } else {
280 delete u;
281 }
282 delete t;
283 }
284 }
285 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
286 assertSuccess("snext()", ec);
287 delete avail;
288
289 // Now test the failure path
290 UParseError parseError;
291 UErrorCode status = U_ZERO_ERROR;
292 UnicodeString id("<Not a valid Transliterator ID>");
293 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
294 if (t != 0) {
295 errln("FAIL: " + id + " returned a transliterator");
296 delete t;
297 } else {
298 logln("OK: Bogus ID handled properly");
299 }
300 }
301
TestSimpleRules(void)302 void TransliteratorTest::TestSimpleRules(void) {
303 /* Example: rules 1. ab>x|y
304 * 2. yc>z
305 *
306 * []|eabcd start - no match, copy e to tranlated buffer
307 * [e]|abcd match rule 1 - copy output & adjust cursor
308 * [ex|y]cd match rule 2 - copy output & adjust cursor
309 * [exz]|d no match, copy d to transliterated buffer
310 * [exzd]| done
311 */
312 expect(UnicodeString("ab>x|y;", "") +
313 "yc>z",
314 "eabcd", "exzd");
315
316 /* Another set of rules:
317 * 1. ab>x|yzacw
318 * 2. za>q
319 * 3. qc>r
320 * 4. cw>n
321 *
322 * []|ab Rule 1
323 * [x|yzacw] No match
324 * [xy|zacw] Rule 2
325 * [xyq|cw] Rule 4
326 * [xyqn]| Done
327 */
328 expect(UnicodeString("ab>x|yzacw;") +
329 "za>q;" +
330 "qc>r;" +
331 "cw>n",
332 "ab", "xyqn");
333
334 /* Test categories
335 */
336 UErrorCode status = U_ZERO_ERROR;
337 UParseError parseError;
338 Transliterator *t = Transliterator::createFromRules(
339 "<ID>",
340 UnicodeString("$dummy=").append((UChar)0xE100) +
341 UnicodeString(";"
342 "$vowel=[aeiouAEIOU];"
343 "$lu=[:Lu:];"
344 "$vowel } $lu > '!';"
345 "$vowel > '&';"
346 "'!' { $lu > '^';"
347 "$lu > '*';"
348 "a > ERROR", ""),
349 UTRANS_FORWARD, parseError,
350 status);
351 if (U_FAILURE(status)) {
352 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
353 return;
354 }
355 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
356 delete t;
357 }
358
359 /**
360 * Test inline set syntax and set variable syntax.
361 */
TestInlineSet(void)362 void TransliteratorTest::TestInlineSet(void) {
363 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
364 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
365
366 expect(UnicodeString(
367 "$digit = [0-9];"
368 "$alpha = [a-zA-Z];"
369 "$alphanumeric = [$digit $alpha];" // ***
370 "$special = [^$alphanumeric];" // ***
371 "$alphanumeric > '-';"
372 "$special > '*';", ""),
373
374 "thx-1138", "---*----");
375 }
376
377 /**
378 * Create some inverses and confirm that they work. We have to be
379 * careful how we do this, since the inverses will not be true
380 * inverses -- we can't throw any random string at the composition
381 * of the transliterators and expect the identity function. F x
382 * F' != I. However, if we are careful about the input, we will
383 * get the expected results.
384 */
TestRuleBasedInverse(void)385 void TransliteratorTest::TestRuleBasedInverse(void) {
386 UnicodeString RULES =
387 UnicodeString("abc>zyx;") +
388 "ab>yz;" +
389 "bc>zx;" +
390 "ca>xy;" +
391 "a>x;" +
392 "b>y;" +
393 "c>z;" +
394
395 "abc<zyx;" +
396 "ab<yz;" +
397 "bc<zx;" +
398 "ca<xy;" +
399 "a<x;" +
400 "b<y;" +
401 "c<z;" +
402
403 "";
404
405 const char* DATA[] = {
406 // Careful here -- random strings will not work. If we keep
407 // the left side to the domain and the right side to the range
408 // we will be okay though (left, abc; right xyz).
409 "a", "x",
410 "abcacab", "zyxxxyy",
411 "caccb", "xyzzy",
412 };
413
414 int32_t DATA_length = UPRV_LENGTHOF(DATA);
415
416 UErrorCode status = U_ZERO_ERROR;
417 UParseError parseError;
418 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
419 UTRANS_FORWARD, parseError, status);
420 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
421 UTRANS_REVERSE, parseError, status);
422 if (U_FAILURE(status)) {
423 errln("FAIL: RBT constructor failed");
424 return;
425 }
426 for (int32_t i=0; i<DATA_length; i+=2) {
427 expect(*fwd, DATA[i], DATA[i+1]);
428 expect(*rev, DATA[i+1], DATA[i]);
429 }
430 delete fwd;
431 delete rev;
432 }
433
434 /**
435 * Basic test of keyboard.
436 */
TestKeyboard(void)437 void TransliteratorTest::TestKeyboard(void) {
438 UParseError parseError;
439 UErrorCode status = U_ZERO_ERROR;
440 Transliterator *t = Transliterator::createFromRules("<ID>",
441 UnicodeString("psch>Y;")
442 +"ps>y;"
443 +"ch>x;"
444 +"a>A;",
445 UTRANS_FORWARD, parseError,
446 status);
447 if (U_FAILURE(status)) {
448 errln("FAIL: RBT constructor failed");
449 return;
450 }
451 const char* DATA[] = {
452 // insertion, buffer
453 "a", "A",
454 "p", "Ap",
455 "s", "Aps",
456 "c", "Apsc",
457 "a", "AycA",
458 "psch", "AycAY",
459 0, "AycAY", // null means finishKeyboardTransliteration
460 };
461
462 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
463 delete t;
464 }
465
466 /**
467 * Basic test of keyboard with cursor.
468 */
TestKeyboard2(void)469 void TransliteratorTest::TestKeyboard2(void) {
470 UParseError parseError;
471 UErrorCode status = U_ZERO_ERROR;
472 Transliterator *t = Transliterator::createFromRules("<ID>",
473 UnicodeString("ych>Y;")
474 +"ps>|y;"
475 +"ch>x;"
476 +"a>A;",
477 UTRANS_FORWARD, parseError,
478 status);
479 if (U_FAILURE(status)) {
480 errln("FAIL: RBT constructor failed");
481 return;
482 }
483 const char* DATA[] = {
484 // insertion, buffer
485 "a", "A",
486 "p", "Ap",
487 "s", "Aps", // modified for rollback - "Ay",
488 "c", "Apsc", // modified for rollback - "Ayc",
489 "a", "AycA",
490 "p", "AycAp",
491 "s", "AycAps", // modified for rollback - "AycAy",
492 "c", "AycApsc", // modified for rollback - "AycAyc",
493 "h", "AycAY",
494 0, "AycAY", // null means finishKeyboardTransliteration
495 };
496
497 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
498 delete t;
499 }
500
501 /**
502 * Test keyboard transliteration with back-replacement.
503 */
TestKeyboard3(void)504 void TransliteratorTest::TestKeyboard3(void) {
505 // We want th>z but t>y. Furthermore, during keyboard
506 // transliteration we want t>y then yh>z if t, then h are
507 // typed.
508 UnicodeString RULES("t>|y;"
509 "yh>z;");
510
511 const char* DATA[] = {
512 // Column 1: characters to add to buffer (as if typed)
513 // Column 2: expected appearance of buffer after
514 // keyboard xliteration.
515 "a", "a",
516 "b", "ab",
517 "t", "abt", // modified for rollback - "aby",
518 "c", "abyc",
519 "t", "abyct", // modified for rollback - "abycy",
520 "h", "abycz",
521 0, "abycz", // null means finishKeyboardTransliteration
522 };
523
524 UParseError parseError;
525 UErrorCode status = U_ZERO_ERROR;
526 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
527 if (U_FAILURE(status)) {
528 errln("FAIL: RBT constructor failed");
529 return;
530 }
531 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
532 delete t;
533 }
534
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)535 void TransliteratorTest::keyboardAux(const Transliterator& t,
536 const char* DATA[], int32_t DATA_length) {
537 UErrorCode status = U_ZERO_ERROR;
538 UTransPosition index={0, 0, 0, 0};
539 UnicodeString s;
540 for (int32_t i=0; i<DATA_length; i+=2) {
541 UnicodeString log;
542 if (DATA[i] != 0) {
543 log = s + " + "
544 + DATA[i]
545 + " -> ";
546 t.transliterate(s, index, DATA[i], status);
547 } else {
548 log = s + " => ";
549 t.finishTransliteration(s, index);
550 }
551 // Show the start index '{' and the cursor '|'
552 UnicodeString a, b, c;
553 s.extractBetween(0, index.contextStart, a);
554 s.extractBetween(index.contextStart, index.start, b);
555 s.extractBetween(index.start, s.length(), c);
556 log.append(a).
557 append((UChar)LEFT_BRACE).
558 append(b).
559 append((UChar)PIPE).
560 append(c);
561 if (s == DATA[i+1] && U_SUCCESS(status)) {
562 logln(log);
563 } else {
564 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
565 }
566 }
567 }
568
TestArabic(void)569 void TransliteratorTest::TestArabic(void) {
570 // Test disabled for 2.0 until new Arabic transliterator can be written.
571 // /*
572 // const char* DATA[] = {
573 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
574 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
575 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
576 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
577 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
578 // "\u062c\u0645\u064a\u0644\u0629",
579 // };
580 // */
581 //
582 // UChar ar_raw[] = {
583 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
584 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
585 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
586 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
587 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
588 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
589 // };
590 // UnicodeString ar(ar_raw);
591 // UErrorCode status=U_ZERO_ERROR;
592 // UParseError parseError;
593 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
594 // if (t == 0) {
595 // errln("FAIL: createInstance failed");
596 // return;
597 // }
598 // expect(*t, "Arabic", ar);
599 // delete t;
600 }
601
602 /**
603 * Compose the Kana transliterator forward and reverse and try
604 * some strings that should come out unchanged.
605 */
TestCompoundKana(void)606 void TransliteratorTest::TestCompoundKana(void) {
607 UParseError parseError;
608 UErrorCode status = U_ZERO_ERROR;
609 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
610 if (t == 0) {
611 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
612 } else {
613 expect(*t, "aaaaa", "aaaaa");
614 delete t;
615 }
616 }
617
618 /**
619 * Compose the hex transliterators forward and reverse.
620 */
TestCompoundHex(void)621 void TransliteratorTest::TestCompoundHex(void) {
622 UParseError parseError;
623 UErrorCode status = U_ZERO_ERROR;
624 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
625 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
626 Transliterator* transab[] = { a, b };
627 Transliterator* transba[] = { b, a };
628 if (a == 0 || b == 0) {
629 errln("FAIL: construction failed");
630 delete a;
631 delete b;
632 return;
633 }
634 // Do some basic tests of a
635 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
636 // Do some basic tests of b
637 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
638
639 Transliterator* ab = new CompoundTransliterator(transab, 2);
640 UnicodeString s("abcde", "");
641 expect(*ab, s, s);
642
643 UnicodeString str(s);
644 a->transliterate(str);
645 Transliterator* ba = new CompoundTransliterator(transba, 2);
646 expect(*ba, str, str);
647
648 delete ab;
649 delete ba;
650 delete a;
651 delete b;
652 }
653
654 int gTestFilterClassID = 0;
655 /**
656 * Used by TestFiltering().
657 */
658 class TestFilter : public UnicodeFilter {
clone() const659 virtual UnicodeFunctor* clone() const {
660 return new TestFilter(*this);
661 }
contains(UChar32 c) const662 virtual UBool contains(UChar32 c) const {
663 return c != (UChar)0x0063 /*c*/;
664 }
665 // Stubs
toPattern(UnicodeString & result,UBool) const666 virtual UnicodeString& toPattern(UnicodeString& result,
667 UBool /*escapeUnprintable*/) const {
668 return result;
669 }
matchesIndexValue(uint8_t) const670 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
671 return FALSE;
672 }
addMatchSetTo(UnicodeSet &) const673 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
674 public:
getDynamicClassID() const675 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
676 };
677
678 /**
679 * Do some basic tests of filtering.
680 */
TestFiltering(void)681 void TransliteratorTest::TestFiltering(void) {
682 UParseError parseError;
683 UErrorCode status = U_ZERO_ERROR;
684 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
685 if (hex == 0) {
686 errln("FAIL: createInstance(Any-Hex) failed");
687 return;
688 }
689 hex->adoptFilter(new TestFilter());
690 UnicodeString s("abcde");
691 hex->transliterate(s);
692 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
693 if (s == exp) {
694 logln(UnicodeString("Ok: \"") + exp + "\"");
695 } else {
696 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
697 }
698
699 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
700 UnicodeFilter *f = hex->orphanFilter();
701 if (f == NULL){
702 errln("FAIL: orphanFilter() should get a UnicodeFilter");
703 } else {
704 delete f;
705 }
706 delete hex;
707 }
708
709 /**
710 * Test anchors
711 */
TestAnchors(void)712 void TransliteratorTest::TestAnchors(void) {
713 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
714 "aaa",
715 "012");
716 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
717 "aaa",
718 "012");
719 expect(UnicodeString("^ab > 01 ;"
720 " ab > |8 ;"
721 " b > k ;"
722 " 8x$ > 45 ;"
723 " 8x > 77 ;", ""),
724
725 "ababbabxabx",
726 "018k7745");
727 expect(UnicodeString("$s = [z$] ;"
728 "$s{ab > 01 ;"
729 " ab > |8 ;"
730 " b > k ;"
731 " 8x}$s > 45 ;"
732 " 8x > 77 ;", ""),
733
734 "abzababbabxzabxabx",
735 "01z018k45z01x45");
736 }
737
738 /**
739 * Test pattern quoting and escape mechanisms.
740 */
TestPatternQuoting(void)741 void TransliteratorTest::TestPatternQuoting(void) {
742 // Array of 3n items
743 // Each item is <rules>, <input>, <expected output>
744 const UnicodeString DATA[] = {
745 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
746 UnicodeString(UChar(0x4E01)),
747 "[male adult]"
748 };
749
750 for (int32_t i=0; i<3; i+=3) {
751 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
752 UParseError parseError;
753 UErrorCode status = U_ZERO_ERROR;
754 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
755 if (U_FAILURE(status)) {
756 errln("RBT constructor failed");
757 } else {
758 expect(*t, DATA[i+1], DATA[i+2]);
759 }
760 delete t;
761 }
762 }
763
764 /**
765 * Regression test for bugs found in Greek transliteration.
766 */
TestJ277(void)767 void TransliteratorTest::TestJ277(void) {
768 UErrorCode status = U_ZERO_ERROR;
769 UParseError parseError;
770 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
771 if (gl == NULL) {
772 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
773 return;
774 }
775
776 UChar sigma = 0x3C3;
777 UChar upsilon = 0x3C5;
778 UChar nu = 0x3BD;
779 // UChar PHI = 0x3A6;
780 UChar alpha = 0x3B1;
781 // UChar omega = 0x3C9;
782 // UChar omicron = 0x3BF;
783 // UChar epsilon = 0x3B5;
784
785 // sigma upsilon nu -> syn
786 UnicodeString syn;
787 syn.append(sigma).append(upsilon).append(nu);
788 expect(*gl, syn, "syn");
789
790 // sigma alpha upsilon nu -> saun
791 UnicodeString sayn;
792 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
793 expect(*gl, sayn, "saun");
794
795 // Again, using a smaller rule set
796 UnicodeString rules(
797 "$alpha = \\u03B1;"
798 "$nu = \\u03BD;"
799 "$sigma = \\u03C3;"
800 "$ypsilon = \\u03C5;"
801 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
802 "s <> $sigma;"
803 "a <> $alpha;"
804 "u <> $vowel { $ypsilon;"
805 "y <> $ypsilon;"
806 "n <> $nu;",
807 "");
808 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
809 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
810 expect(*mini, syn, "syn");
811 expect(*mini, sayn, "saun");
812 delete mini;
813 mini = NULL;
814
815 #if !UCONFIG_NO_FORMATTING
816 // Transliterate the Greek locale data
817 Locale el("el");
818 DateFormatSymbols syms(el, status);
819 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
820 int32_t i, count;
821 const UnicodeString* data = syms.getMonths(count);
822 for (i=0; i<count; ++i) {
823 if (data[i].length() == 0) {
824 continue;
825 }
826 UnicodeString out(data[i]);
827 gl->transliterate(out);
828 UBool ok = TRUE;
829 if (data[i].length() >= 2 && out.length() >= 2 &&
830 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
831 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
832 ok = FALSE;
833 }
834 }
835 if (ok) {
836 logln(prettify(data[i] + " -> " + out));
837 } else {
838 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
839 }
840 }
841 #endif
842
843 delete gl;
844 }
845
846 /**
847 * Prefix, suffix support in hex transliterators
848 */
TestJ243(void)849 void TransliteratorTest::TestJ243(void) {
850 UErrorCode ec = U_ZERO_ERROR;
851
852 // Test default Hex-Any, which should handle
853 // \u, \U, u+, and U+
854 Transliterator *hex =
855 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
856 if (assertSuccess("getInstance", ec)) {
857 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
858 }
859 delete hex;
860
861 // // Try a custom Hex-Unicode
862 // // \uXXXX and &#xXXXX;
863 // ec = U_ZERO_ERROR;
864 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
865 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
866 // "abcd5fx0123");
867 // // Try custom Any-Hex (default is tested elsewhere)
868 // ec = U_ZERO_ERROR;
869 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
870 // expect(hex3, "012", "012");
871 }
872
873 /**
874 * Parsers need better syntax error messages.
875 */
TestJ329(void)876 void TransliteratorTest::TestJ329(void) {
877
878 struct { UBool containsErrors; const char* rule; } DATA[] = {
879 { FALSE, "a > b; c > d" },
880 { TRUE, "a > b; no operator; c > d" },
881 };
882 int32_t DATA_length = UPRV_LENGTHOF(DATA);
883
884 for (int32_t i=0; i<DATA_length; ++i) {
885 UErrorCode status = U_ZERO_ERROR;
886 UParseError parseError;
887 Transliterator *rbt = Transliterator::createFromRules("<ID>",
888 DATA[i].rule,
889 UTRANS_FORWARD,
890 parseError,
891 status);
892 UBool gotError = U_FAILURE(status);
893 UnicodeString desc(DATA[i].rule);
894 desc.append(gotError ? " -> error" : " -> no error");
895 if (gotError) {
896 desc = desc + ", ParseError code=" + u_errorName(status) +
897 " line=" + parseError.line +
898 " offset=" + parseError.offset +
899 " context=" + parseError.preContext;
900 }
901 if (gotError == DATA[i].containsErrors) {
902 logln(UnicodeString("Ok: ") + desc);
903 } else {
904 errln(UnicodeString("FAIL: ") + desc);
905 }
906 delete rbt;
907 }
908 }
909
910 /**
911 * Test segments and segment references.
912 */
TestSegments(void)913 void TransliteratorTest::TestSegments(void) {
914 // Array of 3n items
915 // Each item is <rules>, <input>, <expected output>
916 UnicodeString DATA[] = {
917 "([a-z]) '.' ([0-9]) > $2 '-' $1",
918 "abc.123.xyz.456",
919 "ab1-c23.xy4-z56",
920
921 // nested
922 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
923 "a1 b2",
924 "a1.a.1 b2.b.2",
925 };
926 int32_t DATA_length = UPRV_LENGTHOF(DATA);
927
928 for (int32_t i=0; i<DATA_length; i+=3) {
929 logln("Pattern: " + prettify(DATA[i]));
930 UParseError parseError;
931 UErrorCode status = U_ZERO_ERROR;
932 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
933 if (U_FAILURE(status)) {
934 errln("FAIL: RBT constructor");
935 } else {
936 expect(*t, DATA[i+1], DATA[i+2]);
937 }
938 delete t;
939 }
940 }
941
942 /**
943 * Test cursor positioning outside of the key
944 */
TestCursorOffset(void)945 void TransliteratorTest::TestCursorOffset(void) {
946 // Array of 3n items
947 // Each item is <rules>, <input>, <expected output>
948 UnicodeString DATA[] = {
949 "pre {alpha} post > | @ ALPHA ;"
950 "eALPHA > beta ;"
951 "pre {beta} post > BETA @@ | ;"
952 "post > xyz",
953
954 "prealphapost prebetapost",
955
956 "prbetaxyz preBETApost",
957 };
958 int32_t DATA_length = UPRV_LENGTHOF(DATA);
959
960 for (int32_t i=0; i<DATA_length; i+=3) {
961 logln("Pattern: " + prettify(DATA[i]));
962 UParseError parseError;
963 UErrorCode status = U_ZERO_ERROR;
964 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
965 if (U_FAILURE(status)) {
966 errln("FAIL: RBT constructor");
967 } else {
968 expect(*t, DATA[i+1], DATA[i+2]);
969 }
970 delete t;
971 }
972 }
973
974 /**
975 * Test zero length and > 1 char length variable values. Test
976 * use of variable refs in UnicodeSets.
977 */
TestArbitraryVariableValues(void)978 void TransliteratorTest::TestArbitraryVariableValues(void) {
979 // Array of 3n items
980 // Each item is <rules>, <input>, <expected output>
981 UnicodeString DATA[] = {
982 "$abe = ab;"
983 "$pat = x[yY]z;"
984 "$ll = 'a-z';"
985 "$llZ = [$ll];"
986 "$llY = [$ll$pat];"
987 "$emp = ;"
988
989 "$abe > ABE;"
990 "$pat > END;"
991 "$llZ > 1;"
992 "$llY > 2;"
993 "7$emp 8 > 9;"
994 "",
995
996 "ab xYzxyz stY78",
997 "ABE ENDEND 1129",
998 };
999 int32_t DATA_length = UPRV_LENGTHOF(DATA);
1000
1001 for (int32_t i=0; i<DATA_length; i+=3) {
1002 logln("Pattern: " + prettify(DATA[i]));
1003 UParseError parseError;
1004 UErrorCode status = U_ZERO_ERROR;
1005 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1006 if (U_FAILURE(status)) {
1007 errln("FAIL: RBT constructor");
1008 } else {
1009 expect(*t, DATA[i+1], DATA[i+2]);
1010 }
1011 delete t;
1012 }
1013 }
1014
1015 /**
1016 * Confirm that the contextStart, contextLimit, start, and limit
1017 * behave correctly. J474.
1018 */
TestPositionHandling(void)1019 void TransliteratorTest::TestPositionHandling(void) {
1020 // Array of 3n items
1021 // Each item is <rules>, <input>, <expected output>
1022 const char* DATA[] = {
1023 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1024 "xtat txtb", // pos 0,9,0,9
1025 "xTTaSS TTxUUb",
1026
1027 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1028 "xtat txtb", // pos 2,9,3,8
1029 "xtaSS TTxUUb",
1030
1031 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1032 "xtat txtb", // pos 3,8,3,8
1033 "xtaTT TTxTTb",
1034 };
1035
1036 // Array of 4n positions -- these go with the DATA array
1037 // They are: contextStart, contextLimit, start, limit
1038 int32_t POS[] = {
1039 0, 9, 0, 9,
1040 2, 9, 3, 8,
1041 3, 8, 3, 8,
1042 };
1043
1044 int32_t n = UPRV_LENGTHOF(DATA) / 3;
1045 for (int32_t i=0; i<n; i++) {
1046 UErrorCode status = U_ZERO_ERROR;
1047 UParseError parseError;
1048 Transliterator *t = Transliterator::createFromRules("<ID>",
1049 DATA[3*i], UTRANS_FORWARD, parseError, status);
1050 if (U_FAILURE(status)) {
1051 delete t;
1052 errln("FAIL: RBT constructor");
1053 return;
1054 }
1055 UTransPosition pos;
1056 pos.contextStart= POS[4*i];
1057 pos.contextLimit = POS[4*i+1];
1058 pos.start = POS[4*i+2];
1059 pos.limit = POS[4*i+3];
1060 UnicodeString rsource(DATA[3*i+1]);
1061 t->transliterate(rsource, pos, status);
1062 if (U_FAILURE(status)) {
1063 delete t;
1064 errln("FAIL: transliterate");
1065 return;
1066 }
1067 t->finishTransliteration(rsource, pos);
1068 expectAux(DATA[3*i],
1069 DATA[3*i+1],
1070 rsource,
1071 DATA[3*i+2]);
1072 delete t;
1073 }
1074 }
1075
1076 /**
1077 * Test the Hiragana-Katakana transliterator.
1078 */
TestHiraganaKatakana(void)1079 void TransliteratorTest::TestHiraganaKatakana(void) {
1080 UParseError parseError;
1081 UErrorCode status = U_ZERO_ERROR;
1082 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1083 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1084 if (hk == 0 || kh == 0) {
1085 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1086 delete hk;
1087 delete kh;
1088 return;
1089 }
1090
1091 // Array of 3n items
1092 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1093 const char* DATA[] = {
1094 "both",
1095 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1096 "\\u30A2\\u30F8\\u30F2\\u30B0",
1097
1098 "kh",
1099 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1100 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1101 };
1102 int32_t DATA_length = UPRV_LENGTHOF(DATA);
1103
1104 for (int32_t i=0; i<DATA_length; i+=3) {
1105 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1106 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1107 switch (*DATA[i]) {
1108 case 0x68: //'h': // Hiragana-Katakana
1109 expect(*hk, h, k);
1110 break;
1111 case 0x6B: //'k': // Katakana-Hiragana
1112 expect(*kh, k, h);
1113 break;
1114 case 0x62: //'b': // both
1115 expect(*hk, h, k);
1116 expect(*kh, k, h);
1117 break;
1118 }
1119 }
1120 delete hk;
1121 delete kh;
1122 }
1123
1124 /**
1125 * Test cloning / copy constructor of RBT.
1126 */
TestCopyJ476(void)1127 void TransliteratorTest::TestCopyJ476(void) {
1128 // The real test here is what happens when the destructors are
1129 // called. So we let one object get destructed, and check to
1130 // see that its copy still works.
1131 Transliterator *t2 = 0;
1132 {
1133 UParseError parseError;
1134 UErrorCode status = U_ZERO_ERROR;
1135 Transliterator *t1 = Transliterator::createFromRules("t1",
1136 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1137 if (U_FAILURE(status)) {
1138 errln("FAIL: RBT constructor");
1139 return;
1140 }
1141 t2 = t1->clone(); // Call copy constructor under the covers.
1142 expect(*t1, "abcfoofoo", "ABcbar");
1143 delete t1;
1144 }
1145 expect(*t2, "abcfoofoo", "ABcbar");
1146 delete t2;
1147 }
1148
1149 /**
1150 * Test inter-Indic transliterators. These are composed.
1151 * ICU4C Jitterbug 483.
1152 */
TestInterIndic(void)1153 void TransliteratorTest::TestInterIndic(void) {
1154 UnicodeString ID("Devanagari-Gujarati", "");
1155 UErrorCode status = U_ZERO_ERROR;
1156 UParseError parseError;
1157 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1158 if (dg == 0) {
1159 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1160 return;
1161 }
1162 UnicodeString id = dg->getID();
1163 if (id != ID) {
1164 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1165 }
1166 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1167 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1168 expect(*dg, dev, guj);
1169 delete dg;
1170 }
1171
1172 /**
1173 * Test filter syntax in IDs. (J918)
1174 */
TestFilterIDs(void)1175 void TransliteratorTest::TestFilterIDs(void) {
1176 // Array of 3n strings:
1177 // <id>, <inverse id>, <input>, <expected output>
1178 const char* DATA[] = {
1179 "[aeiou]Any-Hex", // ID
1180 "[aeiou]Hex-Any", // expected inverse ID
1181 "quizzical", // src
1182 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1183
1184 "[aeiou]Any-Hex;[^5]Hex-Any",
1185 "[^5]Any-Hex;[aeiou]Hex-Any",
1186 "quizzical",
1187 "q\\u0075izzical",
1188
1189 "[abc]Null",
1190 "[abc]Null",
1191 "xyz",
1192 "xyz",
1193 };
1194 enum { DATA_length = UPRV_LENGTHOF(DATA) };
1195
1196 for (int i=0; i<DATA_length; i+=4) {
1197 UnicodeString ID(DATA[i], "");
1198 UnicodeString uID(DATA[i+1], "");
1199 UnicodeString data2(DATA[i+2], "");
1200 UnicodeString data3(DATA[i+3], "");
1201 UParseError parseError;
1202 UErrorCode status = U_ZERO_ERROR;
1203 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1204 if (t == 0) {
1205 errln("FAIL: createInstance(" + ID + ") returned NULL");
1206 return;
1207 }
1208 expect(*t, data2, data3);
1209
1210 // Check the ID
1211 if (ID != t->getID()) {
1212 errln("FAIL: createInstance(" + ID + ").getID() => " +
1213 t->getID());
1214 }
1215
1216 // Check the inverse
1217 Transliterator *u = t->createInverse(status);
1218 if (u == 0) {
1219 errln("FAIL: " + ID + ".createInverse() returned NULL");
1220 } else if (u->getID() != uID) {
1221 errln("FAIL: " + ID + ".createInverse().getID() => " +
1222 u->getID() + ", expected " + uID);
1223 }
1224
1225 delete t;
1226 delete u;
1227 }
1228 }
1229
1230 /**
1231 * Test the case mapping transliterators.
1232 */
TestCaseMap(void)1233 void TransliteratorTest::TestCaseMap(void) {
1234 UParseError parseError;
1235 UErrorCode status = U_ZERO_ERROR;
1236 Transliterator* toUpper =
1237 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1238 Transliterator* toLower =
1239 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240 Transliterator* toTitle =
1241 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1242 if (toUpper==0 || toLower==0 || toTitle==0) {
1243 errln("FAIL: createInstance returned NULL");
1244 delete toUpper;
1245 delete toLower;
1246 delete toTitle;
1247 return;
1248 }
1249
1250 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1251 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1252 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1253 "the quick brown foX jumped over the lazY dogs.");
1254 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1255 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1256
1257 delete toUpper;
1258 delete toLower;
1259 delete toTitle;
1260 }
1261
1262 /**
1263 * Test the name mapping transliterators.
1264 */
TestNameMap(void)1265 void TransliteratorTest::TestNameMap(void) {
1266 UParseError parseError;
1267 UErrorCode status = U_ZERO_ERROR;
1268 Transliterator* uni2name =
1269 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1270 Transliterator* name2uni =
1271 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1272 if (uni2name==0 || name2uni==0) {
1273 errln("FAIL: createInstance returned NULL");
1274 delete uni2name;
1275 delete name2uni;
1276 return;
1277 }
1278
1279 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1280 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1281 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1282 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1283 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1284
1285 delete uni2name;
1286 delete name2uni;
1287
1288 // round trip
1289 Transliterator* t =
1290 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1291 if (t==0) {
1292 errln("FAIL: createInstance returned NULL");
1293 delete t;
1294 return;
1295 }
1296
1297 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1298 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1299 expect(*t, s, s);
1300 delete t;
1301 }
1302
1303 /**
1304 * Test liberalized ID syntax. 1006c
1305 */
TestLiberalizedID(void)1306 void TransliteratorTest::TestLiberalizedID(void) {
1307 // Some test cases have an expected getID() value of NULL. This
1308 // means I have disabled the test case for now. This stuff is
1309 // still under development, and I haven't decided whether to make
1310 // getID() return canonical case yet. It will all get rewritten
1311 // with the move to Source-Target/Variant IDs anyway. [aliu]
1312 const char* DATA[] = {
1313 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1314 " Null ", "Null", "whitespace",
1315 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1316 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1317 };
1318 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1319 UParseError parseError;
1320 UErrorCode status= U_ZERO_ERROR;
1321 for (int32_t i=0; i<DATA_length; i+=3) {
1322 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1323 if (t == 0) {
1324 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1325 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1326 } else {
1327 UnicodeString exp;
1328 if (DATA[i+1]) {
1329 exp = UnicodeString(DATA[i+1], "");
1330 }
1331 // Don't worry about getID() if the expected char*
1332 // is NULL -- see above.
1333 if (exp.length() == 0 || exp == t->getID()) {
1334 logln(UnicodeString("Ok: ") + DATA[i+2] +
1335 " create ID \"" + DATA[i] + "\" => \"" +
1336 exp + "\"");
1337 } else {
1338 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1339 " create ID \"" + DATA[i] + "\" => \"" +
1340 t->getID() + "\", exp \"" + exp + "\"");
1341 }
1342 delete t;
1343 }
1344 }
1345 }
1346
1347 /* test for Jitterbug 912 */
TestCreateInstance()1348 void TransliteratorTest::TestCreateInstance(){
1349 const char* FORWARD = "F";
1350 const char* REVERSE = "R";
1351 const char* DATA[] = {
1352 // Column 1: id
1353 // Column 2: direction
1354 // Column 3: expected ID, or "" if expect failure
1355 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1356
1357 // JB#2689: bad compound causes crash
1358 "InvalidSource-InvalidTarget", FORWARD, "",
1359 "InvalidSource-InvalidTarget", REVERSE, "",
1360 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1361 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1362 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1363 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1364
1365 NULL
1366 };
1367
1368 for (int32_t i=0; DATA[i]; i+=3) {
1369 UParseError err;
1370 UErrorCode ec = U_ZERO_ERROR;
1371 UnicodeString id(DATA[i]);
1372 UTransDirection dir = (DATA[i+1]==FORWARD)?
1373 UTRANS_FORWARD:UTRANS_REVERSE;
1374 UnicodeString expID(DATA[i+2]);
1375 Transliterator* t =
1376 Transliterator::createInstance(id,dir,err,ec);
1377 UnicodeString newID;
1378 if (t) {
1379 newID = t->getID();
1380 }
1381 UBool ok = (newID == expID);
1382 if (!t) {
1383 newID = u_errorName(ec);
1384 }
1385 if (ok) {
1386 logln((UnicodeString)"Ok: createInstance(" +
1387 id + "," + DATA[i+1] + ") => " + newID);
1388 } else {
1389 dataerrln((UnicodeString)"FAIL: createInstance(" +
1390 id + "," + DATA[i+1] + ") => " + newID +
1391 ", expected " + expID);
1392 }
1393 delete t;
1394 }
1395 }
1396
1397 /**
1398 * Test the normalization transliterator.
1399 */
TestNormalizationTransliterator()1400 void TransliteratorTest::TestNormalizationTransliterator() {
1401 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1402 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1403 const char* CANON[] = {
1404 // Input Decomposed Composed
1405 "cat", "cat", "cat" ,
1406 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1407
1408 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1409 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1410
1411 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1412 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1413 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1414
1415 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1416 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1417
1418 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1419 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1420 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1421
1422 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1423 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1424
1425 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1426 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1427
1428 "Henry IV", "Henry IV", "Henry IV" ,
1429 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1430
1431 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1432 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1433 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1434 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1435 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1436
1437 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1438 0 // end
1439 };
1440
1441 const char* COMPAT[] = {
1442 // Input Decomposed Composed
1443 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1444
1445 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1446 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1447
1448 "Henry IV", "Henry IV", "Henry IV" ,
1449 "Henry \\u2163", "Henry IV", "Henry IV" ,
1450
1451 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1452 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1453
1454 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1455 0 // end
1456 };
1457
1458 int32_t i;
1459 UParseError parseError;
1460 UErrorCode status = U_ZERO_ERROR;
1461 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1462 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1463 if (!NFD || !NFC) {
1464 dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1465 delete NFD;
1466 delete NFC;
1467 return;
1468 }
1469 for (i=0; CANON[i]; i+=3) {
1470 UnicodeString in = CharsToUnicodeString(CANON[i]);
1471 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1472 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1473 expect(*NFD, in, expd);
1474 expect(*NFC, in, expc);
1475 }
1476 delete NFD;
1477 delete NFC;
1478
1479 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1480 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1481 if (!NFKD || !NFKC) {
1482 dataerrln("FAIL: createInstance failed");
1483 delete NFKD;
1484 delete NFKC;
1485 return;
1486 }
1487 for (i=0; COMPAT[i]; i+=3) {
1488 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1489 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1490 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1491 expect(*NFKD, in, expkd);
1492 expect(*NFKC, in, expkc);
1493 }
1494 delete NFKD;
1495 delete NFKC;
1496
1497 UParseError pe;
1498 status = U_ZERO_ERROR;
1499 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1500 UTRANS_FORWARD,
1501 pe, status);
1502 if (t == 0) {
1503 errln("FAIL: createInstance failed");
1504 }
1505 expect(*t, CharsToUnicodeString("\\u010dx"),
1506 CharsToUnicodeString("c\\u030C"));
1507 delete t;
1508 }
1509
1510 /**
1511 * Test compound RBT rules.
1512 */
TestCompoundRBT(void)1513 void TransliteratorTest::TestCompoundRBT(void) {
1514 // Careful with spacing and ';' here: Phrase this exactly
1515 // as toRules() is going to return it. If toRules() changes
1516 // with regard to spacing or ';', then adjust this string.
1517 UnicodeString rule("::Hex-Any;\n"
1518 "::Any-Lower;\n"
1519 "a > '.A.';\n"
1520 "b > '.B.';\n"
1521 "::[^t]Any-Upper;", "");
1522 UParseError parseError;
1523 UErrorCode status = U_ZERO_ERROR;
1524 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1525 if (t == 0) {
1526 errln("FAIL: createFromRules failed");
1527 return;
1528 }
1529 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1530 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1531 UnicodeString r;
1532 t->toRules(r, TRUE);
1533 if (r == rule) {
1534 logln((UnicodeString)"OK: toRules() => " + r);
1535 } else {
1536 errln((UnicodeString)"FAIL: toRules() => " + r +
1537 ", expected " + rule);
1538 }
1539 delete t;
1540
1541 // Now test toRules
1542 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1543 if (t == 0) {
1544 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1545 return;
1546 }
1547 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1548 t->toRules(r, TRUE);
1549 if (r != exp) {
1550 errln((UnicodeString)"FAIL: toRules() => " + r +
1551 ", expected " + exp);
1552 } else {
1553 logln((UnicodeString)"OK: toRules() => " + r);
1554 }
1555 delete t;
1556
1557 // Round trip the result of toRules
1558 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1559 if (t == 0) {
1560 errln("FAIL: createFromRules #2 failed");
1561 return;
1562 } else {
1563 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1564 }
1565
1566 // Test toRules again
1567 t->toRules(r, TRUE);
1568 if (r != exp) {
1569 errln((UnicodeString)"FAIL: toRules() => " + r +
1570 ", expected " + exp);
1571 } else {
1572 logln((UnicodeString)"OK: toRules() => " + r);
1573 }
1574
1575 delete t;
1576
1577 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1578 // to what the regenerated ID will look like.
1579 UnicodeString id("Upper(Lower);(NFKC)", "");
1580 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1581 if (t == 0) {
1582 errln("FAIL: createInstance #2 failed");
1583 return;
1584 }
1585 if (t->getID() == id) {
1586 logln((UnicodeString)"OK: created " + id);
1587 } else {
1588 errln((UnicodeString)"FAIL: createInstance(" + id +
1589 ").getID() => " + t->getID());
1590 }
1591
1592 Transliterator *u = t->createInverse(status);
1593 if (u == 0) {
1594 errln("FAIL: createInverse failed");
1595 delete t;
1596 return;
1597 }
1598 exp = "NFKC();Lower(Upper)";
1599 if (u->getID() == exp) {
1600 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1601 u->getID());
1602 } else {
1603 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1604 u->getID());
1605 }
1606 delete t;
1607 delete u;
1608 }
1609
1610 /**
1611 * Compound filter semantics were orginially not implemented
1612 * correctly. Originally, each component filter f(i) is replaced by
1613 * f'(i) = f(i) && g, where g is the filter for the compound
1614 * transliterator.
1615 *
1616 * From Mark:
1617 *
1618 * Suppose and I have a transliterator X. Internally X is
1619 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1620 *
1621 * The compound should convert all greek characters (through latin) to
1622 * cyrillic, then lowercase the result. The filter should say "don't
1623 * touch 'A' in the original". But because an intermediate result
1624 * happens to go through "A", the Greek Alpha gets hung up.
1625 */
TestCompoundFilter(void)1626 void TransliteratorTest::TestCompoundFilter(void) {
1627 UParseError parseError;
1628 UErrorCode status = U_ZERO_ERROR;
1629 Transliterator *t = Transliterator::createInstance
1630 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1631 if (t == 0) {
1632 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1633 return;
1634 }
1635 t->adoptFilter(new UnicodeSet("[^A]", status));
1636 if (U_FAILURE(status)) {
1637 errln("FAIL: UnicodeSet ct failed");
1638 delete t;
1639 return;
1640 }
1641
1642 // Only the 'A' at index 1 should remain unchanged
1643 expect(*t,
1644 CharsToUnicodeString("BA\\u039A\\u0391"),
1645 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1646 delete t;
1647 }
1648
TestRemove(void)1649 void TransliteratorTest::TestRemove(void) {
1650 UParseError parseError;
1651 UErrorCode status = U_ZERO_ERROR;
1652 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1653 if (t == 0) {
1654 errln("FAIL: createInstance failed");
1655 return;
1656 }
1657
1658 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1659
1660 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1661 // duplicating the filter
1662 Transliterator* t2 = t->clone();
1663 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1664
1665 delete t;
1666 delete t2;
1667 }
1668
TestToRules(void)1669 void TransliteratorTest::TestToRules(void) {
1670 const char* RBT = "rbt";
1671 const char* SET = "set";
1672 static const char* DATA[] = {
1673 RBT,
1674 "$a=\\u4E61; [$a] > A;",
1675 "[\\u4E61] > A;",
1676
1677 RBT,
1678 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1679 "[[:Zs:][:Zl:]]{a} > A;",
1680
1681 SET,
1682 "[[:Zs:][:Zl:]]",
1683 "[[:Zs:][:Zl:]]",
1684
1685 SET,
1686 "[:Ps:]",
1687 "[:Ps:]",
1688
1689 SET,
1690 "[:L:]",
1691 "[:L:]",
1692
1693 SET,
1694 "[[:L:]-[A]]",
1695 "[[:L:]-[A]]",
1696
1697 SET,
1698 "[~[:Lu:][:Ll:]]",
1699 "[~[:Lu:][:Ll:]]",
1700
1701 SET,
1702 "[~[a-z]]",
1703 "[~[a-z]]",
1704
1705 RBT,
1706 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1707 "[^[:Zs:]]{a} > A;",
1708
1709 RBT,
1710 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1711 "[[a-z]-[:Zs:]]{a} > A;",
1712
1713 RBT,
1714 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1715 "[[:Zs:]&[a-z]]{a} > A;",
1716
1717 RBT,
1718 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1719 "[x[:Zs:]]{a} > A;",
1720
1721 RBT,
1722 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1723 "$macron = \\u0304 ;"
1724 "$evowel = [aeiouyAEIOUY] ;"
1725 "$iotasub = \\u0345 ;"
1726 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1727 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1728
1729 RBT,
1730 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732 };
1733 static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1734
1735 for (int32_t d=0; d < DATA_length; d+=3) {
1736 if (DATA[d] == RBT) {
1737 // Transliterator test
1738 UParseError parseError;
1739 UErrorCode status = U_ZERO_ERROR;
1740 Transliterator *t = Transliterator::createFromRules("ID",
1741 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1742 if (t == 0) {
1743 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1744 return;
1745 }
1746 UnicodeString rules, escapedRules;
1747 t->toRules(rules, FALSE);
1748 t->toRules(escapedRules, TRUE);
1749 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1750 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1751 if (rules == expRules) {
1752 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1753 " => " + rules);
1754 } else {
1755 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1756 " => " + rules + ", exp " + expRules);
1757 }
1758 if (escapedRules == expEscapedRules) {
1759 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1760 " => " + escapedRules);
1761 } else {
1762 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1763 " => " + escapedRules + ", exp " + expEscapedRules);
1764 }
1765 delete t;
1766
1767 } else {
1768 // UnicodeSet test
1769 UErrorCode status = U_ZERO_ERROR;
1770 UnicodeString pat(DATA[d+1], -1, US_INV);
1771 UnicodeString expToPat(DATA[d+2], -1, US_INV);
1772 UnicodeSet set(pat, status);
1773 if (U_FAILURE(status)) {
1774 errln("FAIL: UnicodeSet ct failed");
1775 return;
1776 }
1777 // Adjust spacing etc. as necessary.
1778 UnicodeString toPat;
1779 set.toPattern(toPat);
1780 if (expToPat == toPat) {
1781 logln((UnicodeString)"Ok: " + pat +
1782 " => " + toPat);
1783 } else {
1784 errln((UnicodeString)"FAIL: " + pat +
1785 " => " + prettify(toPat, TRUE) +
1786 ", exp " + prettify(pat, TRUE));
1787 }
1788 }
1789 }
1790 }
1791
TestContext()1792 void TransliteratorTest::TestContext() {
1793 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1794 expect("de > x; {d}e > y;",
1795 "de",
1796 "ye",
1797 &pos);
1798
1799 expect("ab{c} > z;",
1800 "xadabdabcy",
1801 "xadabdabzy");
1802 }
1803
TestSupplemental()1804 void TransliteratorTest::TestSupplemental() {
1805
1806 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1807 "a > $a; $s > i;"),
1808 CharsToUnicodeString("ab\\U0001030Fx"),
1809 CharsToUnicodeString("\\U00010300bix"));
1810
1811 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1812 "$b=[A-Z\\U00010400-\\U0001044D];"
1813 "($a)($b) > $2 $1;"),
1814 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1815 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1816
1817 // k|ax\\U00010300xm
1818
1819 // k|a\\U00010400\\U00010300xm
1820 // ky|\\U00010400\\U00010300xm
1821 // ky\\U00010400|\\U00010300xm
1822
1823 // ky\\U00010400|\\U00010300\\U00010400m
1824 // ky\\U00010400y|\\U00010400m
1825 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1826 "$a {x} > | @ \\U00010400;"
1827 "{$a} [^\\u0000-\\uFFFF] > y;"),
1828 CharsToUnicodeString("kax\\U00010300xm"),
1829 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1830
1831 expectT("Any-Name",
1832 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1833 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1834
1835 expectT("Any-Hex/Unicode",
1836 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1837 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1838
1839 expectT("Any-Hex/C",
1840 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1841 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1842
1843 expectT("Any-Hex/Perl",
1844 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1845 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1846
1847 expectT("Any-Hex/Java",
1848 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1849 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1850
1851 expectT("Any-Hex/XML",
1852 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1853 "𐌰􏼀󠁡 ");
1854
1855 expectT("Any-Hex/XML10",
1856 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1857 "𐌰􏼀󠁡 ");
1858
1859 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1860 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1861 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1862 }
1863
TestQuantifier()1864 void TransliteratorTest::TestQuantifier() {
1865
1866 // Make sure @ in a quantified anteContext works
1867 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1868 "AAAAAb",
1869 "aaa(aac)");
1870
1871 // Make sure @ in a quantified postContext works
1872 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1873 "baaaaa",
1874 "caa(aaa)");
1875
1876 // Make sure @ in a quantified postContext with seg ref works
1877 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1878 "baaaaa",
1879 "baa(aaa)");
1880
1881 // Make sure @ past ante context doesn't enter ante context
1882 UTransPosition pos = {0, 5, 3, 5};
1883 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1884 "xxxab",
1885 "xxx(ac)",
1886 &pos);
1887
1888 // Make sure @ past post context doesn't pass limit
1889 UTransPosition pos2 = {0, 4, 0, 2};
1890 expect("{b} a+ > c @@ |; x > y; a > A;",
1891 "baxx",
1892 "caxx",
1893 &pos2);
1894
1895 // Make sure @ past post context doesn't enter post context
1896 expect("{b} a+ > c @@ |; x > y; a > A;",
1897 "baxx",
1898 "cayy");
1899
1900 expect("(ab)? c > d;",
1901 "c abc ababc",
1902 "d d abd");
1903
1904 // NOTE: The (ab)+ when referenced just yields a single "ab",
1905 // not the full sequence of them. This accords with perl behavior.
1906 expect("(ab)+ {x} > '(' $1 ')';",
1907 "x abx ababxy",
1908 "x ab(ab) abab(ab)y");
1909
1910 expect("b+ > x;",
1911 "ac abc abbc abbbc",
1912 "ac axc axc axc");
1913
1914 expect("[abc]+ > x;",
1915 "qac abrc abbcs abtbbc",
1916 "qx xrx xs xtx");
1917
1918 expect("q{(ab)+} > x;",
1919 "qa qab qaba qababc qaba",
1920 "qa qx qxa qxc qxa");
1921
1922 expect("q(ab)* > x;",
1923 "qa qab qaba qababc",
1924 "xa x xa xc");
1925
1926 // NOTE: The (ab)+ when referenced just yields a single "ab",
1927 // not the full sequence of them. This accords with perl behavior.
1928 expect("q(ab)* > '(' $1 ')';",
1929 "qa qab qaba qababc",
1930 "()a (ab) (ab)a (ab)c");
1931
1932 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1933 // quoted string
1934 expect("'ab'+ > x;",
1935 "bb ab ababb",
1936 "bb x xb");
1937
1938 // $foo+ and $foo* -- the quantifier should apply to the entire
1939 // variable reference
1940 expect("$var = ab; $var+ > x;",
1941 "bb ab ababb",
1942 "bb x xb");
1943 }
1944
1945 class TestTrans : public Transliterator {
1946 public:
TestTrans(const UnicodeString & id)1947 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1948 }
clone(void) const1949 virtual Transliterator* clone(void) const {
1950 return new TestTrans(getID());
1951 }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1952 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1953 UBool /*isIncremental*/) const
1954 {
1955 offsets.start = offsets.limit;
1956 }
1957 virtual UClassID getDynamicClassID() const;
1958 static UClassID U_EXPORT2 getStaticClassID();
1959 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1960 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1961
1962 /**
1963 * Test Source-Target/Variant.
1964 */
1965 void TransliteratorTest::TestSTV(void) {
1966 int32_t ns = Transliterator::countAvailableSources();
1967 if (ns < 0 || ns > 255) {
1968 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1969 return;
1970 }
1971 int32_t i, j;
1972 for (i=0; i<ns; ++i) {
1973 UnicodeString source;
1974 Transliterator::getAvailableSource(i, source);
1975 logln((UnicodeString)"" + i + ": " + source);
1976 if (source.length() == 0) {
1977 errln("FAIL: empty source");
1978 continue;
1979 }
1980 int32_t nt = Transliterator::countAvailableTargets(source);
1981 if (nt < 0 || nt > 255) {
1982 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1983 continue;
1984 }
1985 for (int32_t j=0; j<nt; ++j) {
1986 UnicodeString target;
1987 Transliterator::getAvailableTarget(j, source, target);
1988 logln((UnicodeString)" " + j + ": " + target);
1989 if (target.length() == 0) {
1990 errln("FAIL: empty target");
1991 continue;
1992 }
1993 int32_t nv = Transliterator::countAvailableVariants(source, target);
1994 if (nv < 0 || nv > 255) {
1995 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1996 continue;
1997 }
1998 for (int32_t k=0; k<nv; ++k) {
1999 UnicodeString variant;
2000 Transliterator::getAvailableVariant(k, source, target, variant);
2001 if (variant.length() == 0) {
2002 logln((UnicodeString)" " + k + ": <empty>");
2003 } else {
2004 logln((UnicodeString)" " + k + ": " + variant);
2005 }
2006 }
2007 }
2008 }
2009
2010 // Test registration
2011 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2014 for (i=0; i<3; ++i) {
2015 Transliterator *t = new TestTrans(IDS[i]);
2016 if (t == 0) {
2017 errln("FAIL: out of memory");
2018 return;
2019 }
2020 if (t->getID() != IDS[i]) {
2021 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2022 delete t;
2023 return;
2024 }
2025 Transliterator::registerInstance(t);
2026 UErrorCode status = U_ZERO_ERROR;
2027 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2028 if (t == NULL) {
2029 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2030 IDS[i]);
2031 } else {
2032 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2033 IDS[i]);
2034 delete t;
2035 }
2036 Transliterator::unregister(IDS[i]);
2037 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2038 if (t != NULL) {
2039 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2040 IDS[i]);
2041 delete t;
2042 }
2043 }
2044
2045 // Make sure getAvailable API reflects removal
2046 int32_t n = Transliterator::countAvailableIDs();
2047 for (i=0; i<n; ++i) {
2048 UnicodeString id = Transliterator::getAvailableID(i);
2049 for (j=0; j<3; ++j) {
2050 if (id.caseCompare(FULL_IDS[j],0)==0) {
2051 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2052 }
2053 }
2054 }
2055 n = Transliterator::countAvailableTargets("Any");
2056 for (i=0; i<n; ++i) {
2057 UnicodeString t;
2058 Transliterator::getAvailableTarget(i, "Any", t);
2059 if (t.caseCompare(IDS[0],0)==0) {
2060 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2061 }
2062 }
2063 n = Transliterator::countAvailableSources();
2064 for (i=0; i<n; ++i) {
2065 UnicodeString s;
2066 Transliterator::getAvailableSource(i, s);
2067 for (j=0; j<3; ++j) {
2068 if (SOURCES[j] == NULL) continue;
2069 if (s.caseCompare(SOURCES[j],0)==0) {
2070 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2071 }
2072 }
2073 }
2074 }
2075
2076 /**
2077 * Test inverse of Greek-Latin; Title()
2078 */
TestCompoundInverse(void)2079 void TransliteratorTest::TestCompoundInverse(void) {
2080 UParseError parseError;
2081 UErrorCode status = U_ZERO_ERROR;
2082 Transliterator *t = Transliterator::createInstance
2083 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2084 if (t == 0) {
2085 dataerrln("FAIL: createInstance - %s", u_errorName(status));
2086 return;
2087 }
2088 UnicodeString exp("(Title);Latin-Greek");
2089 if (t->getID() == exp) {
2090 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2091 t->getID());
2092 } else {
2093 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2094 t->getID() + "\", expected \"" + exp + "\"");
2095 }
2096 delete t;
2097 }
2098
2099 /**
2100 * Test NFD chaining with RBT
2101 */
TestNFDChainRBT()2102 void TransliteratorTest::TestNFDChainRBT() {
2103 UParseError pe;
2104 UErrorCode ec = U_ZERO_ERROR;
2105 Transliterator* t = Transliterator::createFromRules(
2106 "TEST", "::NFD; aa > Q; a > q;",
2107 UTRANS_FORWARD, pe, ec);
2108 if (t == NULL || U_FAILURE(ec)) {
2109 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2110 return;
2111 }
2112 expect(*t, "aa", "Q");
2113 delete t;
2114
2115 // TEMPORARY TESTS -- BEING DEBUGGED
2116 //=- UnicodeString s, s2;
2117 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2118 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2119 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2120 //=- expect(*t, s, s2);
2121 //=- delete t;
2122 //=-
2123 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2124 //=- expect(*t, s2, s);
2125 //=- delete t;
2126 //=-
2127 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2128 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2129 //=- expect(*t, s, s);
2130 //=- delete t;
2131
2132 // const char* source[] = {
2133 // /*
2134 // "\\u015Br\\u012Bmad",
2135 // "bhagavadg\\u012Bt\\u0101",
2136 // "adhy\\u0101ya",
2137 // "arjuna",
2138 // "vi\\u1E63\\u0101da",
2139 // "y\\u014Dga",
2140 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2141 // "uv\\u0101cr\\u0325",
2142 // */
2143 // "rmk\\u1E63\\u0113t",
2144 // //"dharmak\\u1E63\\u0113tr\\u0113",
2145 // /*
2146 // "kuruk\\u1E63\\u0113tr\\u0113",
2147 // "samav\\u0113t\\u0101",
2148 // "yuyutsava-\\u1E25",
2149 // "m\\u0101mak\\u0101-\\u1E25",
2150 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2151 // "kimakurvata",
2152 // "san\\u0304java",
2153 // */
2154 //
2155 // 0
2156 // };
2157 // const char* expected[] = {
2158 // /*
2159 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2160 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2161 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2162 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2163 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2164 // "\\u092f\\u094b\\u0917",
2165 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2166 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2167 // */
2168 // "\\u0927",
2169 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170 // /*
2171 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2172 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2173 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2174 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2175 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2176 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2177 // "\\u0938\\u0902\\u091c\\u0935",
2178 // */
2179 // 0
2180 // };
2181 // UErrorCode status = U_ZERO_ERROR;
2182 // UParseError parseError;
2183 // UnicodeString message;
2184 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2185 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2186 // if(U_FAILURE(status)){
2187 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2188 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2189 // delete latinToDevToLatin;
2190 // delete devToLatinToDev;
2191 // return;
2192 // }
2193 // UnicodeString gotResult;
2194 // for(int i= 0; source[i] != 0; i++){
2195 // gotResult = source[i];
2196 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2197 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2198 // }
2199 // delete latinToDevToLatin;
2200 // delete devToLatinToDev;
2201 }
2202
2203 /**
2204 * Inverse of "Null" should be "Null". (J21)
2205 */
TestNullInverse()2206 void TransliteratorTest::TestNullInverse() {
2207 UParseError pe;
2208 UErrorCode ec = U_ZERO_ERROR;
2209 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2210 if (t == 0 || U_FAILURE(ec)) {
2211 errln("FAIL: createInstance");
2212 return;
2213 }
2214 Transliterator *u = t->createInverse(ec);
2215 if (u == 0 || U_FAILURE(ec)) {
2216 errln("FAIL: createInverse");
2217 delete t;
2218 return;
2219 }
2220 if (u->getID() != "Null") {
2221 errln("FAIL: Inverse of Null should be Null");
2222 }
2223 delete t;
2224 delete u;
2225 }
2226
2227 /**
2228 * Check ID of inverse of alias. (J22)
2229 */
TestAliasInverseID()2230 void TransliteratorTest::TestAliasInverseID() {
2231 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2232 UParseError pe;
2233 UErrorCode ec = U_ZERO_ERROR;
2234 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2235 if (t == 0 || U_FAILURE(ec)) {
2236 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2237 return;
2238 }
2239 Transliterator *u = t->createInverse(ec);
2240 if (u == 0 || U_FAILURE(ec)) {
2241 errln("FAIL: createInverse");
2242 delete t;
2243 return;
2244 }
2245 UnicodeString exp = "Hangul-Latin";
2246 UnicodeString got = u->getID();
2247 if (got != exp) {
2248 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2249 ", expected " + exp);
2250 }
2251 delete t;
2252 delete u;
2253 }
2254
2255 /**
2256 * Test IDs of inverses of compound transliterators. (J20)
2257 */
TestCompoundInverseID()2258 void TransliteratorTest::TestCompoundInverseID() {
2259 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2260 UParseError pe;
2261 UErrorCode ec = U_ZERO_ERROR;
2262 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2263 if (t == 0 || U_FAILURE(ec)) {
2264 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2265 return;
2266 }
2267 Transliterator *u = t->createInverse(ec);
2268 if (u == 0 || U_FAILURE(ec)) {
2269 errln("FAIL: createInverse");
2270 delete t;
2271 return;
2272 }
2273 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2274 UnicodeString got = u->getID();
2275 if (got != exp) {
2276 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2277 ", expected " + exp);
2278 }
2279 delete t;
2280 delete u;
2281 }
2282
2283 /**
2284 * Test undefined variable.
2285
2286 */
TestUndefinedVariable()2287 void TransliteratorTest::TestUndefinedVariable() {
2288 UnicodeString rule = "$initial } a <> \\u1161;";
2289 UParseError pe;
2290 UErrorCode ec = U_ZERO_ERROR;
2291 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2292 delete t;
2293 if (U_FAILURE(ec)) {
2294 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2295 u_errorName(ec));
2296 return;
2297 }
2298 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2299 u_errorName(ec));
2300 }
2301
2302 /**
2303 * Test empty context.
2304 */
TestEmptyContext()2305 void TransliteratorTest::TestEmptyContext() {
2306 expect(" { a } > b;", "xay a ", "xby b ");
2307 }
2308
2309 /**
2310 * Test compound filter ID syntax
2311 */
TestCompoundFilterID(void)2312 void TransliteratorTest::TestCompoundFilterID(void) {
2313 static const char* DATA[] = {
2314 // Col. 1 = ID or rule set (latter must start with #)
2315
2316 // = columns > 1 are null if expect col. 1 to be illegal =
2317
2318 // Col. 2 = direction, "F..." or "R..."
2319 // Col. 3 = source string
2320 // Col. 4 = exp result
2321
2322 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2323 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2324 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2325 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2326 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2327 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2328 NULL,
2329 };
2330
2331 for (int32_t i=0; DATA[i]; i+=4) {
2332 UnicodeString id = CharsToUnicodeString(DATA[i]);
2333 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2334 UTRANS_REVERSE : UTRANS_FORWARD;
2335 UnicodeString source;
2336 UnicodeString exp;
2337 if (DATA[i+2] != NULL) {
2338 source = CharsToUnicodeString(DATA[i+2]);
2339 exp = CharsToUnicodeString(DATA[i+3]);
2340 }
2341 UBool expOk = (DATA[i+1] != NULL);
2342 Transliterator* t = NULL;
2343 UParseError pe;
2344 UErrorCode ec = U_ZERO_ERROR;
2345 if (id.charAt(0) == 0x23/*#*/) {
2346 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2347 } else {
2348 t = Transliterator::createInstance(id, direction, pe, ec);
2349 }
2350 UBool ok = (t != NULL && U_SUCCESS(ec));
2351 UnicodeString transID;
2352 if (t!=0) {
2353 transID = t->getID();
2354 }
2355 else {
2356 transID = UnicodeString("NULL", "");
2357 }
2358 if (ok == expOk) {
2359 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2360 u_errorName(ec));
2361 if (source.length() != 0) {
2362 expect(*t, source, exp);
2363 }
2364 delete t;
2365 } else {
2366 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2367 u_errorName(ec));
2368 }
2369 }
2370 }
2371
2372 /**
2373 * Test new property set syntax
2374 */
TestPropertySet()2375 void TransliteratorTest::TestPropertySet() {
2376 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2377 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2378 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2379 }
2380
2381 /**
2382 * Test various failure points of the new 2.0 engine.
2383 */
TestNewEngine()2384 void TransliteratorTest::TestNewEngine() {
2385 UParseError pe;
2386 UErrorCode ec = U_ZERO_ERROR;
2387 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2388 if (t == 0 || U_FAILURE(ec)) {
2389 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2390 return;
2391 }
2392 // Katakana should be untouched
2393 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2394 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2395
2396 delete t;
2397
2398 #if 1
2399 // This test will only work if Transliterator.ROLLBACK is
2400 // true. Otherwise, this test will fail, revealing a
2401 // limitation of global filters in incremental mode.
2402 Transliterator *a =
2403 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2404 Transliterator *A =
2405 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2406 if (U_FAILURE(ec)) {
2407 delete a;
2408 delete A;
2409 return;
2410 }
2411
2412 Transliterator* array[3];
2413 array[0] = a;
2414 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2415 array[2] = A;
2416 if (U_FAILURE(ec)) {
2417 errln("FAIL: createInstance NFD");
2418 delete a;
2419 delete A;
2420 delete array[1];
2421 return;
2422 }
2423
2424 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2425 if (U_FAILURE(ec)) {
2426 errln("FAIL: UnicodeSet constructor");
2427 delete a;
2428 delete A;
2429 delete array[1];
2430 delete t;
2431 return;
2432 }
2433
2434 expect(*t, "aAaA", "bAbA");
2435
2436 assertTrue("countElements", t->countElements() == 3);
2437 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2438 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2439 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2440 assertSuccess("getElement", ec);
2441
2442 delete a;
2443 delete A;
2444 delete array[1];
2445 delete t;
2446 #endif
2447
2448 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2449 "a",
2450 "ax");
2451
2452 UnicodeString gr = CharsToUnicodeString(
2453 "$ddot = \\u0308 ;"
2454 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2455 "$rough = \\u0314 ;"
2456 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2457 "\\u03b1 <> a ;"
2458 "$rough <> h ;");
2459
2460 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2461 }
2462
2463 /**
2464 * Test quantified segment behavior. We want:
2465 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2466 */
TestQuantifiedSegment(void)2467 void TransliteratorTest::TestQuantifiedSegment(void) {
2468 // The normal case
2469 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2470
2471 // The tricky case; the quantifier is around the segment
2472 expect("([abc])+ > x $1 x;", "cba", "xax");
2473
2474 // Tricky case in reverse direction
2475 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2476
2477 // Check post-context segment
2478 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2479
2480 // Test toRule/toPattern for non-quantified segment.
2481 // Careful with spacing here.
2482 UnicodeString r("([a-c]){q} > x $1 x;");
2483 UParseError pe;
2484 UErrorCode ec = U_ZERO_ERROR;
2485 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2486 if (U_FAILURE(ec)) {
2487 errln("FAIL: createFromRules");
2488 delete t;
2489 return;
2490 }
2491 UnicodeString rr;
2492 t->toRules(rr, TRUE);
2493 if (r != rr) {
2494 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2495 } else {
2496 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2497 }
2498 delete t;
2499
2500 // Test toRule/toPattern for quantified segment.
2501 // Careful with spacing here.
2502 r = "([a-c])+{q} > x $1 x;";
2503 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2504 if (U_FAILURE(ec)) {
2505 errln("FAIL: createFromRules");
2506 delete t;
2507 return;
2508 }
2509 t->toRules(rr, TRUE);
2510 if (r != rr) {
2511 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2512 } else {
2513 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2514 }
2515 delete t;
2516 }
2517
2518 //======================================================================
2519 // Ram's tests
2520 //======================================================================
TestDevanagariLatinRT()2521 void TransliteratorTest::TestDevanagariLatinRT(){
2522 const int MAX_LEN= 52;
2523 const char* const source[MAX_LEN] = {
2524 "bh\\u0101rata",
2525 "kra",
2526 "k\\u1E63a",
2527 "khra",
2528 "gra",
2529 "\\u1E45ra",
2530 "cra",
2531 "chra",
2532 "j\\u00F1a",
2533 "jhra",
2534 "\\u00F1ra",
2535 "\\u1E6Dya",
2536 "\\u1E6Dhra",
2537 "\\u1E0Dya",
2538 //"r\\u0323ya", // \u095c is not valid in Devanagari
2539 "\\u1E0Dhya",
2540 "\\u1E5Bhra",
2541 "\\u1E47ra",
2542 "tta",
2543 "thra",
2544 "dda",
2545 "dhra",
2546 "nna",
2547 "pra",
2548 "phra",
2549 "bra",
2550 "bhra",
2551 "mra",
2552 "\\u1E49ra",
2553 //"l\\u0331ra",
2554 "yra",
2555 "\\u1E8Fra",
2556 //"l-",
2557 "vra",
2558 "\\u015Bra",
2559 "\\u1E63ra",
2560 "sra",
2561 "hma",
2562 "\\u1E6D\\u1E6Da",
2563 "\\u1E6D\\u1E6Dha",
2564 "\\u1E6Dh\\u1E6Dha",
2565 "\\u1E0D\\u1E0Da",
2566 "\\u1E0D\\u1E0Dha",
2567 "\\u1E6Dya",
2568 "\\u1E6Dhya",
2569 "\\u1E0Dya",
2570 "\\u1E0Dhya",
2571 // Not roundtrippable --
2572 // \\u0939\\u094d\\u094d\\u092E - hma
2573 // \\u0939\\u094d\\u092E - hma
2574 // CharsToUnicodeString("hma"),
2575 "hya",
2576 "\\u015Br\\u0325",
2577 "\\u015Bca",
2578 "\\u0115",
2579 "san\\u0304j\\u012Bb s\\u0113nagupta",
2580 "\\u0101nand vaddir\\u0101ju",
2581 "\\u0101",
2582 "a"
2583 };
2584 const char* const expected[MAX_LEN] = {
2585 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2586 "\\u0915\\u094D\\u0930", /* kra */
2587 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2588 "\\u0916\\u094D\\u0930", /* khra */
2589 "\\u0917\\u094D\\u0930", /* gra */
2590 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2591 "\\u091A\\u094D\\u0930", /* cra */
2592 "\\u091B\\u094D\\u0930", /* chra */
2593 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2594 "\\u091D\\u094D\\u0930", /* jhra */
2595 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2596 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2597 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2598 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2599 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2600 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2601 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2602 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2603 "\\u0924\\u094D\\u0924", /* tta */
2604 "\\u0925\\u094D\\u0930", /* thra */
2605 "\\u0926\\u094D\\u0926", /* dda */
2606 "\\u0927\\u094D\\u0930", /* dhra */
2607 "\\u0928\\u094D\\u0928", /* nna */
2608 "\\u092A\\u094D\\u0930", /* pra */
2609 "\\u092B\\u094D\\u0930", /* phra */
2610 "\\u092C\\u094D\\u0930", /* bra */
2611 "\\u092D\\u094D\\u0930", /* bhra */
2612 "\\u092E\\u094D\\u0930", /* mra */
2613 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2614 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2615 "\\u092F\\u094D\\u0930", /* yra */
2616 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2617 //"l-",
2618 "\\u0935\\u094D\\u0930", /* vra */
2619 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2620 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2621 "\\u0938\\u094D\\u0930", /* sra */
2622 "\\u0939\\u094d\\u092E", /* hma */
2623 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2624 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2625 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2626 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2627 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2628 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2629 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2630 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2631 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2632 // "hma", /* hma */
2633 "\\u0939\\u094D\\u092F", /* hya */
2634 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2635 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2636 "\\u090d", /* e\\u0306 */
2637 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2638 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2639 "\\u0906",
2640 "\\u0905",
2641 };
2642 UErrorCode status = U_ZERO_ERROR;
2643 UParseError parseError;
2644 UnicodeString message;
2645 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2646 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2647 if(U_FAILURE(status)){
2648 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2649 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2650 return;
2651 }
2652 UnicodeString gotResult;
2653 for(int i= 0; i<MAX_LEN; i++){
2654 gotResult = source[i];
2655 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2656 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2657 }
2658 delete latinToDev;
2659 delete devToLatin;
2660 }
2661
TestTeluguLatinRT()2662 void TransliteratorTest::TestTeluguLatinRT(){
2663 const int MAX_LEN=10;
2664 const char* const source[MAX_LEN] = {
2665 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2666 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2667 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2668 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2669 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2670 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2671 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2672 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2673 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2674 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2675 };
2676
2677 const char* const expected[MAX_LEN] = {
2678 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2679 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2680 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2683 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2684 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2685 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2686 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 };
2689
2690 UErrorCode status = U_ZERO_ERROR;
2691 UParseError parseError;
2692 UnicodeString message;
2693 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2694 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2695 if(U_FAILURE(status)){
2696 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2697 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2698 return;
2699 }
2700 UnicodeString gotResult;
2701 for(int i= 0; i<MAX_LEN; i++){
2702 gotResult = source[i];
2703 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2704 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2705 }
2706 delete latinToDev;
2707 delete devToLatin;
2708 }
2709
TestSanskritLatinRT()2710 void TransliteratorTest::TestSanskritLatinRT(){
2711 const int MAX_LEN =16;
2712 const char* const source[MAX_LEN] = {
2713 "rmk\\u1E63\\u0113t",
2714 "\\u015Br\\u012Bmad",
2715 "bhagavadg\\u012Bt\\u0101",
2716 "adhy\\u0101ya",
2717 "arjuna",
2718 "vi\\u1E63\\u0101da",
2719 "y\\u014Dga",
2720 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2721 "uv\\u0101cr\\u0325",
2722 "dharmak\\u1E63\\u0113tr\\u0113",
2723 "kuruk\\u1E63\\u0113tr\\u0113",
2724 "samav\\u0113t\\u0101",
2725 "yuyutsava\\u1E25",
2726 "m\\u0101mak\\u0101\\u1E25",
2727 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2728 "kimakurvata",
2729 "san\\u0304java",
2730 };
2731 const char* const expected[MAX_LEN] = {
2732 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2733 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2734 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2735 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2736 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2737 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2738 "\\u092f\\u094b\\u0917",
2739 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2740 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2741 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2744 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2745 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2746 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2747 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2748 "\\u0938\\u0902\\u091c\\u0935",
2749 };
2750 UErrorCode status = U_ZERO_ERROR;
2751 UParseError parseError;
2752 UnicodeString message;
2753 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2754 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2755 if(U_FAILURE(status)){
2756 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2757 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2758 return;
2759 }
2760 UnicodeString gotResult;
2761 for(int i= 0; i<MAX_LEN; i++){
2762 gotResult = source[i];
2763 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2764 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2765 }
2766 delete latinToDev;
2767 delete devToLatin;
2768 }
2769
2770
TestCompoundLatinRT()2771 void TransliteratorTest::TestCompoundLatinRT(){
2772 const char* const source[] = {
2773 "rmk\\u1E63\\u0113t",
2774 "\\u015Br\\u012Bmad",
2775 "bhagavadg\\u012Bt\\u0101",
2776 "adhy\\u0101ya",
2777 "arjuna",
2778 "vi\\u1E63\\u0101da",
2779 "y\\u014Dga",
2780 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2781 "uv\\u0101cr\\u0325",
2782 "dharmak\\u1E63\\u0113tr\\u0113",
2783 "kuruk\\u1E63\\u0113tr\\u0113",
2784 "samav\\u0113t\\u0101",
2785 "yuyutsava\\u1E25",
2786 "m\\u0101mak\\u0101\\u1E25",
2787 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2788 "kimakurvata",
2789 "san\\u0304java"
2790 };
2791 const int MAX_LEN = UPRV_LENGTHOF(source);
2792 const char* const expected[MAX_LEN] = {
2793 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2794 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2795 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2796 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2797 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2798 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2799 "\\u092f\\u094b\\u0917",
2800 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2801 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2802 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2805 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2806 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2807 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2808 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2809 "\\u0938\\u0902\\u091c\\u0935"
2810 };
2811 if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2812 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2813 return;
2814 }
2815
2816 UErrorCode status = U_ZERO_ERROR;
2817 UParseError parseError;
2818 UnicodeString message;
2819 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2820 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2821 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2822 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2823
2824 if(U_FAILURE(status)){
2825 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2826 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2827 return;
2828 }
2829 UnicodeString gotResult;
2830 for(int i= 0; i<MAX_LEN; i++){
2831 gotResult = source[i];
2832 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2833 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835
2836 }
2837 delete(latinToDevToLatin);
2838 delete(devToLatinToDev);
2839 delete(devToTelToDev);
2840 delete(latinToTelToLatin);
2841 }
2842
2843 /**
2844 * Test Gurmukhi-Devanagari Tippi and Bindi
2845 */
TestGurmukhiDevanagari()2846 void TransliteratorTest::TestGurmukhiDevanagari(){
2847 // the rule says:
2848 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2849 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2850 UErrorCode status = U_ZERO_ERROR;
2851 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2852 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2853 UParseError parseError;
2854
2855 UnicodeSetIterator vIter(vowel);
2856 UnicodeSetIterator nvIter(non_vowel);
2857 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2858 if(U_FAILURE(status)) {
2859 dataerrln("Error creating transliterator %s", u_errorName(status));
2860 delete trans;
2861 return;
2862 }
2863 UnicodeString src (" \\u0902", -1, US_INV);
2864 UnicodeString expected(" \\u0A02", -1, US_INV);
2865 src = src.unescape();
2866 expected= expected.unescape();
2867
2868 while(vIter.next()){
2869 src.setCharAt(0,(UChar) vIter.getCodepoint());
2870 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2871 expect(*trans,src,expected);
2872 }
2873
2874 expected.setCharAt(1,0x0A70);
2875 while(nvIter.next()){
2876 //src.setCharAt(0,(char) nvIter.codepoint);
2877 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2878 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2879 expect(*trans,src,expected);
2880 }
2881 delete trans;
2882 }
2883 /**
2884 * Test instantiation from a locale.
2885 */
TestLocaleInstantiation(void)2886 void TransliteratorTest::TestLocaleInstantiation(void) {
2887 UParseError pe;
2888 UErrorCode ec = U_ZERO_ERROR;
2889 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2890 if (U_FAILURE(ec)) {
2891 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2892 delete t;
2893 return;
2894 }
2895 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2896 delete t;
2897
2898 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2899 if (U_FAILURE(ec)) {
2900 errln("FAIL: createInstance(en-el)");
2901 delete t;
2902 return;
2903 }
2904 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2905 delete t;
2906 }
2907
2908 /**
2909 * Test title case handling of accent (should ignore accents)
2910 */
TestTitleAccents(void)2911 void TransliteratorTest::TestTitleAccents(void) {
2912 UParseError pe;
2913 UErrorCode ec = U_ZERO_ERROR;
2914 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2915 if (U_FAILURE(ec)) {
2916 errln("FAIL: createInstance(Title)");
2917 delete t;
2918 return;
2919 }
2920 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2921 delete t;
2922 }
2923
2924 /**
2925 * Basic test of a locale resource based rule.
2926 */
TestLocaleResource()2927 void TransliteratorTest::TestLocaleResource() {
2928 const char* DATA[] = {
2929 // id from to
2930 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2931 "Latin-el", "b", "\\u03bc\\u03c0",
2932 "Latin-Greek", "b", "\\u03B2",
2933 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2934 "el-Latin", "\\u03B2", "v",
2935 "Greek-Latin", "\\u03B2", "b",
2936 };
2937 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2938 for (int32_t i=0; i<DATA_length; i+=3) {
2939 UParseError pe;
2940 UErrorCode ec = U_ZERO_ERROR;
2941 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2942 if (U_FAILURE(ec)) {
2943 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2944 delete t;
2945 continue;
2946 }
2947 expect(*t, CharsToUnicodeString(DATA[i+1]),
2948 CharsToUnicodeString(DATA[i+2]));
2949 delete t;
2950 }
2951 }
2952
2953 /**
2954 * Make sure parse errors reference the right line.
2955 */
TestParseError()2956 void TransliteratorTest::TestParseError() {
2957 static const char* rule =
2958 "a > b;\n"
2959 "# more stuff\n"
2960 "d << b;";
2961 UErrorCode ec = U_ZERO_ERROR;
2962 UParseError pe;
2963 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2964 delete t;
2965 if (U_FAILURE(ec)) {
2966 UnicodeString err(pe.preContext);
2967 err.append((UChar)124/*|*/).append(pe.postContext);
2968 if (err.indexOf("d << b") >= 0) {
2969 logln("Ok: " + err);
2970 } else {
2971 errln("FAIL: " + err);
2972 }
2973 }
2974 else {
2975 errln("FAIL: no syntax error");
2976 }
2977 static const char* maskingRule =
2978 "a>x;\n"
2979 "# more stuff\n"
2980 "ab>y;";
2981 ec = U_ZERO_ERROR;
2982 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2983 if (ec != U_RULE_MASK_ERROR) {
2984 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2985 }
2986 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2987 errln("FAIL: did not get expected precontext");
2988 }
2989 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2990 errln("FAIL: did not get expected postcontext");
2991 }
2992 }
2993
2994 /**
2995 * Make sure sets on output are disallowed.
2996 */
TestOutputSet()2997 void TransliteratorTest::TestOutputSet() {
2998 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2999 UErrorCode ec = U_ZERO_ERROR;
3000 UParseError pe;
3001 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3002 delete t;
3003 if (U_FAILURE(ec)) {
3004 UnicodeString err(pe.preContext);
3005 err.append((UChar)124/*|*/).append(pe.postContext);
3006 logln("Ok: " + err);
3007 return;
3008 }
3009 errln("FAIL: No syntax error");
3010 }
3011
3012 /**
3013 * Test the use variable range pragma, making sure that use of
3014 * variable range characters is detected and flagged as an error.
3015 */
TestVariableRange()3016 void TransliteratorTest::TestVariableRange() {
3017 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3018 UErrorCode ec = U_ZERO_ERROR;
3019 UParseError pe;
3020 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3021 delete t;
3022 if (U_FAILURE(ec)) {
3023 UnicodeString err(pe.preContext);
3024 err.append((UChar)124/*|*/).append(pe.postContext);
3025 logln("Ok: " + err);
3026 return;
3027 }
3028 errln("FAIL: No syntax error");
3029 }
3030
3031 /**
3032 * Test invalid post context error handling
3033 */
TestInvalidPostContext()3034 void TransliteratorTest::TestInvalidPostContext() {
3035 UnicodeString rule = "a}b{c>d;";
3036 UErrorCode ec = U_ZERO_ERROR;
3037 UParseError pe;
3038 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3039 delete t;
3040 if (U_FAILURE(ec)) {
3041 UnicodeString err(pe.preContext);
3042 err.append((UChar)124/*|*/).append(pe.postContext);
3043 if (err.indexOf("a}b{c") >= 0) {
3044 logln("Ok: " + err);
3045 } else {
3046 errln("FAIL: " + err);
3047 }
3048 return;
3049 }
3050 errln("FAIL: No syntax error");
3051 }
3052
3053 /**
3054 * Test ID form variants
3055 */
TestIDForms()3056 void TransliteratorTest::TestIDForms() {
3057 const char* DATA[] = {
3058 "NFC", NULL, "NFD",
3059 "nfd", NULL, "NFC", // make sure case is ignored
3060 "Any-NFKD", NULL, "Any-NFKC",
3061 "Null", NULL, "Null",
3062 "-nfkc", "nfkc", "NFKD",
3063 "-nfkc/", "nfkc", "NFKD",
3064 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3065 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3066 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3067 "Source-", NULL, NULL,
3068 "Source/Variant-", NULL, NULL,
3069 "Source-/Variant", NULL, NULL,
3070 "/Variant", NULL, NULL,
3071 "/Variant-", NULL, NULL,
3072 "-/Variant", NULL, NULL,
3073 "-/", NULL, NULL,
3074 "-", NULL, NULL,
3075 "/", NULL, NULL,
3076 };
3077 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3078
3079 for (int32_t i=0; i<DATA_length; i+=3) {
3080 const char* ID = DATA[i];
3081 const char* expID = DATA[i+1];
3082 const char* expInvID = DATA[i+2];
3083 UBool expValid = (expInvID != NULL);
3084 if (expID == NULL) {
3085 expID = ID;
3086 }
3087 UParseError pe;
3088 UErrorCode ec = U_ZERO_ERROR;
3089 Transliterator *t =
3090 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3091 if (U_FAILURE(ec)) {
3092 if (!expValid) {
3093 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3094 } else {
3095 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3096 }
3097 delete t;
3098 continue;
3099 }
3100 Transliterator *u = t->createInverse(ec);
3101 if (U_FAILURE(ec)) {
3102 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3103 delete t;
3104 delete u;
3105 continue;
3106 }
3107 if (t->getID() == expID &&
3108 u->getID() == expInvID) {
3109 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3110 } else {
3111 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3112 t->getID() + " x getInverse() => " + u->getID() +
3113 ", expected " + expInvID);
3114 }
3115 delete t;
3116 delete u;
3117 }
3118 }
3119
3120 static const UChar SPACE[] = {32,0};
3121 static const UChar NEWLINE[] = {10,0};
3122 static const UChar RETURN[] = {13,0};
3123 static const UChar EMPTY[] = {0};
3124
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3125 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3126 const UnicodeString& testRulesForward) {
3127 UnicodeString rules2; t2.toRules(rules2, TRUE);
3128 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3129 rules2.findAndReplace(SPACE, EMPTY);
3130 rules2.findAndReplace(NEWLINE, EMPTY);
3131 rules2.findAndReplace(RETURN, EMPTY);
3132
3133 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3134
3135 if (rules2 != testRules) {
3136 errln(label);
3137 logln((UnicodeString)"GENERATED RULES: " + rules2);
3138 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3139 }
3140 }
3141
3142 /**
3143 * Mark's toRules test.
3144 */
TestToRulesMark()3145 void TransliteratorTest::TestToRulesMark() {
3146 const char* testRules =
3147 "::[[:Latin:][:Mark:]];"
3148 "::NFKD (NFC);"
3149 "::Lower (Lower);"
3150 "a <> \\u03B1;" // alpha
3151 "::NFKC (NFD);"
3152 "::Upper (Lower);"
3153 "::Lower ();"
3154 "::([[:Greek:][:Mark:]]);"
3155 ;
3156 const char* testRulesForward =
3157 "::[[:Latin:][:Mark:]];"
3158 "::NFKD(NFC);"
3159 "::Lower(Lower);"
3160 "a > \\u03B1;"
3161 "::NFKC(NFD);"
3162 "::Upper (Lower);"
3163 "::Lower ();"
3164 ;
3165 const char* testRulesBackward =
3166 "::[[:Greek:][:Mark:]];"
3167 "::Lower (Upper);"
3168 "::NFD(NFKC);"
3169 "\\u03B1 > a;"
3170 "::Lower(Lower);"
3171 "::NFC(NFKD);"
3172 ;
3173 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3174 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3175
3176 UParseError pe;
3177 UErrorCode ec = U_ZERO_ERROR;
3178 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3179 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3180
3181 if (U_FAILURE(ec)) {
3182 delete t2;
3183 delete t3;
3184 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3185 return;
3186 }
3187
3188 expect(*t2, source, target);
3189 expect(*t3, target, source);
3190
3191 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3192 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3193
3194 delete t2;
3195 delete t3;
3196 }
3197
3198 /**
3199 * Test Escape and Unescape transliterators.
3200 */
TestEscape()3201 void TransliteratorTest::TestEscape() {
3202 UParseError pe;
3203 UErrorCode ec;
3204 Transliterator *t;
3205
3206 ec = U_ZERO_ERROR;
3207 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3208 if (U_FAILURE(ec)) {
3209 errln((UnicodeString)"FAIL: createInstance");
3210 } else {
3211 expect(*t,
3212 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3213 "@12Q");
3214 }
3215 delete t;
3216
3217 ec = U_ZERO_ERROR;
3218 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3219 if (U_FAILURE(ec)) {
3220 errln((UnicodeString)"FAIL: createInstance");
3221 } else {
3222 expect(*t,
3223 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3224 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3225 }
3226 delete t;
3227
3228 ec = U_ZERO_ERROR;
3229 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3230 if (U_FAILURE(ec)) {
3231 errln((UnicodeString)"FAIL: createInstance");
3232 } else {
3233 expect(*t,
3234 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3235 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3236 }
3237 delete t;
3238
3239 ec = U_ZERO_ERROR;
3240 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3241 if (U_FAILURE(ec)) {
3242 errln((UnicodeString)"FAIL: createInstance");
3243 } else {
3244 expect(*t,
3245 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3246 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3247 }
3248 delete t;
3249 }
3250
3251
TestAnchorMasking()3252 void TransliteratorTest::TestAnchorMasking(){
3253 UnicodeString rule ("^a > Q; a > q;");
3254 UErrorCode status= U_ZERO_ERROR;
3255 UParseError parseError;
3256
3257 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3258 if(U_FAILURE(status)){
3259 errln(UnicodeString("FAIL: ") + "ID" +
3260 ".createFromRules() => bad rules" +
3261 /*", parse error " + parseError.code +*/
3262 ", line " + parseError.line +
3263 ", offset " + parseError.offset +
3264 ", context " + prettify(parseError.preContext, TRUE) +
3265 ", rules: " + prettify(rule, TRUE));
3266 }
3267 delete t;
3268 }
3269
3270 /**
3271 * Make sure display names of variants look reasonable.
3272 */
TestDisplayName()3273 void TransliteratorTest::TestDisplayName() {
3274 #if UCONFIG_NO_FORMATTING
3275 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3276 return;
3277 #else
3278 static const char* DATA[] = {
3279 // ID, forward name, reverse name
3280 // Update the text as necessary -- the important thing is
3281 // not the text itself, but how various cases are handled.
3282
3283 // Basic test
3284 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3285
3286 // Variants
3287 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3288
3289 // Target-only IDs
3290 "NFC", "Any to NFC", "Any to NFD",
3291 };
3292
3293 int32_t DATA_length = UPRV_LENGTHOF(DATA);
3294
3295 Locale US("en", "US");
3296
3297 for (int32_t i=0; i<DATA_length; i+=3) {
3298 UnicodeString name;
3299 Transliterator::getDisplayName(DATA[i], US, name);
3300 if (name != DATA[i+1]) {
3301 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3302 name + ", expected " + DATA[i+1]);
3303 } else {
3304 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3305 }
3306 UErrorCode ec = U_ZERO_ERROR;
3307 UParseError pe;
3308 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3309 if (U_FAILURE(ec)) {
3310 delete t;
3311 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3312 continue;
3313 }
3314 name = Transliterator::getDisplayName(t->getID(), US, name);
3315 if (name != DATA[i+2]) {
3316 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3317 name + ", expected " + DATA[i+2]);
3318 } else {
3319 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3320 }
3321 delete t;
3322 }
3323 #endif
3324 }
3325
TestSpecialCases(void)3326 void TransliteratorTest::TestSpecialCases(void) {
3327 const UnicodeString registerRules[] = {
3328 "Any-Dev1", "x > X; y > Y;",
3329 "Any-Dev2", "XY > Z",
3330 "Greek-Latin/FAKE",
3331 CharsToUnicodeString
3332 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3333 "" // END MARKER
3334 };
3335
3336 const UnicodeString testCases[] = {
3337 // NORMALIZATION
3338 // should add more test cases
3339 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343
3344 // mp -> b BUG
3345 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347
3348 // check for devanagari bug
3349 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3350
3351 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3352 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3353 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3354
3355 //TODO: enable this test once Titlecase works right
3356 /*
3357 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3358 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3359 */
3360 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3362 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3363 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3364
3365 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367
3368 // FORMS OF S
3369 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3370 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3371 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3372 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3373 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3374 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3375 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3376 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3377 // Tatiana bug
3378 // Upper: TAT\\u02B9\\u00C2NA
3379 // Lower: tat\\u02B9\\u00E2na
3380 // Title: Tat\\u02B9\\u00E2na
3381 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3382 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3387
3388 "" // END MARKER
3389 };
3390
3391 UParseError pos;
3392 int32_t i;
3393 for (i = 0; registerRules[i].length()!=0; i+=2) {
3394 UErrorCode status = U_ZERO_ERROR;
3395
3396 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3397 registerRules[i+1], UTRANS_FORWARD, pos, status);
3398 if (U_FAILURE(status)) {
3399 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3400 } else {
3401 Transliterator::registerInstance(t);
3402 }
3403 }
3404 for (i = 0; testCases[i].length()!=0; i+=3) {
3405 UErrorCode ec = U_ZERO_ERROR;
3406 UParseError pe;
3407 const UnicodeString& name = testCases[i];
3408 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3409 if (U_FAILURE(ec)) {
3410 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3411 delete t;
3412 continue;
3413 }
3414 const UnicodeString& id = t->getID();
3415 const UnicodeString& source = testCases[i+1];
3416 UnicodeString target;
3417
3418 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3419
3420 if (testCases[i+2].length() > 0) {
3421 target = testCases[i+2];
3422 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3423 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3424 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3425 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3426 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3427 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3428 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3429 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3430 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3431 target = source;
3432 target.toLower(Locale::getUS());
3433 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3434 target = source;
3435 target.toUpper(Locale::getUS());
3436 }
3437 if (U_FAILURE(ec)) {
3438 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3439 continue;
3440 }
3441
3442 expect(*t, source, target);
3443 delete t;
3444 }
3445 for (i = 0; registerRules[i].length()!=0; i+=2) {
3446 Transliterator::unregister(registerRules[i]);
3447 }
3448 }
3449
Char32ToEscapedChars(UChar32 ch,char * buffer)3450 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3451 if (ch <= 0xFFFF) {
3452 sprintf(buffer, "\\u%04x", (int)ch);
3453 } else {
3454 sprintf(buffer, "\\U%08x", (int)ch);
3455 }
3456 return buffer;
3457 }
3458
TestSurrogateCasing(void)3459 void TransliteratorTest::TestSurrogateCasing (void) {
3460 // check that casing handles surrogates
3461 // titlecase is currently defective
3462 char buffer[20];
3463 UChar buffer2[20];
3464 UChar32 dee;
3465 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3466 UnicodeString DEE(u_totitle(dee));
3467 if (DEE != DESERET_DEE) {
3468 err("Fails titlecase of surrogates");
3469 err(Char32ToEscapedChars(dee, buffer));
3470 err(", ");
3471 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3472 }
3473
3474 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3475 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3476 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3477 UErrorCode status= U_ZERO_ERROR;
3478
3479 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3480 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3481 errln("Fails: Can't uppercase surrogates.");
3482 }
3483
3484 status= U_ZERO_ERROR;
3485 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3486 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3487 errln("Fails: Can't lowercase surrogates.");
3488 }
3489 }
3490
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3491 static void _trans(Transliterator& t, const UnicodeString& src,
3492 UnicodeString& result) {
3493 result = src;
3494 t.transliterate(result);
3495 }
3496
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3497 static void _trans(const UnicodeString& id, const UnicodeString& src,
3498 UnicodeString& result, UErrorCode ec) {
3499 UParseError pe;
3500 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3501 if (U_SUCCESS(ec)) {
3502 _trans(*t, src, result);
3503 }
3504 delete t;
3505 }
3506
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3507 static UnicodeString _findMatch(const UnicodeString& source,
3508 const UnicodeString* pairs) {
3509 UnicodeString empty;
3510 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3511 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3512 return pairs[i+1];
3513 }
3514 }
3515 return empty;
3516 }
3517
3518 // Check to see that incremental gets at least part way through a reasonable string.
3519
TestIncrementalProgress(void)3520 void TransliteratorTest::TestIncrementalProgress(void) {
3521 UErrorCode ec = U_ZERO_ERROR;
3522 UnicodeString latinTest = "The Quick Brown Fox.";
3523 UnicodeString devaTest;
3524 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3525 UnicodeString kataTest;
3526 _trans("Latin-Katakana", latinTest, kataTest, ec);
3527 if (U_FAILURE(ec)) {
3528 errln("FAIL: Internal error");
3529 return;
3530 }
3531 const UnicodeString tests[] = {
3532 "Any", latinTest,
3533 "Latin", latinTest,
3534 "Halfwidth", latinTest,
3535 "Devanagari", devaTest,
3536 "Katakana", kataTest,
3537 "" // END MARKER
3538 };
3539
3540 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3541 int32_t i = 0, j=0, k=0;
3542 int32_t sources = Transliterator::countAvailableSources();
3543 for (i = 0; i < sources; i++) {
3544 UnicodeString source;
3545 Transliterator::getAvailableSource(i, source);
3546 UnicodeString test = _findMatch(source, tests);
3547 if (test.length() == 0) {
3548 logln((UnicodeString)"Skipping " + source + "-X");
3549 continue;
3550 }
3551 int32_t targets = Transliterator::countAvailableTargets(source);
3552 for (j = 0; j < targets; j++) {
3553 UnicodeString target;
3554 Transliterator::getAvailableTarget(j, source, target);
3555 int32_t variants = Transliterator::countAvailableVariants(source, target);
3556 for (k =0; k< variants; k++) {
3557 UnicodeString variant;
3558 UParseError err;
3559 UErrorCode status = U_ZERO_ERROR;
3560
3561 Transliterator::getAvailableVariant(k, source, target, variant);
3562 UnicodeString id = source + "-" + target + "/" + variant;
3563
3564 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3565 if (U_FAILURE(status)) {
3566 dataerrln((UnicodeString)"FAIL: Could not create " + id);
3567 delete t;
3568 continue;
3569 }
3570 status = U_ZERO_ERROR;
3571 CheckIncrementalAux(t, test);
3572
3573 UnicodeString rev;
3574 _trans(*t, test, rev);
3575 Transliterator *inv = t->createInverse(status);
3576 if (U_FAILURE(status)) {
3577 // The following are forward-only, it is OK that creating an inverse will not work:
3578 // 1. Devanagari-Arabic
3579 // 2. Any-*/BGN
3580 // 3. Any-*/UNGEGN
3581 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3582 if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3583 && !(id.startsWith((UnicodeString)"Any-") &&
3584 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3585 )
3586 #if UCONFIG_NO_BREAK_ITERATION
3587 && id.compare((UnicodeString)"Latin-Thai/") != 0
3588 #endif
3589 )
3590 {
3591 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3592 }
3593 delete t;
3594 delete inv;
3595 continue;
3596 }
3597 CheckIncrementalAux(inv, rev);
3598 delete t;
3599 delete inv;
3600 }
3601 }
3602 }
3603 }
3604
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3605 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3606 const UnicodeString& input) {
3607 UErrorCode ec = U_ZERO_ERROR;
3608 UTransPosition pos;
3609 UnicodeString test = input;
3610
3611 pos.contextStart = 0;
3612 pos.contextLimit = input.length();
3613 pos.start = 0;
3614 pos.limit = input.length();
3615
3616 t->transliterate(test, pos, ec);
3617 if (U_FAILURE(ec)) {
3618 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3619 return;
3620 }
3621 UBool gotError = FALSE;
3622 (void)gotError; // Suppress set but not used warning.
3623
3624 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3625
3626 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3627 errln((UnicodeString)"No Progress, " +
3628 t->getID() + ": " + formatInput(test, input, pos));
3629 gotError = TRUE;
3630 } else {
3631 logln((UnicodeString)"PASS Progress, " +
3632 t->getID() + ": " + formatInput(test, input, pos));
3633 }
3634 t->finishTransliteration(test, pos);
3635 if (pos.start != pos.limit) {
3636 errln((UnicodeString)"Incomplete, " +
3637 t->getID() + ": " + formatInput(test, input, pos));
3638 gotError = TRUE;
3639 }
3640 }
3641
TestFunction()3642 void TransliteratorTest::TestFunction() {
3643 // Careful with spacing and ';' here: Phrase this exactly
3644 // as toRules() is going to return it. If toRules() changes
3645 // with regard to spacing or ';', then adjust this string.
3646 UnicodeString rule =
3647 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3648
3649 UParseError pe;
3650 UErrorCode ec = U_ZERO_ERROR;
3651 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3652 if (t == NULL) {
3653 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3654 return;
3655 }
3656
3657 UnicodeString r;
3658 t->toRules(r, TRUE);
3659 if (r == rule) {
3660 logln((UnicodeString)"OK: toRules() => " + r);
3661 } else {
3662 errln((UnicodeString)"FAIL: toRules() => " + r +
3663 ", expected " + rule);
3664 }
3665
3666 expect(*t, "The Quick Brown Fox",
3667 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3668
3669 delete t;
3670 }
3671
TestInvalidBackRef(void)3672 void TransliteratorTest::TestInvalidBackRef(void) {
3673 UnicodeString rule = ". > $1;";
3674 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3675 UParseError pe;
3676 UErrorCode ec = U_ZERO_ERROR;
3677 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3678 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3679
3680 if (t != NULL) {
3681 errln("FAIL: createFromRules should have returned NULL");
3682 delete t;
3683 }
3684
3685 if (t2 != NULL) {
3686 errln("FAIL: createFromRules should have returned NULL");
3687 delete t2;
3688 }
3689
3690 if (U_SUCCESS(ec)) {
3691 errln("FAIL: Ok: . > $1; => no error");
3692 } else {
3693 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3694 }
3695 }
3696
TestMulticharStringSet()3697 void TransliteratorTest::TestMulticharStringSet() {
3698 // Basic testing
3699 const char* rule =
3700 " [{aa}] > x;"
3701 " a > y;"
3702 " [b{bc}] > z;"
3703 "[{gd}] { e > q;"
3704 " e } [{fg}] > r;" ;
3705
3706 UParseError pe;
3707 UErrorCode ec = U_ZERO_ERROR;
3708 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3709 if (t == NULL || U_FAILURE(ec)) {
3710 delete t;
3711 errln("FAIL: createFromRules failed");
3712 return;
3713 }
3714
3715 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3716 "y x yz z d gd de gdq gdqfg ddrfg");
3717 delete t;
3718
3719 // Overlapped string test. Make sure that when multiple
3720 // strings can match that the longest one is matched.
3721 rule =
3722 " [a {ab} {abc}] > x;"
3723 " b > y;"
3724 " c > z;"
3725 " q [t {st} {rst}] { e > p;" ;
3726
3727 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3728 if (t == NULL || U_FAILURE(ec)) {
3729 delete t;
3730 errln("FAIL: createFromRules failed");
3731 return;
3732 }
3733
3734 expect(*t, "a ab abc qte qste qrste",
3735 "x x x qtp qstp qrstp");
3736 delete t;
3737 }
3738
3739 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3740 // BEGIN TestUserFunction support factory
3741
3742 Transliterator* _TUFF[4];
3743 UnicodeString* _TUFID[4];
3744
_TUFFactory(const UnicodeString &,Transliterator::Token context)3745 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3746 Transliterator::Token context) {
3747 return _TUFF[context.integer]->clone();
3748 }
3749
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3750 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3751 _TUFF[n] = t;
3752 _TUFID[n] = new UnicodeString(ID);
3753 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3754 }
3755
_TUFUnreg(int32_t n)3756 static void _TUFUnreg(int32_t n) {
3757 if (_TUFF[n] != NULL) {
3758 Transliterator::unregister(*_TUFID[n]);
3759 delete _TUFF[n];
3760 delete _TUFID[n];
3761 }
3762 }
3763
3764 // END TestUserFunction support factory
3765 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3766
3767 /**
3768 * Test that user-registered transliterators can be used under function
3769 * syntax.
3770 */
TestUserFunction()3771 void TransliteratorTest::TestUserFunction() {
3772
3773 Transliterator* t;
3774 UParseError pe;
3775 UErrorCode ec = U_ZERO_ERROR;
3776
3777 // Setup our factory
3778 int32_t i;
3779 for (i=0; i<4; ++i) {
3780 _TUFF[i] = NULL;
3781 }
3782
3783 // There's no need to register inverses if we don't use them
3784 t = Transliterator::createFromRules("gif",
3785 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3786 UTRANS_FORWARD, pe, ec);
3787 if (t == NULL || U_FAILURE(ec)) {
3788 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3789 return;
3790 }
3791 _TUFReg("Any-gif", t, 0);
3792
3793 t = Transliterator::createFromRules("RemoveCurly",
3794 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3795 UTRANS_FORWARD, pe, ec);
3796 if (t == NULL || U_FAILURE(ec)) {
3797 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3798 goto FAIL;
3799 }
3800 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3801 _TUFReg("Any-RemoveCurly", t, 1);
3802
3803 logln("Trying &hex");
3804 t = Transliterator::createFromRules("hex2",
3805 "(.) > &hex($1);",
3806 UTRANS_FORWARD, pe, ec);
3807 if (t == NULL || U_FAILURE(ec)) {
3808 errln("FAIL: createFromRules");
3809 goto FAIL;
3810 }
3811 logln("Registering");
3812 _TUFReg("Any-hex2", t, 2);
3813 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3814 if (t == NULL || U_FAILURE(ec)) {
3815 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3816 goto FAIL;
3817 }
3818 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3819 delete t;
3820
3821 logln("Trying &gif");
3822 t = Transliterator::createFromRules("gif2",
3823 "(.) > &Gif(&Hex2($1));",
3824 UTRANS_FORWARD, pe, ec);
3825 if (t == NULL || U_FAILURE(ec)) {
3826 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3827 goto FAIL;
3828 }
3829 logln("Registering");
3830 _TUFReg("Any-gif2", t, 3);
3831 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3832 if (t == NULL || U_FAILURE(ec)) {
3833 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3834 goto FAIL;
3835 }
3836 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3837 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3838 delete t;
3839
3840 // Test that filters are allowed after &
3841 t = Transliterator::createFromRules("test",
3842 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3843 UTRANS_FORWARD, pe, ec);
3844 if (t == NULL || U_FAILURE(ec)) {
3845 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3846 goto FAIL;
3847 }
3848 expect(*t, "abc",
3849 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3850 delete t;
3851
3852 FAIL:
3853 for (i=0; i<4; ++i) {
3854 _TUFUnreg(i);
3855 }
3856 }
3857
3858 /**
3859 * Test the Any-X transliterators.
3860 */
TestAnyX(void)3861 void TransliteratorTest::TestAnyX(void) {
3862 UParseError parseError;
3863 UErrorCode status = U_ZERO_ERROR;
3864 Transliterator* anyLatin =
3865 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3866 if (anyLatin==0) {
3867 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3868 delete anyLatin;
3869 return;
3870 }
3871
3872 expect(*anyLatin,
3873 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3874 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3875
3876 delete anyLatin;
3877 }
3878
3879 /**
3880 * Test Any-X transliterators with sample letters from all scripts.
3881 */
TestAny(void)3882 void TransliteratorTest::TestAny(void) {
3883 UErrorCode status = U_ZERO_ERROR;
3884 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3885 // function call parameters going on in this test.
3886 UnicodeSet alphabetic("[:alphabetic:]", status);
3887 if (U_FAILURE(status)) {
3888 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3889 return;
3890 }
3891 alphabetic.freeze();
3892
3893 UnicodeString testString;
3894 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3895 const char *scriptName = uscript_getShortName((UScriptCode)i);
3896 if (scriptName == NULL) {
3897 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3898 return;
3899 }
3900
3901 UnicodeSet sample;
3902 sample.applyPropertyAlias("script", scriptName, status);
3903 if (U_FAILURE(status)) {
3904 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3905 return;
3906 }
3907 sample.retainAll(alphabetic);
3908 for (int32_t count=0; count<5; count++) {
3909 UChar32 c = sample.charAt(count);
3910 if (c == -1) {
3911 break;
3912 }
3913 testString.append(c);
3914 }
3915 }
3916
3917 UParseError parseError;
3918 Transliterator* anyLatin =
3919 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3920 if (U_FAILURE(status)) {
3921 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3922 return;
3923 }
3924
3925 logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3926 anyLatin->transliterate(testString);
3927 logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3928 delete anyLatin;
3929 }
3930
3931
3932 /**
3933 * Test the source and target set API. These are only implemented
3934 * for RBT and CompoundTransliterator at this time.
3935 */
TestSourceTargetSet()3936 void TransliteratorTest::TestSourceTargetSet() {
3937 UErrorCode ec = U_ZERO_ERROR;
3938
3939 // Rules
3940 const char* r =
3941 "a > b; "
3942 "r [x{lu}] > q;";
3943
3944 // Expected source
3945 UnicodeSet expSrc("[arx{lu}]", ec);
3946
3947 // Expected target
3948 UnicodeSet expTrg("[bq]", ec);
3949
3950 UParseError pe;
3951 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3952
3953 if (U_FAILURE(ec)) {
3954 delete t;
3955 errln("FAIL: Couldn't set up test");
3956 return;
3957 }
3958
3959 UnicodeSet src; t->getSourceSet(src);
3960 UnicodeSet trg; t->getTargetSet(trg);
3961
3962 if (src == expSrc && trg == expTrg) {
3963 UnicodeString a, b;
3964 logln((UnicodeString)"Ok: " +
3965 r + " => source = " + src.toPattern(a, TRUE) +
3966 ", target = " + trg.toPattern(b, TRUE));
3967 } else {
3968 UnicodeString a, b, c, d;
3969 errln((UnicodeString)"FAIL: " +
3970 r + " => source = " + src.toPattern(a, TRUE) +
3971 ", expected " + expSrc.toPattern(b, TRUE) +
3972 "; target = " + trg.toPattern(c, TRUE) +
3973 ", expected " + expTrg.toPattern(d, TRUE));
3974 }
3975
3976 delete t;
3977 }
3978
3979 /**
3980 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3981 */
TestPatternWhiteSpace()3982 void TransliteratorTest::TestPatternWhiteSpace() {
3983 // Rules
3984 const char* r = "a > \\u200E b;";
3985
3986 UErrorCode ec = U_ZERO_ERROR;
3987 UParseError pe;
3988 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3989
3990 if (U_FAILURE(ec)) {
3991 errln("FAIL: Couldn't set up test");
3992 } else {
3993 expect(*t, "a", "b");
3994 }
3995 delete t;
3996
3997 // UnicodeSet
3998 ec = U_ZERO_ERROR;
3999 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4000
4001 if (U_FAILURE(ec)) {
4002 errln("FAIL: Couldn't set up test");
4003 } else {
4004 if (set.contains(0x200E)) {
4005 errln("FAIL: U+200E not being ignored by UnicodeSet");
4006 }
4007 }
4008 }
4009 //======================================================================
4010 // this method is in TestUScript.java
4011 //======================================================================
TestAllCodepoints()4012 void TransliteratorTest::TestAllCodepoints(){
4013 UScriptCode code= USCRIPT_INVALID_CODE;
4014 char id[256]={'\0'};
4015 char abbr[256]={'\0'};
4016 char newId[256]={'\0'};
4017 char newAbbrId[256]={'\0'};
4018 char oldId[256]={'\0'};
4019 char oldAbbrId[256]={'\0'};
4020
4021 UErrorCode status =U_ZERO_ERROR;
4022 UParseError pe;
4023
4024 for(uint32_t i = 0; i<=0x10ffff; i++){
4025 code = uscript_getScript(i,&status);
4026 if(code == USCRIPT_INVALID_CODE){
4027 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4028 }
4029 const char* myId = uscript_getName(code);
4030 if(!myId) {
4031 dataerrln("Valid script code returned NULL name. Check your data!");
4032 return;
4033 }
4034 uprv_strcpy(id,myId);
4035 uprv_strcpy(abbr,uscript_getShortName(code));
4036
4037 uprv_strcpy(newId,"[:");
4038 uprv_strcat(newId,id);
4039 uprv_strcat(newId,":];NFD");
4040
4041 uprv_strcpy(newAbbrId,"[:");
4042 uprv_strcat(newAbbrId,abbr);
4043 uprv_strcat(newAbbrId,":];NFD");
4044
4045 if(uprv_strcmp(newId,oldId)!=0){
4046 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4047 if(t==NULL || U_FAILURE(status)){
4048 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4049 }
4050 delete t;
4051 }
4052 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4053 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4054 if(t==NULL || U_FAILURE(status)){
4055 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4056 }
4057 delete t;
4058 }
4059 uprv_strcpy(oldId,newId);
4060 uprv_strcpy(oldAbbrId, newAbbrId);
4061
4062 }
4063
4064 }
4065
4066 #define TEST_TRANSLIT_ID(id, cls) { \
4067 UErrorCode ec = U_ZERO_ERROR; \
4068 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4069 if (U_FAILURE(ec)) { \
4070 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4071 } else { \
4072 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4073 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4074 } \
4075 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4076 } \
4077 delete t; \
4078 }
4079
4080 #define TEST_TRANSLIT_RULE(rule, cls) { \
4081 UErrorCode ec = U_ZERO_ERROR; \
4082 UParseError pe; \
4083 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4084 if (U_FAILURE(ec)) { \
4085 errln("FAIL: Couldn't create " rule); \
4086 } else { \
4087 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4088 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4089 } \
4090 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4091 } \
4092 delete t; \
4093 }
4094
TestBoilerplate()4095 void TransliteratorTest::TestBoilerplate() {
4096 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4097 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4098 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4099 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4100 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4101 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4102 TEST_TRANSLIT_ID("Null", NullTransliterator);
4103 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4104 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4105 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4106 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4107 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4108 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4109 }
4110
TestAlternateSyntax()4111 void TransliteratorTest::TestAlternateSyntax() {
4112 // U+2206 == &
4113 // U+2190 == <
4114 // U+2192 == >
4115 // U+2194 == <>
4116 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4117 "abc",
4118 "xbz");
4119 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4120 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4121 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4122 }
4123
4124 static const char* BEGIN_END_RULES[] = {
4125 // [0]
4126 "abc > xy;"
4127 "aba > z;",
4128
4129 // [1]
4130 /*
4131 "::BEGIN;"
4132 "abc > xy;"
4133 "::END;"
4134 "::BEGIN;"
4135 "aba > z;"
4136 "::END;",
4137 */
4138 "", // test case commented out below, this is here to keep from messing up the indexes
4139
4140 // [2]
4141 /*
4142 "abc > xy;"
4143 "::BEGIN;"
4144 "aba > z;"
4145 "::END;",
4146 */
4147 "", // test case commented out below, this is here to keep from messing up the indexes
4148
4149 // [3]
4150 /*
4151 "::BEGIN;"
4152 "abc > xy;"
4153 "::END;"
4154 "aba > z;",
4155 */
4156 "", // test case commented out below, this is here to keep from messing up the indexes
4157
4158 // [4]
4159 "abc > xy;"
4160 "::Null;"
4161 "aba > z;",
4162
4163 // [5]
4164 "::Upper;"
4165 "ABC > xy;"
4166 "AB > x;"
4167 "C > z;"
4168 "::Upper;"
4169 "XYZ > p;"
4170 "XY > q;"
4171 "Z > r;"
4172 "::Upper;",
4173
4174 // [6]
4175 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4176 "$delim = [\\-$ws];"
4177 "$ws $delim* > ' ';"
4178 "'-' $delim* > '-';",
4179
4180 // [7]
4181 "::Null;"
4182 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4183 "$delim = [\\-$ws];"
4184 "$ws $delim* > ' ';"
4185 "'-' $delim* > '-';",
4186
4187 // [8]
4188 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4189 "$delim = [\\-$ws];"
4190 "$ws $delim* > ' ';"
4191 "'-' $delim* > '-';"
4192 "::Null;",
4193
4194 // [9]
4195 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4196 "$delim = [\\-$ws];"
4197 "::Null;"
4198 "$ws $delim* > ' ';"
4199 "'-' $delim* > '-';",
4200
4201 // [10]
4202 /*
4203 "::BEGIN;"
4204 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4205 "$delim = [\\-$ws];"
4206 "::END;"
4207 "$ws $delim* > ' ';"
4208 "'-' $delim* > '-';",
4209 */
4210 "", // test case commented out below, this is here to keep from messing up the indexes
4211
4212 // [11]
4213 /*
4214 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4215 "$delim = [\\-$ws];"
4216 "::BEGIN;"
4217 "$ws $delim* > ' ';"
4218 "'-' $delim* > '-';"
4219 "::END;",
4220 */
4221 "", // test case commented out below, this is here to keep from messing up the indexes
4222
4223 // [12]
4224 /*
4225 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4226 "$delim = [\\-$ws];"
4227 "$ab = [ab];"
4228 "::BEGIN;"
4229 "$ws $delim* > ' ';"
4230 "'-' $delim* > '-';"
4231 "::END;"
4232 "::BEGIN;"
4233 "$ab { ' ' } $ab > '-';"
4234 "c { ' ' > ;"
4235 "::END;"
4236 "::BEGIN;"
4237 "'a-a' > a\\%|a;"
4238 "::END;",
4239 */
4240 "", // test case commented out below, this is here to keep from messing up the indexes
4241
4242 // [13]
4243 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4244 "$delim = [\\-$ws];"
4245 "$ab = [ab];"
4246 "::Null;"
4247 "$ws $delim* > ' ';"
4248 "'-' $delim* > '-';"
4249 "::Null;"
4250 "$ab { ' ' } $ab > '-';"
4251 "c { ' ' > ;"
4252 "::Null;"
4253 "'a-a' > a\\%|a;",
4254
4255 // [14]
4256 /*
4257 "::[abc];"
4258 "::BEGIN;"
4259 "abc > xy;"
4260 "::END;"
4261 "::BEGIN;"
4262 "aba > yz;"
4263 "::END;"
4264 "::Upper;",
4265 */
4266 "", // test case commented out below, this is here to keep from messing up the indexes
4267
4268 // [15]
4269 "::[abc];"
4270 "abc > xy;"
4271 "::Null;"
4272 "aba > yz;"
4273 "::Upper;",
4274
4275 // [16]
4276 /*
4277 "::[abc];"
4278 "::BEGIN;"
4279 "abc <> xy;"
4280 "::END;"
4281 "::BEGIN;"
4282 "aba <> yz;"
4283 "::END;"
4284 "::Upper(Lower);"
4285 "::([XYZ]);"
4286 */
4287 "", // test case commented out below, this is here to keep from messing up the indexes
4288
4289 // [17]
4290 "::[abc];"
4291 "abc <> xy;"
4292 "::Null;"
4293 "aba <> yz;"
4294 "::Upper(Lower);"
4295 "::([XYZ]);"
4296 };
4297
4298 /*
4299 (This entire test is commented out below and will need some heavy revision when we re-add
4300 the ::BEGIN/::END stuff)
4301 static const char* BOGUS_BEGIN_END_RULES[] = {
4302 // [7]
4303 "::BEGIN;"
4304 "abc > xy;"
4305 "::BEGIN;"
4306 "aba > z;"
4307 "::END;"
4308 "::END;",
4309
4310 // [8]
4311 "abc > xy;"
4312 " aba > z;"
4313 "::END;",
4314
4315 // [9]
4316 "::BEGIN;"
4317 "::Upper;"
4318 "::END;"
4319 };
4320 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4321 */
4322
4323 static const char* BEGIN_END_TEST_CASES[] = {
4324 // rules input expected output
4325 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4326 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4327 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4328 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4329 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4330 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4331
4332 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4333 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4334 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4335 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4336 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4337 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4338 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4339 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4340 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4341 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4342 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4343 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4344
4345 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4346 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4347 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4348 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4349 };
4350 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4351
TestBeginEnd()4352 void TransliteratorTest::TestBeginEnd() {
4353 // run through the list of test cases above
4354 int32_t i = 0;
4355 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4356 expect((UnicodeString)"Test case #" + (i / 3),
4357 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4358 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4359 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4360 }
4361
4362 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4363 UParseError parseError;
4364 UErrorCode status = U_ZERO_ERROR;
4365 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4366 UTRANS_REVERSE, parseError, status);
4367 if (reversed == 0 || U_FAILURE(status)) {
4368 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4369 } else {
4370 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4371 }
4372 delete reversed;
4373
4374 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4375 // that all of them cause errors
4376 /*
4377 (commented out until we have the real ::BEGIN/::END stuff in place
4378 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4379 UParseError parseError;
4380 UErrorCode status = U_ZERO_ERROR;
4381 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4382 UTRANS_FORWARD, parseError, status);
4383 if (!U_FAILURE(status)) {
4384 delete t;
4385 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4386 }
4387 }
4388 */
4389 }
4390
TestBeginEndToRules()4391 void TransliteratorTest::TestBeginEndToRules() {
4392 // run through the same list of test cases we used above, but this time, instead of just
4393 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4394 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4395 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4396 // to (i.e., does the same thing as) the original rule set
4397 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4398 UParseError parseError;
4399 UErrorCode status = U_ZERO_ERROR;
4400 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4401 UTRANS_FORWARD, parseError, status);
4402 if (U_FAILURE(status)) {
4403 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4404 } else {
4405 UnicodeString rules;
4406 t->toRules(rules, TRUE);
4407 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4408 UTRANS_FORWARD, parseError, status);
4409 if (U_FAILURE(status)) {
4410 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4411 parseError, status);
4412 delete t;
4413 } else {
4414 expect(*t2,
4415 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4416 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4417 delete t;
4418 delete t2;
4419 }
4420 }
4421 }
4422
4423 // do the same thing for the reversible test case
4424 UParseError parseError;
4425 UErrorCode status = U_ZERO_ERROR;
4426 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4427 UTRANS_REVERSE, parseError, status);
4428 if (U_FAILURE(status)) {
4429 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4430 } else {
4431 UnicodeString rules;
4432 reversed->toRules(rules, FALSE);
4433 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4434 parseError, status);
4435 if (U_FAILURE(status)) {
4436 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4437 parseError, status);
4438 delete reversed;
4439 } else {
4440 expect(*reversed2,
4441 UnicodeString("xy XY XYZ yz YZ"),
4442 UnicodeString("xy abc xaba yz aba"));
4443 delete reversed;
4444 delete reversed2;
4445 }
4446 }
4447 }
4448
TestRegisterAlias()4449 void TransliteratorTest::TestRegisterAlias() {
4450 UnicodeString longID("Lower;[aeiou]Upper");
4451 UnicodeString shortID("Any-CapVowels");
4452 UnicodeString reallyShortID("CapVowels");
4453
4454 Transliterator::registerAlias(shortID, longID);
4455
4456 UErrorCode err = U_ZERO_ERROR;
4457 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4458 if (U_FAILURE(err)) {
4459 errln("Failed to instantiate transliterator with long ID");
4460 Transliterator::unregister(shortID);
4461 return;
4462 }
4463 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4464 if (U_FAILURE(err)) {
4465 errln("Failed to instantiate transliterator with short ID");
4466 delete t1;
4467 Transliterator::unregister(shortID);
4468 return;
4469 }
4470
4471 if (t1->getID() != longID)
4472 errln("Transliterator instantiated with long ID doesn't have long ID");
4473 if (t2->getID() != reallyShortID)
4474 errln("Transliterator instantiated with short ID doesn't have short ID");
4475
4476 UnicodeString rules1;
4477 UnicodeString rules2;
4478
4479 t1->toRules(rules1, TRUE);
4480 t2->toRules(rules2, TRUE);
4481 if (rules1 != rules2)
4482 errln("Alias transliterators aren't the same");
4483
4484 delete t1;
4485 delete t2;
4486 Transliterator::unregister(shortID);
4487
4488 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4489 if (U_SUCCESS(err)) {
4490 errln("Instantiation with short ID succeeded after short ID was unregistered");
4491 delete t1;
4492 }
4493
4494 // try the same thing again, but this time with something other than
4495 // an instance of CompoundTransliterator
4496 UnicodeString realID("Latin-Greek");
4497 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4498 Transliterator::registerAlias(fakeID, realID);
4499
4500 err = U_ZERO_ERROR;
4501 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4502 if (U_FAILURE(err)) {
4503 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4504 Transliterator::unregister(realID);
4505 return;
4506 }
4507 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4508 if (U_FAILURE(err)) {
4509 errln("Failed to instantiate transliterator with fake ID");
4510 delete t1;
4511 Transliterator::unregister(realID);
4512 return;
4513 }
4514
4515 t1->toRules(rules1, TRUE);
4516 t2->toRules(rules2, TRUE);
4517 if (rules1 != rules2)
4518 errln("Alias transliterators aren't the same");
4519
4520 delete t1;
4521 delete t2;
4522 Transliterator::unregister(fakeID);
4523 }
4524
TestRuleStripping()4525 void TransliteratorTest::TestRuleStripping() {
4526 /*
4527 #
4528 \uE001>\u0C01; # SIGN
4529 */
4530 static const UChar rule[] = {
4531 0x0023,0x0020,0x000D,0x000A,
4532 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4533 };
4534 static const UChar expectedRule[] = {
4535 0xE001,0x003E,0x0C01,0x003B,0
4536 };
4537 UChar result[UPRV_LENGTHOF(rule)];
4538 UErrorCode status = U_ZERO_ERROR;
4539 int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4540 if (len != u_strlen(expectedRule)) {
4541 errln("utrans_stripRules return len = %d", len);
4542 }
4543 if (u_strncmp(expectedRule, result, len) != 0) {
4544 errln("utrans_stripRules did not return expected string");
4545 }
4546 }
4547
4548 /**
4549 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4550 */
TestHalfwidthFullwidth(void)4551 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4552 UParseError parseError;
4553 UErrorCode status = U_ZERO_ERROR;
4554 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4555 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4556 if (hf == 0 || fh == 0) {
4557 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4558 delete hf;
4559 delete fh;
4560 return;
4561 }
4562
4563 // Array of 2n items
4564 // Each item is
4565 // "hf"|"fh"|"both",
4566 // <Halfwidth>,
4567 // <Fullwidth>
4568 const char* DATA[] = {
4569 "both",
4570 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4571 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4572 };
4573 int32_t DATA_length = UPRV_LENGTHOF(DATA);
4574
4575 for (int32_t i=0; i<DATA_length; i+=3) {
4576 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4577 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4578 switch (*DATA[i]) {
4579 case 0x68: //'h': // Halfwidth-Fullwidth only
4580 expect(*hf, h, f);
4581 break;
4582 case 0x66: //'f': // Fullwidth-Halfwidth only
4583 expect(*fh, f, h);
4584 break;
4585 case 0x62: //'b': // both directions
4586 expect(*hf, h, f);
4587 expect(*fh, f, h);
4588 break;
4589 }
4590 }
4591 delete hf;
4592 delete fh;
4593 }
4594
4595
4596 /**
4597 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4598 * TODO: confirm that the expected results are correct.
4599 * For now, test just confirms that C++ and Java give identical results.
4600 */
TestThai(void)4601 void TransliteratorTest::TestThai(void) {
4602 #if !UCONFIG_NO_BREAK_ITERATION
4603 UParseError parseError;
4604 UErrorCode status = U_ZERO_ERROR;
4605 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4606 if (tr == 0) {
4607 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4608 return;
4609 }
4610 if (U_FAILURE(status)) {
4611 errln("FAIL: createInstance failed with %s", u_errorName(status));
4612 return;
4613 }
4614 const char *thaiText =
4615 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4616 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4617 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4618 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4619 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4620 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4621 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4622 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4623 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4624 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4625 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4626 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4627 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4628 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4629 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4630 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4631 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4632 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4633 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4634 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4635 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4636 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4637 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4638 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4639 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4640 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4641 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4642 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4643 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4644 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4645
4646 const char *latinText =
4647 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4648 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4649 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4650 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4651 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4652 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4653 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4654 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4655 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4656 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4657 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4658 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4659 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4660 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4661 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4662 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4663 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4664 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4665
4666
4667 UnicodeString xlitText(thaiText);
4668 xlitText = xlitText.unescape();
4669 tr->transliterate(xlitText);
4670
4671 UnicodeString expectedText(latinText);
4672 expectedText = expectedText.unescape();
4673 expect(*tr, xlitText, expectedText);
4674
4675 delete tr;
4676 #endif
4677 }
4678
4679
4680 //======================================================================
4681 // Support methods
4682 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4683 void TransliteratorTest::expectT(const UnicodeString& id,
4684 const UnicodeString& source,
4685 const UnicodeString& expectedResult) {
4686 UErrorCode ec = U_ZERO_ERROR;
4687 UParseError pe;
4688 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4689 if (U_FAILURE(ec)) {
4690 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
4691 delete t;
4692 return;
4693 }
4694 expect(*t, source, expectedResult);
4695 delete t;
4696 }
4697
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4698 void TransliteratorTest::reportParseError(const UnicodeString& message,
4699 const UParseError& parseError,
4700 const UErrorCode& status) {
4701 dataerrln(message +
4702 /*", parse error " + parseError.code +*/
4703 ", line " + parseError.line +
4704 ", offset " + parseError.offset +
4705 ", pre-context " + prettify(parseError.preContext, TRUE) +
4706 ", post-context " + prettify(parseError.postContext,TRUE) +
4707 ", Error: " + u_errorName(status));
4708 }
4709
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4710 void TransliteratorTest::expect(const UnicodeString& rules,
4711 const UnicodeString& source,
4712 const UnicodeString& expectedResult,
4713 UTransPosition *pos) {
4714 expect("<ID>", rules, source, expectedResult, pos);
4715 }
4716
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4717 void TransliteratorTest::expect(const UnicodeString& id,
4718 const UnicodeString& rules,
4719 const UnicodeString& source,
4720 const UnicodeString& expectedResult,
4721 UTransPosition *pos) {
4722 UErrorCode status = U_ZERO_ERROR;
4723 UParseError parseError;
4724 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4725 if (U_FAILURE(status)) {
4726 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4727 } else {
4728 expect(*t, source, expectedResult, pos);
4729 }
4730 delete t;
4731 }
4732
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4733 void TransliteratorTest::expect(const Transliterator& t,
4734 const UnicodeString& source,
4735 const UnicodeString& expectedResult,
4736 const Transliterator& reverseTransliterator) {
4737 expect(t, source, expectedResult);
4738 expect(reverseTransliterator, expectedResult, source);
4739 }
4740
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4741 void TransliteratorTest::expect(const Transliterator& t,
4742 const UnicodeString& source,
4743 const UnicodeString& expectedResult,
4744 UTransPosition *pos) {
4745 if (pos == 0) {
4746 UnicodeString result(source);
4747 t.transliterate(result);
4748 expectAux(t.getID() + ":String", source, result, expectedResult);
4749 }
4750 UTransPosition index={0, 0, 0, 0};
4751 if (pos != 0) {
4752 index = *pos;
4753 }
4754
4755 UnicodeString rsource(source);
4756 if (pos == 0) {
4757 t.transliterate(rsource);
4758 } else {
4759 // Do it all at once -- below we do it incrementally
4760 t.finishTransliteration(rsource, *pos);
4761 }
4762 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4763
4764 // Test keyboard (incremental) transliteration -- this result
4765 // must be the same after we finalize (see below).
4766 UnicodeString log;
4767 rsource.remove();
4768 if (pos != 0) {
4769 rsource = source;
4770 formatInput(log, rsource, index);
4771 log.append(" -> ");
4772 UErrorCode status = U_ZERO_ERROR;
4773 t.transliterate(rsource, index, status);
4774 formatInput(log, rsource, index);
4775 } else {
4776 for (int32_t i=0; i<source.length(); ++i) {
4777 if (i != 0) {
4778 log.append(" + ");
4779 }
4780 log.append(source.charAt(i)).append(" -> ");
4781 UErrorCode status = U_ZERO_ERROR;
4782 t.transliterate(rsource, index, source.charAt(i), status);
4783 formatInput(log, rsource, index);
4784 }
4785 }
4786
4787 // As a final step in keyboard transliteration, we must call
4788 // transliterate to finish off any pending partial matches that
4789 // were waiting for more input.
4790 t.finishTransliteration(rsource, index);
4791 log.append(" => ").append(rsource);
4792
4793 expectAux(t.getID() + ":Keyboard", log,
4794 rsource == expectedResult,
4795 expectedResult);
4796 }
4797
4798
4799 /**
4800 * @param appendTo result is appended to this param.
4801 * @param input the string being transliterated
4802 * @param pos the index struct
4803 */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4804 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4805 const UnicodeString& input,
4806 const UTransPosition& pos) {
4807 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4808 // the {} indicate the context start and limit, and the ||
4809 // indicate the start and limit.
4810 if (0 <= pos.contextStart &&
4811 pos.contextStart <= pos.start &&
4812 pos.start <= pos.limit &&
4813 pos.limit <= pos.contextLimit &&
4814 pos.contextLimit <= input.length()) {
4815
4816 UnicodeString a, b, c, d, e;
4817 input.extractBetween(0, pos.contextStart, a);
4818 input.extractBetween(pos.contextStart, pos.start, b);
4819 input.extractBetween(pos.start, pos.limit, c);
4820 input.extractBetween(pos.limit, pos.contextLimit, d);
4821 input.extractBetween(pos.contextLimit, input.length(), e);
4822 appendTo.append(a).append((UChar)123/*{*/).append(b).
4823 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4824 append((UChar)125/*}*/).append(e);
4825 } else {
4826 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4827 pos.contextStart + ", s=" + pos.start + ", l=" +
4828 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4829 input);
4830 }
4831 return appendTo;
4832 }
4833
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4834 void TransliteratorTest::expectAux(const UnicodeString& tag,
4835 const UnicodeString& source,
4836 const UnicodeString& result,
4837 const UnicodeString& expectedResult) {
4838 expectAux(tag, source + " -> " + result,
4839 result == expectedResult,
4840 expectedResult);
4841 }
4842
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4843 void TransliteratorTest::expectAux(const UnicodeString& tag,
4844 const UnicodeString& summary, UBool pass,
4845 const UnicodeString& expectedResult) {
4846 if (pass) {
4847 logln(UnicodeString("(")+tag+") " + prettify(summary));
4848 } else {
4849 dataerrln(UnicodeString("FAIL: (")+tag+") "
4850 + prettify(summary)
4851 + ", expected " + prettify(expectedResult));
4852 }
4853 }
4854
4855 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4856