1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/10/99 aliu Creation.
10 **********************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_TRANSLITERATION
16
17 #include "transtst.h"
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
29 #include "cpdtrans.h"
30 #include "nultrans.h"
31 #include "rbt.h"
32 #include "rbt_pars.h"
33 #include "anytrans.h"
34 #include "esctrn.h"
35 #include "name2uni.h"
36 #include "nortrans.h"
37 #include "remtrans.h"
38 #include "titletrn.h"
39 #include "tolowtrn.h"
40 #include "toupptrn.h"
41 #include "unesctrn.h"
42 #include "uni2name.h"
43 #include "cstring.h"
44 #include "cmemory.h"
45 #include <stdio.h>
46
47 /***********************************************************************
48
49 HOW TO USE THIS TEST FILE
50 -or-
51 How I developed on two platforms
52 without losing (too much of) my mind
53
54
55 1. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
59
60 2. Make liberal use of the expect() method; it is your friend.
61
62 3. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
64
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
67
68 ==> THIS IS THE IMPORTANT PART <==
69
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
74
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
77 labeled
78
79 "icu4j ONLY"
80
81 or
82
83 "icu4c ONLY"
84
85 Make sure you document the reason the test is here and not there.
86
87
88 Thank you.
89 The Management
90 ***********************************************************************/
91
92 // Define character constants thusly to be EBCDIC-friendly
93 enum {
94 LEFT_BRACE=((UChar)0x007B), /*{*/
95 PIPE =((UChar)0x007C), /*|*/
96 ZERO =((UChar)0x0030), /*0*/
97 UPPER_A =((UChar)0x0041) /*A*/
98 };
99
TransliteratorTest()100 TransliteratorTest::TransliteratorTest()
101 : DESERET_DEE((UChar32)0x10414),
102 DESERET_dee((UChar32)0x1043C)
103 {
104 }
105
~TransliteratorTest()106 TransliteratorTest::~TransliteratorTest() {}
107
108 void
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110 const char* &name, char* /*par*/) {
111 switch (index) {
112 TESTCASE(0,TestInstantiation);
113 TESTCASE(1,TestSimpleRules);
114 TESTCASE(2,TestRuleBasedInverse);
115 TESTCASE(3,TestKeyboard);
116 TESTCASE(4,TestKeyboard2);
117 TESTCASE(5,TestKeyboard3);
118 TESTCASE(6,TestArabic);
119 TESTCASE(7,TestCompoundKana);
120 TESTCASE(8,TestCompoundHex);
121 TESTCASE(9,TestFiltering);
122 TESTCASE(10,TestInlineSet);
123 TESTCASE(11,TestPatternQuoting);
124 TESTCASE(12,TestJ277);
125 TESTCASE(13,TestJ243);
126 TESTCASE(14,TestJ329);
127 TESTCASE(15,TestSegments);
128 TESTCASE(16,TestCursorOffset);
129 TESTCASE(17,TestArbitraryVariableValues);
130 TESTCASE(18,TestPositionHandling);
131 TESTCASE(19,TestHiraganaKatakana);
132 TESTCASE(20,TestCopyJ476);
133 TESTCASE(21,TestAnchors);
134 TESTCASE(22,TestInterIndic);
135 TESTCASE(23,TestFilterIDs);
136 TESTCASE(24,TestCaseMap);
137 TESTCASE(25,TestNameMap);
138 TESTCASE(26,TestLiberalizedID);
139 TESTCASE(27,TestCreateInstance);
140 TESTCASE(28,TestNormalizationTransliterator);
141 TESTCASE(29,TestCompoundRBT);
142 TESTCASE(30,TestCompoundFilter);
143 TESTCASE(31,TestRemove);
144 TESTCASE(32,TestToRules);
145 TESTCASE(33,TestContext);
146 TESTCASE(34,TestSupplemental);
147 TESTCASE(35,TestQuantifier);
148 TESTCASE(36,TestSTV);
149 TESTCASE(37,TestCompoundInverse);
150 TESTCASE(38,TestNFDChainRBT);
151 TESTCASE(39,TestNullInverse);
152 TESTCASE(40,TestAliasInverseID);
153 TESTCASE(41,TestCompoundInverseID);
154 TESTCASE(42,TestUndefinedVariable);
155 TESTCASE(43,TestEmptyContext);
156 TESTCASE(44,TestCompoundFilterID);
157 TESTCASE(45,TestPropertySet);
158 TESTCASE(46,TestNewEngine);
159 TESTCASE(47,TestQuantifiedSegment);
160 TESTCASE(48,TestDevanagariLatinRT);
161 TESTCASE(49,TestTeluguLatinRT);
162 TESTCASE(50,TestCompoundLatinRT);
163 TESTCASE(51,TestSanskritLatinRT);
164 TESTCASE(52,TestLocaleInstantiation);
165 TESTCASE(53,TestTitleAccents);
166 TESTCASE(54,TestLocaleResource);
167 TESTCASE(55,TestParseError);
168 TESTCASE(56,TestOutputSet);
169 TESTCASE(57,TestVariableRange);
170 TESTCASE(58,TestInvalidPostContext);
171 TESTCASE(59,TestIDForms);
172 TESTCASE(60,TestToRulesMark);
173 TESTCASE(61,TestEscape);
174 TESTCASE(62,TestAnchorMasking);
175 TESTCASE(63,TestDisplayName);
176 TESTCASE(64,TestSpecialCases);
177 #if !UCONFIG_NO_FILE_IO
178 TESTCASE(65,TestIncrementalProgress);
179 #endif
180 TESTCASE(66,TestSurrogateCasing);
181 TESTCASE(67,TestFunction);
182 TESTCASE(68,TestInvalidBackRef);
183 TESTCASE(69,TestMulticharStringSet);
184 TESTCASE(70,TestUserFunction);
185 TESTCASE(71,TestAnyX);
186 TESTCASE(72,TestSourceTargetSet);
187 TESTCASE(73,TestGurmukhiDevanagari);
188 TESTCASE(74,TestPatternWhiteSpace);
189 TESTCASE(75,TestAllCodepoints);
190 TESTCASE(76,TestBoilerplate);
191 TESTCASE(77,TestAlternateSyntax);
192 TESTCASE(78,TestBeginEnd);
193 TESTCASE(79,TestBeginEndToRules);
194 TESTCASE(80,TestRegisterAlias);
195 TESTCASE(81,TestRuleStripping);
196 TESTCASE(82,TestHalfwidthFullwidth);
197 TESTCASE(83,TestThai);
198 TESTCASE(84,TestAny);
199 default: name = ""; break;
200 }
201 }
202
203 /**
204 * Make sure every system transliterator can be instantiated.
205 *
206 * ALSO test that the result of toRules() for each rule is a valid
207 * rule. Do this here so we don't have to have another test that
208 * instantiates everything as well.
209 */
TestInstantiation()210 void TransliteratorTest::TestInstantiation() {
211 UErrorCode ec = U_ZERO_ERROR;
212 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
213 assertSuccess("getAvailableIDs()", ec);
214 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
215 int32_t n = Transliterator::countAvailableIDs();
216 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217 avail->count(ec) == n);
218 assertSuccess("count()", ec);
219 UnicodeString name;
220 for (int32_t i=0; i<n; ++i) {
221 const UnicodeString& id = *avail->snext(ec);
222 if (!assertSuccess("snext()", ec) ||
223 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
224 break;
225 }
226 UnicodeString id2 = Transliterator::getAvailableID(i);
227 if (id.length() < 1) {
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i + ") returned empty string");
230 continue;
231 }
232 if (id != id2) {
233 errln(UnicodeString("FAIL: getAvailableID(") +
234 i + ") != getAvailableIDs().snext()");
235 continue;
236 }
237 UParseError parseError;
238 UErrorCode status = U_ZERO_ERROR;
239 Transliterator* t = Transliterator::createInstance(id,
240 UTRANS_FORWARD, parseError,status);
241 name.truncate(0);
242 Transliterator::getDisplayName(id, name);
243 if (t == 0) {
244 #if UCONFIG_NO_BREAK_ITERATION
245 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246 if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
247 id.compare((UnicodeString)"Thai-Latin") != 0)
248 #endif
249 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
250 /*", parse error " + parseError.code +*/
251 ", line " + parseError.line +
252 ", offset " + parseError.offset +
253 ", pre-context " + prettify(parseError.preContext, TRUE) +
254 ", post-context " +prettify(parseError.postContext,TRUE) +
255 ", Error: " + u_errorName(status));
256 // When createInstance fails, it deletes the failing
257 // entry from the available ID list. We detect this
258 // here by looking for a change in countAvailableIDs.
259 int32_t nn = Transliterator::countAvailableIDs();
260 if (nn == (n - 1)) {
261 n = nn;
262 --i; // Compensate for deleted entry
263 }
264 } else {
265 logln(UnicodeString("OK: ") + name + " (" + id + ")");
266
267 // Now test toRules
268 UnicodeString rules;
269 t->toRules(rules, TRUE);
270 Transliterator *u = Transliterator::createFromRules("x",
271 rules, UTRANS_FORWARD, parseError,status);
272 if (u == 0) {
273 errln(UnicodeString("FAIL: ") + id +
274 ".createFromRules() => bad rules" +
275 /*", parse error " + parseError.code +*/
276 ", line " + parseError.line +
277 ", offset " + parseError.offset +
278 ", context " + prettify(parseError.preContext, TRUE) +
279 ", rules: " + prettify(rules, TRUE));
280 } else {
281 delete u;
282 }
283 delete t;
284 }
285 }
286 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
287 assertSuccess("snext()", ec);
288 delete avail;
289
290 // Now test the failure path
291 UParseError parseError;
292 UErrorCode status = U_ZERO_ERROR;
293 UnicodeString id("<Not a valid Transliterator ID>");
294 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
295 if (t != 0) {
296 errln("FAIL: " + id + " returned a transliterator");
297 delete t;
298 } else {
299 logln("OK: Bogus ID handled properly");
300 }
301 }
302
TestSimpleRules(void)303 void TransliteratorTest::TestSimpleRules(void) {
304 /* Example: rules 1. ab>x|y
305 * 2. yc>z
306 *
307 * []|eabcd start - no match, copy e to tranlated buffer
308 * [e]|abcd match rule 1 - copy output & adjust cursor
309 * [ex|y]cd match rule 2 - copy output & adjust cursor
310 * [exz]|d no match, copy d to transliterated buffer
311 * [exzd]| done
312 */
313 expect(UnicodeString("ab>x|y;", "") +
314 "yc>z",
315 "eabcd", "exzd");
316
317 /* Another set of rules:
318 * 1. ab>x|yzacw
319 * 2. za>q
320 * 3. qc>r
321 * 4. cw>n
322 *
323 * []|ab Rule 1
324 * [x|yzacw] No match
325 * [xy|zacw] Rule 2
326 * [xyq|cw] Rule 4
327 * [xyqn]| Done
328 */
329 expect(UnicodeString("ab>x|yzacw;") +
330 "za>q;" +
331 "qc>r;" +
332 "cw>n",
333 "ab", "xyqn");
334
335 /* Test categories
336 */
337 UErrorCode status = U_ZERO_ERROR;
338 UParseError parseError;
339 Transliterator *t = Transliterator::createFromRules(
340 "<ID>",
341 UnicodeString("$dummy=").append((UChar)0xE100) +
342 UnicodeString(";"
343 "$vowel=[aeiouAEIOU];"
344 "$lu=[:Lu:];"
345 "$vowel } $lu > '!';"
346 "$vowel > '&';"
347 "'!' { $lu > '^';"
348 "$lu > '*';"
349 "a > ERROR", ""),
350 UTRANS_FORWARD, parseError,
351 status);
352 if (U_FAILURE(status)) {
353 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
354 return;
355 }
356 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
357 delete t;
358 }
359
360 /**
361 * Test inline set syntax and set variable syntax.
362 */
TestInlineSet(void)363 void TransliteratorTest::TestInlineSet(void) {
364 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
365 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
366
367 expect(UnicodeString(
368 "$digit = [0-9];"
369 "$alpha = [a-zA-Z];"
370 "$alphanumeric = [$digit $alpha];" // ***
371 "$special = [^$alphanumeric];" // ***
372 "$alphanumeric > '-';"
373 "$special > '*';", ""),
374
375 "thx-1138", "---*----");
376 }
377
378 /**
379 * Create some inverses and confirm that they work. We have to be
380 * careful how we do this, since the inverses will not be true
381 * inverses -- we can't throw any random string at the composition
382 * of the transliterators and expect the identity function. F x
383 * F' != I. However, if we are careful about the input, we will
384 * get the expected results.
385 */
TestRuleBasedInverse(void)386 void TransliteratorTest::TestRuleBasedInverse(void) {
387 UnicodeString RULES =
388 UnicodeString("abc>zyx;") +
389 "ab>yz;" +
390 "bc>zx;" +
391 "ca>xy;" +
392 "a>x;" +
393 "b>y;" +
394 "c>z;" +
395
396 "abc<zyx;" +
397 "ab<yz;" +
398 "bc<zx;" +
399 "ca<xy;" +
400 "a<x;" +
401 "b<y;" +
402 "c<z;" +
403
404 "";
405
406 const char* DATA[] = {
407 // Careful here -- random strings will not work. If we keep
408 // the left side to the domain and the right side to the range
409 // we will be okay though (left, abc; right xyz).
410 "a", "x",
411 "abcacab", "zyxxxyy",
412 "caccb", "xyzzy",
413 };
414
415 int32_t DATA_length = UPRV_LENGTHOF(DATA);
416
417 UErrorCode status = U_ZERO_ERROR;
418 UParseError parseError;
419 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
420 UTRANS_FORWARD, parseError, status);
421 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
422 UTRANS_REVERSE, parseError, status);
423 if (U_FAILURE(status)) {
424 errln("FAIL: RBT constructor failed");
425 return;
426 }
427 for (int32_t i=0; i<DATA_length; i+=2) {
428 expect(*fwd, DATA[i], DATA[i+1]);
429 expect(*rev, DATA[i+1], DATA[i]);
430 }
431 delete fwd;
432 delete rev;
433 }
434
435 /**
436 * Basic test of keyboard.
437 */
TestKeyboard(void)438 void TransliteratorTest::TestKeyboard(void) {
439 UParseError parseError;
440 UErrorCode status = U_ZERO_ERROR;
441 Transliterator *t = Transliterator::createFromRules("<ID>",
442 UnicodeString("psch>Y;")
443 +"ps>y;"
444 +"ch>x;"
445 +"a>A;",
446 UTRANS_FORWARD, parseError,
447 status);
448 if (U_FAILURE(status)) {
449 errln("FAIL: RBT constructor failed");
450 return;
451 }
452 const char* DATA[] = {
453 // insertion, buffer
454 "a", "A",
455 "p", "Ap",
456 "s", "Aps",
457 "c", "Apsc",
458 "a", "AycA",
459 "psch", "AycAY",
460 0, "AycAY", // null means finishKeyboardTransliteration
461 };
462
463 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
464 delete t;
465 }
466
467 /**
468 * Basic test of keyboard with cursor.
469 */
TestKeyboard2(void)470 void TransliteratorTest::TestKeyboard2(void) {
471 UParseError parseError;
472 UErrorCode status = U_ZERO_ERROR;
473 Transliterator *t = Transliterator::createFromRules("<ID>",
474 UnicodeString("ych>Y;")
475 +"ps>|y;"
476 +"ch>x;"
477 +"a>A;",
478 UTRANS_FORWARD, parseError,
479 status);
480 if (U_FAILURE(status)) {
481 errln("FAIL: RBT constructor failed");
482 return;
483 }
484 const char* DATA[] = {
485 // insertion, buffer
486 "a", "A",
487 "p", "Ap",
488 "s", "Aps", // modified for rollback - "Ay",
489 "c", "Apsc", // modified for rollback - "Ayc",
490 "a", "AycA",
491 "p", "AycAp",
492 "s", "AycAps", // modified for rollback - "AycAy",
493 "c", "AycApsc", // modified for rollback - "AycAyc",
494 "h", "AycAY",
495 0, "AycAY", // null means finishKeyboardTransliteration
496 };
497
498 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
499 delete t;
500 }
501
502 /**
503 * Test keyboard transliteration with back-replacement.
504 */
TestKeyboard3(void)505 void TransliteratorTest::TestKeyboard3(void) {
506 // We want th>z but t>y. Furthermore, during keyboard
507 // transliteration we want t>y then yh>z if t, then h are
508 // typed.
509 UnicodeString RULES("t>|y;"
510 "yh>z;");
511
512 const char* DATA[] = {
513 // Column 1: characters to add to buffer (as if typed)
514 // Column 2: expected appearance of buffer after
515 // keyboard xliteration.
516 "a", "a",
517 "b", "ab",
518 "t", "abt", // modified for rollback - "aby",
519 "c", "abyc",
520 "t", "abyct", // modified for rollback - "abycy",
521 "h", "abycz",
522 0, "abycz", // null means finishKeyboardTransliteration
523 };
524
525 UParseError parseError;
526 UErrorCode status = U_ZERO_ERROR;
527 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
528 if (U_FAILURE(status)) {
529 errln("FAIL: RBT constructor failed");
530 return;
531 }
532 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
533 delete t;
534 }
535
keyboardAux(const Transliterator & t,const char * DATA[],int32_t DATA_length)536 void TransliteratorTest::keyboardAux(const Transliterator& t,
537 const char* DATA[], int32_t DATA_length) {
538 UErrorCode status = U_ZERO_ERROR;
539 UTransPosition index={0, 0, 0, 0};
540 UnicodeString s;
541 for (int32_t i=0; i<DATA_length; i+=2) {
542 UnicodeString log;
543 if (DATA[i] != 0) {
544 log = s + " + "
545 + DATA[i]
546 + " -> ";
547 t.transliterate(s, index, DATA[i], status);
548 } else {
549 log = s + " => ";
550 t.finishTransliteration(s, index);
551 }
552 // Show the start index '{' and the cursor '|'
553 UnicodeString a, b, c;
554 s.extractBetween(0, index.contextStart, a);
555 s.extractBetween(index.contextStart, index.start, b);
556 s.extractBetween(index.start, s.length(), c);
557 log.append(a).
558 append((UChar)LEFT_BRACE).
559 append(b).
560 append((UChar)PIPE).
561 append(c);
562 if (s == DATA[i+1] && U_SUCCESS(status)) {
563 logln(log);
564 } else {
565 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
566 }
567 }
568 }
569
TestArabic(void)570 void TransliteratorTest::TestArabic(void) {
571 // Test disabled for 2.0 until new Arabic transliterator can be written.
572 // /*
573 // const char* DATA[] = {
574 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
575 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
576 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
577 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
578 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
579 // "\u062c\u0645\u064a\u0644\u0629",
580 // };
581 // */
582 //
583 // UChar ar_raw[] = {
584 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
585 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
586 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
588 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
589 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
590 // };
591 // UnicodeString ar(ar_raw);
592 // UErrorCode status=U_ZERO_ERROR;
593 // UParseError parseError;
594 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
595 // if (t == 0) {
596 // errln("FAIL: createInstance failed");
597 // return;
598 // }
599 // expect(*t, "Arabic", ar);
600 // delete t;
601 }
602
603 /**
604 * Compose the Kana transliterator forward and reverse and try
605 * some strings that should come out unchanged.
606 */
TestCompoundKana(void)607 void TransliteratorTest::TestCompoundKana(void) {
608 UParseError parseError;
609 UErrorCode status = U_ZERO_ERROR;
610 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
611 if (t == 0) {
612 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
613 } else {
614 expect(*t, "aaaaa", "aaaaa");
615 delete t;
616 }
617 }
618
619 /**
620 * Compose the hex transliterators forward and reverse.
621 */
TestCompoundHex(void)622 void TransliteratorTest::TestCompoundHex(void) {
623 UParseError parseError;
624 UErrorCode status = U_ZERO_ERROR;
625 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
626 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
627 Transliterator* transab[] = { a, b };
628 Transliterator* transba[] = { b, a };
629 if (a == 0 || b == 0) {
630 errln("FAIL: construction failed");
631 delete a;
632 delete b;
633 return;
634 }
635 // Do some basic tests of a
636 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
637 // Do some basic tests of b
638 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
639
640 Transliterator* ab = new CompoundTransliterator(transab, 2);
641 UnicodeString s("abcde", "");
642 expect(*ab, s, s);
643
644 UnicodeString str(s);
645 a->transliterate(str);
646 Transliterator* ba = new CompoundTransliterator(transba, 2);
647 expect(*ba, str, str);
648
649 delete ab;
650 delete ba;
651 delete a;
652 delete b;
653 }
654
655 int gTestFilterClassID = 0;
656 /**
657 * Used by TestFiltering().
658 */
659 class TestFilter : public UnicodeFilter {
clone() const660 virtual UnicodeFunctor* clone() const {
661 return new TestFilter(*this);
662 }
contains(UChar32 c) const663 virtual UBool contains(UChar32 c) const {
664 return c != (UChar)0x0063 /*c*/;
665 }
666 // Stubs
toPattern(UnicodeString & result,UBool) const667 virtual UnicodeString& toPattern(UnicodeString& result,
668 UBool /*escapeUnprintable*/) const {
669 return result;
670 }
matchesIndexValue(uint8_t) const671 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
672 return FALSE;
673 }
addMatchSetTo(UnicodeSet &) const674 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
675 public:
getDynamicClassID() const676 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
677 };
678
679 /**
680 * Do some basic tests of filtering.
681 */
TestFiltering(void)682 void TransliteratorTest::TestFiltering(void) {
683 UParseError parseError;
684 UErrorCode status = U_ZERO_ERROR;
685 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
686 if (hex == 0) {
687 errln("FAIL: createInstance(Any-Hex) failed");
688 return;
689 }
690 hex->adoptFilter(new TestFilter());
691 UnicodeString s("abcde");
692 hex->transliterate(s);
693 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
694 if (s == exp) {
695 logln(UnicodeString("Ok: \"") + exp + "\"");
696 } else {
697 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
698 }
699
700 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
701 UnicodeFilter *f = hex->orphanFilter();
702 if (f == NULL){
703 errln("FAIL: orphanFilter() should get a UnicodeFilter");
704 } else {
705 delete f;
706 }
707 delete hex;
708 }
709
710 /**
711 * Test anchors
712 */
TestAnchors(void)713 void TransliteratorTest::TestAnchors(void) {
714 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
715 "aaa",
716 "012");
717 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
718 "aaa",
719 "012");
720 expect(UnicodeString("^ab > 01 ;"
721 " ab > |8 ;"
722 " b > k ;"
723 " 8x$ > 45 ;"
724 " 8x > 77 ;", ""),
725
726 "ababbabxabx",
727 "018k7745");
728 expect(UnicodeString("$s = [z$] ;"
729 "$s{ab > 01 ;"
730 " ab > |8 ;"
731 " b > k ;"
732 " 8x}$s > 45 ;"
733 " 8x > 77 ;", ""),
734
735 "abzababbabxzabxabx",
736 "01z018k45z01x45");
737 }
738
739 /**
740 * Test pattern quoting and escape mechanisms.
741 */
TestPatternQuoting(void)742 void TransliteratorTest::TestPatternQuoting(void) {
743 // Array of 3n items
744 // Each item is <rules>, <input>, <expected output>
745 const UnicodeString DATA[] = {
746 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
747 UnicodeString(UChar(0x4E01)),
748 "[male adult]"
749 };
750
751 for (int32_t i=0; i<3; i+=3) {
752 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
753 UParseError parseError;
754 UErrorCode status = U_ZERO_ERROR;
755 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
756 if (U_FAILURE(status)) {
757 errln("RBT constructor failed");
758 } else {
759 expect(*t, DATA[i+1], DATA[i+2]);
760 }
761 delete t;
762 }
763 }
764
765 /**
766 * Regression test for bugs found in Greek transliteration.
767 */
TestJ277(void)768 void TransliteratorTest::TestJ277(void) {
769 UErrorCode status = U_ZERO_ERROR;
770 UParseError parseError;
771 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
772 if (gl == NULL) {
773 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
774 return;
775 }
776
777 UChar sigma = 0x3C3;
778 UChar upsilon = 0x3C5;
779 UChar nu = 0x3BD;
780 // UChar PHI = 0x3A6;
781 UChar alpha = 0x3B1;
782 // UChar omega = 0x3C9;
783 // UChar omicron = 0x3BF;
784 // UChar epsilon = 0x3B5;
785
786 // sigma upsilon nu -> syn
787 UnicodeString syn;
788 syn.append(sigma).append(upsilon).append(nu);
789 expect(*gl, syn, "syn");
790
791 // sigma alpha upsilon nu -> saun
792 UnicodeString sayn;
793 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
794 expect(*gl, sayn, "saun");
795
796 // Again, using a smaller rule set
797 UnicodeString rules(
798 "$alpha = \\u03B1;"
799 "$nu = \\u03BD;"
800 "$sigma = \\u03C3;"
801 "$ypsilon = \\u03C5;"
802 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
803 "s <> $sigma;"
804 "a <> $alpha;"
805 "u <> $vowel { $ypsilon;"
806 "y <> $ypsilon;"
807 "n <> $nu;",
808 "");
809 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
810 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
811 expect(*mini, syn, "syn");
812 expect(*mini, sayn, "saun");
813 delete mini;
814 mini = NULL;
815
816 #if !UCONFIG_NO_FORMATTING
817 // Transliterate the Greek locale data
818 Locale el("el");
819 DateFormatSymbols syms(el, status);
820 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
821 int32_t i, count;
822 const UnicodeString* data = syms.getMonths(count);
823 for (i=0; i<count; ++i) {
824 if (data[i].length() == 0) {
825 continue;
826 }
827 UnicodeString out(data[i]);
828 gl->transliterate(out);
829 UBool ok = TRUE;
830 if (data[i].length() >= 2 && out.length() >= 2 &&
831 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
832 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
833 ok = FALSE;
834 }
835 }
836 if (ok) {
837 logln(prettify(data[i] + " -> " + out));
838 } else {
839 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
840 }
841 }
842 #endif
843
844 delete gl;
845 }
846
847 /**
848 * Prefix, suffix support in hex transliterators
849 */
TestJ243(void)850 void TransliteratorTest::TestJ243(void) {
851 UErrorCode ec = U_ZERO_ERROR;
852
853 // Test default Hex-Any, which should handle
854 // \u, \U, u+, and U+
855 Transliterator *hex =
856 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
857 if (assertSuccess("getInstance", ec)) {
858 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
859 }
860 delete hex;
861
862 // // Try a custom Hex-Unicode
863 // // \uXXXX and &#xXXXX;
864 // ec = U_ZERO_ERROR;
865 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
866 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
867 // "abcd5fx0123");
868 // // Try custom Any-Hex (default is tested elsewhere)
869 // ec = U_ZERO_ERROR;
870 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
871 // expect(hex3, "012", "012");
872 }
873
874 /**
875 * Parsers need better syntax error messages.
876 */
TestJ329(void)877 void TransliteratorTest::TestJ329(void) {
878
879 struct { UBool containsErrors; const char* rule; } DATA[] = {
880 { FALSE, "a > b; c > d" },
881 { TRUE, "a > b; no operator; c > d" },
882 };
883 int32_t DATA_length = UPRV_LENGTHOF(DATA);
884
885 for (int32_t i=0; i<DATA_length; ++i) {
886 UErrorCode status = U_ZERO_ERROR;
887 UParseError parseError;
888 Transliterator *rbt = Transliterator::createFromRules("<ID>",
889 DATA[i].rule,
890 UTRANS_FORWARD,
891 parseError,
892 status);
893 UBool gotError = U_FAILURE(status);
894 UnicodeString desc(DATA[i].rule);
895 desc.append(gotError ? " -> error" : " -> no error");
896 if (gotError) {
897 desc = desc + ", ParseError code=" + u_errorName(status) +
898 " line=" + parseError.line +
899 " offset=" + parseError.offset +
900 " context=" + parseError.preContext;
901 }
902 if (gotError == DATA[i].containsErrors) {
903 logln(UnicodeString("Ok: ") + desc);
904 } else {
905 errln(UnicodeString("FAIL: ") + desc);
906 }
907 delete rbt;
908 }
909 }
910
911 /**
912 * Test segments and segment references.
913 */
TestSegments(void)914 void TransliteratorTest::TestSegments(void) {
915 // Array of 3n items
916 // Each item is <rules>, <input>, <expected output>
917 UnicodeString DATA[] = {
918 "([a-z]) '.' ([0-9]) > $2 '-' $1",
919 "abc.123.xyz.456",
920 "ab1-c23.xy4-z56",
921
922 // nested
923 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
924 "a1 b2",
925 "a1.a.1 b2.b.2",
926 };
927 int32_t DATA_length = UPRV_LENGTHOF(DATA);
928
929 for (int32_t i=0; i<DATA_length; i+=3) {
930 logln("Pattern: " + prettify(DATA[i]));
931 UParseError parseError;
932 UErrorCode status = U_ZERO_ERROR;
933 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
934 if (U_FAILURE(status)) {
935 errln("FAIL: RBT constructor");
936 } else {
937 expect(*t, DATA[i+1], DATA[i+2]);
938 }
939 delete t;
940 }
941 }
942
943 /**
944 * Test cursor positioning outside of the key
945 */
TestCursorOffset(void)946 void TransliteratorTest::TestCursorOffset(void) {
947 // Array of 3n items
948 // Each item is <rules>, <input>, <expected output>
949 UnicodeString DATA[] = {
950 "pre {alpha} post > | @ ALPHA ;"
951 "eALPHA > beta ;"
952 "pre {beta} post > BETA @@ | ;"
953 "post > xyz",
954
955 "prealphapost prebetapost",
956
957 "prbetaxyz preBETApost",
958 };
959 int32_t DATA_length = UPRV_LENGTHOF(DATA);
960
961 for (int32_t i=0; i<DATA_length; i+=3) {
962 logln("Pattern: " + prettify(DATA[i]));
963 UParseError parseError;
964 UErrorCode status = U_ZERO_ERROR;
965 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
966 if (U_FAILURE(status)) {
967 errln("FAIL: RBT constructor");
968 } else {
969 expect(*t, DATA[i+1], DATA[i+2]);
970 }
971 delete t;
972 }
973 }
974
975 /**
976 * Test zero length and > 1 char length variable values. Test
977 * use of variable refs in UnicodeSets.
978 */
TestArbitraryVariableValues(void)979 void TransliteratorTest::TestArbitraryVariableValues(void) {
980 // Array of 3n items
981 // Each item is <rules>, <input>, <expected output>
982 UnicodeString DATA[] = {
983 "$abe = ab;"
984 "$pat = x[yY]z;"
985 "$ll = 'a-z';"
986 "$llZ = [$ll];"
987 "$llY = [$ll$pat];"
988 "$emp = ;"
989
990 "$abe > ABE;"
991 "$pat > END;"
992 "$llZ > 1;"
993 "$llY > 2;"
994 "7$emp 8 > 9;"
995 "",
996
997 "ab xYzxyz stY78",
998 "ABE ENDEND 1129",
999 };
1000 int32_t DATA_length = UPRV_LENGTHOF(DATA);
1001
1002 for (int32_t i=0; i<DATA_length; i+=3) {
1003 logln("Pattern: " + prettify(DATA[i]));
1004 UParseError parseError;
1005 UErrorCode status = U_ZERO_ERROR;
1006 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1007 if (U_FAILURE(status)) {
1008 errln("FAIL: RBT constructor");
1009 } else {
1010 expect(*t, DATA[i+1], DATA[i+2]);
1011 }
1012 delete t;
1013 }
1014 }
1015
1016 /**
1017 * Confirm that the contextStart, contextLimit, start, and limit
1018 * behave correctly. J474.
1019 */
TestPositionHandling(void)1020 void TransliteratorTest::TestPositionHandling(void) {
1021 // Array of 3n items
1022 // Each item is <rules>, <input>, <expected output>
1023 const char* DATA[] = {
1024 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025 "xtat txtb", // pos 0,9,0,9
1026 "xTTaSS TTxUUb",
1027
1028 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029 "xtat txtb", // pos 2,9,3,8
1030 "xtaSS TTxUUb",
1031
1032 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033 "xtat txtb", // pos 3,8,3,8
1034 "xtaTT TTxTTb",
1035 };
1036
1037 // Array of 4n positions -- these go with the DATA array
1038 // They are: contextStart, contextLimit, start, limit
1039 int32_t POS[] = {
1040 0, 9, 0, 9,
1041 2, 9, 3, 8,
1042 3, 8, 3, 8,
1043 };
1044
1045 int32_t n = UPRV_LENGTHOF(DATA) / 3;
1046 for (int32_t i=0; i<n; i++) {
1047 UErrorCode status = U_ZERO_ERROR;
1048 UParseError parseError;
1049 Transliterator *t = Transliterator::createFromRules("<ID>",
1050 DATA[3*i], UTRANS_FORWARD, parseError, status);
1051 if (U_FAILURE(status)) {
1052 delete t;
1053 errln("FAIL: RBT constructor");
1054 return;
1055 }
1056 UTransPosition pos;
1057 pos.contextStart= POS[4*i];
1058 pos.contextLimit = POS[4*i+1];
1059 pos.start = POS[4*i+2];
1060 pos.limit = POS[4*i+3];
1061 UnicodeString rsource(DATA[3*i+1]);
1062 t->transliterate(rsource, pos, status);
1063 if (U_FAILURE(status)) {
1064 delete t;
1065 errln("FAIL: transliterate");
1066 return;
1067 }
1068 t->finishTransliteration(rsource, pos);
1069 expectAux(DATA[3*i],
1070 DATA[3*i+1],
1071 rsource,
1072 DATA[3*i+2]);
1073 delete t;
1074 }
1075 }
1076
1077 /**
1078 * Test the Hiragana-Katakana transliterator.
1079 */
TestHiraganaKatakana(void)1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081 UParseError parseError;
1082 UErrorCode status = U_ZERO_ERROR;
1083 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1084 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1085 if (hk == 0 || kh == 0) {
1086 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1087 delete hk;
1088 delete kh;
1089 return;
1090 }
1091
1092 // Array of 3n items
1093 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094 const char* DATA[] = {
1095 "both",
1096 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097 "\\u30A2\\u30F8\\u30F2\\u30B0",
1098
1099 "kh",
1100 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1102 };
1103 int32_t DATA_length = UPRV_LENGTHOF(DATA);
1104
1105 for (int32_t i=0; i<DATA_length; i+=3) {
1106 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1107 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1108 switch (*DATA[i]) {
1109 case 0x68: //'h': // Hiragana-Katakana
1110 expect(*hk, h, k);
1111 break;
1112 case 0x6B: //'k': // Katakana-Hiragana
1113 expect(*kh, k, h);
1114 break;
1115 case 0x62: //'b': // both
1116 expect(*hk, h, k);
1117 expect(*kh, k, h);
1118 break;
1119 }
1120 }
1121 delete hk;
1122 delete kh;
1123 }
1124
1125 /**
1126 * Test cloning / copy constructor of RBT.
1127 */
TestCopyJ476(void)1128 void TransliteratorTest::TestCopyJ476(void) {
1129 // The real test here is what happens when the destructors are
1130 // called. So we let one object get destructed, and check to
1131 // see that its copy still works.
1132 Transliterator *t2 = 0;
1133 {
1134 UParseError parseError;
1135 UErrorCode status = U_ZERO_ERROR;
1136 Transliterator *t1 = Transliterator::createFromRules("t1",
1137 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1138 if (U_FAILURE(status)) {
1139 errln("FAIL: RBT constructor");
1140 return;
1141 }
1142 t2 = t1->clone(); // Call copy constructor under the covers.
1143 expect(*t1, "abcfoofoo", "ABcbar");
1144 delete t1;
1145 }
1146 expect(*t2, "abcfoofoo", "ABcbar");
1147 delete t2;
1148 }
1149
1150 /**
1151 * Test inter-Indic transliterators. These are composed.
1152 * ICU4C Jitterbug 483.
1153 */
TestInterIndic(void)1154 void TransliteratorTest::TestInterIndic(void) {
1155 UnicodeString ID("Devanagari-Gujarati", "");
1156 UErrorCode status = U_ZERO_ERROR;
1157 UParseError parseError;
1158 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1159 if (dg == 0) {
1160 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1161 return;
1162 }
1163 UnicodeString id = dg->getID();
1164 if (id != ID) {
1165 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1166 }
1167 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169 expect(*dg, dev, guj);
1170 delete dg;
1171 }
1172
1173 /**
1174 * Test filter syntax in IDs. (J918)
1175 */
TestFilterIDs(void)1176 void TransliteratorTest::TestFilterIDs(void) {
1177 // Array of 3n strings:
1178 // <id>, <inverse id>, <input>, <expected output>
1179 const char* DATA[] = {
1180 "[aeiou]Any-Hex", // ID
1181 "[aeiou]Hex-Any", // expected inverse ID
1182 "quizzical", // src
1183 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1184
1185 "[aeiou]Any-Hex;[^5]Hex-Any",
1186 "[^5]Any-Hex;[aeiou]Hex-Any",
1187 "quizzical",
1188 "q\\u0075izzical",
1189
1190 "[abc]Null",
1191 "[abc]Null",
1192 "xyz",
1193 "xyz",
1194 };
1195 enum { DATA_length = UPRV_LENGTHOF(DATA) };
1196
1197 for (int i=0; i<DATA_length; i+=4) {
1198 UnicodeString ID(DATA[i], "");
1199 UnicodeString uID(DATA[i+1], "");
1200 UnicodeString data2(DATA[i+2], "");
1201 UnicodeString data3(DATA[i+3], "");
1202 UParseError parseError;
1203 UErrorCode status = U_ZERO_ERROR;
1204 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1205 if (t == 0) {
1206 errln("FAIL: createInstance(" + ID + ") returned NULL");
1207 return;
1208 }
1209 expect(*t, data2, data3);
1210
1211 // Check the ID
1212 if (ID != t->getID()) {
1213 errln("FAIL: createInstance(" + ID + ").getID() => " +
1214 t->getID());
1215 }
1216
1217 // Check the inverse
1218 Transliterator *u = t->createInverse(status);
1219 if (u == 0) {
1220 errln("FAIL: " + ID + ".createInverse() returned NULL");
1221 } else if (u->getID() != uID) {
1222 errln("FAIL: " + ID + ".createInverse().getID() => " +
1223 u->getID() + ", expected " + uID);
1224 }
1225
1226 delete t;
1227 delete u;
1228 }
1229 }
1230
1231 /**
1232 * Test the case mapping transliterators.
1233 */
TestCaseMap(void)1234 void TransliteratorTest::TestCaseMap(void) {
1235 UParseError parseError;
1236 UErrorCode status = U_ZERO_ERROR;
1237 Transliterator* toUpper =
1238 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239 Transliterator* toLower =
1240 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241 Transliterator* toTitle =
1242 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1243 if (toUpper==0 || toLower==0 || toTitle==0) {
1244 errln("FAIL: createInstance returned NULL");
1245 delete toUpper;
1246 delete toLower;
1247 delete toTitle;
1248 return;
1249 }
1250
1251 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1252 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254 "the quick brown foX jumped over the lazY dogs.");
1255 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1256 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1257
1258 delete toUpper;
1259 delete toLower;
1260 delete toTitle;
1261 }
1262
1263 /**
1264 * Test the name mapping transliterators.
1265 */
TestNameMap(void)1266 void TransliteratorTest::TestNameMap(void) {
1267 UParseError parseError;
1268 UErrorCode status = U_ZERO_ERROR;
1269 Transliterator* uni2name =
1270 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1271 Transliterator* name2uni =
1272 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1273 if (uni2name==0 || name2uni==0) {
1274 errln("FAIL: createInstance returned NULL");
1275 delete uni2name;
1276 delete name2uni;
1277 return;
1278 }
1279
1280 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1285
1286 delete uni2name;
1287 delete name2uni;
1288
1289 // round trip
1290 Transliterator* t =
1291 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1292 if (t==0) {
1293 errln("FAIL: createInstance returned NULL");
1294 delete t;
1295 return;
1296 }
1297
1298 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1300 expect(*t, s, s);
1301 delete t;
1302 }
1303
1304 /**
1305 * Test liberalized ID syntax. 1006c
1306 */
TestLiberalizedID(void)1307 void TransliteratorTest::TestLiberalizedID(void) {
1308 // Some test cases have an expected getID() value of NULL. This
1309 // means I have disabled the test case for now. This stuff is
1310 // still under development, and I haven't decided whether to make
1311 // getID() return canonical case yet. It will all get rewritten
1312 // with the move to Source-Target/Variant IDs anyway. [aliu]
1313 const char* DATA[] = {
1314 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1315 " Null ", "Null", "whitespace",
1316 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1317 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1318 };
1319 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1320 UParseError parseError;
1321 UErrorCode status= U_ZERO_ERROR;
1322 for (int32_t i=0; i<DATA_length; i+=3) {
1323 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1324 if (t == 0) {
1325 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1326 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1327 } else {
1328 UnicodeString exp;
1329 if (DATA[i+1]) {
1330 exp = UnicodeString(DATA[i+1], "");
1331 }
1332 // Don't worry about getID() if the expected char*
1333 // is NULL -- see above.
1334 if (exp.length() == 0 || exp == t->getID()) {
1335 logln(UnicodeString("Ok: ") + DATA[i+2] +
1336 " create ID \"" + DATA[i] + "\" => \"" +
1337 exp + "\"");
1338 } else {
1339 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1340 " create ID \"" + DATA[i] + "\" => \"" +
1341 t->getID() + "\", exp \"" + exp + "\"");
1342 }
1343 delete t;
1344 }
1345 }
1346 }
1347
1348 /* test for Jitterbug 912 */
TestCreateInstance()1349 void TransliteratorTest::TestCreateInstance(){
1350 const char* FORWARD = "F";
1351 const char* REVERSE = "R";
1352 const char* DATA[] = {
1353 // Column 1: id
1354 // Column 2: direction
1355 // Column 3: expected ID, or "" if expect failure
1356 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1357
1358 // JB#2689: bad compound causes crash
1359 "InvalidSource-InvalidTarget", FORWARD, "",
1360 "InvalidSource-InvalidTarget", REVERSE, "",
1361 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1362 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1363 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1364 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1365
1366 NULL
1367 };
1368
1369 for (int32_t i=0; DATA[i]; i+=3) {
1370 UParseError err;
1371 UErrorCode ec = U_ZERO_ERROR;
1372 UnicodeString id(DATA[i]);
1373 UTransDirection dir = (DATA[i+1]==FORWARD)?
1374 UTRANS_FORWARD:UTRANS_REVERSE;
1375 UnicodeString expID(DATA[i+2]);
1376 Transliterator* t =
1377 Transliterator::createInstance(id,dir,err,ec);
1378 UnicodeString newID;
1379 if (t) {
1380 newID = t->getID();
1381 }
1382 UBool ok = (newID == expID);
1383 if (!t) {
1384 newID = u_errorName(ec);
1385 }
1386 if (ok) {
1387 logln((UnicodeString)"Ok: createInstance(" +
1388 id + "," + DATA[i+1] + ") => " + newID);
1389 } else {
1390 dataerrln((UnicodeString)"FAIL: createInstance(" +
1391 id + "," + DATA[i+1] + ") => " + newID +
1392 ", expected " + expID);
1393 }
1394 delete t;
1395 }
1396 }
1397
1398 /**
1399 * Test the normalization transliterator.
1400 */
TestNormalizationTransliterator()1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404 const char* CANON[] = {
1405 // Input Decomposed Composed
1406 "cat", "cat", "cat" ,
1407 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1408
1409 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1410 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1411
1412 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1413 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1414 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1415
1416 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1418
1419 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1420 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1421 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1422
1423 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1424 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1425
1426 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1427 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1428
1429 "Henry IV", "Henry IV", "Henry IV" ,
1430 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1431
1432 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1433 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1434 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1435 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1436 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1437
1438 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1439 0 // end
1440 };
1441
1442 const char* COMPAT[] = {
1443 // Input Decomposed Composed
1444 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1445
1446 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1447 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1448
1449 "Henry IV", "Henry IV", "Henry IV" ,
1450 "Henry \\u2163", "Henry IV", "Henry IV" ,
1451
1452 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1453 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1454
1455 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1456 0 // end
1457 };
1458
1459 int32_t i;
1460 UParseError parseError;
1461 UErrorCode status = U_ZERO_ERROR;
1462 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1463 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1464 if (!NFD || !NFC) {
1465 dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1466 delete NFD;
1467 delete NFC;
1468 return;
1469 }
1470 for (i=0; CANON[i]; i+=3) {
1471 UnicodeString in = CharsToUnicodeString(CANON[i]);
1472 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1473 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1474 expect(*NFD, in, expd);
1475 expect(*NFC, in, expc);
1476 }
1477 delete NFD;
1478 delete NFC;
1479
1480 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1481 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1482 if (!NFKD || !NFKC) {
1483 dataerrln("FAIL: createInstance failed");
1484 delete NFKD;
1485 delete NFKC;
1486 return;
1487 }
1488 for (i=0; COMPAT[i]; i+=3) {
1489 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1490 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1491 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1492 expect(*NFKD, in, expkd);
1493 expect(*NFKC, in, expkc);
1494 }
1495 delete NFKD;
1496 delete NFKC;
1497
1498 UParseError pe;
1499 status = U_ZERO_ERROR;
1500 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1501 UTRANS_FORWARD,
1502 pe, status);
1503 if (t == 0) {
1504 errln("FAIL: createInstance failed");
1505 }
1506 expect(*t, CharsToUnicodeString("\\u010dx"),
1507 CharsToUnicodeString("c\\u030C"));
1508 delete t;
1509 }
1510
1511 /**
1512 * Test compound RBT rules.
1513 */
TestCompoundRBT(void)1514 void TransliteratorTest::TestCompoundRBT(void) {
1515 // Careful with spacing and ';' here: Phrase this exactly
1516 // as toRules() is going to return it. If toRules() changes
1517 // with regard to spacing or ';', then adjust this string.
1518 UnicodeString rule("::Hex-Any;\n"
1519 "::Any-Lower;\n"
1520 "a > '.A.';\n"
1521 "b > '.B.';\n"
1522 "::[^t]Any-Upper;", "");
1523 UParseError parseError;
1524 UErrorCode status = U_ZERO_ERROR;
1525 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1526 if (t == 0) {
1527 errln("FAIL: createFromRules failed");
1528 return;
1529 }
1530 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1532 UnicodeString r;
1533 t->toRules(r, TRUE);
1534 if (r == rule) {
1535 logln((UnicodeString)"OK: toRules() => " + r);
1536 } else {
1537 errln((UnicodeString)"FAIL: toRules() => " + r +
1538 ", expected " + rule);
1539 }
1540 delete t;
1541
1542 // Now test toRules
1543 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1544 if (t == 0) {
1545 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1546 return;
1547 }
1548 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549 t->toRules(r, TRUE);
1550 if (r != exp) {
1551 errln((UnicodeString)"FAIL: toRules() => " + r +
1552 ", expected " + exp);
1553 } else {
1554 logln((UnicodeString)"OK: toRules() => " + r);
1555 }
1556 delete t;
1557
1558 // Round trip the result of toRules
1559 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1560 if (t == 0) {
1561 errln("FAIL: createFromRules #2 failed");
1562 return;
1563 } else {
1564 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1565 }
1566
1567 // Test toRules again
1568 t->toRules(r, TRUE);
1569 if (r != exp) {
1570 errln((UnicodeString)"FAIL: toRules() => " + r +
1571 ", expected " + exp);
1572 } else {
1573 logln((UnicodeString)"OK: toRules() => " + r);
1574 }
1575
1576 delete t;
1577
1578 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1579 // to what the regenerated ID will look like.
1580 UnicodeString id("Upper(Lower);(NFKC)", "");
1581 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1582 if (t == 0) {
1583 errln("FAIL: createInstance #2 failed");
1584 return;
1585 }
1586 if (t->getID() == id) {
1587 logln((UnicodeString)"OK: created " + id);
1588 } else {
1589 errln((UnicodeString)"FAIL: createInstance(" + id +
1590 ").getID() => " + t->getID());
1591 }
1592
1593 Transliterator *u = t->createInverse(status);
1594 if (u == 0) {
1595 errln("FAIL: createInverse failed");
1596 delete t;
1597 return;
1598 }
1599 exp = "NFKC();Lower(Upper)";
1600 if (u->getID() == exp) {
1601 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1602 u->getID());
1603 } else {
1604 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1605 u->getID());
1606 }
1607 delete t;
1608 delete u;
1609 }
1610
1611 /**
1612 * Compound filter semantics were orginially not implemented
1613 * correctly. Originally, each component filter f(i) is replaced by
1614 * f'(i) = f(i) && g, where g is the filter for the compound
1615 * transliterator.
1616 *
1617 * From Mark:
1618 *
1619 * Suppose and I have a transliterator X. Internally X is
1620 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1621 *
1622 * The compound should convert all greek characters (through latin) to
1623 * cyrillic, then lowercase the result. The filter should say "don't
1624 * touch 'A' in the original". But because an intermediate result
1625 * happens to go through "A", the Greek Alpha gets hung up.
1626 */
TestCompoundFilter(void)1627 void TransliteratorTest::TestCompoundFilter(void) {
1628 UParseError parseError;
1629 UErrorCode status = U_ZERO_ERROR;
1630 Transliterator *t = Transliterator::createInstance
1631 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1632 if (t == 0) {
1633 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1634 return;
1635 }
1636 t->adoptFilter(new UnicodeSet("[^A]", status));
1637 if (U_FAILURE(status)) {
1638 errln("FAIL: UnicodeSet ct failed");
1639 delete t;
1640 return;
1641 }
1642
1643 // Only the 'A' at index 1 should remain unchanged
1644 expect(*t,
1645 CharsToUnicodeString("BA\\u039A\\u0391"),
1646 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647 delete t;
1648 }
1649
TestRemove(void)1650 void TransliteratorTest::TestRemove(void) {
1651 UParseError parseError;
1652 UErrorCode status = U_ZERO_ERROR;
1653 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1654 if (t == 0) {
1655 errln("FAIL: createInstance failed");
1656 return;
1657 }
1658
1659 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1660
1661 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662 // duplicating the filter
1663 Transliterator* t2 = t->clone();
1664 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1665
1666 delete t;
1667 delete t2;
1668 }
1669
TestToRules(void)1670 void TransliteratorTest::TestToRules(void) {
1671 const char* RBT = "rbt";
1672 const char* SET = "set";
1673 static const char* DATA[] = {
1674 RBT,
1675 "$a=\\u4E61; [$a] > A;",
1676 "[\\u4E61] > A;",
1677
1678 RBT,
1679 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680 "[[:Zs:][:Zl:]]{a} > A;",
1681
1682 SET,
1683 "[[:Zs:][:Zl:]]",
1684 "[[:Zs:][:Zl:]]",
1685
1686 SET,
1687 "[:Ps:]",
1688 "[:Ps:]",
1689
1690 SET,
1691 "[:L:]",
1692 "[:L:]",
1693
1694 SET,
1695 "[[:L:]-[A]]",
1696 "[[:L:]-[A]]",
1697
1698 SET,
1699 "[~[:Lu:][:Ll:]]",
1700 "[~[:Lu:][:Ll:]]",
1701
1702 SET,
1703 "[~[a-z]]",
1704 "[~[a-z]]",
1705
1706 RBT,
1707 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708 "[^[:Zs:]]{a} > A;",
1709
1710 RBT,
1711 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712 "[[a-z]-[:Zs:]]{a} > A;",
1713
1714 RBT,
1715 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716 "[[:Zs:]&[a-z]]{a} > A;",
1717
1718 RBT,
1719 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720 "[x[:Zs:]]{a} > A;",
1721
1722 RBT,
1723 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724 "$macron = \\u0304 ;"
1725 "$evowel = [aeiouyAEIOUY] ;"
1726 "$iotasub = \\u0345 ;"
1727 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729
1730 RBT,
1731 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733 };
1734 static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1735
1736 for (int32_t d=0; d < DATA_length; d+=3) {
1737 if (DATA[d] == RBT) {
1738 // Transliterator test
1739 UParseError parseError;
1740 UErrorCode status = U_ZERO_ERROR;
1741 Transliterator *t = Transliterator::createFromRules("ID",
1742 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1743 if (t == 0) {
1744 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1745 return;
1746 }
1747 UnicodeString rules, escapedRules;
1748 t->toRules(rules, FALSE);
1749 t->toRules(escapedRules, TRUE);
1750 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1751 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1752 if (rules == expRules) {
1753 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754 " => " + rules);
1755 } else {
1756 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1757 " => " + rules + ", exp " + expRules);
1758 }
1759 if (escapedRules == expEscapedRules) {
1760 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761 " => " + escapedRules);
1762 } else {
1763 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1764 " => " + escapedRules + ", exp " + expEscapedRules);
1765 }
1766 delete t;
1767
1768 } else {
1769 // UnicodeSet test
1770 UErrorCode status = U_ZERO_ERROR;
1771 UnicodeString pat(DATA[d+1], -1, US_INV);
1772 UnicodeString expToPat(DATA[d+2], -1, US_INV);
1773 UnicodeSet set(pat, status);
1774 if (U_FAILURE(status)) {
1775 errln("FAIL: UnicodeSet ct failed");
1776 return;
1777 }
1778 // Adjust spacing etc. as necessary.
1779 UnicodeString toPat;
1780 set.toPattern(toPat);
1781 if (expToPat == toPat) {
1782 logln((UnicodeString)"Ok: " + pat +
1783 " => " + toPat);
1784 } else {
1785 errln((UnicodeString)"FAIL: " + pat +
1786 " => " + prettify(toPat, TRUE) +
1787 ", exp " + prettify(pat, TRUE));
1788 }
1789 }
1790 }
1791 }
1792
TestContext()1793 void TransliteratorTest::TestContext() {
1794 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1795 expect("de > x; {d}e > y;",
1796 "de",
1797 "ye",
1798 &pos);
1799
1800 expect("ab{c} > z;",
1801 "xadabdabcy",
1802 "xadabdabzy");
1803 }
1804
TestSupplemental()1805 void TransliteratorTest::TestSupplemental() {
1806
1807 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1808 "a > $a; $s > i;"),
1809 CharsToUnicodeString("ab\\U0001030Fx"),
1810 CharsToUnicodeString("\\U00010300bix"));
1811
1812 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813 "$b=[A-Z\\U00010400-\\U0001044D];"
1814 "($a)($b) > $2 $1;"),
1815 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1817
1818 // k|ax\\U00010300xm
1819
1820 // k|a\\U00010400\\U00010300xm
1821 // ky|\\U00010400\\U00010300xm
1822 // ky\\U00010400|\\U00010300xm
1823
1824 // ky\\U00010400|\\U00010300\\U00010400m
1825 // ky\\U00010400y|\\U00010400m
1826 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827 "$a {x} > | @ \\U00010400;"
1828 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829 CharsToUnicodeString("kax\\U00010300xm"),
1830 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831
1832 expectT("Any-Name",
1833 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1835
1836 expectT("Any-Hex/Unicode",
1837 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1839
1840 expectT("Any-Hex/C",
1841 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1843
1844 expectT("Any-Hex/Perl",
1845 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1847
1848 expectT("Any-Hex/Java",
1849 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1851
1852 expectT("Any-Hex/XML",
1853 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854 "𐌰􏼀󠁡 ");
1855
1856 expectT("Any-Hex/XML10",
1857 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858 "𐌰􏼀󠁡 ");
1859
1860 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863 }
1864
TestQuantifier()1865 void TransliteratorTest::TestQuantifier() {
1866
1867 // Make sure @ in a quantified anteContext works
1868 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869 "AAAAAb",
1870 "aaa(aac)");
1871
1872 // Make sure @ in a quantified postContext works
1873 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874 "baaaaa",
1875 "caa(aaa)");
1876
1877 // Make sure @ in a quantified postContext with seg ref works
1878 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879 "baaaaa",
1880 "baa(aaa)");
1881
1882 // Make sure @ past ante context doesn't enter ante context
1883 UTransPosition pos = {0, 5, 3, 5};
1884 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1885 "xxxab",
1886 "xxx(ac)",
1887 &pos);
1888
1889 // Make sure @ past post context doesn't pass limit
1890 UTransPosition pos2 = {0, 4, 0, 2};
1891 expect("{b} a+ > c @@ |; x > y; a > A;",
1892 "baxx",
1893 "caxx",
1894 &pos2);
1895
1896 // Make sure @ past post context doesn't enter post context
1897 expect("{b} a+ > c @@ |; x > y; a > A;",
1898 "baxx",
1899 "cayy");
1900
1901 expect("(ab)? c > d;",
1902 "c abc ababc",
1903 "d d abd");
1904
1905 // NOTE: The (ab)+ when referenced just yields a single "ab",
1906 // not the full sequence of them. This accords with perl behavior.
1907 expect("(ab)+ {x} > '(' $1 ')';",
1908 "x abx ababxy",
1909 "x ab(ab) abab(ab)y");
1910
1911 expect("b+ > x;",
1912 "ac abc abbc abbbc",
1913 "ac axc axc axc");
1914
1915 expect("[abc]+ > x;",
1916 "qac abrc abbcs abtbbc",
1917 "qx xrx xs xtx");
1918
1919 expect("q{(ab)+} > x;",
1920 "qa qab qaba qababc qaba",
1921 "qa qx qxa qxc qxa");
1922
1923 expect("q(ab)* > x;",
1924 "qa qab qaba qababc",
1925 "xa x xa xc");
1926
1927 // NOTE: The (ab)+ when referenced just yields a single "ab",
1928 // not the full sequence of them. This accords with perl behavior.
1929 expect("q(ab)* > '(' $1 ')';",
1930 "qa qab qaba qababc",
1931 "()a (ab) (ab)a (ab)c");
1932
1933 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1934 // quoted string
1935 expect("'ab'+ > x;",
1936 "bb ab ababb",
1937 "bb x xb");
1938
1939 // $foo+ and $foo* -- the quantifier should apply to the entire
1940 // variable reference
1941 expect("$var = ab; $var+ > x;",
1942 "bb ab ababb",
1943 "bb x xb");
1944 }
1945
1946 class TestTrans : public Transliterator {
1947 public:
TestTrans(const UnicodeString & id)1948 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1949 }
clone(void) const1950 virtual Transliterator* clone(void) const {
1951 return new TestTrans(getID());
1952 }
handleTransliterate(Replaceable &,UTransPosition & offsets,UBool) const1953 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1954 UBool /*isIncremental*/) const
1955 {
1956 offsets.start = offsets.limit;
1957 }
1958 virtual UClassID getDynamicClassID() const;
1959 static UClassID U_EXPORT2 getStaticClassID();
1960 };
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1962
1963 /**
1964 * Test Source-Target/Variant.
1965 */
1966 void TransliteratorTest::TestSTV(void) {
1967 int32_t ns = Transliterator::countAvailableSources();
1968 if (ns < 0 || ns > 255) {
1969 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1970 return;
1971 }
1972 int32_t i, j;
1973 for (i=0; i<ns; ++i) {
1974 UnicodeString source;
1975 Transliterator::getAvailableSource(i, source);
1976 logln((UnicodeString)"" + i + ": " + source);
1977 if (source.length() == 0) {
1978 errln("FAIL: empty source");
1979 continue;
1980 }
1981 int32_t nt = Transliterator::countAvailableTargets(source);
1982 if (nt < 0 || nt > 255) {
1983 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1984 continue;
1985 }
1986 for (int32_t j=0; j<nt; ++j) {
1987 UnicodeString target;
1988 Transliterator::getAvailableTarget(j, source, target);
1989 logln((UnicodeString)" " + j + ": " + target);
1990 if (target.length() == 0) {
1991 errln("FAIL: empty target");
1992 continue;
1993 }
1994 int32_t nv = Transliterator::countAvailableVariants(source, target);
1995 if (nv < 0 || nv > 255) {
1996 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1997 continue;
1998 }
1999 for (int32_t k=0; k<nv; ++k) {
2000 UnicodeString variant;
2001 Transliterator::getAvailableVariant(k, source, target, variant);
2002 if (variant.length() == 0) {
2003 logln((UnicodeString)" " + k + ": <empty>");
2004 } else {
2005 logln((UnicodeString)" " + k + ": " + variant);
2006 }
2007 }
2008 }
2009 }
2010
2011 // Test registration
2012 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2015 for (i=0; i<3; ++i) {
2016 Transliterator *t = new TestTrans(IDS[i]);
2017 if (t == 0) {
2018 errln("FAIL: out of memory");
2019 return;
2020 }
2021 if (t->getID() != IDS[i]) {
2022 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2023 delete t;
2024 return;
2025 }
2026 Transliterator::registerInstance(t);
2027 UErrorCode status = U_ZERO_ERROR;
2028 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029 if (t == NULL) {
2030 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2031 IDS[i]);
2032 } else {
2033 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2034 IDS[i]);
2035 delete t;
2036 }
2037 Transliterator::unregister(IDS[i]);
2038 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2039 if (t != NULL) {
2040 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2041 IDS[i]);
2042 delete t;
2043 }
2044 }
2045
2046 // Make sure getAvailable API reflects removal
2047 int32_t n = Transliterator::countAvailableIDs();
2048 for (i=0; i<n; ++i) {
2049 UnicodeString id = Transliterator::getAvailableID(i);
2050 for (j=0; j<3; ++j) {
2051 if (id.caseCompare(FULL_IDS[j],0)==0) {
2052 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2053 }
2054 }
2055 }
2056 n = Transliterator::countAvailableTargets("Any");
2057 for (i=0; i<n; ++i) {
2058 UnicodeString t;
2059 Transliterator::getAvailableTarget(i, "Any", t);
2060 if (t.caseCompare(IDS[0],0)==0) {
2061 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2062 }
2063 }
2064 n = Transliterator::countAvailableSources();
2065 for (i=0; i<n; ++i) {
2066 UnicodeString s;
2067 Transliterator::getAvailableSource(i, s);
2068 for (j=0; j<3; ++j) {
2069 if (SOURCES[j] == NULL) continue;
2070 if (s.caseCompare(SOURCES[j],0)==0) {
2071 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2072 }
2073 }
2074 }
2075 }
2076
2077 /**
2078 * Test inverse of Greek-Latin; Title()
2079 */
TestCompoundInverse(void)2080 void TransliteratorTest::TestCompoundInverse(void) {
2081 UParseError parseError;
2082 UErrorCode status = U_ZERO_ERROR;
2083 Transliterator *t = Transliterator::createInstance
2084 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2085 if (t == 0) {
2086 dataerrln("FAIL: createInstance - %s", u_errorName(status));
2087 return;
2088 }
2089 UnicodeString exp("(Title);Latin-Greek");
2090 if (t->getID() == exp) {
2091 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092 t->getID());
2093 } else {
2094 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095 t->getID() + "\", expected \"" + exp + "\"");
2096 }
2097 delete t;
2098 }
2099
2100 /**
2101 * Test NFD chaining with RBT
2102 */
TestNFDChainRBT()2103 void TransliteratorTest::TestNFDChainRBT() {
2104 UParseError pe;
2105 UErrorCode ec = U_ZERO_ERROR;
2106 Transliterator* t = Transliterator::createFromRules(
2107 "TEST", "::NFD; aa > Q; a > q;",
2108 UTRANS_FORWARD, pe, ec);
2109 if (t == NULL || U_FAILURE(ec)) {
2110 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2111 return;
2112 }
2113 expect(*t, "aa", "Q");
2114 delete t;
2115
2116 // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=- UnicodeString s, s2;
2118 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=- expect(*t, s, s2);
2122 //=- delete t;
2123 //=-
2124 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=- expect(*t, s2, s);
2126 //=- delete t;
2127 //=-
2128 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=- expect(*t, s, s);
2131 //=- delete t;
2132
2133 // const char* source[] = {
2134 // /*
2135 // "\\u015Br\\u012Bmad",
2136 // "bhagavadg\\u012Bt\\u0101",
2137 // "adhy\\u0101ya",
2138 // "arjuna",
2139 // "vi\\u1E63\\u0101da",
2140 // "y\\u014Dga",
2141 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 // "uv\\u0101cr\\u0325",
2143 // */
2144 // "rmk\\u1E63\\u0113t",
2145 // //"dharmak\\u1E63\\u0113tr\\u0113",
2146 // /*
2147 // "kuruk\\u1E63\\u0113tr\\u0113",
2148 // "samav\\u0113t\\u0101",
2149 // "yuyutsava-\\u1E25",
2150 // "m\\u0101mak\\u0101-\\u1E25",
2151 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2152 // "kimakurvata",
2153 // "san\\u0304java",
2154 // */
2155 //
2156 // 0
2157 // };
2158 // const char* expected[] = {
2159 // /*
2160 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 // "\\u092f\\u094b\\u0917",
2166 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168 // */
2169 // "\\u0927",
2170 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 // /*
2172 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 // "\\u0938\\u0902\\u091c\\u0935",
2179 // */
2180 // 0
2181 // };
2182 // UErrorCode status = U_ZERO_ERROR;
2183 // UParseError parseError;
2184 // UnicodeString message;
2185 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 // if(U_FAILURE(status)){
2188 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2189 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 // delete latinToDevToLatin;
2191 // delete devToLatinToDev;
2192 // return;
2193 // }
2194 // UnicodeString gotResult;
2195 // for(int i= 0; source[i] != 0; i++){
2196 // gotResult = source[i];
2197 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2199 // }
2200 // delete latinToDevToLatin;
2201 // delete devToLatinToDev;
2202 }
2203
2204 /**
2205 * Inverse of "Null" should be "Null". (J21)
2206 */
TestNullInverse()2207 void TransliteratorTest::TestNullInverse() {
2208 UParseError pe;
2209 UErrorCode ec = U_ZERO_ERROR;
2210 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2211 if (t == 0 || U_FAILURE(ec)) {
2212 errln("FAIL: createInstance");
2213 return;
2214 }
2215 Transliterator *u = t->createInverse(ec);
2216 if (u == 0 || U_FAILURE(ec)) {
2217 errln("FAIL: createInverse");
2218 delete t;
2219 return;
2220 }
2221 if (u->getID() != "Null") {
2222 errln("FAIL: Inverse of Null should be Null");
2223 }
2224 delete t;
2225 delete u;
2226 }
2227
2228 /**
2229 * Check ID of inverse of alias. (J22)
2230 */
TestAliasInverseID()2231 void TransliteratorTest::TestAliasInverseID() {
2232 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2233 UParseError pe;
2234 UErrorCode ec = U_ZERO_ERROR;
2235 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2236 if (t == 0 || U_FAILURE(ec)) {
2237 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2238 return;
2239 }
2240 Transliterator *u = t->createInverse(ec);
2241 if (u == 0 || U_FAILURE(ec)) {
2242 errln("FAIL: createInverse");
2243 delete t;
2244 return;
2245 }
2246 UnicodeString exp = "Hangul-Latin";
2247 UnicodeString got = u->getID();
2248 if (got != exp) {
2249 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2250 ", expected " + exp);
2251 }
2252 delete t;
2253 delete u;
2254 }
2255
2256 /**
2257 * Test IDs of inverses of compound transliterators. (J20)
2258 */
TestCompoundInverseID()2259 void TransliteratorTest::TestCompoundInverseID() {
2260 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2261 UParseError pe;
2262 UErrorCode ec = U_ZERO_ERROR;
2263 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2264 if (t == 0 || U_FAILURE(ec)) {
2265 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2266 return;
2267 }
2268 Transliterator *u = t->createInverse(ec);
2269 if (u == 0 || U_FAILURE(ec)) {
2270 errln("FAIL: createInverse");
2271 delete t;
2272 return;
2273 }
2274 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2275 UnicodeString got = u->getID();
2276 if (got != exp) {
2277 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2278 ", expected " + exp);
2279 }
2280 delete t;
2281 delete u;
2282 }
2283
2284 /**
2285 * Test undefined variable.
2286
2287 */
TestUndefinedVariable()2288 void TransliteratorTest::TestUndefinedVariable() {
2289 UnicodeString rule = "$initial } a <> \\u1161;";
2290 UParseError pe;
2291 UErrorCode ec = U_ZERO_ERROR;
2292 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2293 delete t;
2294 if (U_FAILURE(ec)) {
2295 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2296 u_errorName(ec));
2297 return;
2298 }
2299 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2300 u_errorName(ec));
2301 }
2302
2303 /**
2304 * Test empty context.
2305 */
TestEmptyContext()2306 void TransliteratorTest::TestEmptyContext() {
2307 expect(" { a } > b;", "xay a ", "xby b ");
2308 }
2309
2310 /**
2311 * Test compound filter ID syntax
2312 */
TestCompoundFilterID(void)2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314 static const char* DATA[] = {
2315 // Col. 1 = ID or rule set (latter must start with #)
2316
2317 // = columns > 1 are null if expect col. 1 to be illegal =
2318
2319 // Col. 2 = direction, "F..." or "R..."
2320 // Col. 3 = source string
2321 // Col. 4 = exp result
2322
2323 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2324 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2325 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329 NULL,
2330 };
2331
2332 for (int32_t i=0; DATA[i]; i+=4) {
2333 UnicodeString id = CharsToUnicodeString(DATA[i]);
2334 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2335 UTRANS_REVERSE : UTRANS_FORWARD;
2336 UnicodeString source;
2337 UnicodeString exp;
2338 if (DATA[i+2] != NULL) {
2339 source = CharsToUnicodeString(DATA[i+2]);
2340 exp = CharsToUnicodeString(DATA[i+3]);
2341 }
2342 UBool expOk = (DATA[i+1] != NULL);
2343 Transliterator* t = NULL;
2344 UParseError pe;
2345 UErrorCode ec = U_ZERO_ERROR;
2346 if (id.charAt(0) == 0x23/*#*/) {
2347 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2348 } else {
2349 t = Transliterator::createInstance(id, direction, pe, ec);
2350 }
2351 UBool ok = (t != NULL && U_SUCCESS(ec));
2352 UnicodeString transID;
2353 if (t!=0) {
2354 transID = t->getID();
2355 }
2356 else {
2357 transID = UnicodeString("NULL", "");
2358 }
2359 if (ok == expOk) {
2360 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2361 u_errorName(ec));
2362 if (source.length() != 0) {
2363 expect(*t, source, exp);
2364 }
2365 delete t;
2366 } else {
2367 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368 u_errorName(ec));
2369 }
2370 }
2371 }
2372
2373 /**
2374 * Test new property set syntax
2375 */
TestPropertySet()2376 void TransliteratorTest::TestPropertySet() {
2377 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 }
2381
2382 /**
2383 * Test various failure points of the new 2.0 engine.
2384 */
TestNewEngine()2385 void TransliteratorTest::TestNewEngine() {
2386 UParseError pe;
2387 UErrorCode ec = U_ZERO_ERROR;
2388 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389 if (t == 0 || U_FAILURE(ec)) {
2390 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391 return;
2392 }
2393 // Katakana should be untouched
2394 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397 delete t;
2398
2399 #if 1
2400 // This test will only work if Transliterator.ROLLBACK is
2401 // true. Otherwise, this test will fail, revealing a
2402 // limitation of global filters in incremental mode.
2403 Transliterator *a =
2404 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405 Transliterator *A =
2406 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407 if (U_FAILURE(ec)) {
2408 delete a;
2409 delete A;
2410 return;
2411 }
2412
2413 Transliterator* array[3];
2414 array[0] = a;
2415 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416 array[2] = A;
2417 if (U_FAILURE(ec)) {
2418 errln("FAIL: createInstance NFD");
2419 delete a;
2420 delete A;
2421 delete array[1];
2422 return;
2423 }
2424
2425 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426 if (U_FAILURE(ec)) {
2427 errln("FAIL: UnicodeSet constructor");
2428 delete a;
2429 delete A;
2430 delete array[1];
2431 delete t;
2432 return;
2433 }
2434
2435 expect(*t, "aAaA", "bAbA");
2436
2437 assertTrue("countElements", t->countElements() == 3);
2438 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441 assertSuccess("getElement", ec);
2442
2443 delete a;
2444 delete A;
2445 delete array[1];
2446 delete t;
2447 #endif
2448
2449 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450 "a",
2451 "ax");
2452
2453 UnicodeString gr = CharsToUnicodeString(
2454 "$ddot = \\u0308 ;"
2455 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456 "$rough = \\u0314 ;"
2457 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458 "\\u03b1 <> a ;"
2459 "$rough <> h ;");
2460
2461 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 }
2463
2464 /**
2465 * Test quantified segment behavior. We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467 */
TestQuantifiedSegment(void)2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2469 // The normal case
2470 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472 // The tricky case; the quantifier is around the segment
2473 expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475 // Tricky case in reverse direction
2476 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478 // Check post-context segment
2479 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481 // Test toRule/toPattern for non-quantified segment.
2482 // Careful with spacing here.
2483 UnicodeString r("([a-c]){q} > x $1 x;");
2484 UParseError pe;
2485 UErrorCode ec = U_ZERO_ERROR;
2486 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487 if (U_FAILURE(ec)) {
2488 errln("FAIL: createFromRules");
2489 delete t;
2490 return;
2491 }
2492 UnicodeString rr;
2493 t->toRules(rr, TRUE);
2494 if (r != rr) {
2495 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496 } else {
2497 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498 }
2499 delete t;
2500
2501 // Test toRule/toPattern for quantified segment.
2502 // Careful with spacing here.
2503 r = "([a-c])+{q} > x $1 x;";
2504 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505 if (U_FAILURE(ec)) {
2506 errln("FAIL: createFromRules");
2507 delete t;
2508 return;
2509 }
2510 t->toRules(rr, TRUE);
2511 if (r != rr) {
2512 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513 } else {
2514 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515 }
2516 delete t;
2517 }
2518
2519 //======================================================================
2520 // Ram's tests
2521 //======================================================================
TestDevanagariLatinRT()2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523 const int MAX_LEN= 52;
2524 const char* const source[MAX_LEN] = {
2525 "bh\\u0101rata",
2526 "kra",
2527 "k\\u1E63a",
2528 "khra",
2529 "gra",
2530 "\\u1E45ra",
2531 "cra",
2532 "chra",
2533 "j\\u00F1a",
2534 "jhra",
2535 "\\u00F1ra",
2536 "\\u1E6Dya",
2537 "\\u1E6Dhra",
2538 "\\u1E0Dya",
2539 //"r\\u0323ya", // \u095c is not valid in Devanagari
2540 "\\u1E0Dhya",
2541 "\\u1E5Bhra",
2542 "\\u1E47ra",
2543 "tta",
2544 "thra",
2545 "dda",
2546 "dhra",
2547 "nna",
2548 "pra",
2549 "phra",
2550 "bra",
2551 "bhra",
2552 "mra",
2553 "\\u1E49ra",
2554 //"l\\u0331ra",
2555 "yra",
2556 "\\u1E8Fra",
2557 //"l-",
2558 "vra",
2559 "\\u015Bra",
2560 "\\u1E63ra",
2561 "sra",
2562 "hma",
2563 "\\u1E6D\\u1E6Da",
2564 "\\u1E6D\\u1E6Dha",
2565 "\\u1E6Dh\\u1E6Dha",
2566 "\\u1E0D\\u1E0Da",
2567 "\\u1E0D\\u1E0Dha",
2568 "\\u1E6Dya",
2569 "\\u1E6Dhya",
2570 "\\u1E0Dya",
2571 "\\u1E0Dhya",
2572 // Not roundtrippable --
2573 // \\u0939\\u094d\\u094d\\u092E - hma
2574 // \\u0939\\u094d\\u092E - hma
2575 // CharsToUnicodeString("hma"),
2576 "hya",
2577 "\\u015Br\\u0325",
2578 "\\u015Bca",
2579 "\\u0115",
2580 "san\\u0304j\\u012Bb s\\u0113nagupta",
2581 "\\u0101nand vaddir\\u0101ju",
2582 "\\u0101",
2583 "a"
2584 };
2585 const char* const expected[MAX_LEN] = {
2586 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2587 "\\u0915\\u094D\\u0930", /* kra */
2588 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2589 "\\u0916\\u094D\\u0930", /* khra */
2590 "\\u0917\\u094D\\u0930", /* gra */
2591 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2592 "\\u091A\\u094D\\u0930", /* cra */
2593 "\\u091B\\u094D\\u0930", /* chra */
2594 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2595 "\\u091D\\u094D\\u0930", /* jhra */
2596 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2597 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2598 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2599 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2600 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2601 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2602 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2603 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2604 "\\u0924\\u094D\\u0924", /* tta */
2605 "\\u0925\\u094D\\u0930", /* thra */
2606 "\\u0926\\u094D\\u0926", /* dda */
2607 "\\u0927\\u094D\\u0930", /* dhra */
2608 "\\u0928\\u094D\\u0928", /* nna */
2609 "\\u092A\\u094D\\u0930", /* pra */
2610 "\\u092B\\u094D\\u0930", /* phra */
2611 "\\u092C\\u094D\\u0930", /* bra */
2612 "\\u092D\\u094D\\u0930", /* bhra */
2613 "\\u092E\\u094D\\u0930", /* mra */
2614 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2615 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2616 "\\u092F\\u094D\\u0930", /* yra */
2617 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2618 //"l-",
2619 "\\u0935\\u094D\\u0930", /* vra */
2620 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2621 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2622 "\\u0938\\u094D\\u0930", /* sra */
2623 "\\u0939\\u094d\\u092E", /* hma */
2624 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2625 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2626 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2627 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2628 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2629 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2630 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2631 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2632 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2633 // "hma", /* hma */
2634 "\\u0939\\u094D\\u092F", /* hya */
2635 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2636 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2637 "\\u090d", /* e\\u0306 */
2638 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640 "\\u0906",
2641 "\\u0905",
2642 };
2643 UErrorCode status = U_ZERO_ERROR;
2644 UParseError parseError;
2645 UnicodeString message;
2646 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648 if(U_FAILURE(status)){
2649 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2650 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651 return;
2652 }
2653 UnicodeString gotResult;
2654 for(int i= 0; i<MAX_LEN; i++){
2655 gotResult = source[i];
2656 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658 }
2659 delete latinToDev;
2660 delete devToLatin;
2661 }
2662
TestTeluguLatinRT()2663 void TransliteratorTest::TestTeluguLatinRT(){
2664 const int MAX_LEN=10;
2665 const char* const source[MAX_LEN] = {
2666 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2667 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2668 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2669 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2670 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2671 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2672 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2673 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2674 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2675 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2676 };
2677
2678 const char* const expected[MAX_LEN] = {
2679 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689 };
2690
2691 UErrorCode status = U_ZERO_ERROR;
2692 UParseError parseError;
2693 UnicodeString message;
2694 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696 if(U_FAILURE(status)){
2697 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2698 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699 return;
2700 }
2701 UnicodeString gotResult;
2702 for(int i= 0; i<MAX_LEN; i++){
2703 gotResult = source[i];
2704 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706 }
2707 delete latinToDev;
2708 delete devToLatin;
2709 }
2710
TestSanskritLatinRT()2711 void TransliteratorTest::TestSanskritLatinRT(){
2712 const int MAX_LEN =16;
2713 const char* const source[MAX_LEN] = {
2714 "rmk\\u1E63\\u0113t",
2715 "\\u015Br\\u012Bmad",
2716 "bhagavadg\\u012Bt\\u0101",
2717 "adhy\\u0101ya",
2718 "arjuna",
2719 "vi\\u1E63\\u0101da",
2720 "y\\u014Dga",
2721 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722 "uv\\u0101cr\\u0325",
2723 "dharmak\\u1E63\\u0113tr\\u0113",
2724 "kuruk\\u1E63\\u0113tr\\u0113",
2725 "samav\\u0113t\\u0101",
2726 "yuyutsava\\u1E25",
2727 "m\\u0101mak\\u0101\\u1E25",
2728 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729 "kimakurvata",
2730 "san\\u0304java",
2731 };
2732 const char* const expected[MAX_LEN] = {
2733 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739 "\\u092f\\u094b\\u0917",
2740 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749 "\\u0938\\u0902\\u091c\\u0935",
2750 };
2751 UErrorCode status = U_ZERO_ERROR;
2752 UParseError parseError;
2753 UnicodeString message;
2754 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756 if(U_FAILURE(status)){
2757 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2758 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759 return;
2760 }
2761 UnicodeString gotResult;
2762 for(int i= 0; i<MAX_LEN; i++){
2763 gotResult = source[i];
2764 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766 }
2767 delete latinToDev;
2768 delete devToLatin;
2769 }
2770
2771
TestCompoundLatinRT()2772 void TransliteratorTest::TestCompoundLatinRT(){
2773 const char* const source[] = {
2774 "rmk\\u1E63\\u0113t",
2775 "\\u015Br\\u012Bmad",
2776 "bhagavadg\\u012Bt\\u0101",
2777 "adhy\\u0101ya",
2778 "arjuna",
2779 "vi\\u1E63\\u0101da",
2780 "y\\u014Dga",
2781 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782 "uv\\u0101cr\\u0325",
2783 "dharmak\\u1E63\\u0113tr\\u0113",
2784 "kuruk\\u1E63\\u0113tr\\u0113",
2785 "samav\\u0113t\\u0101",
2786 "yuyutsava\\u1E25",
2787 "m\\u0101mak\\u0101\\u1E25",
2788 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789 "kimakurvata",
2790 "san\\u0304java"
2791 };
2792 const int MAX_LEN = UPRV_LENGTHOF(source);
2793 const char* const expected[MAX_LEN] = {
2794 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800 "\\u092f\\u094b\\u0917",
2801 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810 "\\u0938\\u0902\\u091c\\u0935"
2811 };
2812 if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814 return;
2815 }
2816
2817 UErrorCode status = U_ZERO_ERROR;
2818 UParseError parseError;
2819 UnicodeString message;
2820 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825 if(U_FAILURE(status)){
2826 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2827 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828 return;
2829 }
2830 UnicodeString gotResult;
2831 for(int i= 0; i<MAX_LEN; i++){
2832 gotResult = source[i];
2833 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837 }
2838 delete(latinToDevToLatin);
2839 delete(devToLatinToDev);
2840 delete(devToTelToDev);
2841 delete(latinToTelToLatin);
2842 }
2843
2844 /**
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2846 */
TestGurmukhiDevanagari()2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2848 // the rule says:
2849 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2850 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2851 UErrorCode status = U_ZERO_ERROR;
2852 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854 UParseError parseError;
2855
2856 UnicodeSetIterator vIter(vowel);
2857 UnicodeSetIterator nvIter(non_vowel);
2858 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859 if(U_FAILURE(status)) {
2860 dataerrln("Error creating transliterator %s", u_errorName(status));
2861 delete trans;
2862 return;
2863 }
2864 UnicodeString src (" \\u0902", -1, US_INV);
2865 UnicodeString expected(" \\u0A02", -1, US_INV);
2866 src = src.unescape();
2867 expected= expected.unescape();
2868
2869 while(vIter.next()){
2870 src.setCharAt(0,(UChar) vIter.getCodepoint());
2871 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872 expect(*trans,src,expected);
2873 }
2874
2875 expected.setCharAt(1,0x0A70);
2876 while(nvIter.next()){
2877 //src.setCharAt(0,(char) nvIter.codepoint);
2878 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880 expect(*trans,src,expected);
2881 }
2882 delete trans;
2883 }
2884 /**
2885 * Test instantiation from a locale.
2886 */
TestLocaleInstantiation(void)2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2888 UParseError pe;
2889 UErrorCode ec = U_ZERO_ERROR;
2890 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891 if (U_FAILURE(ec)) {
2892 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893 delete t;
2894 return;
2895 }
2896 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897 delete t;
2898
2899 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900 if (U_FAILURE(ec)) {
2901 errln("FAIL: createInstance(en-el)");
2902 delete t;
2903 return;
2904 }
2905 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906 delete t;
2907 }
2908
2909 /**
2910 * Test title case handling of accent (should ignore accents)
2911 */
TestTitleAccents(void)2912 void TransliteratorTest::TestTitleAccents(void) {
2913 UParseError pe;
2914 UErrorCode ec = U_ZERO_ERROR;
2915 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916 if (U_FAILURE(ec)) {
2917 errln("FAIL: createInstance(Title)");
2918 delete t;
2919 return;
2920 }
2921 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922 delete t;
2923 }
2924
2925 /**
2926 * Basic test of a locale resource based rule.
2927 */
TestLocaleResource()2928 void TransliteratorTest::TestLocaleResource() {
2929 const char* DATA[] = {
2930 // id from to
2931 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2932 "Latin-el", "b", "\\u03bc\\u03c0",
2933 "Latin-Greek", "b", "\\u03B2",
2934 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2935 "el-Latin", "\\u03B2", "v",
2936 "Greek-Latin", "\\u03B2", "b",
2937 };
2938 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939 for (int32_t i=0; i<DATA_length; i+=3) {
2940 UParseError pe;
2941 UErrorCode ec = U_ZERO_ERROR;
2942 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943 if (U_FAILURE(ec)) {
2944 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945 delete t;
2946 continue;
2947 }
2948 expect(*t, CharsToUnicodeString(DATA[i+1]),
2949 CharsToUnicodeString(DATA[i+2]));
2950 delete t;
2951 }
2952 }
2953
2954 /**
2955 * Make sure parse errors reference the right line.
2956 */
TestParseError()2957 void TransliteratorTest::TestParseError() {
2958 static const char* rule =
2959 "a > b;\n"
2960 "# more stuff\n"
2961 "d << b;";
2962 UErrorCode ec = U_ZERO_ERROR;
2963 UParseError pe;
2964 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965 delete t;
2966 if (U_FAILURE(ec)) {
2967 UnicodeString err(pe.preContext);
2968 err.append((UChar)124/*|*/).append(pe.postContext);
2969 if (err.indexOf("d << b") >= 0) {
2970 logln("Ok: " + err);
2971 } else {
2972 errln("FAIL: " + err);
2973 }
2974 }
2975 else {
2976 errln("FAIL: no syntax error");
2977 }
2978 static const char* maskingRule =
2979 "a>x;\n"
2980 "# more stuff\n"
2981 "ab>y;";
2982 ec = U_ZERO_ERROR;
2983 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984 if (ec != U_RULE_MASK_ERROR) {
2985 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986 }
2987 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988 errln("FAIL: did not get expected precontext");
2989 }
2990 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991 errln("FAIL: did not get expected postcontext");
2992 }
2993 }
2994
2995 /**
2996 * Make sure sets on output are disallowed.
2997 */
TestOutputSet()2998 void TransliteratorTest::TestOutputSet() {
2999 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000 UErrorCode ec = U_ZERO_ERROR;
3001 UParseError pe;
3002 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003 delete t;
3004 if (U_FAILURE(ec)) {
3005 UnicodeString err(pe.preContext);
3006 err.append((UChar)124/*|*/).append(pe.postContext);
3007 logln("Ok: " + err);
3008 return;
3009 }
3010 errln("FAIL: No syntax error");
3011 }
3012
3013 /**
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3016 */
TestVariableRange()3017 void TransliteratorTest::TestVariableRange() {
3018 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019 UErrorCode ec = U_ZERO_ERROR;
3020 UParseError pe;
3021 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022 delete t;
3023 if (U_FAILURE(ec)) {
3024 UnicodeString err(pe.preContext);
3025 err.append((UChar)124/*|*/).append(pe.postContext);
3026 logln("Ok: " + err);
3027 return;
3028 }
3029 errln("FAIL: No syntax error");
3030 }
3031
3032 /**
3033 * Test invalid post context error handling
3034 */
TestInvalidPostContext()3035 void TransliteratorTest::TestInvalidPostContext() {
3036 UnicodeString rule = "a}b{c>d;";
3037 UErrorCode ec = U_ZERO_ERROR;
3038 UParseError pe;
3039 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040 delete t;
3041 if (U_FAILURE(ec)) {
3042 UnicodeString err(pe.preContext);
3043 err.append((UChar)124/*|*/).append(pe.postContext);
3044 if (err.indexOf("a}b{c") >= 0) {
3045 logln("Ok: " + err);
3046 } else {
3047 errln("FAIL: " + err);
3048 }
3049 return;
3050 }
3051 errln("FAIL: No syntax error");
3052 }
3053
3054 /**
3055 * Test ID form variants
3056 */
TestIDForms()3057 void TransliteratorTest::TestIDForms() {
3058 const char* DATA[] = {
3059 "NFC", NULL, "NFD",
3060 "nfd", NULL, "NFC", // make sure case is ignored
3061 "Any-NFKD", NULL, "Any-NFKC",
3062 "Null", NULL, "Null",
3063 "-nfkc", "nfkc", "NFKD",
3064 "-nfkc/", "nfkc", "NFKD",
3065 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068 "Source-", NULL, NULL,
3069 "Source/Variant-", NULL, NULL,
3070 "Source-/Variant", NULL, NULL,
3071 "/Variant", NULL, NULL,
3072 "/Variant-", NULL, NULL,
3073 "-/Variant", NULL, NULL,
3074 "-/", NULL, NULL,
3075 "-", NULL, NULL,
3076 "/", NULL, NULL,
3077 };
3078 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079
3080 for (int32_t i=0; i<DATA_length; i+=3) {
3081 const char* ID = DATA[i];
3082 const char* expID = DATA[i+1];
3083 const char* expInvID = DATA[i+2];
3084 UBool expValid = (expInvID != NULL);
3085 if (expID == NULL) {
3086 expID = ID;
3087 }
3088 UParseError pe;
3089 UErrorCode ec = U_ZERO_ERROR;
3090 Transliterator *t =
3091 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092 if (U_FAILURE(ec)) {
3093 if (!expValid) {
3094 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095 } else {
3096 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097 }
3098 delete t;
3099 continue;
3100 }
3101 Transliterator *u = t->createInverse(ec);
3102 if (U_FAILURE(ec)) {
3103 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104 delete t;
3105 delete u;
3106 continue;
3107 }
3108 if (t->getID() == expID &&
3109 u->getID() == expInvID) {
3110 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111 } else {
3112 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113 t->getID() + " x getInverse() => " + u->getID() +
3114 ", expected " + expInvID);
3115 }
3116 delete t;
3117 delete u;
3118 }
3119 }
3120
3121 static const UChar SPACE[] = {32,0};
3122 static const UChar NEWLINE[] = {10,0};
3123 static const UChar RETURN[] = {13,0};
3124 static const UChar EMPTY[] = {0};
3125
checkRules(const UnicodeString & label,Transliterator & t2,const UnicodeString & testRulesForward)3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127 const UnicodeString& testRulesForward) {
3128 UnicodeString rules2; t2.toRules(rules2, TRUE);
3129 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130 rules2.findAndReplace(SPACE, EMPTY);
3131 rules2.findAndReplace(NEWLINE, EMPTY);
3132 rules2.findAndReplace(RETURN, EMPTY);
3133
3134 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136 if (rules2 != testRules) {
3137 errln(label);
3138 logln((UnicodeString)"GENERATED RULES: " + rules2);
3139 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3140 }
3141 }
3142
3143 /**
3144 * Mark's toRules test.
3145 */
TestToRulesMark()3146 void TransliteratorTest::TestToRulesMark() {
3147 const char* testRules =
3148 "::[[:Latin:][:Mark:]];"
3149 "::NFKD (NFC);"
3150 "::Lower (Lower);"
3151 "a <> \\u03B1;" // alpha
3152 "::NFKC (NFD);"
3153 "::Upper (Lower);"
3154 "::Lower ();"
3155 "::([[:Greek:][:Mark:]]);"
3156 ;
3157 const char* testRulesForward =
3158 "::[[:Latin:][:Mark:]];"
3159 "::NFKD(NFC);"
3160 "::Lower(Lower);"
3161 "a > \\u03B1;"
3162 "::NFKC(NFD);"
3163 "::Upper (Lower);"
3164 "::Lower ();"
3165 ;
3166 const char* testRulesBackward =
3167 "::[[:Greek:][:Mark:]];"
3168 "::Lower (Upper);"
3169 "::NFD(NFKC);"
3170 "\\u03B1 > a;"
3171 "::Lower(Lower);"
3172 "::NFC(NFKD);"
3173 ;
3174 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177 UParseError pe;
3178 UErrorCode ec = U_ZERO_ERROR;
3179 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3180 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3181
3182 if (U_FAILURE(ec)) {
3183 delete t2;
3184 delete t3;
3185 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186 return;
3187 }
3188
3189 expect(*t2, source, target);
3190 expect(*t3, target, source);
3191
3192 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194
3195 delete t2;
3196 delete t3;
3197 }
3198
3199 /**
3200 * Test Escape and Unescape transliterators.
3201 */
TestEscape()3202 void TransliteratorTest::TestEscape() {
3203 UParseError pe;
3204 UErrorCode ec;
3205 Transliterator *t;
3206
3207 ec = U_ZERO_ERROR;
3208 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3209 if (U_FAILURE(ec)) {
3210 errln((UnicodeString)"FAIL: createInstance");
3211 } else {
3212 expect(*t,
3213 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3214 "@12Q");
3215 }
3216 delete t;
3217
3218 ec = U_ZERO_ERROR;
3219 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3220 if (U_FAILURE(ec)) {
3221 errln((UnicodeString)"FAIL: createInstance");
3222 } else {
3223 expect(*t,
3224 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3226 }
3227 delete t;
3228
3229 ec = U_ZERO_ERROR;
3230 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3231 if (U_FAILURE(ec)) {
3232 errln((UnicodeString)"FAIL: createInstance");
3233 } else {
3234 expect(*t,
3235 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3237 }
3238 delete t;
3239
3240 ec = U_ZERO_ERROR;
3241 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3242 if (U_FAILURE(ec)) {
3243 errln((UnicodeString)"FAIL: createInstance");
3244 } else {
3245 expect(*t,
3246 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3248 }
3249 delete t;
3250 }
3251
3252
TestAnchorMasking()3253 void TransliteratorTest::TestAnchorMasking(){
3254 UnicodeString rule ("^a > Q; a > q;");
3255 UErrorCode status= U_ZERO_ERROR;
3256 UParseError parseError;
3257
3258 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3259 if(U_FAILURE(status)){
3260 errln(UnicodeString("FAIL: ") + "ID" +
3261 ".createFromRules() => bad rules" +
3262 /*", parse error " + parseError.code +*/
3263 ", line " + parseError.line +
3264 ", offset " + parseError.offset +
3265 ", context " + prettify(parseError.preContext, TRUE) +
3266 ", rules: " + prettify(rule, TRUE));
3267 }
3268 delete t;
3269 }
3270
3271 /**
3272 * Make sure display names of variants look reasonable.
3273 */
TestDisplayName()3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277 return;
3278 #else
3279 static const char* DATA[] = {
3280 // ID, forward name, reverse name
3281 // Update the text as necessary -- the important thing is
3282 // not the text itself, but how various cases are handled.
3283
3284 // Basic test
3285 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286
3287 // Variants
3288 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289
3290 // Target-only IDs
3291 "NFC", "Any to NFC", "Any to NFD",
3292 };
3293
3294 int32_t DATA_length = UPRV_LENGTHOF(DATA);
3295
3296 Locale US("en", "US");
3297
3298 for (int32_t i=0; i<DATA_length; i+=3) {
3299 UnicodeString name;
3300 Transliterator::getDisplayName(DATA[i], US, name);
3301 if (name != DATA[i+1]) {
3302 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3303 name + ", expected " + DATA[i+1]);
3304 } else {
3305 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3306 }
3307 UErrorCode ec = U_ZERO_ERROR;
3308 UParseError pe;
3309 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3310 if (U_FAILURE(ec)) {
3311 delete t;
3312 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3313 continue;
3314 }
3315 name = Transliterator::getDisplayName(t->getID(), US, name);
3316 if (name != DATA[i+2]) {
3317 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3318 name + ", expected " + DATA[i+2]);
3319 } else {
3320 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3321 }
3322 delete t;
3323 }
3324 #endif
3325 }
3326
TestSpecialCases(void)3327 void TransliteratorTest::TestSpecialCases(void) {
3328 const UnicodeString registerRules[] = {
3329 "Any-Dev1", "x > X; y > Y;",
3330 "Any-Dev2", "XY > Z",
3331 "Greek-Latin/FAKE",
3332 CharsToUnicodeString
3333 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334 "" // END MARKER
3335 };
3336
3337 const UnicodeString testCases[] = {
3338 // NORMALIZATION
3339 // should add more test cases
3340 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344
3345 // mp -> b BUG
3346 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3348
3349 // check for devanagari bug
3350 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3351
3352 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3355
3356 //TODO: enable this test once Titlecase works right
3357 /*
3358 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3360 */
3361 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3363 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3364 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3365
3366 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3368
3369 // FORMS OF S
3370 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3378 // Tatiana bug
3379 // Upper: TAT\\u02B9\\u00C2NA
3380 // Lower: tat\\u02B9\\u00E2na
3381 // Title: Tat\\u02B9\\u00E2na
3382 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3388
3389 "" // END MARKER
3390 };
3391
3392 UParseError pos;
3393 int32_t i;
3394 for (i = 0; registerRules[i].length()!=0; i+=2) {
3395 UErrorCode status = U_ZERO_ERROR;
3396
3397 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3398 registerRules[i+1], UTRANS_FORWARD, pos, status);
3399 if (U_FAILURE(status)) {
3400 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3401 } else {
3402 Transliterator::registerInstance(t);
3403 }
3404 }
3405 for (i = 0; testCases[i].length()!=0; i+=3) {
3406 UErrorCode ec = U_ZERO_ERROR;
3407 UParseError pe;
3408 const UnicodeString& name = testCases[i];
3409 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3410 if (U_FAILURE(ec)) {
3411 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3412 delete t;
3413 continue;
3414 }
3415 const UnicodeString& id = t->getID();
3416 const UnicodeString& source = testCases[i+1];
3417 UnicodeString target;
3418
3419 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3420
3421 if (testCases[i+2].length() > 0) {
3422 target = testCases[i+2];
3423 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3424 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3425 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3426 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3427 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3428 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3429 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3430 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3431 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3432 target = source;
3433 target.toLower(Locale::getUS());
3434 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3435 target = source;
3436 target.toUpper(Locale::getUS());
3437 }
3438 if (U_FAILURE(ec)) {
3439 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3440 continue;
3441 }
3442
3443 expect(*t, source, target);
3444 delete t;
3445 }
3446 for (i = 0; registerRules[i].length()!=0; i+=2) {
3447 Transliterator::unregister(registerRules[i]);
3448 }
3449 }
3450
Char32ToEscapedChars(UChar32 ch,char * buffer)3451 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3452 if (ch <= 0xFFFF) {
3453 sprintf(buffer, "\\u%04x", (int)ch);
3454 } else {
3455 sprintf(buffer, "\\U%08x", (int)ch);
3456 }
3457 return buffer;
3458 }
3459
TestSurrogateCasing(void)3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461 // check that casing handles surrogates
3462 // titlecase is currently defective
3463 char buffer[20];
3464 UChar buffer2[20];
3465 UChar32 dee;
3466 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3467 UnicodeString DEE(u_totitle(dee));
3468 if (DEE != DESERET_DEE) {
3469 err("Fails titlecase of surrogates");
3470 err(Char32ToEscapedChars(dee, buffer));
3471 err(", ");
3472 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3473 }
3474
3475 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3476 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3477 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3478 UErrorCode status= U_ZERO_ERROR;
3479
3480 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3481 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3482 errln("Fails: Can't uppercase surrogates.");
3483 }
3484
3485 status= U_ZERO_ERROR;
3486 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3487 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3488 errln("Fails: Can't lowercase surrogates.");
3489 }
3490 }
3491
_trans(Transliterator & t,const UnicodeString & src,UnicodeString & result)3492 static void _trans(Transliterator& t, const UnicodeString& src,
3493 UnicodeString& result) {
3494 result = src;
3495 t.transliterate(result);
3496 }
3497
_trans(const UnicodeString & id,const UnicodeString & src,UnicodeString & result,UErrorCode ec)3498 static void _trans(const UnicodeString& id, const UnicodeString& src,
3499 UnicodeString& result, UErrorCode ec) {
3500 UParseError pe;
3501 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3502 if (U_SUCCESS(ec)) {
3503 _trans(*t, src, result);
3504 }
3505 delete t;
3506 }
3507
_findMatch(const UnicodeString & source,const UnicodeString * pairs)3508 static UnicodeString _findMatch(const UnicodeString& source,
3509 const UnicodeString* pairs) {
3510 UnicodeString empty;
3511 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3512 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3513 return pairs[i+1];
3514 }
3515 }
3516 return empty;
3517 }
3518
3519 // Check to see that incremental gets at least part way through a reasonable string.
3520
TestIncrementalProgress(void)3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522 UErrorCode ec = U_ZERO_ERROR;
3523 UnicodeString latinTest = "The Quick Brown Fox.";
3524 UnicodeString devaTest;
3525 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3526 UnicodeString kataTest;
3527 _trans("Latin-Katakana", latinTest, kataTest, ec);
3528 if (U_FAILURE(ec)) {
3529 errln("FAIL: Internal error");
3530 return;
3531 }
3532 const UnicodeString tests[] = {
3533 "Any", latinTest,
3534 "Latin", latinTest,
3535 "Halfwidth", latinTest,
3536 "Devanagari", devaTest,
3537 "Katakana", kataTest,
3538 "" // END MARKER
3539 };
3540
3541 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542 int32_t i = 0, j=0, k=0;
3543 int32_t sources = Transliterator::countAvailableSources();
3544 for (i = 0; i < sources; i++) {
3545 UnicodeString source;
3546 Transliterator::getAvailableSource(i, source);
3547 UnicodeString test = _findMatch(source, tests);
3548 if (test.length() == 0) {
3549 logln((UnicodeString)"Skipping " + source + "-X");
3550 continue;
3551 }
3552 int32_t targets = Transliterator::countAvailableTargets(source);
3553 for (j = 0; j < targets; j++) {
3554 UnicodeString target;
3555 Transliterator::getAvailableTarget(j, source, target);
3556 int32_t variants = Transliterator::countAvailableVariants(source, target);
3557 for (k =0; k< variants; k++) {
3558 UnicodeString variant;
3559 UParseError err;
3560 UErrorCode status = U_ZERO_ERROR;
3561
3562 Transliterator::getAvailableVariant(k, source, target, variant);
3563 UnicodeString id = source + "-" + target + "/" + variant;
3564
3565 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3566 if (U_FAILURE(status)) {
3567 dataerrln((UnicodeString)"FAIL: Could not create " + id);
3568 delete t;
3569 continue;
3570 }
3571 status = U_ZERO_ERROR;
3572 CheckIncrementalAux(t, test);
3573
3574 UnicodeString rev;
3575 _trans(*t, test, rev);
3576 Transliterator *inv = t->createInverse(status);
3577 if (U_FAILURE(status)) {
3578 // The following are forward-only, it is OK that creating an inverse will not work:
3579 // 1. Devanagari-Arabic
3580 // 2. Any-*/BGN
3581 // 2a. Any-*/BGN_1981
3582 // 3. Any-*/UNGEGN
3583 // 4. Any-*/MNS
3584 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3585 if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3586 && !(id.startsWith((UnicodeString)"Any-") &&
3587 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3588 )
3589 #if UCONFIG_NO_BREAK_ITERATION
3590 && id.compare((UnicodeString)"Latin-Thai/") != 0
3591 #endif
3592 )
3593 {
3594 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3595 }
3596 delete t;
3597 delete inv;
3598 continue;
3599 }
3600 CheckIncrementalAux(inv, rev);
3601 delete t;
3602 delete inv;
3603 }
3604 }
3605 }
3606 }
3607
CheckIncrementalAux(const Transliterator * t,const UnicodeString & input)3608 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3609 const UnicodeString& input) {
3610 UErrorCode ec = U_ZERO_ERROR;
3611 UTransPosition pos;
3612 UnicodeString test = input;
3613
3614 pos.contextStart = 0;
3615 pos.contextLimit = input.length();
3616 pos.start = 0;
3617 pos.limit = input.length();
3618
3619 t->transliterate(test, pos, ec);
3620 if (U_FAILURE(ec)) {
3621 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3622 return;
3623 }
3624 UBool gotError = FALSE;
3625 (void)gotError; // Suppress set but not used warning.
3626
3627 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3628
3629 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3630 errln((UnicodeString)"No Progress, " +
3631 t->getID() + ": " + formatInput(test, input, pos));
3632 gotError = TRUE;
3633 } else {
3634 logln((UnicodeString)"PASS Progress, " +
3635 t->getID() + ": " + formatInput(test, input, pos));
3636 }
3637 t->finishTransliteration(test, pos);
3638 if (pos.start != pos.limit) {
3639 errln((UnicodeString)"Incomplete, " +
3640 t->getID() + ": " + formatInput(test, input, pos));
3641 gotError = TRUE;
3642 }
3643 }
3644
TestFunction()3645 void TransliteratorTest::TestFunction() {
3646 // Careful with spacing and ';' here: Phrase this exactly
3647 // as toRules() is going to return it. If toRules() changes
3648 // with regard to spacing or ';', then adjust this string.
3649 UnicodeString rule =
3650 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3651
3652 UParseError pe;
3653 UErrorCode ec = U_ZERO_ERROR;
3654 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3655 if (t == NULL) {
3656 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3657 return;
3658 }
3659
3660 UnicodeString r;
3661 t->toRules(r, TRUE);
3662 if (r == rule) {
3663 logln((UnicodeString)"OK: toRules() => " + r);
3664 } else {
3665 errln((UnicodeString)"FAIL: toRules() => " + r +
3666 ", expected " + rule);
3667 }
3668
3669 expect(*t, "The Quick Brown Fox",
3670 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3671
3672 delete t;
3673 }
3674
TestInvalidBackRef(void)3675 void TransliteratorTest::TestInvalidBackRef(void) {
3676 UnicodeString rule = ". > $1;";
3677 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3678 UParseError pe;
3679 UErrorCode ec = U_ZERO_ERROR;
3680 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3681 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3682
3683 if (t != NULL) {
3684 errln("FAIL: createFromRules should have returned NULL");
3685 delete t;
3686 }
3687
3688 if (t2 != NULL) {
3689 errln("FAIL: createFromRules should have returned NULL");
3690 delete t2;
3691 }
3692
3693 if (U_SUCCESS(ec)) {
3694 errln("FAIL: Ok: . > $1; => no error");
3695 } else {
3696 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3697 }
3698 }
3699
TestMulticharStringSet()3700 void TransliteratorTest::TestMulticharStringSet() {
3701 // Basic testing
3702 const char* rule =
3703 " [{aa}] > x;"
3704 " a > y;"
3705 " [b{bc}] > z;"
3706 "[{gd}] { e > q;"
3707 " e } [{fg}] > r;" ;
3708
3709 UParseError pe;
3710 UErrorCode ec = U_ZERO_ERROR;
3711 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3712 if (t == NULL || U_FAILURE(ec)) {
3713 delete t;
3714 errln("FAIL: createFromRules failed");
3715 return;
3716 }
3717
3718 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3719 "y x yz z d gd de gdq gdqfg ddrfg");
3720 delete t;
3721
3722 // Overlapped string test. Make sure that when multiple
3723 // strings can match that the longest one is matched.
3724 rule =
3725 " [a {ab} {abc}] > x;"
3726 " b > y;"
3727 " c > z;"
3728 " q [t {st} {rst}] { e > p;" ;
3729
3730 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3731 if (t == NULL || U_FAILURE(ec)) {
3732 delete t;
3733 errln("FAIL: createFromRules failed");
3734 return;
3735 }
3736
3737 expect(*t, "a ab abc qte qste qrste",
3738 "x x x qtp qstp qrstp");
3739 delete t;
3740 }
3741
3742 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3743 // BEGIN TestUserFunction support factory
3744
3745 Transliterator* _TUFF[4];
3746 UnicodeString* _TUFID[4];
3747
_TUFFactory(const UnicodeString &,Transliterator::Token context)3748 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3749 Transliterator::Token context) {
3750 return _TUFF[context.integer]->clone();
3751 }
3752
_TUFReg(const UnicodeString & ID,Transliterator * t,int32_t n)3753 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3754 _TUFF[n] = t;
3755 _TUFID[n] = new UnicodeString(ID);
3756 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3757 }
3758
_TUFUnreg(int32_t n)3759 static void _TUFUnreg(int32_t n) {
3760 if (_TUFF[n] != NULL) {
3761 Transliterator::unregister(*_TUFID[n]);
3762 delete _TUFF[n];
3763 delete _TUFID[n];
3764 }
3765 }
3766
3767 // END TestUserFunction support factory
3768 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3769
3770 /**
3771 * Test that user-registered transliterators can be used under function
3772 * syntax.
3773 */
TestUserFunction()3774 void TransliteratorTest::TestUserFunction() {
3775
3776 Transliterator* t;
3777 UParseError pe;
3778 UErrorCode ec = U_ZERO_ERROR;
3779
3780 // Setup our factory
3781 int32_t i;
3782 for (i=0; i<4; ++i) {
3783 _TUFF[i] = NULL;
3784 }
3785
3786 // There's no need to register inverses if we don't use them
3787 t = Transliterator::createFromRules("gif",
3788 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3789 UTRANS_FORWARD, pe, ec);
3790 if (t == NULL || U_FAILURE(ec)) {
3791 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3792 return;
3793 }
3794 _TUFReg("Any-gif", t, 0);
3795
3796 t = Transliterator::createFromRules("RemoveCurly",
3797 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3798 UTRANS_FORWARD, pe, ec);
3799 if (t == NULL || U_FAILURE(ec)) {
3800 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3801 goto FAIL;
3802 }
3803 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3804 _TUFReg("Any-RemoveCurly", t, 1);
3805
3806 logln("Trying &hex");
3807 t = Transliterator::createFromRules("hex2",
3808 "(.) > &hex($1);",
3809 UTRANS_FORWARD, pe, ec);
3810 if (t == NULL || U_FAILURE(ec)) {
3811 errln("FAIL: createFromRules");
3812 goto FAIL;
3813 }
3814 logln("Registering");
3815 _TUFReg("Any-hex2", t, 2);
3816 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3817 if (t == NULL || U_FAILURE(ec)) {
3818 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3819 goto FAIL;
3820 }
3821 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3822 delete t;
3823
3824 logln("Trying &gif");
3825 t = Transliterator::createFromRules("gif2",
3826 "(.) > &Gif(&Hex2($1));",
3827 UTRANS_FORWARD, pe, ec);
3828 if (t == NULL || U_FAILURE(ec)) {
3829 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3830 goto FAIL;
3831 }
3832 logln("Registering");
3833 _TUFReg("Any-gif2", t, 3);
3834 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3835 if (t == NULL || U_FAILURE(ec)) {
3836 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3837 goto FAIL;
3838 }
3839 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3840 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3841 delete t;
3842
3843 // Test that filters are allowed after &
3844 t = Transliterator::createFromRules("test",
3845 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3846 UTRANS_FORWARD, pe, ec);
3847 if (t == NULL || U_FAILURE(ec)) {
3848 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3849 goto FAIL;
3850 }
3851 expect(*t, "abc",
3852 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3853 delete t;
3854
3855 FAIL:
3856 for (i=0; i<4; ++i) {
3857 _TUFUnreg(i);
3858 }
3859 }
3860
3861 /**
3862 * Test the Any-X transliterators.
3863 */
TestAnyX(void)3864 void TransliteratorTest::TestAnyX(void) {
3865 UParseError parseError;
3866 UErrorCode status = U_ZERO_ERROR;
3867 Transliterator* anyLatin =
3868 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3869 if (anyLatin==0) {
3870 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3871 delete anyLatin;
3872 return;
3873 }
3874
3875 expect(*anyLatin,
3876 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3877 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3878
3879 delete anyLatin;
3880 }
3881
3882 /**
3883 * Test Any-X transliterators with sample letters from all scripts.
3884 */
TestAny(void)3885 void TransliteratorTest::TestAny(void) {
3886 UErrorCode status = U_ZERO_ERROR;
3887 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3888 // function call parameters going on in this test.
3889 UnicodeSet alphabetic("[:alphabetic:]", status);
3890 if (U_FAILURE(status)) {
3891 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3892 return;
3893 }
3894 alphabetic.freeze();
3895
3896 UnicodeString testString;
3897 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3898 const char *scriptName = uscript_getShortName((UScriptCode)i);
3899 if (scriptName == NULL) {
3900 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3901 return;
3902 }
3903
3904 UnicodeSet sample;
3905 sample.applyPropertyAlias("script", scriptName, status);
3906 if (U_FAILURE(status)) {
3907 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3908 return;
3909 }
3910 sample.retainAll(alphabetic);
3911 for (int32_t count=0; count<5; count++) {
3912 UChar32 c = sample.charAt(count);
3913 if (c == -1) {
3914 break;
3915 }
3916 testString.append(c);
3917 }
3918 }
3919
3920 UParseError parseError;
3921 Transliterator* anyLatin =
3922 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3923 if (U_FAILURE(status)) {
3924 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3925 return;
3926 }
3927
3928 logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3929 anyLatin->transliterate(testString);
3930 logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3931 delete anyLatin;
3932 }
3933
3934
3935 /**
3936 * Test the source and target set API. These are only implemented
3937 * for RBT and CompoundTransliterator at this time.
3938 */
TestSourceTargetSet()3939 void TransliteratorTest::TestSourceTargetSet() {
3940 UErrorCode ec = U_ZERO_ERROR;
3941
3942 // Rules
3943 const char* r =
3944 "a > b; "
3945 "r [x{lu}] > q;";
3946
3947 // Expected source
3948 UnicodeSet expSrc("[arx{lu}]", ec);
3949
3950 // Expected target
3951 UnicodeSet expTrg("[bq]", ec);
3952
3953 UParseError pe;
3954 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3955
3956 if (U_FAILURE(ec)) {
3957 delete t;
3958 errln("FAIL: Couldn't set up test");
3959 return;
3960 }
3961
3962 UnicodeSet src; t->getSourceSet(src);
3963 UnicodeSet trg; t->getTargetSet(trg);
3964
3965 if (src == expSrc && trg == expTrg) {
3966 UnicodeString a, b;
3967 logln((UnicodeString)"Ok: " +
3968 r + " => source = " + src.toPattern(a, TRUE) +
3969 ", target = " + trg.toPattern(b, TRUE));
3970 } else {
3971 UnicodeString a, b, c, d;
3972 errln((UnicodeString)"FAIL: " +
3973 r + " => source = " + src.toPattern(a, TRUE) +
3974 ", expected " + expSrc.toPattern(b, TRUE) +
3975 "; target = " + trg.toPattern(c, TRUE) +
3976 ", expected " + expTrg.toPattern(d, TRUE));
3977 }
3978
3979 delete t;
3980 }
3981
3982 /**
3983 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3984 */
TestPatternWhiteSpace()3985 void TransliteratorTest::TestPatternWhiteSpace() {
3986 // Rules
3987 const char* r = "a > \\u200E b;";
3988
3989 UErrorCode ec = U_ZERO_ERROR;
3990 UParseError pe;
3991 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3992
3993 if (U_FAILURE(ec)) {
3994 errln("FAIL: Couldn't set up test");
3995 } else {
3996 expect(*t, "a", "b");
3997 }
3998 delete t;
3999
4000 // UnicodeSet
4001 ec = U_ZERO_ERROR;
4002 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4003
4004 if (U_FAILURE(ec)) {
4005 errln("FAIL: Couldn't set up test");
4006 } else {
4007 if (set.contains(0x200E)) {
4008 errln("FAIL: U+200E not being ignored by UnicodeSet");
4009 }
4010 }
4011 }
4012 //======================================================================
4013 // this method is in TestUScript.java
4014 //======================================================================
TestAllCodepoints()4015 void TransliteratorTest::TestAllCodepoints(){
4016 UScriptCode code= USCRIPT_INVALID_CODE;
4017 char id[256]={'\0'};
4018 char abbr[256]={'\0'};
4019 char newId[256]={'\0'};
4020 char newAbbrId[256]={'\0'};
4021 char oldId[256]={'\0'};
4022 char oldAbbrId[256]={'\0'};
4023
4024 UErrorCode status =U_ZERO_ERROR;
4025 UParseError pe;
4026
4027 for(uint32_t i = 0; i<=0x10ffff; i++){
4028 code = uscript_getScript(i,&status);
4029 if(code == USCRIPT_INVALID_CODE){
4030 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4031 }
4032 const char* myId = uscript_getName(code);
4033 if(!myId) {
4034 dataerrln("Valid script code returned NULL name. Check your data!");
4035 return;
4036 }
4037 uprv_strcpy(id,myId);
4038 uprv_strcpy(abbr,uscript_getShortName(code));
4039
4040 uprv_strcpy(newId,"[:");
4041 uprv_strcat(newId,id);
4042 uprv_strcat(newId,":];NFD");
4043
4044 uprv_strcpy(newAbbrId,"[:");
4045 uprv_strcat(newAbbrId,abbr);
4046 uprv_strcat(newAbbrId,":];NFD");
4047
4048 if(uprv_strcmp(newId,oldId)!=0){
4049 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4050 if(t==NULL || U_FAILURE(status)){
4051 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4052 }
4053 delete t;
4054 }
4055 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4056 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4057 if(t==NULL || U_FAILURE(status)){
4058 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4059 }
4060 delete t;
4061 }
4062 uprv_strcpy(oldId,newId);
4063 uprv_strcpy(oldAbbrId, newAbbrId);
4064
4065 }
4066
4067 }
4068
4069 #define TEST_TRANSLIT_ID(id, cls) { \
4070 UErrorCode ec = U_ZERO_ERROR; \
4071 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4072 if (U_FAILURE(ec)) { \
4073 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4074 } else { \
4075 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4076 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077 } \
4078 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079 } \
4080 delete t; \
4081 }
4082
4083 #define TEST_TRANSLIT_RULE(rule, cls) { \
4084 UErrorCode ec = U_ZERO_ERROR; \
4085 UParseError pe; \
4086 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4087 if (U_FAILURE(ec)) { \
4088 errln("FAIL: Couldn't create " rule); \
4089 } else { \
4090 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4091 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4092 } \
4093 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4094 } \
4095 delete t; \
4096 }
4097
TestBoilerplate()4098 void TransliteratorTest::TestBoilerplate() {
4099 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4100 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4101 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4102 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4103 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4104 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4105 TEST_TRANSLIT_ID("Null", NullTransliterator);
4106 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4107 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4108 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4109 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4110 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4111 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4112 }
4113
TestAlternateSyntax()4114 void TransliteratorTest::TestAlternateSyntax() {
4115 // U+2206 == &
4116 // U+2190 == <
4117 // U+2192 == >
4118 // U+2194 == <>
4119 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4120 "abc",
4121 "xbz");
4122 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4123 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4124 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4125 }
4126
4127 static const char* BEGIN_END_RULES[] = {
4128 // [0]
4129 "abc > xy;"
4130 "aba > z;",
4131
4132 // [1]
4133 /*
4134 "::BEGIN;"
4135 "abc > xy;"
4136 "::END;"
4137 "::BEGIN;"
4138 "aba > z;"
4139 "::END;",
4140 */
4141 "", // test case commented out below, this is here to keep from messing up the indexes
4142
4143 // [2]
4144 /*
4145 "abc > xy;"
4146 "::BEGIN;"
4147 "aba > z;"
4148 "::END;",
4149 */
4150 "", // test case commented out below, this is here to keep from messing up the indexes
4151
4152 // [3]
4153 /*
4154 "::BEGIN;"
4155 "abc > xy;"
4156 "::END;"
4157 "aba > z;",
4158 */
4159 "", // test case commented out below, this is here to keep from messing up the indexes
4160
4161 // [4]
4162 "abc > xy;"
4163 "::Null;"
4164 "aba > z;",
4165
4166 // [5]
4167 "::Upper;"
4168 "ABC > xy;"
4169 "AB > x;"
4170 "C > z;"
4171 "::Upper;"
4172 "XYZ > p;"
4173 "XY > q;"
4174 "Z > r;"
4175 "::Upper;",
4176
4177 // [6]
4178 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4179 "$delim = [\\-$ws];"
4180 "$ws $delim* > ' ';"
4181 "'-' $delim* > '-';",
4182
4183 // [7]
4184 "::Null;"
4185 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4186 "$delim = [\\-$ws];"
4187 "$ws $delim* > ' ';"
4188 "'-' $delim* > '-';",
4189
4190 // [8]
4191 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192 "$delim = [\\-$ws];"
4193 "$ws $delim* > ' ';"
4194 "'-' $delim* > '-';"
4195 "::Null;",
4196
4197 // [9]
4198 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4199 "$delim = [\\-$ws];"
4200 "::Null;"
4201 "$ws $delim* > ' ';"
4202 "'-' $delim* > '-';",
4203
4204 // [10]
4205 /*
4206 "::BEGIN;"
4207 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4208 "$delim = [\\-$ws];"
4209 "::END;"
4210 "$ws $delim* > ' ';"
4211 "'-' $delim* > '-';",
4212 */
4213 "", // test case commented out below, this is here to keep from messing up the indexes
4214
4215 // [11]
4216 /*
4217 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4218 "$delim = [\\-$ws];"
4219 "::BEGIN;"
4220 "$ws $delim* > ' ';"
4221 "'-' $delim* > '-';"
4222 "::END;",
4223 */
4224 "", // test case commented out below, this is here to keep from messing up the indexes
4225
4226 // [12]
4227 /*
4228 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4229 "$delim = [\\-$ws];"
4230 "$ab = [ab];"
4231 "::BEGIN;"
4232 "$ws $delim* > ' ';"
4233 "'-' $delim* > '-';"
4234 "::END;"
4235 "::BEGIN;"
4236 "$ab { ' ' } $ab > '-';"
4237 "c { ' ' > ;"
4238 "::END;"
4239 "::BEGIN;"
4240 "'a-a' > a\\%|a;"
4241 "::END;",
4242 */
4243 "", // test case commented out below, this is here to keep from messing up the indexes
4244
4245 // [13]
4246 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4247 "$delim = [\\-$ws];"
4248 "$ab = [ab];"
4249 "::Null;"
4250 "$ws $delim* > ' ';"
4251 "'-' $delim* > '-';"
4252 "::Null;"
4253 "$ab { ' ' } $ab > '-';"
4254 "c { ' ' > ;"
4255 "::Null;"
4256 "'a-a' > a\\%|a;",
4257
4258 // [14]
4259 /*
4260 "::[abc];"
4261 "::BEGIN;"
4262 "abc > xy;"
4263 "::END;"
4264 "::BEGIN;"
4265 "aba > yz;"
4266 "::END;"
4267 "::Upper;",
4268 */
4269 "", // test case commented out below, this is here to keep from messing up the indexes
4270
4271 // [15]
4272 "::[abc];"
4273 "abc > xy;"
4274 "::Null;"
4275 "aba > yz;"
4276 "::Upper;",
4277
4278 // [16]
4279 /*
4280 "::[abc];"
4281 "::BEGIN;"
4282 "abc <> xy;"
4283 "::END;"
4284 "::BEGIN;"
4285 "aba <> yz;"
4286 "::END;"
4287 "::Upper(Lower);"
4288 "::([XYZ]);"
4289 */
4290 "", // test case commented out below, this is here to keep from messing up the indexes
4291
4292 // [17]
4293 "::[abc];"
4294 "abc <> xy;"
4295 "::Null;"
4296 "aba <> yz;"
4297 "::Upper(Lower);"
4298 "::([XYZ]);"
4299 };
4300
4301 /*
4302 (This entire test is commented out below and will need some heavy revision when we re-add
4303 the ::BEGIN/::END stuff)
4304 static const char* BOGUS_BEGIN_END_RULES[] = {
4305 // [7]
4306 "::BEGIN;"
4307 "abc > xy;"
4308 "::BEGIN;"
4309 "aba > z;"
4310 "::END;"
4311 "::END;",
4312
4313 // [8]
4314 "abc > xy;"
4315 " aba > z;"
4316 "::END;",
4317
4318 // [9]
4319 "::BEGIN;"
4320 "::Upper;"
4321 "::END;"
4322 };
4323 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4324 */
4325
4326 static const char* BEGIN_END_TEST_CASES[] = {
4327 // rules input expected output
4328 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4329 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4330 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4331 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4332 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4333 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4334
4335 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4336 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4337 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4338 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4339 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4340 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4341 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4342 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4343 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4344 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4345 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4346 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4347
4348 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4349 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4350 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4351 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4352 };
4353 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4354
TestBeginEnd()4355 void TransliteratorTest::TestBeginEnd() {
4356 // run through the list of test cases above
4357 int32_t i = 0;
4358 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4359 expect((UnicodeString)"Test case #" + (i / 3),
4360 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4361 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4362 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4363 }
4364
4365 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4366 UParseError parseError;
4367 UErrorCode status = U_ZERO_ERROR;
4368 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4369 UTRANS_REVERSE, parseError, status);
4370 if (reversed == 0 || U_FAILURE(status)) {
4371 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4372 } else {
4373 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4374 }
4375 delete reversed;
4376
4377 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4378 // that all of them cause errors
4379 /*
4380 (commented out until we have the real ::BEGIN/::END stuff in place
4381 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4382 UParseError parseError;
4383 UErrorCode status = U_ZERO_ERROR;
4384 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4385 UTRANS_FORWARD, parseError, status);
4386 if (!U_FAILURE(status)) {
4387 delete t;
4388 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4389 }
4390 }
4391 */
4392 }
4393
TestBeginEndToRules()4394 void TransliteratorTest::TestBeginEndToRules() {
4395 // run through the same list of test cases we used above, but this time, instead of just
4396 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4397 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4398 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4399 // to (i.e., does the same thing as) the original rule set
4400 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4401 UParseError parseError;
4402 UErrorCode status = U_ZERO_ERROR;
4403 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4404 UTRANS_FORWARD, parseError, status);
4405 if (U_FAILURE(status)) {
4406 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4407 } else {
4408 UnicodeString rules;
4409 t->toRules(rules, TRUE);
4410 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4411 UTRANS_FORWARD, parseError, status);
4412 if (U_FAILURE(status)) {
4413 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4414 parseError, status);
4415 delete t;
4416 } else {
4417 expect(*t2,
4418 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4419 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4420 delete t;
4421 delete t2;
4422 }
4423 }
4424 }
4425
4426 // do the same thing for the reversible test case
4427 UParseError parseError;
4428 UErrorCode status = U_ZERO_ERROR;
4429 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4430 UTRANS_REVERSE, parseError, status);
4431 if (U_FAILURE(status)) {
4432 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4433 } else {
4434 UnicodeString rules;
4435 reversed->toRules(rules, FALSE);
4436 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4437 parseError, status);
4438 if (U_FAILURE(status)) {
4439 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4440 parseError, status);
4441 delete reversed;
4442 } else {
4443 expect(*reversed2,
4444 UnicodeString("xy XY XYZ yz YZ"),
4445 UnicodeString("xy abc xaba yz aba"));
4446 delete reversed;
4447 delete reversed2;
4448 }
4449 }
4450 }
4451
TestRegisterAlias()4452 void TransliteratorTest::TestRegisterAlias() {
4453 UnicodeString longID("Lower;[aeiou]Upper");
4454 UnicodeString shortID("Any-CapVowels");
4455 UnicodeString reallyShortID("CapVowels");
4456
4457 Transliterator::registerAlias(shortID, longID);
4458
4459 UErrorCode err = U_ZERO_ERROR;
4460 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4461 if (U_FAILURE(err)) {
4462 errln("Failed to instantiate transliterator with long ID");
4463 Transliterator::unregister(shortID);
4464 return;
4465 }
4466 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4467 if (U_FAILURE(err)) {
4468 errln("Failed to instantiate transliterator with short ID");
4469 delete t1;
4470 Transliterator::unregister(shortID);
4471 return;
4472 }
4473
4474 if (t1->getID() != longID)
4475 errln("Transliterator instantiated with long ID doesn't have long ID");
4476 if (t2->getID() != reallyShortID)
4477 errln("Transliterator instantiated with short ID doesn't have short ID");
4478
4479 UnicodeString rules1;
4480 UnicodeString rules2;
4481
4482 t1->toRules(rules1, TRUE);
4483 t2->toRules(rules2, TRUE);
4484 if (rules1 != rules2)
4485 errln("Alias transliterators aren't the same");
4486
4487 delete t1;
4488 delete t2;
4489 Transliterator::unregister(shortID);
4490
4491 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4492 if (U_SUCCESS(err)) {
4493 errln("Instantiation with short ID succeeded after short ID was unregistered");
4494 delete t1;
4495 }
4496
4497 // try the same thing again, but this time with something other than
4498 // an instance of CompoundTransliterator
4499 UnicodeString realID("Latin-Greek");
4500 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4501 Transliterator::registerAlias(fakeID, realID);
4502
4503 err = U_ZERO_ERROR;
4504 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4505 if (U_FAILURE(err)) {
4506 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4507 Transliterator::unregister(realID);
4508 return;
4509 }
4510 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4511 if (U_FAILURE(err)) {
4512 errln("Failed to instantiate transliterator with fake ID");
4513 delete t1;
4514 Transliterator::unregister(realID);
4515 return;
4516 }
4517
4518 t1->toRules(rules1, TRUE);
4519 t2->toRules(rules2, TRUE);
4520 if (rules1 != rules2)
4521 errln("Alias transliterators aren't the same");
4522
4523 delete t1;
4524 delete t2;
4525 Transliterator::unregister(fakeID);
4526 }
4527
TestRuleStripping()4528 void TransliteratorTest::TestRuleStripping() {
4529 /*
4530 #
4531 \uE001>\u0C01; # SIGN
4532 */
4533 static const UChar rule[] = {
4534 0x0023,0x0020,0x000D,0x000A,
4535 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4536 };
4537 static const UChar expectedRule[] = {
4538 0xE001,0x003E,0x0C01,0x003B,0
4539 };
4540 UChar result[UPRV_LENGTHOF(rule)];
4541 UErrorCode status = U_ZERO_ERROR;
4542 int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4543 if (len != u_strlen(expectedRule)) {
4544 errln("utrans_stripRules return len = %d", len);
4545 }
4546 if (u_strncmp(expectedRule, result, len) != 0) {
4547 errln("utrans_stripRules did not return expected string");
4548 }
4549 }
4550
4551 /**
4552 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4553 */
TestHalfwidthFullwidth(void)4554 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4555 UParseError parseError;
4556 UErrorCode status = U_ZERO_ERROR;
4557 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4558 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4559 if (hf == 0 || fh == 0) {
4560 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4561 delete hf;
4562 delete fh;
4563 return;
4564 }
4565
4566 // Array of 2n items
4567 // Each item is
4568 // "hf"|"fh"|"both",
4569 // <Halfwidth>,
4570 // <Fullwidth>
4571 const char* DATA[] = {
4572 "both",
4573 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4574 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4575 };
4576 int32_t DATA_length = UPRV_LENGTHOF(DATA);
4577
4578 for (int32_t i=0; i<DATA_length; i+=3) {
4579 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4580 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4581 switch (*DATA[i]) {
4582 case 0x68: //'h': // Halfwidth-Fullwidth only
4583 expect(*hf, h, f);
4584 break;
4585 case 0x66: //'f': // Fullwidth-Halfwidth only
4586 expect(*fh, f, h);
4587 break;
4588 case 0x62: //'b': // both directions
4589 expect(*hf, h, f);
4590 expect(*fh, f, h);
4591 break;
4592 }
4593 }
4594 delete hf;
4595 delete fh;
4596 }
4597
4598
4599 /**
4600 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4601 * TODO: confirm that the expected results are correct.
4602 * For now, test just confirms that C++ and Java give identical results.
4603 */
TestThai(void)4604 void TransliteratorTest::TestThai(void) {
4605 #if !UCONFIG_NO_BREAK_ITERATION
4606 UParseError parseError;
4607 UErrorCode status = U_ZERO_ERROR;
4608 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4609 if (tr == 0) {
4610 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4611 return;
4612 }
4613 if (U_FAILURE(status)) {
4614 errln("FAIL: createInstance failed with %s", u_errorName(status));
4615 return;
4616 }
4617 const char *thaiText =
4618 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4619 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4620 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4621 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4622 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4623 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4624 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4625 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4626 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4627 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4628 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4629 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4630 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4631 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4632 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4633 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4634 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4635 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4636 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4637 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4638 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4639 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4640 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4641 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4642 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4643 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4644 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4645 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4646 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4647 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4648
4649 const char *latinText =
4650 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4651 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4652 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4653 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4654 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4655 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4656 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4657 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4658 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4659 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4660 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4661 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4662 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4663 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4664 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4665 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4666 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4667 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4668
4669
4670 UnicodeString xlitText(thaiText);
4671 xlitText = xlitText.unescape();
4672 tr->transliterate(xlitText);
4673
4674 UnicodeString expectedText(latinText);
4675 expectedText = expectedText.unescape();
4676 expect(*tr, xlitText, expectedText);
4677
4678 delete tr;
4679 #endif
4680 }
4681
4682
4683 //======================================================================
4684 // Support methods
4685 //======================================================================
expectT(const UnicodeString & id,const UnicodeString & source,const UnicodeString & expectedResult)4686 void TransliteratorTest::expectT(const UnicodeString& id,
4687 const UnicodeString& source,
4688 const UnicodeString& expectedResult) {
4689 UErrorCode ec = U_ZERO_ERROR;
4690 UParseError pe;
4691 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4692 if (U_FAILURE(ec)) {
4693 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
4694 delete t;
4695 return;
4696 }
4697 expect(*t, source, expectedResult);
4698 delete t;
4699 }
4700
reportParseError(const UnicodeString & message,const UParseError & parseError,const UErrorCode & status)4701 void TransliteratorTest::reportParseError(const UnicodeString& message,
4702 const UParseError& parseError,
4703 const UErrorCode& status) {
4704 dataerrln(message +
4705 /*", parse error " + parseError.code +*/
4706 ", line " + parseError.line +
4707 ", offset " + parseError.offset +
4708 ", pre-context " + prettify(parseError.preContext, TRUE) +
4709 ", post-context " + prettify(parseError.postContext,TRUE) +
4710 ", Error: " + u_errorName(status));
4711 }
4712
expect(const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4713 void TransliteratorTest::expect(const UnicodeString& rules,
4714 const UnicodeString& source,
4715 const UnicodeString& expectedResult,
4716 UTransPosition *pos) {
4717 expect("<ID>", rules, source, expectedResult, pos);
4718 }
4719
expect(const UnicodeString & id,const UnicodeString & rules,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4720 void TransliteratorTest::expect(const UnicodeString& id,
4721 const UnicodeString& rules,
4722 const UnicodeString& source,
4723 const UnicodeString& expectedResult,
4724 UTransPosition *pos) {
4725 UErrorCode status = U_ZERO_ERROR;
4726 UParseError parseError;
4727 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4728 if (U_FAILURE(status)) {
4729 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4730 } else {
4731 expect(*t, source, expectedResult, pos);
4732 }
4733 delete t;
4734 }
4735
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,const Transliterator & reverseTransliterator)4736 void TransliteratorTest::expect(const Transliterator& t,
4737 const UnicodeString& source,
4738 const UnicodeString& expectedResult,
4739 const Transliterator& reverseTransliterator) {
4740 expect(t, source, expectedResult);
4741 expect(reverseTransliterator, expectedResult, source);
4742 }
4743
expect(const Transliterator & t,const UnicodeString & source,const UnicodeString & expectedResult,UTransPosition * pos)4744 void TransliteratorTest::expect(const Transliterator& t,
4745 const UnicodeString& source,
4746 const UnicodeString& expectedResult,
4747 UTransPosition *pos) {
4748 if (pos == 0) {
4749 UnicodeString result(source);
4750 t.transliterate(result);
4751 expectAux(t.getID() + ":String", source, result, expectedResult);
4752 }
4753 UTransPosition index={0, 0, 0, 0};
4754 if (pos != 0) {
4755 index = *pos;
4756 }
4757
4758 UnicodeString rsource(source);
4759 if (pos == 0) {
4760 t.transliterate(rsource);
4761 } else {
4762 // Do it all at once -- below we do it incrementally
4763 t.finishTransliteration(rsource, *pos);
4764 }
4765 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4766
4767 // Test keyboard (incremental) transliteration -- this result
4768 // must be the same after we finalize (see below).
4769 UnicodeString log;
4770 rsource.remove();
4771 if (pos != 0) {
4772 rsource = source;
4773 formatInput(log, rsource, index);
4774 log.append(" -> ");
4775 UErrorCode status = U_ZERO_ERROR;
4776 t.transliterate(rsource, index, status);
4777 formatInput(log, rsource, index);
4778 } else {
4779 for (int32_t i=0; i<source.length(); ++i) {
4780 if (i != 0) {
4781 log.append(" + ");
4782 }
4783 log.append(source.charAt(i)).append(" -> ");
4784 UErrorCode status = U_ZERO_ERROR;
4785 t.transliterate(rsource, index, source.charAt(i), status);
4786 formatInput(log, rsource, index);
4787 }
4788 }
4789
4790 // As a final step in keyboard transliteration, we must call
4791 // transliterate to finish off any pending partial matches that
4792 // were waiting for more input.
4793 t.finishTransliteration(rsource, index);
4794 log.append(" => ").append(rsource);
4795
4796 expectAux(t.getID() + ":Keyboard", log,
4797 rsource == expectedResult,
4798 expectedResult);
4799 }
4800
4801
4802 /**
4803 * @param appendTo result is appended to this param.
4804 * @param input the string being transliterated
4805 * @param pos the index struct
4806 */
formatInput(UnicodeString & appendTo,const UnicodeString & input,const UTransPosition & pos)4807 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4808 const UnicodeString& input,
4809 const UTransPosition& pos) {
4810 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4811 // the {} indicate the context start and limit, and the ||
4812 // indicate the start and limit.
4813 if (0 <= pos.contextStart &&
4814 pos.contextStart <= pos.start &&
4815 pos.start <= pos.limit &&
4816 pos.limit <= pos.contextLimit &&
4817 pos.contextLimit <= input.length()) {
4818
4819 UnicodeString a, b, c, d, e;
4820 input.extractBetween(0, pos.contextStart, a);
4821 input.extractBetween(pos.contextStart, pos.start, b);
4822 input.extractBetween(pos.start, pos.limit, c);
4823 input.extractBetween(pos.limit, pos.contextLimit, d);
4824 input.extractBetween(pos.contextLimit, input.length(), e);
4825 appendTo.append(a).append((UChar)123/*{*/).append(b).
4826 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4827 append((UChar)125/*}*/).append(e);
4828 } else {
4829 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4830 pos.contextStart + ", s=" + pos.start + ", l=" +
4831 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4832 input);
4833 }
4834 return appendTo;
4835 }
4836
expectAux(const UnicodeString & tag,const UnicodeString & source,const UnicodeString & result,const UnicodeString & expectedResult)4837 void TransliteratorTest::expectAux(const UnicodeString& tag,
4838 const UnicodeString& source,
4839 const UnicodeString& result,
4840 const UnicodeString& expectedResult) {
4841 expectAux(tag, source + " -> " + result,
4842 result == expectedResult,
4843 expectedResult);
4844 }
4845
expectAux(const UnicodeString & tag,const UnicodeString & summary,UBool pass,const UnicodeString & expectedResult)4846 void TransliteratorTest::expectAux(const UnicodeString& tag,
4847 const UnicodeString& summary, UBool pass,
4848 const UnicodeString& expectedResult) {
4849 if (pass) {
4850 logln(UnicodeString("(")+tag+") " + prettify(summary));
4851 } else {
4852 dataerrln(UnicodeString("FAIL: (")+tag+") "
4853 + prettify(summary)
4854 + ", expected " + prettify(expectedResult));
4855 }
4856 }
4857
4858 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
4859