1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2016 International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * Date Name Description
9 * 06/23/00 aliu Creation.
10 *******************************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_TRANSLITERATION
16
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "unicode/utrans.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uset.h"
23 #include "cintltst.h"
24 #include "cmemory.h"
25
26 #define TEST(x) addTest(root, &x, "utrans/" # x)
27
28 static void TestAPI(void);
29 static void TestSimpleRules(void);
30 static void TestFilter(void);
31 static void TestOpenInverse(void);
32 static void TestClone(void);
33 static void TestRegisterUnregister(void);
34 static void TestExtractBetween(void);
35 static void TestUnicodeIDs(void);
36 static void TestGetRulesAndSourceSet(void);
37 static void TestDataVariantsCompounds(void);
38
39 static void _expectRules(const char*, const char*, const char*);
40 static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
41
42 void addUTransTest(TestNode** root);
43
44
45 void
addUTransTest(TestNode ** root)46 addUTransTest(TestNode** root) {
47 TEST(TestAPI);
48 TEST(TestSimpleRules);
49 TEST(TestFilter);
50 TEST(TestOpenInverse);
51 TEST(TestClone);
52 TEST(TestRegisterUnregister);
53 TEST(TestExtractBetween);
54 TEST(TestUnicodeIDs);
55 TEST(TestGetRulesAndSourceSet);
56 TEST(TestDataVariantsCompounds);
57 }
58
59 /*------------------------------------------------------------------
60 * Replaceable glue
61 *
62 * To test the Replaceable glue we have to dummy up a C-based
63 * Replaceable callback. This code is for testing purposes only.
64 *------------------------------------------------------------------*/
65
66 typedef struct XReplaceable {
67 UChar* text; /* MUST BE null-terminated */
68 } XReplaceable;
69
InitXReplaceable(XReplaceable * rep,const char * cstring)70 static void InitXReplaceable(XReplaceable* rep, const char* cstring) {
71 rep->text = malloc(sizeof(UChar) * (strlen(cstring)+1));
72 u_uastrcpy(rep->text, cstring);
73 }
74
FreeXReplaceable(XReplaceable * rep)75 static void FreeXReplaceable(XReplaceable* rep) {
76 if (rep->text != NULL) {
77 free(rep->text);
78 rep->text = NULL;
79 }
80 }
81
82 /* UReplaceableCallbacks callback */
Xlength(const UReplaceable * rep)83 static int32_t Xlength(const UReplaceable* rep) {
84 const XReplaceable* x = (const XReplaceable*)rep;
85 return u_strlen(x->text);
86 }
87
88 /* UReplaceableCallbacks callback */
XcharAt(const UReplaceable * rep,int32_t offset)89 static UChar XcharAt(const UReplaceable* rep, int32_t offset) {
90 const XReplaceable* x = (const XReplaceable*)rep;
91 return x->text[offset];
92 }
93
94 /* UReplaceableCallbacks callback */
Xchar32At(const UReplaceable * rep,int32_t offset)95 static UChar32 Xchar32At(const UReplaceable* rep, int32_t offset) {
96 const XReplaceable* x = (const XReplaceable*)rep;
97 return x->text[offset];
98 }
99
100 /* UReplaceableCallbacks callback */
Xreplace(UReplaceable * rep,int32_t start,int32_t limit,const UChar * text,int32_t textLength)101 static void Xreplace(UReplaceable* rep, int32_t start, int32_t limit,
102 const UChar* text, int32_t textLength) {
103 XReplaceable* x = (XReplaceable*)rep;
104 int32_t newLen = Xlength(rep) + limit - start + textLength;
105 UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
106 u_strncpy(newText, x->text, start);
107 u_strncpy(newText + start, text, textLength);
108 u_strcpy(newText + start + textLength, x->text + limit);
109 free(x->text);
110 x->text = newText;
111 }
112
113 /* UReplaceableCallbacks callback */
Xcopy(UReplaceable * rep,int32_t start,int32_t limit,int32_t dest)114 static void Xcopy(UReplaceable* rep, int32_t start, int32_t limit, int32_t dest) {
115 XReplaceable* x = (XReplaceable*)rep;
116 int32_t newLen = Xlength(rep) + limit - start;
117 UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
118 u_strncpy(newText, x->text, dest);
119 u_strncpy(newText + dest, x->text + start, limit - start);
120 u_strcpy(newText + dest + limit - start, x->text + dest);
121 free(x->text);
122 x->text = newText;
123 }
124
125 /* UReplaceableCallbacks callback */
Xextract(UReplaceable * rep,int32_t start,int32_t limit,UChar * dst)126 static void Xextract(UReplaceable* rep, int32_t start, int32_t limit, UChar* dst) {
127 XReplaceable* x = (XReplaceable*)rep;
128 int32_t len = limit - start;
129 u_strncpy(dst, x->text, len);
130 }
131
InitXReplaceableCallbacks(UReplaceableCallbacks * callbacks)132 static void InitXReplaceableCallbacks(UReplaceableCallbacks* callbacks) {
133 callbacks->length = Xlength;
134 callbacks->charAt = XcharAt;
135 callbacks->char32At = Xchar32At;
136 callbacks->replace = Xreplace;
137 callbacks->extract = Xextract;
138 callbacks->copy = Xcopy;
139 }
140
141 /*------------------------------------------------------------------
142 * Tests
143 *------------------------------------------------------------------*/
144
TestAPI()145 static void TestAPI() {
146 enum { BUF_CAP = 128 };
147 char buf[BUF_CAP], buf2[BUF_CAP];
148 UErrorCode status = U_ZERO_ERROR;
149 UTransliterator* trans = NULL;
150 int32_t i, n;
151
152 /* Test getAvailableIDs */
153 n = utrans_countAvailableIDs();
154 if (n < 1) {
155 log_err("FAIL: utrans_countAvailableIDs() returned %d\n", n);
156 } else {
157 log_verbose("System ID count: %d\n", n);
158 }
159 for (i=0; i<n; ++i) {
160 utrans_getAvailableID(i, buf, BUF_CAP);
161 if (*buf == 0) {
162 log_err("FAIL: System transliterator %d: \"\"\n", i);
163 } else {
164 log_verbose("System transliterator %d: \"%s\"\n", i, buf);
165 }
166 }
167
168 /* Test open */
169 utrans_getAvailableID(0, buf, BUF_CAP);
170 trans = utrans_open(buf, UTRANS_FORWARD,NULL,0,NULL, &status);
171 if (U_FAILURE(status)) {
172 log_err("FAIL: utrans_open(%s) failed, error=%s\n",
173 buf, u_errorName(status));
174 }
175
176 else {
177 /* Test getID */
178 utrans_getID(trans, buf2, BUF_CAP);
179 if (0 != strcmp(buf, buf2)) {
180 log_err("FAIL: utrans_getID(%s) returned %s\n",
181 buf, buf2);
182 }
183 utrans_close(trans);
184 }
185 }
186
TestUnicodeIDs()187 static void TestUnicodeIDs() {
188 UEnumeration *uenum;
189 UTransliterator *utrans;
190 const UChar *id, *id2;
191 int32_t idLength, id2Length, count, count2;
192
193 UErrorCode errorCode;
194
195 errorCode=U_ZERO_ERROR;
196 uenum=utrans_openIDs(&errorCode);
197 if(U_FAILURE(errorCode)) {
198 log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
199 return;
200 }
201
202 count=uenum_count(uenum, &errorCode);
203 if(U_FAILURE(errorCode) || count<1) {
204 log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
205 }
206
207 count=0;
208 for(;;) {
209 id=uenum_unext(uenum, &idLength, &errorCode);
210 if(U_FAILURE(errorCode)) {
211 log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
212 break;
213 }
214 if(id==NULL) {
215 break;
216 }
217
218 if(++count>10) {
219 /* try to actually open only a few transliterators */
220 continue;
221 }
222
223 utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
224 if(U_FAILURE(errorCode)) {
225 log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
226 continue;
227 }
228
229 id2=utrans_getUnicodeID(utrans, &id2Length);
230 if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
231 log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
232 }
233
234 utrans_close(utrans);
235 }
236
237 uenum_reset(uenum, &errorCode);
238 if(U_FAILURE(errorCode) || count<1) {
239 log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
240 } else {
241 count2=uenum_count(uenum, &errorCode);
242 if(U_FAILURE(errorCode) || count<1) {
243 log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
244 } else if(count!=count2) {
245 log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
246 }
247 }
248
249 uenum_close(uenum);
250 }
251
TestOpenInverse()252 static void TestOpenInverse(){
253 UErrorCode status=U_ZERO_ERROR;
254 UTransliterator* t1=NULL;
255 UTransliterator* inverse1=NULL;
256 enum { BUF_CAP = 128 };
257 char buf1[BUF_CAP];
258 int32_t i=0;
259
260 const char TransID[][25]={
261 "Halfwidth-Fullwidth",
262 "Fullwidth-Halfwidth",
263 "Greek-Latin" ,
264 "Latin-Greek",
265 /*"Arabic-Latin", // Removed in 2.0*/
266 /*"Latin-Arabic", // Removed in 2.0*/
267 "Katakana-Latin",
268 "Latin-Katakana",
269 /*"Hebrew-Latin", // Removed in 2.0*/
270 /*"Latin-Hebrew", // Removed in 2.0*/
271 "Cyrillic-Latin",
272 "Latin-Cyrillic",
273 "Devanagari-Latin",
274 "Latin-Devanagari",
275 "Any-Hex",
276 "Hex-Any"
277 };
278
279 for(i=0; i<UPRV_LENGTHOF(TransID); i=i+2){
280 status = U_ZERO_ERROR;
281 t1=utrans_open(TransID[i], UTRANS_FORWARD,NULL,0,NULL, &status);
282 if(t1 == NULL || U_FAILURE(status)){
283 log_data_err("FAIL: in instantiation for id=%s -> %s (Are you missing data?)\n", TransID[i], u_errorName(status));
284 continue;
285 }
286 inverse1=utrans_openInverse(t1, &status);
287 if(U_FAILURE(status)){
288 log_err("FAIL: utrans_openInverse() failed for id=%s. Error=%s\n", TransID[i], myErrorName(status));
289 continue;
290 }
291 utrans_getID(inverse1, buf1, BUF_CAP);
292 if(strcmp(buf1, TransID[i+1]) != 0){
293 log_err("FAIL :openInverse() for %s returned %s instead of %s\n", TransID[i], buf1, TransID[i+1]);
294 }
295 utrans_close(t1);
296 utrans_close(inverse1);
297 }
298 }
299
TestClone()300 static void TestClone(){
301 UErrorCode status=U_ZERO_ERROR;
302 UTransliterator* t1=NULL;
303 UTransliterator* t2=NULL;
304 UTransliterator* t3=NULL;
305 UTransliterator* t4=NULL;
306 enum { BUF_CAP = 128 };
307 char buf1[BUF_CAP], buf2[BUF_CAP], buf3[BUF_CAP];
308
309 t1=utrans_open("Latin-Devanagari", UTRANS_FORWARD, NULL,0,NULL,&status);
310 if(U_FAILURE(status)){
311 log_data_err("FAIL: construction -> %s (Are you missing data?)\n", u_errorName(status));
312 return;
313 }
314 t2=utrans_open("Latin-Greek", UTRANS_FORWARD, NULL,0,NULL,&status);
315 if(U_FAILURE(status)){
316 log_err("FAIL: construction\n");
317 utrans_close(t1);
318 return;
319 }
320
321 t3=utrans_clone(t1, &status);
322 t4=utrans_clone(t2, &status);
323
324 utrans_getID(t1, buf1, BUF_CAP);
325 utrans_getID(t2, buf2, BUF_CAP);
326 utrans_getID(t3, buf3, BUF_CAP);
327
328 if(strcmp(buf1, buf3) != 0 ||
329 strcmp(buf1, buf2) == 0) {
330 log_err("FAIL: utrans_clone() failed\n");
331 }
332
333 utrans_getID(t4, buf3, BUF_CAP);
334
335 if(strcmp(buf2, buf3) != 0 ||
336 strcmp(buf1, buf3) == 0) {
337 log_err("FAIL: utrans_clone() failed\n");
338 }
339
340 utrans_close(t1);
341 utrans_close(t2);
342 utrans_close(t3);
343 utrans_close(t4);
344
345 }
346
TestRegisterUnregister()347 static void TestRegisterUnregister(){
348 UErrorCode status=U_ZERO_ERROR;
349 UTransliterator* t1=NULL;
350 UTransliterator* rules=NULL, *rules2;
351 UTransliterator* inverse1=NULL;
352 UChar rule[]={ 0x0061, 0x003c, 0x003e, 0x0063}; /*a<>b*/
353
354 U_STRING_DECL(ID, "TestA-TestB", 11);
355 U_STRING_INIT(ID, "TestA-TestB", 11);
356
357 /* Make sure it doesn't exist */
358 t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
359 if(t1 != NULL || U_SUCCESS(status)) {
360 log_err("FAIL: TestA-TestB already registered\n");
361 return;
362 }
363 status=U_ZERO_ERROR;
364 /* Check inverse too */
365 inverse1=utrans_open("TestA-TestB", UTRANS_REVERSE, NULL,0,NULL,&status);
366 if(inverse1 != NULL || U_SUCCESS(status)) {
367 log_err("FAIL: TestA-TestB already registered\n");
368 return;
369 }
370 status=U_ZERO_ERROR;
371 /* Create it */
372 rules=utrans_open("TestA-TestB",UTRANS_FORWARD, rule, 4, NULL, &status);
373 if(U_FAILURE(status)){
374 log_err("FAIL: utrans_openRules(a<>B) failed with error=%s\n", myErrorName(status));
375 return;
376 }
377
378 /* clone it so we can register it a second time */
379 rules2=utrans_clone(rules, &status);
380 if(U_FAILURE(status)) {
381 log_err("FAIL: utrans_clone(a<>B) failed with error=%s\n", myErrorName(status));
382 return;
383 }
384
385 status=U_ZERO_ERROR;
386 /* Register it */
387 utrans_register(rules, &status);
388 if(U_FAILURE(status)){
389 log_err("FAIL: utrans_register failed with error=%s\n", myErrorName(status));
390 return;
391 }
392 status=U_ZERO_ERROR;
393 /* Now check again -- should exist now*/
394 t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
395 if(U_FAILURE(status) || t1 == NULL){
396 log_err("FAIL: TestA-TestB not registered\n");
397 return;
398 }
399 utrans_close(t1);
400
401 /*unregister the instance*/
402 status=U_ZERO_ERROR;
403 utrans_unregister("TestA-TestB");
404 /* now Make sure it doesn't exist */
405 t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
406 if(U_SUCCESS(status) || t1 != NULL) {
407 log_err("FAIL: TestA-TestB isn't unregistered\n");
408 return;
409 }
410 utrans_close(t1);
411
412 /* now with utrans_unregisterID(const UChar *) */
413 status=U_ZERO_ERROR;
414 utrans_register(rules2, &status);
415 if(U_FAILURE(status)){
416 log_err("FAIL: 2nd utrans_register failed with error=%s\n", myErrorName(status));
417 return;
418 }
419 status=U_ZERO_ERROR;
420 /* Now check again -- should exist now*/
421 t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
422 if(U_FAILURE(status) || t1 == NULL){
423 log_err("FAIL: 2nd TestA-TestB not registered\n");
424 return;
425 }
426 utrans_close(t1);
427
428 /*unregister the instance*/
429 status=U_ZERO_ERROR;
430 utrans_unregisterID(ID, -1);
431 /* now Make sure it doesn't exist */
432 t1=utrans_openU(ID, -1, UTRANS_FORWARD,NULL,0,NULL, &status);
433 if(U_SUCCESS(status) || t1 != NULL) {
434 log_err("FAIL: 2nd TestA-TestB isn't unregistered\n");
435 return;
436 }
437
438 utrans_close(t1);
439 utrans_close(inverse1);
440 }
441
TestSimpleRules()442 static void TestSimpleRules() {
443 /* Test rules */
444 /* Example: rules 1. ab>x|y
445 * 2. yc>z
446 *
447 * []|eabcd start - no match, copy e to translated buffer
448 * [e]|abcd match rule 1 - copy output & adjust cursor
449 * [ex|y]cd match rule 2 - copy output & adjust cursor
450 * [exz]|d no match, copy d to transliterated buffer
451 * [exzd]| done
452 */
453 _expectRules("ab>x|y;"
454 "yc>z",
455 "eabcd", "exzd");
456
457 /* Another set of rules:
458 * 1. ab>x|yzacw
459 * 2. za>q
460 * 3. qc>r
461 * 4. cw>n
462 *
463 * []|ab Rule 1
464 * [x|yzacw] No match
465 * [xy|zacw] Rule 2
466 * [xyq|cw] Rule 4
467 * [xyqn]| Done
468 */
469 _expectRules("ab>x|yzacw;"
470 "za>q;"
471 "qc>r;"
472 "cw>n",
473 "ab", "xyqn");
474
475 /* Test categories
476 */
477 _expectRules("$dummy=" "\\uE100" ";" /* careful here with E100 */
478 "$vowel=[aeiouAEIOU];"
479 "$lu=[:Lu:];"
480 "$vowel } $lu > '!';"
481 "$vowel > '&';"
482 "'!' { $lu > '^';"
483 "$lu > '*';"
484 "a > ERROR",
485 "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
486
487 /* Test multiple passes
488 */
489 _expectRules("abc > xy;"
490 "::Null;"
491 "aba > z;",
492 "abc ababc aba", "xy abxy z");
493 }
494
TestFilter()495 static void TestFilter() {
496 UErrorCode status = U_ZERO_ERROR;
497 UChar filt[128];
498 UChar buf[128];
499 UChar exp[128];
500 char *cbuf;
501 int32_t limit;
502 const char* DATA[] = {
503 "[^c]", /* Filter out 'c' */
504 "abcde",
505 "\\u0061\\u0062c\\u0064\\u0065",
506
507 "", /* No filter */
508 "abcde",
509 "\\u0061\\u0062\\u0063\\u0064\\u0065"
510 };
511 int32_t DATA_length = UPRV_LENGTHOF(DATA);
512 int32_t i;
513
514 UTransliterator* hex = utrans_open("Any-Hex", UTRANS_FORWARD, NULL,0,NULL,&status);
515
516 if (hex == 0 || U_FAILURE(status)) {
517 log_err("FAIL: utrans_open(Unicode-Hex) failed, error=%s\n",
518 u_errorName(status));
519 goto exit;
520 }
521
522 for (i=0; i<DATA_length; i+=3) {
523 /*u_uastrcpy(filt, DATA[i]);*/
524 u_charsToUChars(DATA[i], filt, (int32_t)strlen(DATA[i])+1);
525 utrans_setFilter(hex, filt, -1, &status);
526
527 if (U_FAILURE(status)) {
528 log_err("FAIL: utrans_setFilter() failed, error=%s\n",
529 u_errorName(status));
530 goto exit;
531 }
532
533 /*u_uastrcpy(buf, DATA[i+1]);*/
534 u_charsToUChars(DATA[i+1], buf, (int32_t)strlen(DATA[i+1])+1);
535 limit = 5;
536 utrans_transUChars(hex, buf, NULL, 128, 0, &limit, &status);
537
538 if (U_FAILURE(status)) {
539 log_err("FAIL: utrans_transUChars() failed, error=%s\n",
540 u_errorName(status));
541 goto exit;
542 }
543
544 cbuf=aescstrdup(buf, -1);
545 u_charsToUChars(DATA[i+2], exp, (int32_t)strlen(DATA[i+2])+1);
546 if (0 == u_strcmp(buf, exp)) {
547 log_verbose("Ok: %s | %s -> %s\n", DATA[i+1], DATA[i], cbuf);
548 } else {
549 log_err("FAIL: %s | %s -> %s, expected %s\n", DATA[i+1], DATA[i], cbuf, DATA[i+2]);
550 }
551 }
552
553 exit:
554 utrans_close(hex);
555 }
556
557 /**
558 * Test the UReplaceableCallback extractBetween support. We use a
559 * transliterator known to rely on this call.
560 */
TestExtractBetween()561 static void TestExtractBetween() {
562
563 UTransliterator *trans;
564 UErrorCode status = U_ZERO_ERROR;
565 UParseError parseErr;
566
567 trans = utrans_open("Lower", UTRANS_FORWARD, NULL, -1,
568 &parseErr, &status);
569
570 if (U_FAILURE(status)) {
571 log_err("FAIL: utrans_open(Lower) failed, error=%s\n",
572 u_errorName(status));
573 } else {
574 _expect(trans, "ABC", "abc");
575
576 utrans_close(trans);
577 }
578 }
579
580 /**
581 * Test utrans_toRules, utrans_getSourceSet
582 */
583
584 /* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
585 filter & source set 4-20 chars */
586 static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
587 static const char* transSimpleCName = "yo-yo_BJ";
588
589 enum { kUBufMax = 512 };
TestGetRulesAndSourceSet()590 static void TestGetRulesAndSourceSet() {
591 UErrorCode status = U_ZERO_ERROR;
592 UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
593 if ( U_SUCCESS(status) ) {
594 USet* uset;
595 UChar ubuf[kUBufMax];
596 int32_t ulen;
597
598 status = U_ZERO_ERROR;
599 ulen = utrans_toRules(utrans, false, ubuf, kUBufMax, &status);
600 if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
601 log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
602 u_errorName(status), ulen);
603 }
604
605 status = U_ZERO_ERROR;
606 ulen = utrans_toRules(utrans, false, NULL, 0, &status);
607 if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
608 log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
609 u_errorName(status), ulen);
610 }
611
612 status = U_ZERO_ERROR;
613 ulen = utrans_toRules(utrans, true, ubuf, kUBufMax, &status);
614 if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
615 log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
616 u_errorName(status), ulen);
617 }
618
619 status = U_ZERO_ERROR;
620 uset = utrans_getSourceSet(utrans, false, NULL, &status);
621 ulen = uset_toPattern(uset, ubuf, kUBufMax, false, &status);
622 uset_close(uset);
623 if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
624 log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
625 u_errorName(status), ulen);
626 }
627
628 status = U_ZERO_ERROR;
629 uset = utrans_getSourceSet(utrans, true, NULL, &status);
630 ulen = uset_toPattern(uset, ubuf, kUBufMax, false, &status);
631 uset_close(uset);
632 if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
633 log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
634 u_errorName(status), ulen);
635 }
636
637 utrans_close(utrans);
638 } else {
639 log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
640 transSimpleCName, u_errorName(status));
641 }
642 }
643
644 typedef struct {
645 const char * transID;
646 const char * sourceText;
647 const char * targetText;
648 } TransIDSourceTarg;
649
650 static const TransIDSourceTarg dataVarCompItems[] = {
651 { "Simplified-Traditional",
652 "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
653 "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
654 { "Halfwidth-Fullwidth",
655 "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
656 "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
657 { "Han-Latin/Names; Latin-Bopomofo",
658 "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
659 "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA, \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA, \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB, \\u3129\\u311D\\u02CB \\u3108\\u311F" },
660 { "Greek-Latin",
661 "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
662 "A \\u0100I H\\u0100I RH" },
663 /* The following transform is provisional and not present in ICU 60
664 { "Greek-Latin/BGN",
665 "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
666 "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
667 */
668 { "Greek-Latin/UNGEGN",
669 "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
670 "A A A R" },
671 { NULL, NULL, NULL }
672 };
673
674 enum { kBBufMax = 1024 };
TestDataVariantsCompounds()675 static void TestDataVariantsCompounds() {
676 const TransIDSourceTarg* itemsPtr;
677 for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
678 UErrorCode status = U_ZERO_ERROR;
679 UChar utrid[kUBufMax];
680 int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
681 UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
682 if (U_FAILURE(status)) {
683 log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
684 continue;
685 }
686 UChar text[kUBufMax];
687 int32_t textLen = u_unescape(itemsPtr->sourceText, text, kUBufMax);
688 int32_t textLim = textLen;
689 utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
690 if (U_FAILURE(status)) {
691 log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
692 } else {
693 UChar expect[kUBufMax];
694 int32_t expectLen = u_unescape(itemsPtr->targetText, expect, kUBufMax);
695 if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
696 char btext[kBBufMax], bexpect[kBBufMax];
697 u_austrncpy(btext, text, kUBufMax);
698 u_austrncpy(bexpect, expect, kUBufMax);
699 log_err("FAIL: utrans_transUChars(%s),\n expect %s\n get %s\n", itemsPtr->transID, bexpect, btext);
700 }
701 }
702 utrans_close(utrans);
703 }
704 }
705
_expectRules(const char * crules,const char * cfrom,const char * cto)706 static void _expectRules(const char* crules,
707 const char* cfrom,
708 const char* cto) {
709 /* u_uastrcpy has no capacity param for the buffer -- so just
710 * make all buffers way too big */
711 enum { CAP = 256 };
712 UChar rules[CAP];
713 UTransliterator *trans;
714 UErrorCode status = U_ZERO_ERROR;
715 UParseError parseErr;
716
717 u_uastrcpy(rules, crules);
718
719 trans = utrans_open(crules /*use rules as ID*/, UTRANS_FORWARD, rules, -1,
720 &parseErr, &status);
721 if (U_FAILURE(status)) {
722 utrans_close(trans);
723 log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
724 crules, u_errorName(status));
725 return;
726 }
727
728 _expect(trans, cfrom, cto);
729
730 utrans_close(trans);
731 }
732
_expect(const UTransliterator * trans,const char * cfrom,const char * cto)733 static void _expect(const UTransliterator* trans,
734 const char* cfrom,
735 const char* cto) {
736 /* u_uastrcpy has no capacity param for the buffer -- so just
737 * make all buffers way too big */
738 enum { CAP = 256 };
739 UChar from[CAP];
740 UChar to[CAP];
741 UChar buf[CAP];
742 const UChar *ID;
743 int32_t IDLength;
744 const char *id;
745
746 UErrorCode status = U_ZERO_ERROR;
747 int32_t limit;
748 UTransPosition pos;
749 XReplaceable xrep;
750 XReplaceable *xrepPtr = &xrep;
751 UReplaceableCallbacks xrepVtable;
752
753 u_uastrcpy(from, cfrom);
754 u_uastrcpy(to, cto);
755
756 ID = utrans_getUnicodeID(trans, &IDLength);
757 id = aescstrdup(ID, IDLength);
758
759 /* utrans_transUChars() */
760 u_strcpy(buf, from);
761 limit = u_strlen(buf);
762 utrans_transUChars(trans, buf, NULL, CAP, 0, &limit, &status);
763 if (U_FAILURE(status)) {
764 log_err("FAIL: utrans_transUChars() failed, error=%s\n",
765 u_errorName(status));
766 return;
767 }
768
769 if (0 == u_strcmp(buf, to)) {
770 log_verbose("Ok: utrans_transUChars(%s) x %s -> %s\n",
771 id, cfrom, cto);
772 } else {
773 char actual[CAP];
774 u_austrcpy(actual, buf);
775 log_err("FAIL: utrans_transUChars(%s) x %s -> %s, expected %s\n",
776 id, cfrom, actual, cto);
777 }
778
779 /* utrans_transIncrementalUChars() */
780 u_strcpy(buf, from);
781 pos.start = pos.contextStart = 0;
782 pos.limit = pos.contextLimit = u_strlen(buf);
783 utrans_transIncrementalUChars(trans, buf, NULL, CAP, &pos, &status);
784 utrans_transUChars(trans, buf, NULL, CAP, pos.start, &pos.limit, &status);
785 if (U_FAILURE(status)) {
786 log_err("FAIL: utrans_transIncrementalUChars() failed, error=%s\n",
787 u_errorName(status));
788 return;
789 }
790
791 if (0 == u_strcmp(buf, to)) {
792 log_verbose("Ok: utrans_transIncrementalUChars(%s) x %s -> %s\n",
793 id, cfrom, cto);
794 } else {
795 char actual[CAP];
796 u_austrcpy(actual, buf);
797 log_err("FAIL: utrans_transIncrementalUChars(%s) x %s -> %s, expected %s\n",
798 id, cfrom, actual, cto);
799 }
800
801 /* utrans_trans() */
802 InitXReplaceableCallbacks(&xrepVtable);
803 InitXReplaceable(&xrep, cfrom);
804 limit = u_strlen(from);
805 utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, 0, &limit, &status);
806 if (U_FAILURE(status)) {
807 log_err("FAIL: utrans_trans() failed, error=%s\n",
808 u_errorName(status));
809 FreeXReplaceable(&xrep);
810 return;
811 }
812
813 if (0 == u_strcmp(xrep.text, to)) {
814 log_verbose("Ok: utrans_trans(%s) x %s -> %s\n",
815 id, cfrom, cto);
816 } else {
817 char actual[CAP];
818 u_austrcpy(actual, xrep.text);
819 log_err("FAIL: utrans_trans(%s) x %s -> %s, expected %s\n",
820 id, cfrom, actual, cto);
821 }
822 FreeXReplaceable(&xrep);
823
824 /* utrans_transIncremental() */
825 InitXReplaceable(&xrep, cfrom);
826 pos.start = pos.contextStart = 0;
827 pos.limit = pos.contextLimit = u_strlen(from);
828 utrans_transIncremental(trans, (UReplaceable*)xrepPtr, &xrepVtable, &pos, &status);
829 utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, pos.start, &pos.limit, &status);
830 if (U_FAILURE(status)) {
831 log_err("FAIL: utrans_transIncremental() failed, error=%s\n",
832 u_errorName(status));
833 FreeXReplaceable(&xrep);
834 return;
835 }
836
837 if (0 == u_strcmp(xrep.text, to)) {
838 log_verbose("Ok: utrans_transIncremental(%s) x %s -> %s\n",
839 id, cfrom, cto);
840 } else {
841 char actual[CAP];
842 u_austrcpy(actual, xrep.text);
843 log_err("FAIL: utrans_transIncremental(%s) x %s -> %s, expected %s\n",
844 id, cfrom, actual, cto);
845 }
846 FreeXReplaceable(&xrep);
847 }
848
849 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
850