• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *   Copyright (C) 1997-2016 International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  *******************************************************************************
8  *   Date        Name        Description
9  *   06/23/00    aliu        Creation.
10  *******************************************************************************
11  */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "unicode/utrans.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uset.h"
23 #include "cintltst.h"
24 #include "cmemory.h"
25 
26 #define TEST(x) addTest(root, &x, "utrans/" # x)
27 
28 static void TestAPI(void);
29 static void TestSimpleRules(void);
30 static void TestFilter(void);
31 static void TestOpenInverse(void);
32 static void TestClone(void);
33 static void TestRegisterUnregister(void);
34 static void TestExtractBetween(void);
35 static void TestUnicodeIDs(void);
36 static void TestGetRulesAndSourceSet(void);
37 static void TestDataVariantsCompounds(void);
38 
39 static void _expectRules(const char*, const char*, const char*);
40 static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
41 
42 void addUTransTest(TestNode** root);
43 
44 
45 void
addUTransTest(TestNode ** root)46 addUTransTest(TestNode** root) {
47     TEST(TestAPI);
48     TEST(TestSimpleRules);
49     TEST(TestFilter);
50     TEST(TestOpenInverse);
51     TEST(TestClone);
52     TEST(TestRegisterUnregister);
53     TEST(TestExtractBetween);
54     TEST(TestUnicodeIDs);
55     TEST(TestGetRulesAndSourceSet);
56     TEST(TestDataVariantsCompounds);
57 }
58 
59 /*------------------------------------------------------------------
60  * Replaceable glue
61  *
62  * To test the Replaceable glue we have to dummy up a C-based
63  * Replaceable callback.  This code is for testing purposes only.
64  *------------------------------------------------------------------*/
65 
66 typedef struct XReplaceable {
67     UChar* text;    /* MUST BE null-terminated */
68 } XReplaceable;
69 
InitXReplaceable(XReplaceable * rep,const char * cstring)70 static void InitXReplaceable(XReplaceable* rep, const char* cstring) {
71     rep->text = malloc(sizeof(UChar) * (strlen(cstring)+1));
72     u_uastrcpy(rep->text, cstring);
73 }
74 
FreeXReplaceable(XReplaceable * rep)75 static void FreeXReplaceable(XReplaceable* rep) {
76     if (rep->text != NULL) {
77         free(rep->text);
78         rep->text = NULL;
79     }
80 }
81 
82 /* UReplaceableCallbacks callback */
Xlength(const UReplaceable * rep)83 static int32_t Xlength(const UReplaceable* rep) {
84     const XReplaceable* x = (const XReplaceable*)rep;
85     return u_strlen(x->text);
86 }
87 
88 /* UReplaceableCallbacks callback */
XcharAt(const UReplaceable * rep,int32_t offset)89 static UChar XcharAt(const UReplaceable* rep, int32_t offset) {
90     const XReplaceable* x = (const XReplaceable*)rep;
91     return x->text[offset];
92 }
93 
94 /* UReplaceableCallbacks callback */
Xchar32At(const UReplaceable * rep,int32_t offset)95 static UChar32 Xchar32At(const UReplaceable* rep, int32_t offset) {
96     const XReplaceable* x = (const XReplaceable*)rep;
97     return x->text[offset];
98 }
99 
100 /* UReplaceableCallbacks callback */
Xreplace(UReplaceable * rep,int32_t start,int32_t limit,const UChar * text,int32_t textLength)101 static void Xreplace(UReplaceable* rep, int32_t start, int32_t limit,
102               const UChar* text, int32_t textLength) {
103     XReplaceable* x = (XReplaceable*)rep;
104     int32_t newLen = Xlength(rep) + limit - start + textLength;
105     UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
106     u_strncpy(newText, x->text, start);
107     u_strncpy(newText + start, text, textLength);
108     u_strcpy(newText + start + textLength, x->text + limit);
109     free(x->text);
110     x->text = newText;
111 }
112 
113 /* UReplaceableCallbacks callback */
Xcopy(UReplaceable * rep,int32_t start,int32_t limit,int32_t dest)114 static void Xcopy(UReplaceable* rep, int32_t start, int32_t limit, int32_t dest) {
115     XReplaceable* x = (XReplaceable*)rep;
116     int32_t newLen = Xlength(rep) + limit - start;
117     UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
118     u_strncpy(newText, x->text, dest);
119     u_strncpy(newText + dest, x->text + start, limit - start);
120     u_strcpy(newText + dest + limit - start, x->text + dest);
121     free(x->text);
122     x->text = newText;
123 }
124 
125 /* UReplaceableCallbacks callback */
Xextract(UReplaceable * rep,int32_t start,int32_t limit,UChar * dst)126 static void Xextract(UReplaceable* rep, int32_t start, int32_t limit, UChar* dst) {
127     XReplaceable* x = (XReplaceable*)rep;
128     int32_t len = limit - start;
129     u_strncpy(dst, x->text, len);
130 }
131 
InitXReplaceableCallbacks(UReplaceableCallbacks * callbacks)132 static void InitXReplaceableCallbacks(UReplaceableCallbacks* callbacks) {
133     callbacks->length = Xlength;
134     callbacks->charAt = XcharAt;
135     callbacks->char32At = Xchar32At;
136     callbacks->replace = Xreplace;
137     callbacks->extract = Xextract;
138     callbacks->copy = Xcopy;
139 }
140 
141 /*------------------------------------------------------------------
142  * Tests
143  *------------------------------------------------------------------*/
144 
TestAPI()145 static void TestAPI() {
146     enum { BUF_CAP = 128 };
147     char buf[BUF_CAP], buf2[BUF_CAP];
148     UErrorCode status = U_ZERO_ERROR;
149     UTransliterator* trans = NULL;
150     int32_t i, n;
151 
152     /* Test getAvailableIDs */
153     n = utrans_countAvailableIDs();
154     if (n < 1) {
155         log_err("FAIL: utrans_countAvailableIDs() returned %d\n", n);
156     } else {
157         log_verbose("System ID count: %d\n", n);
158     }
159     for (i=0; i<n; ++i) {
160         utrans_getAvailableID(i, buf, BUF_CAP);
161         if (*buf == 0) {
162             log_err("FAIL: System transliterator %d: \"\"\n", i);
163         } else {
164             log_verbose("System transliterator %d: \"%s\"\n", i, buf);
165         }
166     }
167 
168     /* Test open */
169     utrans_getAvailableID(0, buf, BUF_CAP);
170     trans = utrans_open(buf, UTRANS_FORWARD,NULL,0,NULL, &status);
171     if (U_FAILURE(status)) {
172         log_err("FAIL: utrans_open(%s) failed, error=%s\n",
173                 buf, u_errorName(status));
174     }
175 
176     else {
177         /* Test getID */
178         utrans_getID(trans, buf2, BUF_CAP);
179         if (0 != strcmp(buf, buf2)) {
180             log_err("FAIL: utrans_getID(%s) returned %s\n",
181                     buf, buf2);
182         }
183         utrans_close(trans);
184     }
185 }
186 
TestUnicodeIDs()187 static void TestUnicodeIDs() {
188     UEnumeration *uenum;
189     UTransliterator *utrans;
190     const UChar *id, *id2;
191     int32_t idLength, id2Length, count, count2;
192 
193     UErrorCode errorCode;
194 
195     errorCode=U_ZERO_ERROR;
196     uenum=utrans_openIDs(&errorCode);
197     if(U_FAILURE(errorCode)) {
198         log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
199         return;
200     }
201 
202     count=uenum_count(uenum, &errorCode);
203     if(U_FAILURE(errorCode) || count<1) {
204         log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
205     }
206 
207     count=0;
208     for(;;) {
209         id=uenum_unext(uenum, &idLength, &errorCode);
210         if(U_FAILURE(errorCode)) {
211             log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
212             break;
213         }
214         if(id==NULL) {
215             break;
216         }
217 
218         if(++count>10) {
219             /* try to actually open only a few transliterators */
220             continue;
221         }
222 
223         utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
224         if(U_FAILURE(errorCode)) {
225             log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
226             continue;
227         }
228 
229         id2=utrans_getUnicodeID(utrans, &id2Length);
230         if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
231             log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
232         }
233 
234         utrans_close(utrans);
235     }
236 
237     uenum_reset(uenum, &errorCode);
238     if(U_FAILURE(errorCode) || count<1) {
239         log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
240     } else {
241         count2=uenum_count(uenum, &errorCode);
242         if(U_FAILURE(errorCode) || count<1) {
243             log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
244         } else if(count!=count2) {
245             log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
246         }
247     }
248 
249     uenum_close(uenum);
250 }
251 
TestOpenInverse()252 static void TestOpenInverse(){
253     UErrorCode status=U_ZERO_ERROR;
254     UTransliterator* t1=NULL;
255     UTransliterator* inverse1=NULL;
256     enum { BUF_CAP = 128 };
257     char buf1[BUF_CAP];
258     int32_t i=0;
259 
260     const char TransID[][25]={
261            "Halfwidth-Fullwidth",
262            "Fullwidth-Halfwidth",
263            "Greek-Latin" ,
264            "Latin-Greek",
265            /*"Arabic-Latin", // Removed in 2.0*/
266            /*"Latin-Arabic", // Removed in 2.0*/
267            "Katakana-Latin",
268            "Latin-Katakana",
269            /*"Hebrew-Latin", // Removed in 2.0*/
270            /*"Latin-Hebrew", // Removed in 2.0*/
271            "Cyrillic-Latin",
272            "Latin-Cyrillic",
273            "Devanagari-Latin",
274            "Latin-Devanagari",
275            "Any-Hex",
276            "Hex-Any"
277          };
278 
279     for(i=0; i<UPRV_LENGTHOF(TransID); i=i+2){
280         status = U_ZERO_ERROR;
281         t1=utrans_open(TransID[i], UTRANS_FORWARD,NULL,0,NULL, &status);
282         if(t1 == NULL || U_FAILURE(status)){
283             log_data_err("FAIL: in instantiation for id=%s -> %s (Are you missing data?)\n", TransID[i], u_errorName(status));
284             continue;
285         }
286         inverse1=utrans_openInverse(t1, &status);
287         if(U_FAILURE(status)){
288             log_err("FAIL: utrans_openInverse() failed for id=%s. Error=%s\n", TransID[i], myErrorName(status));
289             continue;
290         }
291         utrans_getID(inverse1, buf1, BUF_CAP);
292         if(strcmp(buf1, TransID[i+1]) != 0){
293             log_err("FAIL :openInverse() for %s returned %s instead of %s\n", TransID[i], buf1, TransID[i+1]);
294         }
295         utrans_close(t1);
296         utrans_close(inverse1);
297    }
298 }
299 
TestClone()300 static void TestClone(){
301     UErrorCode status=U_ZERO_ERROR;
302     UTransliterator* t1=NULL;
303     UTransliterator* t2=NULL;
304     UTransliterator* t3=NULL;
305     UTransliterator* t4=NULL;
306     enum { BUF_CAP = 128 };
307     char buf1[BUF_CAP], buf2[BUF_CAP], buf3[BUF_CAP];
308 
309     t1=utrans_open("Latin-Devanagari", UTRANS_FORWARD, NULL,0,NULL,&status);
310     if(U_FAILURE(status)){
311         log_data_err("FAIL: construction -> %s (Are you missing data?)\n", u_errorName(status));
312         return;
313     }
314     t2=utrans_open("Latin-Greek", UTRANS_FORWARD, NULL,0,NULL,&status);
315     if(U_FAILURE(status)){
316         log_err("FAIL: construction\n");
317         utrans_close(t1);
318         return;
319     }
320 
321     t3=utrans_clone(t1, &status);
322     t4=utrans_clone(t2, &status);
323 
324     utrans_getID(t1, buf1, BUF_CAP);
325     utrans_getID(t2, buf2, BUF_CAP);
326     utrans_getID(t3, buf3, BUF_CAP);
327 
328     if(strcmp(buf1, buf3) != 0 ||
329         strcmp(buf1, buf2) == 0) {
330         log_err("FAIL: utrans_clone() failed\n");
331     }
332 
333     utrans_getID(t4, buf3, BUF_CAP);
334 
335     if(strcmp(buf2, buf3) != 0 ||
336         strcmp(buf1, buf3) == 0) {
337         log_err("FAIL: utrans_clone() failed\n");
338     }
339 
340     utrans_close(t1);
341     utrans_close(t2);
342     utrans_close(t3);
343     utrans_close(t4);
344 
345 }
346 
TestRegisterUnregister()347 static void TestRegisterUnregister(){
348     UErrorCode status=U_ZERO_ERROR;
349     UTransliterator* t1=NULL;
350     UTransliterator* rules=NULL, *rules2;
351     UTransliterator* inverse1=NULL;
352     UChar rule[]={ 0x0061, 0x003c, 0x003e, 0x0063}; /*a<>b*/
353 
354     U_STRING_DECL(ID, "TestA-TestB", 11);
355     U_STRING_INIT(ID, "TestA-TestB", 11);
356 
357     /* Make sure it doesn't exist */
358     t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
359     if(t1 != NULL || U_SUCCESS(status)) {
360         log_err("FAIL: TestA-TestB already registered\n");
361         return;
362     }
363     status=U_ZERO_ERROR;
364     /* Check inverse too */
365     inverse1=utrans_open("TestA-TestB", UTRANS_REVERSE, NULL,0,NULL,&status);
366     if(inverse1 != NULL || U_SUCCESS(status)) {
367         log_err("FAIL: TestA-TestB already registered\n");
368         return;
369     }
370     status=U_ZERO_ERROR;
371     /* Create it */
372     rules=utrans_open("TestA-TestB",UTRANS_FORWARD, rule, 4, NULL, &status);
373     if(U_FAILURE(status)){
374         log_err("FAIL: utrans_openRules(a<>B) failed with error=%s\n", myErrorName(status));
375         return;
376     }
377 
378     /* clone it so we can register it a second time */
379     rules2=utrans_clone(rules, &status);
380     if(U_FAILURE(status)) {
381         log_err("FAIL: utrans_clone(a<>B) failed with error=%s\n", myErrorName(status));
382         return;
383     }
384 
385     status=U_ZERO_ERROR;
386     /* Register it */
387     utrans_register(rules, &status);
388     if(U_FAILURE(status)){
389         log_err("FAIL: utrans_register failed with error=%s\n", myErrorName(status));
390         return;
391     }
392     status=U_ZERO_ERROR;
393     /* Now check again -- should exist now*/
394     t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
395     if(U_FAILURE(status) || t1 == NULL){
396         log_err("FAIL: TestA-TestB not registered\n");
397         return;
398     }
399     utrans_close(t1);
400 
401     /*unregister the instance*/
402     status=U_ZERO_ERROR;
403     utrans_unregister("TestA-TestB");
404     /* now Make sure it doesn't exist */
405     t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
406     if(U_SUCCESS(status) || t1 != NULL) {
407         log_err("FAIL: TestA-TestB isn't unregistered\n");
408         return;
409     }
410     utrans_close(t1);
411 
412     /* now with utrans_unregisterID(const UChar *) */
413     status=U_ZERO_ERROR;
414     utrans_register(rules2, &status);
415     if(U_FAILURE(status)){
416         log_err("FAIL: 2nd utrans_register failed with error=%s\n", myErrorName(status));
417         return;
418     }
419     status=U_ZERO_ERROR;
420     /* Now check again -- should exist now*/
421     t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
422     if(U_FAILURE(status) || t1 == NULL){
423         log_err("FAIL: 2nd TestA-TestB not registered\n");
424         return;
425     }
426     utrans_close(t1);
427 
428     /*unregister the instance*/
429     status=U_ZERO_ERROR;
430     utrans_unregisterID(ID, -1);
431     /* now Make sure it doesn't exist */
432     t1=utrans_openU(ID, -1, UTRANS_FORWARD,NULL,0,NULL, &status);
433     if(U_SUCCESS(status) || t1 != NULL) {
434         log_err("FAIL: 2nd TestA-TestB isn't unregistered\n");
435         return;
436     }
437 
438     utrans_close(t1);
439     utrans_close(inverse1);
440 }
441 
TestSimpleRules()442 static void TestSimpleRules() {
443     /* Test rules */
444     /* Example: rules 1. ab>x|y
445      *                2. yc>z
446      *
447      * []|eabcd  start - no match, copy e to translated buffer
448      * [e]|abcd  match rule 1 - copy output & adjust cursor
449      * [ex|y]cd  match rule 2 - copy output & adjust cursor
450      * [exz]|d   no match, copy d to transliterated buffer
451      * [exzd]|   done
452      */
453     _expectRules("ab>x|y;"
454                  "yc>z",
455                  "eabcd", "exzd");
456 
457     /* Another set of rules:
458      *    1. ab>x|yzacw
459      *    2. za>q
460      *    3. qc>r
461      *    4. cw>n
462      *
463      * []|ab       Rule 1
464      * [x|yzacw]   No match
465      * [xy|zacw]   Rule 2
466      * [xyq|cw]    Rule 4
467      * [xyqn]|     Done
468      */
469     _expectRules("ab>x|yzacw;"
470                  "za>q;"
471                  "qc>r;"
472                  "cw>n",
473                  "ab", "xyqn");
474 
475     /* Test categories
476      */
477     _expectRules("$dummy=" "\\uE100" ";" /* careful here with E100 */
478                  "$vowel=[aeiouAEIOU];"
479                  "$lu=[:Lu:];"
480                  "$vowel } $lu > '!';"
481                  "$vowel > '&';"
482                  "'!' { $lu > '^';"
483                  "$lu > '*';"
484                  "a > ERROR",
485                  "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
486 
487     /* Test multiple passes
488     */
489     _expectRules("abc > xy;"
490                  "::Null;"
491                  "aba > z;",
492                  "abc ababc aba", "xy abxy z");
493 }
494 
TestFilter()495 static void TestFilter() {
496     UErrorCode status = U_ZERO_ERROR;
497     UChar filt[128];
498     UChar buf[128];
499     UChar exp[128];
500     char *cbuf;
501     int32_t limit;
502     const char* DATA[] = {
503         "[^c]", /* Filter out 'c' */
504         "abcde",
505         "\\u0061\\u0062c\\u0064\\u0065",
506 
507         "", /* No filter */
508         "abcde",
509         "\\u0061\\u0062\\u0063\\u0064\\u0065"
510     };
511     int32_t DATA_length = UPRV_LENGTHOF(DATA);
512     int32_t i;
513 
514     UTransliterator* hex = utrans_open("Any-Hex", UTRANS_FORWARD, NULL,0,NULL,&status);
515 
516     if (hex == 0 || U_FAILURE(status)) {
517         log_err("FAIL: utrans_open(Unicode-Hex) failed, error=%s\n",
518                 u_errorName(status));
519         goto exit;
520     }
521 
522     for (i=0; i<DATA_length; i+=3) {
523         /*u_uastrcpy(filt, DATA[i]);*/
524         u_charsToUChars(DATA[i], filt, (int32_t)strlen(DATA[i])+1);
525         utrans_setFilter(hex, filt, -1, &status);
526 
527         if (U_FAILURE(status)) {
528             log_err("FAIL: utrans_setFilter() failed, error=%s\n",
529                     u_errorName(status));
530             goto exit;
531         }
532 
533         /*u_uastrcpy(buf, DATA[i+1]);*/
534         u_charsToUChars(DATA[i+1], buf, (int32_t)strlen(DATA[i+1])+1);
535         limit = 5;
536         utrans_transUChars(hex, buf, NULL, 128, 0, &limit, &status);
537 
538         if (U_FAILURE(status)) {
539             log_err("FAIL: utrans_transUChars() failed, error=%s\n",
540                     u_errorName(status));
541             goto exit;
542         }
543 
544         cbuf=aescstrdup(buf, -1);
545         u_charsToUChars(DATA[i+2], exp, (int32_t)strlen(DATA[i+2])+1);
546         if (0 == u_strcmp(buf, exp)) {
547             log_verbose("Ok: %s | %s -> %s\n", DATA[i+1], DATA[i], cbuf);
548         } else {
549             log_err("FAIL: %s | %s -> %s, expected %s\n", DATA[i+1], DATA[i], cbuf, DATA[i+2]);
550         }
551     }
552 
553  exit:
554     utrans_close(hex);
555 }
556 
557 /**
558  * Test the UReplaceableCallback extractBetween support.  We use a
559  * transliterator known to rely on this call.
560  */
TestExtractBetween()561 static void TestExtractBetween() {
562 
563     UTransliterator *trans;
564     UErrorCode status = U_ZERO_ERROR;
565     UParseError parseErr;
566 
567     trans = utrans_open("Lower", UTRANS_FORWARD, NULL, -1,
568                         &parseErr, &status);
569 
570     if (U_FAILURE(status)) {
571         log_err("FAIL: utrans_open(Lower) failed, error=%s\n",
572                 u_errorName(status));
573     } else {
574         _expect(trans, "ABC", "abc");
575 
576         utrans_close(trans);
577     }
578 }
579 
580 /**
581  * Test utrans_toRules, utrans_getSourceSet
582  */
583 
584 /* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
585    filter & source set 4-20 chars */
586 static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
587 static const char* transSimpleCName = "yo-yo_BJ";
588 
589 enum { kUBufMax = 512 };
TestGetRulesAndSourceSet()590 static void TestGetRulesAndSourceSet() {
591     UErrorCode status = U_ZERO_ERROR;
592     UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
593     if ( U_SUCCESS(status) ) {
594         USet* uset;
595         UChar ubuf[kUBufMax];
596         int32_t ulen;
597 
598         status = U_ZERO_ERROR;
599         ulen = utrans_toRules(utrans, false, ubuf, kUBufMax, &status);
600         if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
601             log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
602                     u_errorName(status), ulen);
603         }
604 
605         status = U_ZERO_ERROR;
606         ulen = utrans_toRules(utrans, false, NULL, 0, &status);
607         if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
608             log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
609                     u_errorName(status), ulen);
610         }
611 
612         status = U_ZERO_ERROR;
613         ulen = utrans_toRules(utrans, true, ubuf, kUBufMax, &status);
614         if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
615             log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
616                     u_errorName(status), ulen);
617         }
618 
619         status = U_ZERO_ERROR;
620         uset = utrans_getSourceSet(utrans, false, NULL, &status);
621         ulen = uset_toPattern(uset, ubuf, kUBufMax, false, &status);
622         uset_close(uset);
623         if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
624             log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
625                     u_errorName(status), ulen);
626         }
627 
628         status = U_ZERO_ERROR;
629         uset = utrans_getSourceSet(utrans, true, NULL, &status);
630         ulen = uset_toPattern(uset, ubuf, kUBufMax, false, &status);
631         uset_close(uset);
632         if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
633             log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
634                     u_errorName(status), ulen);
635         }
636 
637         utrans_close(utrans);
638     } else {
639         log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
640                 transSimpleCName, u_errorName(status));
641     }
642 }
643 
644 typedef struct {
645     const char * transID;
646     const char * sourceText;
647     const char * targetText;
648 } TransIDSourceTarg;
649 
650 static const TransIDSourceTarg dataVarCompItems[] = {
651     { "Simplified-Traditional",
652        "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
653        "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
654     { "Halfwidth-Fullwidth",
655       "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
656       "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
657     { "Han-Latin/Names; Latin-Bopomofo",
658        "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
659        "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA, \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA, \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB, \\u3129\\u311D\\u02CB \\u3108\\u311F" },
660     { "Greek-Latin",
661       "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
662       "A \\u0100I H\\u0100I RH" },
663 /* The following transform is provisional and not present in ICU 60
664     { "Greek-Latin/BGN",
665       "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
666       "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
667 */
668     { "Greek-Latin/UNGEGN",
669       "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
670       "A A A R" },
671     { NULL, NULL, NULL }
672 };
673 
674 enum { kBBufMax = 1024 };
TestDataVariantsCompounds()675 static void TestDataVariantsCompounds() {
676     const TransIDSourceTarg* itemsPtr;
677     for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
678         UErrorCode status = U_ZERO_ERROR;
679         UChar utrid[kUBufMax];
680         int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
681         UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
682         if (U_FAILURE(status)) {
683             log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
684             continue;
685         }
686         UChar text[kUBufMax];
687         int32_t textLen =  u_unescape(itemsPtr->sourceText, text, kUBufMax);
688         int32_t textLim = textLen;
689         utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
690         if (U_FAILURE(status)) {
691             log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
692         } else {
693             UChar expect[kUBufMax];
694             int32_t expectLen =  u_unescape(itemsPtr->targetText, expect, kUBufMax);
695             if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
696                 char btext[kBBufMax], bexpect[kBBufMax];
697                 u_austrncpy(btext, text, kUBufMax);
698                 u_austrncpy(bexpect, expect, kUBufMax);
699                 log_err("FAIL: utrans_transUChars(%s),\n       expect %s\n       get    %s\n", itemsPtr->transID, bexpect, btext);
700             }
701         }
702         utrans_close(utrans);
703     }
704 }
705 
_expectRules(const char * crules,const char * cfrom,const char * cto)706 static void _expectRules(const char* crules,
707                   const char* cfrom,
708                   const char* cto) {
709     /* u_uastrcpy has no capacity param for the buffer -- so just
710      * make all buffers way too big */
711     enum { CAP = 256 };
712     UChar rules[CAP];
713     UTransliterator *trans;
714     UErrorCode status = U_ZERO_ERROR;
715     UParseError parseErr;
716 
717     u_uastrcpy(rules, crules);
718 
719     trans = utrans_open(crules /*use rules as ID*/, UTRANS_FORWARD, rules, -1,
720                              &parseErr, &status);
721     if (U_FAILURE(status)) {
722         utrans_close(trans);
723         log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
724                 crules, u_errorName(status));
725         return;
726     }
727 
728     _expect(trans, cfrom, cto);
729 
730     utrans_close(trans);
731 }
732 
_expect(const UTransliterator * trans,const char * cfrom,const char * cto)733 static void _expect(const UTransliterator* trans,
734              const char* cfrom,
735              const char* cto) {
736     /* u_uastrcpy has no capacity param for the buffer -- so just
737      * make all buffers way too big */
738     enum { CAP = 256 };
739     UChar from[CAP];
740     UChar to[CAP];
741     UChar buf[CAP];
742     const UChar *ID;
743     int32_t IDLength;
744     const char *id;
745 
746     UErrorCode status = U_ZERO_ERROR;
747     int32_t limit;
748     UTransPosition pos;
749     XReplaceable xrep;
750     XReplaceable *xrepPtr = &xrep;
751     UReplaceableCallbacks xrepVtable;
752 
753     u_uastrcpy(from, cfrom);
754     u_uastrcpy(to, cto);
755 
756     ID = utrans_getUnicodeID(trans, &IDLength);
757     id = aescstrdup(ID, IDLength);
758 
759     /* utrans_transUChars() */
760     u_strcpy(buf, from);
761     limit = u_strlen(buf);
762     utrans_transUChars(trans, buf, NULL, CAP, 0, &limit, &status);
763     if (U_FAILURE(status)) {
764         log_err("FAIL: utrans_transUChars() failed, error=%s\n",
765                 u_errorName(status));
766         return;
767     }
768 
769     if (0 == u_strcmp(buf, to)) {
770         log_verbose("Ok: utrans_transUChars(%s) x %s -> %s\n",
771                     id, cfrom, cto);
772     } else {
773         char actual[CAP];
774         u_austrcpy(actual, buf);
775         log_err("FAIL: utrans_transUChars(%s) x %s -> %s, expected %s\n",
776                 id, cfrom, actual, cto);
777     }
778 
779     /* utrans_transIncrementalUChars() */
780     u_strcpy(buf, from);
781     pos.start = pos.contextStart = 0;
782     pos.limit = pos.contextLimit = u_strlen(buf);
783     utrans_transIncrementalUChars(trans, buf, NULL, CAP, &pos, &status);
784     utrans_transUChars(trans, buf, NULL, CAP, pos.start, &pos.limit, &status);
785     if (U_FAILURE(status)) {
786         log_err("FAIL: utrans_transIncrementalUChars() failed, error=%s\n",
787                 u_errorName(status));
788         return;
789     }
790 
791     if (0 == u_strcmp(buf, to)) {
792         log_verbose("Ok: utrans_transIncrementalUChars(%s) x %s -> %s\n",
793                     id, cfrom, cto);
794     } else {
795         char actual[CAP];
796         u_austrcpy(actual, buf);
797         log_err("FAIL: utrans_transIncrementalUChars(%s) x %s -> %s, expected %s\n",
798                 id, cfrom, actual, cto);
799     }
800 
801     /* utrans_trans() */
802     InitXReplaceableCallbacks(&xrepVtable);
803     InitXReplaceable(&xrep, cfrom);
804     limit = u_strlen(from);
805     utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, 0, &limit, &status);
806     if (U_FAILURE(status)) {
807         log_err("FAIL: utrans_trans() failed, error=%s\n",
808                 u_errorName(status));
809         FreeXReplaceable(&xrep);
810         return;
811     }
812 
813     if (0 == u_strcmp(xrep.text, to)) {
814         log_verbose("Ok: utrans_trans(%s) x %s -> %s\n",
815                     id, cfrom, cto);
816     } else {
817         char actual[CAP];
818         u_austrcpy(actual, xrep.text);
819         log_err("FAIL: utrans_trans(%s) x %s -> %s, expected %s\n",
820                 id, cfrom, actual, cto);
821     }
822     FreeXReplaceable(&xrep);
823 
824     /* utrans_transIncremental() */
825     InitXReplaceable(&xrep, cfrom);
826     pos.start = pos.contextStart = 0;
827     pos.limit = pos.contextLimit = u_strlen(from);
828     utrans_transIncremental(trans, (UReplaceable*)xrepPtr, &xrepVtable, &pos, &status);
829     utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, pos.start, &pos.limit, &status);
830     if (U_FAILURE(status)) {
831         log_err("FAIL: utrans_transIncremental() failed, error=%s\n",
832                 u_errorName(status));
833         FreeXReplaceable(&xrep);
834         return;
835     }
836 
837     if (0 == u_strcmp(xrep.text, to)) {
838         log_verbose("Ok: utrans_transIncremental(%s) x %s -> %s\n",
839                     id, cfrom, cto);
840     } else {
841         char actual[CAP];
842         u_austrcpy(actual, xrep.text);
843         log_err("FAIL: utrans_transIncremental(%s) x %s -> %s, expected %s\n",
844                 id, cfrom, actual, cto);
845     }
846     FreeXReplaceable(&xrep);
847 }
848 
849 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
850