• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2009, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
14 
15 #define TEST(x) addTest(root, &x, "uset/" # x)
16 
17 static void TestAPI(void);
18 static void Testj2269(void);
19 static void TestSerialized(void);
20 static void TestNonInvariantPattern(void);
21 static void TestBadPattern(void);
22 static void TestFreezable(void);
23 static void TestSpan(void);
24 
25 void addUSetTest(TestNode** root);
26 
27 static void expect(const USet* set,
28                    const char* inList,
29                    const char* outList,
30                    UErrorCode* ec);
31 static void expectContainment(const USet* set,
32                               const char* list,
33                               UBool isIn);
34 static char oneUCharToChar(UChar32 c);
35 static void expectItems(const USet* set,
36                         const char* items);
37 
38 void
addUSetTest(TestNode ** root)39 addUSetTest(TestNode** root) {
40     TEST(TestAPI);
41     TEST(Testj2269);
42     TEST(TestSerialized);
43     TEST(TestNonInvariantPattern);
44     TEST(TestBadPattern);
45     TEST(TestFreezable);
46     TEST(TestSpan);
47 }
48 
49 /*------------------------------------------------------------------
50  * Tests
51  *------------------------------------------------------------------*/
52 
Testj2269()53 static void Testj2269() {
54   UErrorCode status = U_ZERO_ERROR;
55   UChar a[4] = { 0x61, 0x62, 0x63, 0 };
56   USet *s = uset_open(1, 0);
57   uset_addString(s, a, 3);
58   a[0] = 0x63; a[1] = 0x63;
59   expect(s, "{abc}", "{ccc}", &status);
60   uset_close(s);
61 }
62 
63 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
64 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
65 
66 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
67 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
68 
69 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
70 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
71 
72 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
73 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
74 
75 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
76 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
77 
78 /**
79  * Basic API test for uset.x
80  */
TestAPI()81 static void TestAPI() {
82     USet* set;
83     USet* set2;
84     UErrorCode ec;
85 
86     /* [] */
87     set = uset_openEmpty();
88     expect(set, "", "abc{ab}", NULL);
89     uset_close(set);
90 
91     set = uset_open(1, 0);
92     expect(set, "", "abc{ab}", NULL);
93     uset_close(set);
94 
95     set = uset_open(1, 1);
96     uset_clear(set);
97     expect(set, "", "abc{ab}", NULL);
98     uset_close(set);
99 
100     /* [ABC] */
101     set = uset_open(0x0041, 0x0043);
102     expect(set, "ABC", "DEF{ab}", NULL);
103     uset_close(set);
104 
105     /* [a-c{ab}] */
106     ec = U_ZERO_ERROR;
107     set = uset_openPattern(PAT, PAT_LEN, &ec);
108     if(U_FAILURE(ec)) {
109         log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
110         return;
111     }
112     if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
113         log_err("uset_resemblesPattern of PAT failed\n");
114     }
115     expect(set, "abc{ab}", "def{bc}", &ec);
116 
117     /* [a-d{ab}] */
118     uset_add(set, 0x64);
119     expect(set, "abcd{ab}", "ef{bc}", NULL);
120 
121     /* [acd{ab}{bc}] */
122     uset_remove(set, 0x62);
123     uset_addString(set, STR_bc, STR_bc_LEN);
124     expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
125 
126     /* [acd{bc}] */
127     uset_removeString(set, STR_ab, STR_ab_LEN);
128     expect(set, "acd{bc}", "bfg{ab}", NULL);
129 
130     /* [^acd{bc}] */
131     uset_complement(set);
132     expect(set, "bef{bc}", "acd{ac}", NULL);
133 
134     /* [a-e{bc}] */
135     uset_complement(set);
136     uset_addRange(set, 0x0062, 0x0065);
137     expect(set, "abcde{bc}", "fg{ab}", NULL);
138 
139     /* [de{bc}] */
140     uset_removeRange(set, 0x0050, 0x0063);
141     expect(set, "de{bc}", "bcfg{ab}", NULL);
142 
143     /* [g-l] */
144     uset_set(set, 0x0067, 0x006C);
145     expect(set, "ghijkl", "de{bc}", NULL);
146 
147     if (uset_indexOf(set, 0x0067) != 0) {
148         log_err("uset_indexOf failed finding correct index of 'g'\n");
149     }
150 
151     if (uset_charAt(set, 0) != 0x0067) {
152         log_err("uset_charAt failed finding correct char 'g' at index 0\n");
153     }
154 
155     /* How to test this one...? */
156     uset_compact(set);
157 
158     /* [g-i] */
159     uset_retain(set, 0x0067, 0x0069);
160     expect(set, "ghi", "dejkl{bc}", NULL);
161 
162     /* UCHAR_ASCII_HEX_DIGIT */
163     uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
164     if(U_FAILURE(ec)) {
165         log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
166         return;
167     }
168     expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
169 
170     /* [ab] */
171     uset_clear(set);
172     uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
173     expect(set, "ab", "def{ab}", NULL);
174     if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
175         log_err("set should not conatin all characters of \"bc\" \n");
176     }
177 
178     /* [] */
179     set2 = uset_open(1, 1);
180     uset_clear(set2);
181 
182     /* space */
183     uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
184     expect(set2, " ", "abcdefghi{bc}", NULL);
185 
186     /* [a-c] */
187     uset_set(set2, 0x0061, 0x0063);
188     /* [g-i] */
189     uset_set(set, 0x0067, 0x0069);
190 
191     /* [a-c g-i] */
192     if (uset_containsSome(set, set2)) {
193         log_err("set should not contain some of set2 yet\n");
194     }
195     uset_complementAll(set, set2);
196     if (!uset_containsSome(set, set2)) {
197         log_err("set should contain some of set2\n");
198     }
199     expect(set, "abcghi", "def{bc}", NULL);
200 
201     /* [g-i] */
202     uset_removeAll(set, set2);
203     expect(set, "ghi", "abcdef{bc}", NULL);
204 
205     /* [a-c g-i] */
206     uset_addAll(set2, set);
207     expect(set2, "abcghi", "def{bc}", NULL);
208 
209     /* [g-i] */
210     uset_retainAll(set2, set);
211     expect(set2, "ghi", "abcdef{bc}", NULL);
212 
213     uset_close(set);
214     uset_close(set2);
215 }
216 
217 /*------------------------------------------------------------------
218  * Support
219  *------------------------------------------------------------------*/
220 
221 /**
222  * Verifies that the given set contains the characters and strings in
223  * inList, and does not contain those in outList.  Also verifies that
224  * 'set' is not NULL and that 'ec' succeeds.
225  * @param set the set to test, or NULL (on error)
226  * @param inList list of set contents, in iteration order.  Format is
227  * list of individual strings, in iteration order, followed by sorted
228  * list of strings, delimited by {}.  This means we do not test
229  * characters '{' or '}' and we do not test strings containing those
230  * characters either.
231  * @param outList list of things not in the set.  Same format as
232  * inList.
233  * @param ec an error code, checked for success.  May be NULL in which
234  * case it is ignored.
235  */
expect(const USet * set,const char * inList,const char * outList,UErrorCode * ec)236 static void expect(const USet* set,
237                    const char* inList,
238                    const char* outList,
239                    UErrorCode* ec) {
240     if (ec!=NULL && U_FAILURE(*ec)) {
241         log_err("FAIL: %s\n", u_errorName(*ec));
242         return;
243     }
244     if (set == NULL) {
245         log_err("FAIL: USet is NULL\n");
246         return;
247     }
248     expectContainment(set, inList, TRUE);
249     expectContainment(set, outList, FALSE);
250     expectItems(set, inList);
251 }
252 
expectContainment(const USet * set,const char * list,UBool isIn)253 static void expectContainment(const USet* set,
254                               const char* list,
255                               UBool isIn) {
256     const char* p = list;
257     UChar ustr[4096];
258     char *pat;
259     UErrorCode ec;
260     int32_t rangeStart = -1, rangeEnd = -1, length;
261 
262     ec = U_ZERO_ERROR;
263     length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
264     if(U_FAILURE(ec)) {
265         log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
266         return;
267     }
268     pat=aescstrdup(ustr, length);
269 
270     while (*p) {
271         if (*p=='{') {
272             const char* stringStart = ++p;
273             int32_t stringLength = 0;
274             char strCopy[64];
275 
276             while (*p++ != '}') {
277             }
278             stringLength = (int32_t)(p - stringStart - 1);
279             strncpy(strCopy, stringStart, stringLength);
280             strCopy[stringLength] = 0;
281 
282             u_charsToUChars(stringStart, ustr, stringLength);
283 
284             if (uset_containsString(set, ustr, stringLength) == isIn) {
285                 log_verbose("Ok: %s %s \"%s\"\n", pat,
286                             (isIn ? "contains" : "does not contain"),
287                             strCopy);
288             } else {
289                 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat,
290                         (isIn ? "does not contain" : "contains"),
291                         strCopy);
292             }
293         }
294 
295         else {
296             UChar32 c;
297 
298             u_charsToUChars(p, ustr, 1);
299             c = ustr[0];
300 
301             if (uset_contains(set, c) == isIn) {
302                 log_verbose("Ok: %s %s '%c'\n", pat,
303                             (isIn ? "contains" : "does not contain"),
304                             *p);
305             } else {
306                 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat,
307                         (isIn ? "does not contain" : "contains"),
308                         *p);
309             }
310 
311             /* Test the range API too by looking for ranges */
312             if (c == rangeEnd+1) {
313                 rangeEnd = c;
314             } else {
315                 if (rangeStart >= 0) {
316                     if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
317                         log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
318                                     (isIn ? "contains" : "does not contain"),
319                                     rangeStart, rangeEnd);
320                     } else {
321                         log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
322                                 (isIn ? "does not contain" : "contains"),
323                                 rangeStart, rangeEnd);
324                     }
325                 }
326                 rangeStart = rangeEnd = c;
327             }
328 
329             ++p;
330         }
331     }
332 
333     if (rangeStart >= 0) {
334         if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
335             log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
336                         (isIn ? "contains" : "does not contain"),
337                         rangeStart, rangeEnd);
338         } else {
339             log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
340                     (isIn ? "does not contain" : "contains"),
341                     rangeStart, rangeEnd);
342         }
343     }
344 }
345 
346 /* This only works for invariant BMP chars */
oneUCharToChar(UChar32 c)347 static char oneUCharToChar(UChar32 c) {
348     UChar ubuf[1];
349     char buf[1];
350     ubuf[0] = (UChar) c;
351     u_UCharsToChars(ubuf, buf, 1);
352     return buf[0];
353 }
354 
expectItems(const USet * set,const char * items)355 static void expectItems(const USet* set,
356                         const char* items) {
357     const char* p = items;
358     UChar ustr[4096], itemStr[4096];
359     char buf[4096];
360     char *pat;
361     UErrorCode ec;
362     int32_t expectedSize = 0;
363     int32_t itemCount = uset_getItemCount(set);
364     int32_t itemIndex = 0;
365     UChar32 start = 1, end = 0;
366     int32_t itemLen = 0, length;
367 
368     ec = U_ZERO_ERROR;
369     length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
370     if (U_FAILURE(ec)) {
371         log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
372         return;
373     }
374     pat=aescstrdup(ustr, length);
375 
376     if (uset_isEmpty(set) != (strlen(items)==0)) {
377         log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n",
378                 pat,
379                 strlen(items)==0 ? "TRUE" : "FALSE");
380     }
381 
382     /* Don't test patterns starting with "[^" */
383     if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
384         return;
385     }
386 
387     while (*p) {
388 
389         ++expectedSize;
390 
391         if (start > end || start == -1) {
392             /* Fetch our next item */
393             if (itemIndex >= itemCount) {
394                 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat);
395                 return;
396             }
397 
398             itemLen = uset_getItem(set, itemIndex, &start, &end,
399                                    itemStr, sizeof(itemStr), &ec);
400             if (U_FAILURE(ec) || itemLen < 0) {
401                 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
402                 return;
403             }
404 
405             if (itemLen == 0) {
406                 log_verbose("Ok: %s item %d is %c-%c\n", pat,
407                             itemIndex, oneUCharToChar(start),
408                             oneUCharToChar(end));
409             } else {
410                 itemStr[itemLen] = 0;
411                 u_UCharsToChars(itemStr, buf, itemLen+1);
412                 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
413             }
414 
415             ++itemIndex;
416         }
417 
418         if (*p=='{') {
419             const char* stringStart = ++p;
420             int32_t stringLength = 0;
421             char strCopy[64];
422 
423             while (*p++ != '}') {
424             }
425             stringLength = (int32_t)(p - stringStart - 1);
426             strncpy(strCopy, stringStart, stringLength);
427             strCopy[stringLength] = 0;
428 
429             u_charsToUChars(stringStart, ustr, stringLength);
430             ustr[stringLength] = 0;
431 
432             if (itemLen == 0) {
433                 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
434                         pat, strCopy);
435                 return;
436             }
437 
438             if (u_strcmp(ustr, itemStr) != 0) {
439                 log_err("FAIL: for %s expect \"%s\" next\n",
440                         pat, strCopy);
441                 return;
442             }
443         }
444 
445         else {
446             UChar32 c;
447 
448             u_charsToUChars(p, ustr, 1);
449             c = ustr[0];
450 
451             if (itemLen != 0) {
452                 log_err("FAIL: for %s expect '%c' next, but got a string\n",
453                         pat, *p);
454                 return;
455             }
456 
457             if (c != start++) {
458                 log_err("FAIL: for %s expect '%c' next\n",
459                         pat, *p);
460                 return;
461             }
462 
463             ++p;
464         }
465     }
466 
467     if (uset_size(set) == expectedSize) {
468         log_verbose("Ok: %s size is %d\n", pat, expectedSize);
469     } else {
470         log_err("FAIL: %s size is %d, expected %d\n",
471                 pat, uset_size(set), expectedSize);
472     }
473 }
474 
475 static void
TestSerialized()476 TestSerialized() {
477     uint16_t buffer[1000];
478     USerializedSet sset;
479     USet *set;
480     UErrorCode errorCode;
481     UChar32 c;
482     int32_t length;
483 
484     /* use a pattern that generates both BMP and supplementary code points */
485     U_STRING_DECL(pattern, "[:Cf:]", 6);
486     U_STRING_INIT(pattern, "[:Cf:]", 6);
487 
488     errorCode=U_ZERO_ERROR;
489     set=uset_openPattern(pattern, -1, &errorCode);
490     if(U_FAILURE(errorCode)) {
491         log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode));
492         return;
493     }
494 
495     length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
496     if(U_FAILURE(errorCode)) {
497         log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
498         uset_close(set);
499         return;
500     }
501 
502     uset_getSerializedSet(&sset, buffer, length);
503     for(c=0; c<=0x10ffff; ++c) {
504         if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
505             log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
506             break;
507         }
508     }
509 
510     uset_close(set);
511 }
512 
513 /**
514  * Make sure that when non-invariant chars are passed to uset_openPattern
515  * they do not cause an ugly failure mode (e.g. assertion failure).
516  * JB#3795.
517  */
518 static void
TestNonInvariantPattern()519 TestNonInvariantPattern() {
520     UErrorCode ec = U_ZERO_ERROR;
521     /* The critical part of this test is that the following pattern
522        must contain a non-invariant character. */
523     static const char *pattern = "[:ccc!=0:]";
524     UChar buf[256];
525     int32_t len = u_unescape(pattern, buf, 256);
526     /* This test 'fails' by having an assertion failure within the
527        following call.  It passes by running to completion with no
528        assertion failure. */
529     USet *set = uset_openPattern(buf, len, &ec);
530     uset_close(set);
531 }
532 
TestBadPattern(void)533 static void TestBadPattern(void) {
534     UErrorCode status = U_ZERO_ERROR;
535     USet *pat;
536     U_STRING_DECL(pattern, "[", 1);
537     U_STRING_INIT(pattern, "[", 1);
538     pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
539     if (pat != NULL || U_SUCCESS(status)) {
540         log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
541     }
542 }
543 
openIDSet()544 static USet *openIDSet() {
545     UErrorCode errorCode = U_ZERO_ERROR;
546     U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
547     U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
548     return uset_openPattern(pattern, 15, &errorCode);
549 }
550 
TestFreezable()551 static void TestFreezable() {
552     USet *idSet;
553     USet *frozen;
554     USet *thawed;
555 
556     idSet=openIDSet();
557 
558     if (idSet == NULL) {
559         log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
560         uset_close(idSet);
561         return;
562     }
563 
564     frozen=uset_clone(idSet);
565 
566     if (frozen == NULL) {
567         log_err("uset_Clone() returned NULL\n");
568         return;
569     }
570 
571     if(!uset_equals(frozen, idSet)) {
572         log_err("uset_clone() did not make an equal copy\n");
573     }
574 
575     uset_freeze(frozen);
576     uset_addRange(frozen, 0xd802, 0xd805);
577 
578     if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
579         log_err("uset_freeze() or uset_isFrozen() does not work\n");
580     }
581 
582     thawed=uset_cloneAsThawed(frozen);
583 
584     if (thawed == NULL) {
585         log_err("uset_cloneAsThawed(frozen) returned NULL");
586         uset_close(frozen);
587         uset_close(idSet);
588         return;
589     }
590 
591     uset_addRange(thawed, 0xd802, 0xd805);
592 
593     if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
594         log_err("uset_cloneAsThawed() does not work\n");
595     }
596 
597     uset_close(idSet);
598     uset_close(frozen);
599     uset_close(thawed);
600 }
601 
TestSpan()602 static void TestSpan() {
603     static const UChar s16[2]={ 0xe01, 0x3000 };
604     static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
605 
606     USet *idSet=openIDSet();
607 
608     if (idSet == NULL) {
609         log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
610         return;
611     }
612 
613     if(
614         1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
615         0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
616         2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
617         1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
618     ) {
619         log_err("uset_span() or uset_spanBack() does not work\n");
620     }
621 
622     if(
623         3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
624         0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
625         6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
626         3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
627     ) {
628         log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
629     }
630 
631     uset_freeze(idSet);
632 
633     if(
634         1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
635         0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
636         2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
637         1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
638     ) {
639         log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
640     }
641 
642     if(
643         3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
644         0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
645         6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
646         3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
647     ) {
648         log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
649     }
650 
651     uset_close(idSet);
652 }
653 
654 /*eof*/
655