• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2007, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
14 
15 #define TEST(x) addTest(root, &x, "uset/" # x)
16 
17 static void TestAPI(void);
18 static void Testj2269(void);
19 static void TestSerialized(void);
20 static void TestNonInvariantPattern(void);
21 static void TestBadPattern(void);
22 static void TestFreezable(void);
23 static void TestSpan(void);
24 
25 void addUSetTest(TestNode** root);
26 
27 static void expect(const USet* set,
28                    const char* inList,
29                    const char* outList,
30                    UErrorCode* ec);
31 static void expectContainment(const USet* set,
32                               const char* list,
33                               UBool isIn);
34 static char oneUCharToChar(UChar32 c);
35 static void expectItems(const USet* set,
36                         const char* items);
37 
38 void
addUSetTest(TestNode ** root)39 addUSetTest(TestNode** root) {
40     TEST(TestAPI);
41     TEST(Testj2269);
42     TEST(TestSerialized);
43     TEST(TestNonInvariantPattern);
44     TEST(TestBadPattern);
45     TEST(TestFreezable);
46     TEST(TestSpan);
47 }
48 
49 /*------------------------------------------------------------------
50  * Tests
51  *------------------------------------------------------------------*/
52 
Testj2269()53 static void Testj2269() {
54   UErrorCode status = U_ZERO_ERROR;
55   UChar a[4] = { 0x61, 0x62, 0x63, 0 };
56   USet *s = uset_open(1, 0);
57   uset_addString(s, a, 3);
58   a[0] = 0x63; a[1] = 0x63;
59   expect(s, "{abc}", "{ccc}", &status);
60   uset_close(s);
61 }
62 
63 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
64 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
65 
66 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
67 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
68 
69 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
70 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
71 
72 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
73 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
74 
75 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
76 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
77 
78 /**
79  * Basic API test for uset.x
80  */
TestAPI()81 static void TestAPI() {
82     USet* set;
83     USet* set2;
84     UErrorCode ec;
85 
86     /* [] */
87     set = uset_open(1, 1);
88     uset_clear(set);
89     expect(set, "", "abc{ab}", NULL);
90     uset_close(set);
91 
92     /* [ABC] */
93     set = uset_open(0x0041, 0x0043);
94     expect(set, "ABC", "DEF{ab}", NULL);
95     uset_close(set);
96 
97     /* [a-c{ab}] */
98     ec = U_ZERO_ERROR;
99     set = uset_openPattern(PAT, PAT_LEN, &ec);
100     if(U_FAILURE(ec)) {
101         log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
102         return;
103     }
104     if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
105         log_err("uset_resemblesPattern of PAT failed\n");
106     }
107     expect(set, "abc{ab}", "def{bc}", &ec);
108 
109     /* [a-d{ab}] */
110     uset_add(set, 0x64);
111     expect(set, "abcd{ab}", "ef{bc}", NULL);
112 
113     /* [acd{ab}{bc}] */
114     uset_remove(set, 0x62);
115     uset_addString(set, STR_bc, STR_bc_LEN);
116     expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
117 
118     /* [acd{bc}] */
119     uset_removeString(set, STR_ab, STR_ab_LEN);
120     expect(set, "acd{bc}", "bfg{ab}", NULL);
121 
122     /* [^acd{bc}] */
123     uset_complement(set);
124     expect(set, "bef{bc}", "acd{ac}", NULL);
125 
126     /* [a-e{bc}] */
127     uset_complement(set);
128     uset_addRange(set, 0x0062, 0x0065);
129     expect(set, "abcde{bc}", "fg{ab}", NULL);
130 
131     /* [de{bc}] */
132     uset_removeRange(set, 0x0050, 0x0063);
133     expect(set, "de{bc}", "bcfg{ab}", NULL);
134 
135     /* [g-l] */
136     uset_set(set, 0x0067, 0x006C);
137     expect(set, "ghijkl", "de{bc}", NULL);
138 
139     if (uset_indexOf(set, 0x0067) != 0) {
140         log_err("uset_indexOf failed finding correct index of 'g'\n");
141     }
142 
143     if (uset_charAt(set, 0) != 0x0067) {
144         log_err("uset_charAt failed finding correct char 'g' at index 0\n");
145     }
146 
147     /* How to test this one...? */
148     uset_compact(set);
149 
150     /* [g-i] */
151     uset_retain(set, 0x0067, 0x0069);
152     expect(set, "ghi", "dejkl{bc}", NULL);
153 
154     /* UCHAR_ASCII_HEX_DIGIT */
155     uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
156     if(U_FAILURE(ec)) {
157         log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
158         return;
159     }
160     expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
161 
162     /* [ab] */
163     uset_clear(set);
164     uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
165     expect(set, "ab", "def{ab}", NULL);
166     if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
167         log_err("set should not conatin all characters of \"bc\" \n");
168     }
169 
170     /* [] */
171     set2 = uset_open(1, 1);
172     uset_clear(set2);
173 
174     /* space */
175     uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
176     expect(set2, " ", "abcdefghi{bc}", NULL);
177 
178     /* [a-c] */
179     uset_set(set2, 0x0061, 0x0063);
180     /* [g-i] */
181     uset_set(set, 0x0067, 0x0069);
182 
183     /* [a-c g-i] */
184     if (uset_containsSome(set, set2)) {
185         log_err("set should not contain some of set2 yet\n");
186     }
187     uset_complementAll(set, set2);
188     if (!uset_containsSome(set, set2)) {
189         log_err("set should contain some of set2\n");
190     }
191     expect(set, "abcghi", "def{bc}", NULL);
192 
193     /* [g-i] */
194     uset_removeAll(set, set2);
195     expect(set, "ghi", "abcdef{bc}", NULL);
196 
197     /* [a-c g-i] */
198     uset_addAll(set2, set);
199     expect(set2, "abcghi", "def{bc}", NULL);
200 
201     /* [g-i] */
202     uset_retainAll(set2, set);
203     expect(set2, "ghi", "abcdef{bc}", NULL);
204 
205     uset_close(set);
206     uset_close(set2);
207 }
208 
209 /*------------------------------------------------------------------
210  * Support
211  *------------------------------------------------------------------*/
212 
213 /**
214  * Verifies that the given set contains the characters and strings in
215  * inList, and does not contain those in outList.  Also verifies that
216  * 'set' is not NULL and that 'ec' succeeds.
217  * @param set the set to test, or NULL (on error)
218  * @param inList list of set contents, in iteration order.  Format is
219  * list of individual strings, in iteration order, followed by sorted
220  * list of strings, delimited by {}.  This means we do not test
221  * characters '{' or '}' and we do not test strings containing those
222  * characters either.
223  * @param outList list of things not in the set.  Same format as
224  * inList.
225  * @param ec an error code, checked for success.  May be NULL in which
226  * case it is ignored.
227  */
expect(const USet * set,const char * inList,const char * outList,UErrorCode * ec)228 static void expect(const USet* set,
229                    const char* inList,
230                    const char* outList,
231                    UErrorCode* ec) {
232     if (ec!=NULL && U_FAILURE(*ec)) {
233         log_err("FAIL: %s\n", u_errorName(*ec));
234         return;
235     }
236     if (set == NULL) {
237         log_err("FAIL: USet is NULL\n");
238         return;
239     }
240     expectContainment(set, inList, TRUE);
241     expectContainment(set, outList, FALSE);
242     expectItems(set, inList);
243 }
244 
expectContainment(const USet * set,const char * list,UBool isIn)245 static void expectContainment(const USet* set,
246                               const char* list,
247                               UBool isIn) {
248     const char* p = list;
249     UChar ustr[4096];
250     char *pat;
251     UErrorCode ec;
252     int32_t rangeStart = -1, rangeEnd = -1, length;
253 
254     ec = U_ZERO_ERROR;
255     length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
256     if(U_FAILURE(ec)) {
257         log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
258         return;
259     }
260     pat=aescstrdup(ustr, length);
261 
262     while (*p) {
263         if (*p=='{') {
264             const char* stringStart = ++p;
265             int32_t stringLength = 0;
266             char strCopy[64];
267 
268             while (*p++ != '}') {
269             }
270             stringLength = (int32_t)(p - stringStart - 1);
271             strncpy(strCopy, stringStart, stringLength);
272             strCopy[stringLength] = 0;
273 
274             u_charsToUChars(stringStart, ustr, stringLength);
275 
276             if (uset_containsString(set, ustr, stringLength) == isIn) {
277                 log_verbose("Ok: %s %s \"%s\"\n", pat,
278                             (isIn ? "contains" : "does not contain"),
279                             strCopy);
280             } else {
281                 log_err("FAIL: %s %s \"%s\"\n", pat,
282                         (isIn ? "does not contain" : "contains"),
283                         strCopy);
284             }
285         }
286 
287         else {
288             UChar32 c;
289 
290             u_charsToUChars(p, ustr, 1);
291             c = ustr[0];
292 
293             if (uset_contains(set, c) == isIn) {
294                 log_verbose("Ok: %s %s '%c'\n", pat,
295                             (isIn ? "contains" : "does not contain"),
296                             *p);
297             } else {
298                 log_err("FAIL: %s %s '%c'\n", pat,
299                         (isIn ? "does not contain" : "contains"),
300                         *p);
301             }
302 
303             /* Test the range API too by looking for ranges */
304             if (c == rangeEnd+1) {
305                 rangeEnd = c;
306             } else {
307                 if (rangeStart >= 0) {
308                     if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
309                         log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
310                                     (isIn ? "contains" : "does not contain"),
311                                     rangeStart, rangeEnd);
312                     } else {
313                         log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
314                                 (isIn ? "does not contain" : "contains"),
315                                 rangeStart, rangeEnd);
316                     }
317                 }
318                 rangeStart = rangeEnd = c;
319             }
320 
321             ++p;
322         }
323     }
324 
325     if (rangeStart >= 0) {
326         if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
327             log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
328                         (isIn ? "contains" : "does not contain"),
329                         rangeStart, rangeEnd);
330         } else {
331             log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
332                     (isIn ? "does not contain" : "contains"),
333                     rangeStart, rangeEnd);
334         }
335     }
336 }
337 
338 /* This only works for invariant BMP chars */
oneUCharToChar(UChar32 c)339 static char oneUCharToChar(UChar32 c) {
340     UChar ubuf[1];
341     char buf[1];
342     ubuf[0] = (UChar) c;
343     u_UCharsToChars(ubuf, buf, 1);
344     return buf[0];
345 }
346 
expectItems(const USet * set,const char * items)347 static void expectItems(const USet* set,
348                         const char* items) {
349     const char* p = items;
350     UChar ustr[4096], itemStr[4096];
351     char buf[4096];
352     char *pat;
353     UErrorCode ec;
354     int32_t expectedSize = 0;
355     int32_t itemCount = uset_getItemCount(set);
356     int32_t itemIndex = 0;
357     UChar32 start = 1, end = 0;
358     int32_t itemLen = 0, length;
359 
360     ec = U_ZERO_ERROR;
361     length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
362     if (U_FAILURE(ec)) {
363         log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
364         return;
365     }
366     pat=aescstrdup(ustr, length);
367 
368     if (uset_isEmpty(set) != (strlen(items)==0)) {
369         log_err("FAIL: %s should return %s from isEmpty\n",
370                 pat,
371                 strlen(items)==0 ? "TRUE" : "FALSE");
372     }
373 
374     /* Don't test patterns starting with "[^" */
375     if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
376         return;
377     }
378 
379     while (*p) {
380 
381         ++expectedSize;
382 
383         if (start > end || start == -1) {
384             /* Fetch our next item */
385             if (itemIndex >= itemCount) {
386                 log_err("FAIL: ran out of items iterating %s\n", pat);
387                 return;
388             }
389 
390             itemLen = uset_getItem(set, itemIndex, &start, &end,
391                                    itemStr, sizeof(itemStr), &ec);
392             if (U_FAILURE(ec) || itemLen < 0) {
393                 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
394                 return;
395             }
396 
397             if (itemLen == 0) {
398                 log_verbose("Ok: %s item %d is %c-%c\n", pat,
399                             itemIndex, oneUCharToChar(start),
400                             oneUCharToChar(end));
401             } else {
402                 itemStr[itemLen] = 0;
403                 u_UCharsToChars(itemStr, buf, itemLen+1);
404                 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
405             }
406 
407             ++itemIndex;
408         }
409 
410         if (*p=='{') {
411             const char* stringStart = ++p;
412             int32_t stringLength = 0;
413             char strCopy[64];
414 
415             while (*p++ != '}') {
416             }
417             stringLength = (int32_t)(p - stringStart - 1);
418             strncpy(strCopy, stringStart, stringLength);
419             strCopy[stringLength] = 0;
420 
421             u_charsToUChars(stringStart, ustr, stringLength);
422             ustr[stringLength] = 0;
423 
424             if (itemLen == 0) {
425                 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
426                         pat, strCopy);
427                 return;
428             }
429 
430             if (u_strcmp(ustr, itemStr) != 0) {
431                 log_err("FAIL: for %s expect \"%s\" next\n",
432                         pat, strCopy);
433                 return;
434             }
435         }
436 
437         else {
438             UChar32 c;
439 
440             u_charsToUChars(p, ustr, 1);
441             c = ustr[0];
442 
443             if (itemLen != 0) {
444                 log_err("FAIL: for %s expect '%c' next, but got a string\n",
445                         pat, *p);
446                 return;
447             }
448 
449             if (c != start++) {
450                 log_err("FAIL: for %s expect '%c' next\n",
451                         pat, *p);
452                 return;
453             }
454 
455             ++p;
456         }
457     }
458 
459     if (uset_size(set) == expectedSize) {
460         log_verbose("Ok: %s size is %d\n", pat, expectedSize);
461     } else {
462         log_err("FAIL: %s size is %d, expected %d\n",
463                 pat, uset_size(set), expectedSize);
464     }
465 }
466 
467 static void
TestSerialized()468 TestSerialized() {
469     uint16_t buffer[1000];
470     USerializedSet sset;
471     USet *set;
472     UErrorCode errorCode;
473     UChar32 c;
474     int32_t length;
475 
476     /* use a pattern that generates both BMP and supplementary code points */
477     U_STRING_DECL(pattern, "[:Cf:]", 6);
478     U_STRING_INIT(pattern, "[:Cf:]", 6);
479 
480     errorCode=U_ZERO_ERROR;
481     set=uset_openPattern(pattern, -1, &errorCode);
482     if(U_FAILURE(errorCode)) {
483         log_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode));
484         return;
485     }
486 
487     length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
488     if(U_FAILURE(errorCode)) {
489         log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
490         uset_close(set);
491         return;
492     }
493 
494     uset_getSerializedSet(&sset, buffer, length);
495     for(c=0; c<=0x10ffff; ++c) {
496         if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
497             log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
498             break;
499         }
500     }
501 
502     uset_close(set);
503 }
504 
505 /**
506  * Make sure that when non-invariant chars are passed to uset_openPattern
507  * they do not cause an ugly failure mode (e.g. assertion failure).
508  * JB#3795.
509  */
510 static void
TestNonInvariantPattern()511 TestNonInvariantPattern() {
512     UErrorCode ec = U_ZERO_ERROR;
513     /* The critical part of this test is that the following pattern
514        must contain a non-invariant character. */
515     static const char *pattern = "[:ccc!=0:]";
516     UChar buf[256];
517     int32_t len = u_unescape(pattern, buf, 256);
518     /* This test 'fails' by having an assertion failure within the
519        following call.  It passes by running to completion with no
520        assertion failure. */
521     USet *set = uset_openPattern(buf, len, &ec);
522     uset_close(set);
523 }
524 
TestBadPattern(void)525 static void TestBadPattern(void) {
526     UErrorCode status = U_ZERO_ERROR;
527     USet *pat;
528     U_STRING_DECL(pattern, "[", 1);
529     U_STRING_INIT(pattern, "[", 1);
530     pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
531     if (pat != NULL || U_SUCCESS(status)) {
532         log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
533     }
534 }
535 
openIDSet()536 static USet *openIDSet() {
537     UErrorCode errorCode = U_ZERO_ERROR;
538     U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
539     U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
540     return uset_openPattern(pattern, 15, &errorCode);
541 }
542 
TestFreezable()543 static void TestFreezable() {
544     USet *idSet;
545     USet *frozen;
546     USet *thawed;
547 
548     idSet=openIDSet();
549 
550     if (idSet == NULL) {
551         log_err("openIDSet() returned NULL");
552         uset_close(idSet);
553         return;
554     }
555 
556     frozen=uset_clone(idSet);
557 
558     if (frozen == NULL) {
559         log_err("uset_Clone() returned NULL");
560         return;
561     }
562 
563     if(!uset_equals(frozen, idSet)) {
564         log_err("uset_clone() did not make an equal copy\n");
565     }
566 
567     uset_freeze(frozen);
568     uset_addRange(frozen, 0xd802, 0xd805);
569 
570     if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
571         log_err("uset_freeze() or uset_isFrozen() does not work\n");
572     }
573 
574     thawed=uset_cloneAsThawed(frozen);
575 
576     if (thawed == NULL) {
577         log_err("uset_cloneAsThawed(frozen) returned NULL");
578         uset_close(frozen);
579         uset_close(idSet);
580         return;
581     }
582 
583     uset_addRange(thawed, 0xd802, 0xd805);
584 
585     if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
586         log_err("uset_cloneAsThawed() does not work\n");
587     }
588 
589     uset_close(idSet);
590     uset_close(frozen);
591     uset_close(thawed);
592 }
593 
TestSpan()594 static void TestSpan() {
595     static const UChar s16[2]={ 0xe01, 0x3000 };
596     static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
597 
598     USet *idSet=openIDSet();
599 
600     if (idSet == NULL) {
601         log_err("openIDSet() returned NULL");
602         return;
603     }
604 
605     if(
606         1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
607         0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
608         2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
609         1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
610     ) {
611         log_err("uset_span() or uset_spanBack() does not work\n");
612     }
613 
614     if(
615         3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
616         0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
617         6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
618         3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
619     ) {
620         log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
621     }
622 
623     uset_freeze(idSet);
624 
625     if(
626         1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
627         0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
628         2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
629         1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
630     ) {
631         log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
632     }
633 
634     if(
635         3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
636         0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
637         6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
638         3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
639     ) {
640         log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
641     }
642 
643     uset_close(idSet);
644 }
645 
646 /*eof*/
647