• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include "cmemory.h"
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #define TEST(x) addTest(root, &x, "uset/" # x)
15 
16 static void TestAPI(void);
17 static void Testj2269(void);
18 static void TestSerialized(void);
19 static void TestNonInvariantPattern(void);
20 static void TestBadPattern(void);
21 static void TestFreezable(void);
22 static void TestSpan(void);
23 
24 void addUSetTest(TestNode** root);
25 
26 static void expect(const USet* set,
27                    const char* inList,
28                    const char* outList,
29                    UErrorCode* ec);
30 static void expectContainment(const USet* set,
31                               const char* list,
32                               UBool isIn);
33 static char oneUCharToChar(UChar32 c);
34 static void expectItems(const USet* set,
35                         const char* items);
36 
37 void
addUSetTest(TestNode ** root)38 addUSetTest(TestNode** root) {
39     TEST(TestAPI);
40     TEST(Testj2269);
41     TEST(TestSerialized);
42     TEST(TestNonInvariantPattern);
43     TEST(TestBadPattern);
44     TEST(TestFreezable);
45     TEST(TestSpan);
46 }
47 
48 /*------------------------------------------------------------------
49  * Tests
50  *------------------------------------------------------------------*/
51 
Testj2269()52 static void Testj2269() {
53   UErrorCode status = U_ZERO_ERROR;
54   UChar a[4] = { 0x61, 0x62, 0x63, 0 };
55   USet *s = uset_open(1, 0);
56   uset_addString(s, a, 3);
57   a[0] = 0x63; a[1] = 0x63;
58   expect(s, "{abc}", "{ccc}", &status);
59   uset_close(s);
60 }
61 
62 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
63 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
64 
65 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
66 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
67 
68 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
69 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
70 
71 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
72 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
73 
74 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
75 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
76 
77 /**
78  * Basic API test for uset.x
79  */
TestAPI()80 static void TestAPI() {
81     USet* set;
82     USet* set2;
83     UErrorCode ec;
84 
85     /* [] */
86     set = uset_openEmpty();
87     expect(set, "", "abc{ab}", NULL);
88     uset_close(set);
89 
90     set = uset_open(1, 0);
91     expect(set, "", "abc{ab}", NULL);
92     uset_close(set);
93 
94     set = uset_open(1, 1);
95     uset_clear(set);
96     expect(set, "", "abc{ab}", NULL);
97     uset_close(set);
98 
99     /* [ABC] */
100     set = uset_open(0x0041, 0x0043);
101     expect(set, "ABC", "DEF{ab}", NULL);
102     uset_close(set);
103 
104     /* [a-c{ab}] */
105     ec = U_ZERO_ERROR;
106     set = uset_openPattern(PAT, PAT_LEN, &ec);
107     if(U_FAILURE(ec)) {
108         log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
109         return;
110     }
111     if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
112         log_err("uset_resemblesPattern of PAT failed\n");
113     }
114     expect(set, "abc{ab}", "def{bc}", &ec);
115 
116     /* [a-d{ab}] */
117     uset_add(set, 0x64);
118     expect(set, "abcd{ab}", "ef{bc}", NULL);
119 
120     /* [acd{ab}{bc}] */
121     uset_remove(set, 0x62);
122     uset_addString(set, STR_bc, STR_bc_LEN);
123     expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
124 
125     /* [acd{bc}] */
126     uset_removeString(set, STR_ab, STR_ab_LEN);
127     expect(set, "acd{bc}", "bfg{ab}", NULL);
128 
129     /* [^acd{bc}] */
130     uset_complement(set);
131     expect(set, "bef{bc}", "acd{ac}", NULL);
132 
133     /* [a-e{bc}] */
134     uset_complement(set);
135     uset_addRange(set, 0x0062, 0x0065);
136     expect(set, "abcde{bc}", "fg{ab}", NULL);
137 
138     /* [de{bc}] */
139     uset_removeRange(set, 0x0050, 0x0063);
140     expect(set, "de{bc}", "bcfg{ab}", NULL);
141 
142     /* [g-l] */
143     uset_set(set, 0x0067, 0x006C);
144     expect(set, "ghijkl", "de{bc}", NULL);
145 
146     if (uset_indexOf(set, 0x0067) != 0) {
147         log_err("uset_indexOf failed finding correct index of 'g'\n");
148     }
149 
150     if (uset_charAt(set, 0) != 0x0067) {
151         log_err("uset_charAt failed finding correct char 'g' at index 0\n");
152     }
153 
154     /* How to test this one...? */
155     uset_compact(set);
156 
157     /* [g-i] */
158     uset_retain(set, 0x0067, 0x0069);
159     expect(set, "ghi", "dejkl{bc}", NULL);
160 
161     /* UCHAR_ASCII_HEX_DIGIT */
162     uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
163     if(U_FAILURE(ec)) {
164         log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
165         return;
166     }
167     expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
168 
169     /* [ab] */
170     uset_clear(set);
171     uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
172     expect(set, "ab", "def{ab}", NULL);
173     if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
174         log_err("set should not conatin all characters of \"bc\" \n");
175     }
176 
177     /* [] */
178     set2 = uset_open(1, 1);
179     uset_clear(set2);
180 
181     /* space */
182     uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
183     expect(set2, " ", "abcdefghi{bc}", NULL);
184 
185     /* [a-c] */
186     uset_set(set2, 0x0061, 0x0063);
187     /* [g-i] */
188     uset_set(set, 0x0067, 0x0069);
189 
190     /* [a-c g-i] */
191     if (uset_containsSome(set, set2)) {
192         log_err("set should not contain some of set2 yet\n");
193     }
194     uset_complementAll(set, set2);
195     if (!uset_containsSome(set, set2)) {
196         log_err("set should contain some of set2\n");
197     }
198     expect(set, "abcghi", "def{bc}", NULL);
199 
200     /* [g-i] */
201     uset_removeAll(set, set2);
202     expect(set, "ghi", "abcdef{bc}", NULL);
203 
204     /* [a-c g-i] */
205     uset_addAll(set2, set);
206     expect(set2, "abcghi", "def{bc}", NULL);
207 
208     /* [g-i] */
209     uset_retainAll(set2, set);
210     expect(set2, "ghi", "abcdef{bc}", NULL);
211 
212     uset_close(set);
213     uset_close(set2);
214 }
215 
216 /*------------------------------------------------------------------
217  * Support
218  *------------------------------------------------------------------*/
219 
220 /**
221  * Verifies that the given set contains the characters and strings in
222  * inList, and does not contain those in outList.  Also verifies that
223  * 'set' is not NULL and that 'ec' succeeds.
224  * @param set the set to test, or NULL (on error)
225  * @param inList list of set contents, in iteration order.  Format is
226  * list of individual strings, in iteration order, followed by sorted
227  * list of strings, delimited by {}.  This means we do not test
228  * characters '{' or '}' and we do not test strings containing those
229  * characters either.
230  * @param outList list of things not in the set.  Same format as
231  * inList.
232  * @param ec an error code, checked for success.  May be NULL in which
233  * case it is ignored.
234  */
expect(const USet * set,const char * inList,const char * outList,UErrorCode * ec)235 static void expect(const USet* set,
236                    const char* inList,
237                    const char* outList,
238                    UErrorCode* ec) {
239     if (ec!=NULL && U_FAILURE(*ec)) {
240         log_err("FAIL: %s\n", u_errorName(*ec));
241         return;
242     }
243     if (set == NULL) {
244         log_err("FAIL: USet is NULL\n");
245         return;
246     }
247     expectContainment(set, inList, TRUE);
248     expectContainment(set, outList, FALSE);
249     expectItems(set, inList);
250 }
251 
expectContainment(const USet * set,const char * list,UBool isIn)252 static void expectContainment(const USet* set,
253                               const char* list,
254                               UBool isIn) {
255     const char* p = list;
256     UChar ustr[4096];
257     char *pat;
258     UErrorCode ec;
259     int32_t rangeStart = -1, rangeEnd = -1, length;
260 
261     ec = U_ZERO_ERROR;
262     length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
263     if(U_FAILURE(ec)) {
264         log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
265         return;
266     }
267     pat=aescstrdup(ustr, length);
268 
269     while (*p) {
270         if (*p=='{') {
271             const char* stringStart = ++p;
272             int32_t stringLength = 0;
273             char strCopy[64];
274 
275             while (*p++ != '}') {
276             }
277             stringLength = (int32_t)(p - stringStart - 1);
278             strncpy(strCopy, stringStart, stringLength);
279             strCopy[stringLength] = 0;
280 
281             u_charsToUChars(stringStart, ustr, stringLength);
282 
283             if (uset_containsString(set, ustr, stringLength) == isIn) {
284                 log_verbose("Ok: %s %s \"%s\"\n", pat,
285                             (isIn ? "contains" : "does not contain"),
286                             strCopy);
287             } else {
288                 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat,
289                         (isIn ? "does not contain" : "contains"),
290                         strCopy);
291             }
292         }
293 
294         else {
295             UChar32 c;
296 
297             u_charsToUChars(p, ustr, 1);
298             c = ustr[0];
299 
300             if (uset_contains(set, c) == isIn) {
301                 log_verbose("Ok: %s %s '%c'\n", pat,
302                             (isIn ? "contains" : "does not contain"),
303                             *p);
304             } else {
305                 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat,
306                         (isIn ? "does not contain" : "contains"),
307                         *p);
308             }
309 
310             /* Test the range API too by looking for ranges */
311             if (c == rangeEnd+1) {
312                 rangeEnd = c;
313             } else {
314                 if (rangeStart >= 0) {
315                     if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
316                         log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
317                                     (isIn ? "contains" : "does not contain"),
318                                     rangeStart, rangeEnd);
319                     } else {
320                         log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
321                                 (isIn ? "does not contain" : "contains"),
322                                 rangeStart, rangeEnd);
323                     }
324                 }
325                 rangeStart = rangeEnd = c;
326             }
327 
328             ++p;
329         }
330     }
331 
332     if (rangeStart >= 0) {
333         if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
334             log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
335                         (isIn ? "contains" : "does not contain"),
336                         rangeStart, rangeEnd);
337         } else {
338             log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
339                     (isIn ? "does not contain" : "contains"),
340                     rangeStart, rangeEnd);
341         }
342     }
343 }
344 
345 /* This only works for invariant BMP chars */
oneUCharToChar(UChar32 c)346 static char oneUCharToChar(UChar32 c) {
347     UChar ubuf[1];
348     char buf[1];
349     ubuf[0] = (UChar) c;
350     u_UCharsToChars(ubuf, buf, 1);
351     return buf[0];
352 }
353 
expectItems(const USet * set,const char * items)354 static void expectItems(const USet* set,
355                         const char* items) {
356     const char* p = items;
357     UChar ustr[4096], itemStr[4096];
358     char buf[4096];
359     char *pat;
360     UErrorCode ec;
361     int32_t expectedSize = 0;
362     int32_t itemCount = uset_getItemCount(set);
363     int32_t itemIndex = 0;
364     UChar32 start = 1, end = 0;
365     int32_t itemLen = 0, length;
366 
367     ec = U_ZERO_ERROR;
368     length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
369     if (U_FAILURE(ec)) {
370         log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
371         return;
372     }
373     pat=aescstrdup(ustr, length);
374 
375     if (uset_isEmpty(set) != (strlen(items)==0)) {
376         log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n",
377                 pat,
378                 strlen(items)==0 ? "TRUE" : "FALSE");
379     }
380 
381     /* Don't test patterns starting with "[^" */
382     if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
383         return;
384     }
385 
386     while (*p) {
387 
388         ++expectedSize;
389 
390         if (start > end || start == -1) {
391             /* Fetch our next item */
392             if (itemIndex >= itemCount) {
393                 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat);
394                 return;
395             }
396 
397             itemLen = uset_getItem(set, itemIndex, &start, &end,
398                                    itemStr, sizeof(itemStr), &ec);
399             if (U_FAILURE(ec) || itemLen < 0) {
400                 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
401                 return;
402             }
403 
404             if (itemLen == 0) {
405                 log_verbose("Ok: %s item %d is %c-%c\n", pat,
406                             itemIndex, oneUCharToChar(start),
407                             oneUCharToChar(end));
408             } else {
409                 itemStr[itemLen] = 0;
410                 u_UCharsToChars(itemStr, buf, itemLen+1);
411                 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
412             }
413 
414             ++itemIndex;
415         }
416 
417         if (*p=='{') {
418             const char* stringStart = ++p;
419             int32_t stringLength = 0;
420             char strCopy[64];
421 
422             while (*p++ != '}') {
423             }
424             stringLength = (int32_t)(p - stringStart - 1);
425             strncpy(strCopy, stringStart, stringLength);
426             strCopy[stringLength] = 0;
427 
428             u_charsToUChars(stringStart, ustr, stringLength);
429             ustr[stringLength] = 0;
430 
431             if (itemLen == 0) {
432                 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
433                         pat, strCopy);
434                 return;
435             }
436 
437             if (u_strcmp(ustr, itemStr) != 0) {
438                 log_err("FAIL: for %s expect \"%s\" next\n",
439                         pat, strCopy);
440                 return;
441             }
442         }
443 
444         else {
445             UChar32 c;
446 
447             u_charsToUChars(p, ustr, 1);
448             c = ustr[0];
449 
450             if (itemLen != 0) {
451                 log_err("FAIL: for %s expect '%c' next, but got a string\n",
452                         pat, *p);
453                 return;
454             }
455 
456             if (c != start++) {
457                 log_err("FAIL: for %s expect '%c' next\n",
458                         pat, *p);
459                 return;
460             }
461 
462             ++p;
463         }
464     }
465 
466     if (uset_size(set) == expectedSize) {
467         log_verbose("Ok: %s size is %d\n", pat, expectedSize);
468     } else {
469         log_err("FAIL: %s size is %d, expected %d\n",
470                 pat, uset_size(set), expectedSize);
471     }
472 }
473 
474 static void
TestSerialized()475 TestSerialized() {
476     uint16_t buffer[1000];
477     USerializedSet sset;
478     USet *set;
479     UErrorCode errorCode;
480     UChar32 c;
481     int32_t length;
482 
483     /* use a pattern that generates both BMP and supplementary code points */
484     U_STRING_DECL(pattern, "[:Cf:]", 6);
485     U_STRING_INIT(pattern, "[:Cf:]", 6);
486 
487     errorCode=U_ZERO_ERROR;
488     set=uset_openPattern(pattern, -1, &errorCode);
489     if(U_FAILURE(errorCode)) {
490         log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode));
491         return;
492     }
493 
494     length=uset_serialize(set, buffer, UPRV_LENGTHOF(buffer), &errorCode);
495     if(U_FAILURE(errorCode)) {
496         log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
497         uset_close(set);
498         return;
499     }
500 
501     uset_getSerializedSet(&sset, buffer, length);
502     for(c=0; c<=0x10ffff; ++c) {
503         if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
504             log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
505             break;
506         }
507     }
508 
509     uset_close(set);
510 }
511 
512 /**
513  * Make sure that when non-invariant chars are passed to uset_openPattern
514  * they do not cause an ugly failure mode (e.g. assertion failure).
515  * JB#3795.
516  */
517 static void
TestNonInvariantPattern()518 TestNonInvariantPattern() {
519     UErrorCode ec = U_ZERO_ERROR;
520     /* The critical part of this test is that the following pattern
521        must contain a non-invariant character. */
522     static const char *pattern = "[:ccc!=0:]";
523     UChar buf[256];
524     int32_t len = u_unescape(pattern, buf, 256);
525     /* This test 'fails' by having an assertion failure within the
526        following call.  It passes by running to completion with no
527        assertion failure. */
528     USet *set = uset_openPattern(buf, len, &ec);
529     uset_close(set);
530 }
531 
TestBadPattern(void)532 static void TestBadPattern(void) {
533     UErrorCode status = U_ZERO_ERROR;
534     USet *pat;
535     U_STRING_DECL(pattern, "[", 1);
536     U_STRING_INIT(pattern, "[", 1);
537     pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
538     if (pat != NULL || U_SUCCESS(status)) {
539         log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
540     }
541 }
542 
openIDSet()543 static USet *openIDSet() {
544     UErrorCode errorCode = U_ZERO_ERROR;
545     U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
546     U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
547     return uset_openPattern(pattern, 15, &errorCode);
548 }
549 
TestFreezable()550 static void TestFreezable() {
551     USet *idSet;
552     USet *frozen;
553     USet *thawed;
554 
555     idSet=openIDSet();
556 
557     if (idSet == NULL) {
558         log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
559         uset_close(idSet);
560         return;
561     }
562 
563     frozen=uset_clone(idSet);
564 
565     if (frozen == NULL) {
566         log_err("uset_Clone() returned NULL\n");
567         return;
568     }
569 
570     if(!uset_equals(frozen, idSet)) {
571         log_err("uset_clone() did not make an equal copy\n");
572     }
573 
574     uset_freeze(frozen);
575     uset_addRange(frozen, 0xd802, 0xd805);
576 
577     if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
578         log_err("uset_freeze() or uset_isFrozen() does not work\n");
579     }
580 
581     thawed=uset_cloneAsThawed(frozen);
582 
583     if (thawed == NULL) {
584         log_err("uset_cloneAsThawed(frozen) returned NULL");
585         uset_close(frozen);
586         uset_close(idSet);
587         return;
588     }
589 
590     uset_addRange(thawed, 0xd802, 0xd805);
591 
592     if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
593         log_err("uset_cloneAsThawed() does not work\n");
594     }
595 
596     uset_close(idSet);
597     uset_close(frozen);
598     uset_close(thawed);
599 }
600 
TestSpan()601 static void TestSpan() {
602     static const UChar s16[2]={ 0xe01, 0x3000 };
603     static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
604 
605     USet *idSet=openIDSet();
606 
607     if (idSet == NULL) {
608         log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
609         return;
610     }
611 
612     if(
613         1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
614         0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
615         2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
616         1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
617     ) {
618         log_err("uset_span() or uset_spanBack() does not work\n");
619     }
620 
621     if(
622         3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
623         0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
624         6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
625         3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
626     ) {
627         log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
628     }
629 
630     uset_freeze(idSet);
631 
632     if(
633         1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
634         0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
635         2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
636         1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
637     ) {
638         log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
639     }
640 
641     if(
642         3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
643         0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
644         6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
645         3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
646     ) {
647         log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
648     }
649 
650     uset_close(idSet);
651 }
652 
653 /*eof*/
654