1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2009, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include <stdlib.h>
11 #include <string.h>
12
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
14
15 #define TEST(x) addTest(root, &x, "uset/" # x)
16
17 static void TestAPI(void);
18 static void Testj2269(void);
19 static void TestSerialized(void);
20 static void TestNonInvariantPattern(void);
21 static void TestBadPattern(void);
22 static void TestFreezable(void);
23 static void TestSpan(void);
24
25 void addUSetTest(TestNode** root);
26
27 static void expect(const USet* set,
28 const char* inList,
29 const char* outList,
30 UErrorCode* ec);
31 static void expectContainment(const USet* set,
32 const char* list,
33 UBool isIn);
34 static char oneUCharToChar(UChar32 c);
35 static void expectItems(const USet* set,
36 const char* items);
37
38 void
addUSetTest(TestNode ** root)39 addUSetTest(TestNode** root) {
40 TEST(TestAPI);
41 TEST(Testj2269);
42 TEST(TestSerialized);
43 TEST(TestNonInvariantPattern);
44 TEST(TestBadPattern);
45 TEST(TestFreezable);
46 TEST(TestSpan);
47 }
48
49 /*------------------------------------------------------------------
50 * Tests
51 *------------------------------------------------------------------*/
52
Testj2269()53 static void Testj2269() {
54 UErrorCode status = U_ZERO_ERROR;
55 UChar a[4] = { 0x61, 0x62, 0x63, 0 };
56 USet *s = uset_open(1, 0);
57 uset_addString(s, a, 3);
58 a[0] = 0x63; a[1] = 0x63;
59 expect(s, "{abc}", "{ccc}", &status);
60 uset_close(s);
61 }
62
63 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
64 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
65
66 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
67 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
68
69 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
70 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
71
72 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
73 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
74
75 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
76 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
77
78 /**
79 * Basic API test for uset.x
80 */
TestAPI()81 static void TestAPI() {
82 USet* set;
83 USet* set2;
84 UErrorCode ec;
85
86 /* [] */
87 set = uset_openEmpty();
88 expect(set, "", "abc{ab}", NULL);
89 uset_close(set);
90
91 set = uset_open(1, 0);
92 expect(set, "", "abc{ab}", NULL);
93 uset_close(set);
94
95 set = uset_open(1, 1);
96 uset_clear(set);
97 expect(set, "", "abc{ab}", NULL);
98 uset_close(set);
99
100 /* [ABC] */
101 set = uset_open(0x0041, 0x0043);
102 expect(set, "ABC", "DEF{ab}", NULL);
103 uset_close(set);
104
105 /* [a-c{ab}] */
106 ec = U_ZERO_ERROR;
107 set = uset_openPattern(PAT, PAT_LEN, &ec);
108 if(U_FAILURE(ec)) {
109 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
110 return;
111 }
112 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
113 log_err("uset_resemblesPattern of PAT failed\n");
114 }
115 expect(set, "abc{ab}", "def{bc}", &ec);
116
117 /* [a-d{ab}] */
118 uset_add(set, 0x64);
119 expect(set, "abcd{ab}", "ef{bc}", NULL);
120
121 /* [acd{ab}{bc}] */
122 uset_remove(set, 0x62);
123 uset_addString(set, STR_bc, STR_bc_LEN);
124 expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
125
126 /* [acd{bc}] */
127 uset_removeString(set, STR_ab, STR_ab_LEN);
128 expect(set, "acd{bc}", "bfg{ab}", NULL);
129
130 /* [^acd{bc}] */
131 uset_complement(set);
132 expect(set, "bef{bc}", "acd{ac}", NULL);
133
134 /* [a-e{bc}] */
135 uset_complement(set);
136 uset_addRange(set, 0x0062, 0x0065);
137 expect(set, "abcde{bc}", "fg{ab}", NULL);
138
139 /* [de{bc}] */
140 uset_removeRange(set, 0x0050, 0x0063);
141 expect(set, "de{bc}", "bcfg{ab}", NULL);
142
143 /* [g-l] */
144 uset_set(set, 0x0067, 0x006C);
145 expect(set, "ghijkl", "de{bc}", NULL);
146
147 if (uset_indexOf(set, 0x0067) != 0) {
148 log_err("uset_indexOf failed finding correct index of 'g'\n");
149 }
150
151 if (uset_charAt(set, 0) != 0x0067) {
152 log_err("uset_charAt failed finding correct char 'g' at index 0\n");
153 }
154
155 /* How to test this one...? */
156 uset_compact(set);
157
158 /* [g-i] */
159 uset_retain(set, 0x0067, 0x0069);
160 expect(set, "ghi", "dejkl{bc}", NULL);
161
162 /* UCHAR_ASCII_HEX_DIGIT */
163 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
164 if(U_FAILURE(ec)) {
165 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
166 return;
167 }
168 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
169
170 /* [ab] */
171 uset_clear(set);
172 uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
173 expect(set, "ab", "def{ab}", NULL);
174 if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
175 log_err("set should not conatin all characters of \"bc\" \n");
176 }
177
178 /* [] */
179 set2 = uset_open(1, 1);
180 uset_clear(set2);
181
182 /* space */
183 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
184 expect(set2, " ", "abcdefghi{bc}", NULL);
185
186 /* [a-c] */
187 uset_set(set2, 0x0061, 0x0063);
188 /* [g-i] */
189 uset_set(set, 0x0067, 0x0069);
190
191 /* [a-c g-i] */
192 if (uset_containsSome(set, set2)) {
193 log_err("set should not contain some of set2 yet\n");
194 }
195 uset_complementAll(set, set2);
196 if (!uset_containsSome(set, set2)) {
197 log_err("set should contain some of set2\n");
198 }
199 expect(set, "abcghi", "def{bc}", NULL);
200
201 /* [g-i] */
202 uset_removeAll(set, set2);
203 expect(set, "ghi", "abcdef{bc}", NULL);
204
205 /* [a-c g-i] */
206 uset_addAll(set2, set);
207 expect(set2, "abcghi", "def{bc}", NULL);
208
209 /* [g-i] */
210 uset_retainAll(set2, set);
211 expect(set2, "ghi", "abcdef{bc}", NULL);
212
213 uset_close(set);
214 uset_close(set2);
215 }
216
217 /*------------------------------------------------------------------
218 * Support
219 *------------------------------------------------------------------*/
220
221 /**
222 * Verifies that the given set contains the characters and strings in
223 * inList, and does not contain those in outList. Also verifies that
224 * 'set' is not NULL and that 'ec' succeeds.
225 * @param set the set to test, or NULL (on error)
226 * @param inList list of set contents, in iteration order. Format is
227 * list of individual strings, in iteration order, followed by sorted
228 * list of strings, delimited by {}. This means we do not test
229 * characters '{' or '}' and we do not test strings containing those
230 * characters either.
231 * @param outList list of things not in the set. Same format as
232 * inList.
233 * @param ec an error code, checked for success. May be NULL in which
234 * case it is ignored.
235 */
expect(const USet * set,const char * inList,const char * outList,UErrorCode * ec)236 static void expect(const USet* set,
237 const char* inList,
238 const char* outList,
239 UErrorCode* ec) {
240 if (ec!=NULL && U_FAILURE(*ec)) {
241 log_err("FAIL: %s\n", u_errorName(*ec));
242 return;
243 }
244 if (set == NULL) {
245 log_err("FAIL: USet is NULL\n");
246 return;
247 }
248 expectContainment(set, inList, TRUE);
249 expectContainment(set, outList, FALSE);
250 expectItems(set, inList);
251 }
252
expectContainment(const USet * set,const char * list,UBool isIn)253 static void expectContainment(const USet* set,
254 const char* list,
255 UBool isIn) {
256 const char* p = list;
257 UChar ustr[4096];
258 char *pat;
259 UErrorCode ec;
260 int32_t rangeStart = -1, rangeEnd = -1, length;
261
262 ec = U_ZERO_ERROR;
263 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
264 if(U_FAILURE(ec)) {
265 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
266 return;
267 }
268 pat=aescstrdup(ustr, length);
269
270 while (*p) {
271 if (*p=='{') {
272 const char* stringStart = ++p;
273 int32_t stringLength = 0;
274 char strCopy[64];
275
276 while (*p++ != '}') {
277 }
278 stringLength = (int32_t)(p - stringStart - 1);
279 strncpy(strCopy, stringStart, stringLength);
280 strCopy[stringLength] = 0;
281
282 u_charsToUChars(stringStart, ustr, stringLength);
283
284 if (uset_containsString(set, ustr, stringLength) == isIn) {
285 log_verbose("Ok: %s %s \"%s\"\n", pat,
286 (isIn ? "contains" : "does not contain"),
287 strCopy);
288 } else {
289 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat,
290 (isIn ? "does not contain" : "contains"),
291 strCopy);
292 }
293 }
294
295 else {
296 UChar32 c;
297
298 u_charsToUChars(p, ustr, 1);
299 c = ustr[0];
300
301 if (uset_contains(set, c) == isIn) {
302 log_verbose("Ok: %s %s '%c'\n", pat,
303 (isIn ? "contains" : "does not contain"),
304 *p);
305 } else {
306 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat,
307 (isIn ? "does not contain" : "contains"),
308 *p);
309 }
310
311 /* Test the range API too by looking for ranges */
312 if (c == rangeEnd+1) {
313 rangeEnd = c;
314 } else {
315 if (rangeStart >= 0) {
316 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
317 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
318 (isIn ? "contains" : "does not contain"),
319 rangeStart, rangeEnd);
320 } else {
321 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
322 (isIn ? "does not contain" : "contains"),
323 rangeStart, rangeEnd);
324 }
325 }
326 rangeStart = rangeEnd = c;
327 }
328
329 ++p;
330 }
331 }
332
333 if (rangeStart >= 0) {
334 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
335 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
336 (isIn ? "contains" : "does not contain"),
337 rangeStart, rangeEnd);
338 } else {
339 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
340 (isIn ? "does not contain" : "contains"),
341 rangeStart, rangeEnd);
342 }
343 }
344 }
345
346 /* This only works for invariant BMP chars */
oneUCharToChar(UChar32 c)347 static char oneUCharToChar(UChar32 c) {
348 UChar ubuf[1];
349 char buf[1];
350 ubuf[0] = (UChar) c;
351 u_UCharsToChars(ubuf, buf, 1);
352 return buf[0];
353 }
354
expectItems(const USet * set,const char * items)355 static void expectItems(const USet* set,
356 const char* items) {
357 const char* p = items;
358 UChar ustr[4096], itemStr[4096];
359 char buf[4096];
360 char *pat;
361 UErrorCode ec;
362 int32_t expectedSize = 0;
363 int32_t itemCount = uset_getItemCount(set);
364 int32_t itemIndex = 0;
365 UChar32 start = 1, end = 0;
366 int32_t itemLen = 0, length;
367
368 ec = U_ZERO_ERROR;
369 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
370 if (U_FAILURE(ec)) {
371 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
372 return;
373 }
374 pat=aescstrdup(ustr, length);
375
376 if (uset_isEmpty(set) != (strlen(items)==0)) {
377 log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n",
378 pat,
379 strlen(items)==0 ? "TRUE" : "FALSE");
380 }
381
382 /* Don't test patterns starting with "[^" */
383 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
384 return;
385 }
386
387 while (*p) {
388
389 ++expectedSize;
390
391 if (start > end || start == -1) {
392 /* Fetch our next item */
393 if (itemIndex >= itemCount) {
394 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat);
395 return;
396 }
397
398 itemLen = uset_getItem(set, itemIndex, &start, &end,
399 itemStr, sizeof(itemStr), &ec);
400 if (U_FAILURE(ec) || itemLen < 0) {
401 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
402 return;
403 }
404
405 if (itemLen == 0) {
406 log_verbose("Ok: %s item %d is %c-%c\n", pat,
407 itemIndex, oneUCharToChar(start),
408 oneUCharToChar(end));
409 } else {
410 itemStr[itemLen] = 0;
411 u_UCharsToChars(itemStr, buf, itemLen+1);
412 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
413 }
414
415 ++itemIndex;
416 }
417
418 if (*p=='{') {
419 const char* stringStart = ++p;
420 int32_t stringLength = 0;
421 char strCopy[64];
422
423 while (*p++ != '}') {
424 }
425 stringLength = (int32_t)(p - stringStart - 1);
426 strncpy(strCopy, stringStart, stringLength);
427 strCopy[stringLength] = 0;
428
429 u_charsToUChars(stringStart, ustr, stringLength);
430 ustr[stringLength] = 0;
431
432 if (itemLen == 0) {
433 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
434 pat, strCopy);
435 return;
436 }
437
438 if (u_strcmp(ustr, itemStr) != 0) {
439 log_err("FAIL: for %s expect \"%s\" next\n",
440 pat, strCopy);
441 return;
442 }
443 }
444
445 else {
446 UChar32 c;
447
448 u_charsToUChars(p, ustr, 1);
449 c = ustr[0];
450
451 if (itemLen != 0) {
452 log_err("FAIL: for %s expect '%c' next, but got a string\n",
453 pat, *p);
454 return;
455 }
456
457 if (c != start++) {
458 log_err("FAIL: for %s expect '%c' next\n",
459 pat, *p);
460 return;
461 }
462
463 ++p;
464 }
465 }
466
467 if (uset_size(set) == expectedSize) {
468 log_verbose("Ok: %s size is %d\n", pat, expectedSize);
469 } else {
470 log_err("FAIL: %s size is %d, expected %d\n",
471 pat, uset_size(set), expectedSize);
472 }
473 }
474
475 static void
TestSerialized()476 TestSerialized() {
477 uint16_t buffer[1000];
478 USerializedSet sset;
479 USet *set;
480 UErrorCode errorCode;
481 UChar32 c;
482 int32_t length;
483
484 /* use a pattern that generates both BMP and supplementary code points */
485 U_STRING_DECL(pattern, "[:Cf:]", 6);
486 U_STRING_INIT(pattern, "[:Cf:]", 6);
487
488 errorCode=U_ZERO_ERROR;
489 set=uset_openPattern(pattern, -1, &errorCode);
490 if(U_FAILURE(errorCode)) {
491 log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode));
492 return;
493 }
494
495 length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
496 if(U_FAILURE(errorCode)) {
497 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
498 uset_close(set);
499 return;
500 }
501
502 uset_getSerializedSet(&sset, buffer, length);
503 for(c=0; c<=0x10ffff; ++c) {
504 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
505 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
506 break;
507 }
508 }
509
510 uset_close(set);
511 }
512
513 /**
514 * Make sure that when non-invariant chars are passed to uset_openPattern
515 * they do not cause an ugly failure mode (e.g. assertion failure).
516 * JB#3795.
517 */
518 static void
TestNonInvariantPattern()519 TestNonInvariantPattern() {
520 UErrorCode ec = U_ZERO_ERROR;
521 /* The critical part of this test is that the following pattern
522 must contain a non-invariant character. */
523 static const char *pattern = "[:ccc!=0:]";
524 UChar buf[256];
525 int32_t len = u_unescape(pattern, buf, 256);
526 /* This test 'fails' by having an assertion failure within the
527 following call. It passes by running to completion with no
528 assertion failure. */
529 USet *set = uset_openPattern(buf, len, &ec);
530 uset_close(set);
531 }
532
TestBadPattern(void)533 static void TestBadPattern(void) {
534 UErrorCode status = U_ZERO_ERROR;
535 USet *pat;
536 U_STRING_DECL(pattern, "[", 1);
537 U_STRING_INIT(pattern, "[", 1);
538 pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
539 if (pat != NULL || U_SUCCESS(status)) {
540 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
541 }
542 }
543
openIDSet()544 static USet *openIDSet() {
545 UErrorCode errorCode = U_ZERO_ERROR;
546 U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
547 U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
548 return uset_openPattern(pattern, 15, &errorCode);
549 }
550
TestFreezable()551 static void TestFreezable() {
552 USet *idSet;
553 USet *frozen;
554 USet *thawed;
555
556 idSet=openIDSet();
557
558 if (idSet == NULL) {
559 log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
560 uset_close(idSet);
561 return;
562 }
563
564 frozen=uset_clone(idSet);
565
566 if (frozen == NULL) {
567 log_err("uset_Clone() returned NULL\n");
568 return;
569 }
570
571 if(!uset_equals(frozen, idSet)) {
572 log_err("uset_clone() did not make an equal copy\n");
573 }
574
575 uset_freeze(frozen);
576 uset_addRange(frozen, 0xd802, 0xd805);
577
578 if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
579 log_err("uset_freeze() or uset_isFrozen() does not work\n");
580 }
581
582 thawed=uset_cloneAsThawed(frozen);
583
584 if (thawed == NULL) {
585 log_err("uset_cloneAsThawed(frozen) returned NULL");
586 uset_close(frozen);
587 uset_close(idSet);
588 return;
589 }
590
591 uset_addRange(thawed, 0xd802, 0xd805);
592
593 if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
594 log_err("uset_cloneAsThawed() does not work\n");
595 }
596
597 uset_close(idSet);
598 uset_close(frozen);
599 uset_close(thawed);
600 }
601
TestSpan()602 static void TestSpan() {
603 static const UChar s16[2]={ 0xe01, 0x3000 };
604 static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
605
606 USet *idSet=openIDSet();
607
608 if (idSet == NULL) {
609 log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
610 return;
611 }
612
613 if(
614 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
615 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
616 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
617 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
618 ) {
619 log_err("uset_span() or uset_spanBack() does not work\n");
620 }
621
622 if(
623 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
624 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
625 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
626 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
627 ) {
628 log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
629 }
630
631 uset_freeze(idSet);
632
633 if(
634 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
635 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
636 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
637 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
638 ) {
639 log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
640 }
641
642 if(
643 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
644 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
645 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
646 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
647 ) {
648 log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
649 }
650
651 uset_close(idSet);
652 }
653
654 /*eof*/
655