1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7 #include "unicode/uset.h"
8 #include "unicode/ustring.h"
9 #include "cintltst.h"
10 #include <stdlib.h>
11 #include <string.h>
12
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
14
15 #define TEST(x) addTest(root, &x, "uset/" # x)
16
17 static void TestAPI(void);
18 static void Testj2269(void);
19 static void TestSerialized(void);
20 static void TestNonInvariantPattern(void);
21 static void TestBadPattern(void);
22 static void TestFreezable(void);
23 static void TestSpan(void);
24
25 void addUSetTest(TestNode** root);
26
27 static void expect(const USet* set,
28 const char* inList,
29 const char* outList,
30 UErrorCode* ec);
31 static void expectContainment(const USet* set,
32 const char* list,
33 UBool isIn);
34 static char oneUCharToChar(UChar32 c);
35 static void expectItems(const USet* set,
36 const char* items);
37
38 void
addUSetTest(TestNode ** root)39 addUSetTest(TestNode** root) {
40 TEST(TestAPI);
41 TEST(Testj2269);
42 TEST(TestSerialized);
43 TEST(TestNonInvariantPattern);
44 TEST(TestBadPattern);
45 TEST(TestFreezable);
46 TEST(TestSpan);
47 }
48
49 /*------------------------------------------------------------------
50 * Tests
51 *------------------------------------------------------------------*/
52
Testj2269()53 static void Testj2269() {
54 UErrorCode status = U_ZERO_ERROR;
55 UChar a[4] = { 0x61, 0x62, 0x63, 0 };
56 USet *s = uset_open(1, 0);
57 uset_addString(s, a, 3);
58 a[0] = 0x63; a[1] = 0x63;
59 expect(s, "{abc}", "{ccc}", &status);
60 uset_close(s);
61 }
62
63 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
64 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
65
66 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
67 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1;
68
69 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
70 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1;
71
72 static const UChar STR_bc[] = {98,99,0}; /* "bc" */
73 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
74
75 static const UChar STR_ab[] = {97,98,0}; /* "ab" */
76 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
77
78 /**
79 * Basic API test for uset.x
80 */
TestAPI()81 static void TestAPI() {
82 USet* set;
83 USet* set2;
84 UErrorCode ec;
85
86 /* [] */
87 set = uset_open(1, 1);
88 uset_clear(set);
89 expect(set, "", "abc{ab}", NULL);
90 uset_close(set);
91
92 /* [ABC] */
93 set = uset_open(0x0041, 0x0043);
94 expect(set, "ABC", "DEF{ab}", NULL);
95 uset_close(set);
96
97 /* [a-c{ab}] */
98 ec = U_ZERO_ERROR;
99 set = uset_openPattern(PAT, PAT_LEN, &ec);
100 if(U_FAILURE(ec)) {
101 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
102 return;
103 }
104 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
105 log_err("uset_resemblesPattern of PAT failed\n");
106 }
107 expect(set, "abc{ab}", "def{bc}", &ec);
108
109 /* [a-d{ab}] */
110 uset_add(set, 0x64);
111 expect(set, "abcd{ab}", "ef{bc}", NULL);
112
113 /* [acd{ab}{bc}] */
114 uset_remove(set, 0x62);
115 uset_addString(set, STR_bc, STR_bc_LEN);
116 expect(set, "acd{ab}{bc}", "bef{cd}", NULL);
117
118 /* [acd{bc}] */
119 uset_removeString(set, STR_ab, STR_ab_LEN);
120 expect(set, "acd{bc}", "bfg{ab}", NULL);
121
122 /* [^acd{bc}] */
123 uset_complement(set);
124 expect(set, "bef{bc}", "acd{ac}", NULL);
125
126 /* [a-e{bc}] */
127 uset_complement(set);
128 uset_addRange(set, 0x0062, 0x0065);
129 expect(set, "abcde{bc}", "fg{ab}", NULL);
130
131 /* [de{bc}] */
132 uset_removeRange(set, 0x0050, 0x0063);
133 expect(set, "de{bc}", "bcfg{ab}", NULL);
134
135 /* [g-l] */
136 uset_set(set, 0x0067, 0x006C);
137 expect(set, "ghijkl", "de{bc}", NULL);
138
139 if (uset_indexOf(set, 0x0067) != 0) {
140 log_err("uset_indexOf failed finding correct index of 'g'\n");
141 }
142
143 if (uset_charAt(set, 0) != 0x0067) {
144 log_err("uset_charAt failed finding correct char 'g' at index 0\n");
145 }
146
147 /* How to test this one...? */
148 uset_compact(set);
149
150 /* [g-i] */
151 uset_retain(set, 0x0067, 0x0069);
152 expect(set, "ghi", "dejkl{bc}", NULL);
153
154 /* UCHAR_ASCII_HEX_DIGIT */
155 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
156 if(U_FAILURE(ec)) {
157 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
158 return;
159 }
160 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
161
162 /* [ab] */
163 uset_clear(set);
164 uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
165 expect(set, "ab", "def{ab}", NULL);
166 if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
167 log_err("set should not conatin all characters of \"bc\" \n");
168 }
169
170 /* [] */
171 set2 = uset_open(1, 1);
172 uset_clear(set2);
173
174 /* space */
175 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
176 expect(set2, " ", "abcdefghi{bc}", NULL);
177
178 /* [a-c] */
179 uset_set(set2, 0x0061, 0x0063);
180 /* [g-i] */
181 uset_set(set, 0x0067, 0x0069);
182
183 /* [a-c g-i] */
184 if (uset_containsSome(set, set2)) {
185 log_err("set should not contain some of set2 yet\n");
186 }
187 uset_complementAll(set, set2);
188 if (!uset_containsSome(set, set2)) {
189 log_err("set should contain some of set2\n");
190 }
191 expect(set, "abcghi", "def{bc}", NULL);
192
193 /* [g-i] */
194 uset_removeAll(set, set2);
195 expect(set, "ghi", "abcdef{bc}", NULL);
196
197 /* [a-c g-i] */
198 uset_addAll(set2, set);
199 expect(set2, "abcghi", "def{bc}", NULL);
200
201 /* [g-i] */
202 uset_retainAll(set2, set);
203 expect(set2, "ghi", "abcdef{bc}", NULL);
204
205 uset_close(set);
206 uset_close(set2);
207 }
208
209 /*------------------------------------------------------------------
210 * Support
211 *------------------------------------------------------------------*/
212
213 /**
214 * Verifies that the given set contains the characters and strings in
215 * inList, and does not contain those in outList. Also verifies that
216 * 'set' is not NULL and that 'ec' succeeds.
217 * @param set the set to test, or NULL (on error)
218 * @param inList list of set contents, in iteration order. Format is
219 * list of individual strings, in iteration order, followed by sorted
220 * list of strings, delimited by {}. This means we do not test
221 * characters '{' or '}' and we do not test strings containing those
222 * characters either.
223 * @param outList list of things not in the set. Same format as
224 * inList.
225 * @param ec an error code, checked for success. May be NULL in which
226 * case it is ignored.
227 */
expect(const USet * set,const char * inList,const char * outList,UErrorCode * ec)228 static void expect(const USet* set,
229 const char* inList,
230 const char* outList,
231 UErrorCode* ec) {
232 if (ec!=NULL && U_FAILURE(*ec)) {
233 log_err("FAIL: %s\n", u_errorName(*ec));
234 return;
235 }
236 if (set == NULL) {
237 log_err("FAIL: USet is NULL\n");
238 return;
239 }
240 expectContainment(set, inList, TRUE);
241 expectContainment(set, outList, FALSE);
242 expectItems(set, inList);
243 }
244
expectContainment(const USet * set,const char * list,UBool isIn)245 static void expectContainment(const USet* set,
246 const char* list,
247 UBool isIn) {
248 const char* p = list;
249 UChar ustr[4096];
250 char *pat;
251 UErrorCode ec;
252 int32_t rangeStart = -1, rangeEnd = -1, length;
253
254 ec = U_ZERO_ERROR;
255 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
256 if(U_FAILURE(ec)) {
257 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
258 return;
259 }
260 pat=aescstrdup(ustr, length);
261
262 while (*p) {
263 if (*p=='{') {
264 const char* stringStart = ++p;
265 int32_t stringLength = 0;
266 char strCopy[64];
267
268 while (*p++ != '}') {
269 }
270 stringLength = (int32_t)(p - stringStart - 1);
271 strncpy(strCopy, stringStart, stringLength);
272 strCopy[stringLength] = 0;
273
274 u_charsToUChars(stringStart, ustr, stringLength);
275
276 if (uset_containsString(set, ustr, stringLength) == isIn) {
277 log_verbose("Ok: %s %s \"%s\"\n", pat,
278 (isIn ? "contains" : "does not contain"),
279 strCopy);
280 } else {
281 log_err("FAIL: %s %s \"%s\"\n", pat,
282 (isIn ? "does not contain" : "contains"),
283 strCopy);
284 }
285 }
286
287 else {
288 UChar32 c;
289
290 u_charsToUChars(p, ustr, 1);
291 c = ustr[0];
292
293 if (uset_contains(set, c) == isIn) {
294 log_verbose("Ok: %s %s '%c'\n", pat,
295 (isIn ? "contains" : "does not contain"),
296 *p);
297 } else {
298 log_err("FAIL: %s %s '%c'\n", pat,
299 (isIn ? "does not contain" : "contains"),
300 *p);
301 }
302
303 /* Test the range API too by looking for ranges */
304 if (c == rangeEnd+1) {
305 rangeEnd = c;
306 } else {
307 if (rangeStart >= 0) {
308 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
309 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
310 (isIn ? "contains" : "does not contain"),
311 rangeStart, rangeEnd);
312 } else {
313 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
314 (isIn ? "does not contain" : "contains"),
315 rangeStart, rangeEnd);
316 }
317 }
318 rangeStart = rangeEnd = c;
319 }
320
321 ++p;
322 }
323 }
324
325 if (rangeStart >= 0) {
326 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) {
327 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat,
328 (isIn ? "contains" : "does not contain"),
329 rangeStart, rangeEnd);
330 } else {
331 log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
332 (isIn ? "does not contain" : "contains"),
333 rangeStart, rangeEnd);
334 }
335 }
336 }
337
338 /* This only works for invariant BMP chars */
oneUCharToChar(UChar32 c)339 static char oneUCharToChar(UChar32 c) {
340 UChar ubuf[1];
341 char buf[1];
342 ubuf[0] = (UChar) c;
343 u_UCharsToChars(ubuf, buf, 1);
344 return buf[0];
345 }
346
expectItems(const USet * set,const char * items)347 static void expectItems(const USet* set,
348 const char* items) {
349 const char* p = items;
350 UChar ustr[4096], itemStr[4096];
351 char buf[4096];
352 char *pat;
353 UErrorCode ec;
354 int32_t expectedSize = 0;
355 int32_t itemCount = uset_getItemCount(set);
356 int32_t itemIndex = 0;
357 UChar32 start = 1, end = 0;
358 int32_t itemLen = 0, length;
359
360 ec = U_ZERO_ERROR;
361 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
362 if (U_FAILURE(ec)) {
363 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
364 return;
365 }
366 pat=aescstrdup(ustr, length);
367
368 if (uset_isEmpty(set) != (strlen(items)==0)) {
369 log_err("FAIL: %s should return %s from isEmpty\n",
370 pat,
371 strlen(items)==0 ? "TRUE" : "FALSE");
372 }
373
374 /* Don't test patterns starting with "[^" */
375 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
376 return;
377 }
378
379 while (*p) {
380
381 ++expectedSize;
382
383 if (start > end || start == -1) {
384 /* Fetch our next item */
385 if (itemIndex >= itemCount) {
386 log_err("FAIL: ran out of items iterating %s\n", pat);
387 return;
388 }
389
390 itemLen = uset_getItem(set, itemIndex, &start, &end,
391 itemStr, sizeof(itemStr), &ec);
392 if (U_FAILURE(ec) || itemLen < 0) {
393 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec));
394 return;
395 }
396
397 if (itemLen == 0) {
398 log_verbose("Ok: %s item %d is %c-%c\n", pat,
399 itemIndex, oneUCharToChar(start),
400 oneUCharToChar(end));
401 } else {
402 itemStr[itemLen] = 0;
403 u_UCharsToChars(itemStr, buf, itemLen+1);
404 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf);
405 }
406
407 ++itemIndex;
408 }
409
410 if (*p=='{') {
411 const char* stringStart = ++p;
412 int32_t stringLength = 0;
413 char strCopy[64];
414
415 while (*p++ != '}') {
416 }
417 stringLength = (int32_t)(p - stringStart - 1);
418 strncpy(strCopy, stringStart, stringLength);
419 strCopy[stringLength] = 0;
420
421 u_charsToUChars(stringStart, ustr, stringLength);
422 ustr[stringLength] = 0;
423
424 if (itemLen == 0) {
425 log_err("FAIL: for %s expect \"%s\" next, but got a char\n",
426 pat, strCopy);
427 return;
428 }
429
430 if (u_strcmp(ustr, itemStr) != 0) {
431 log_err("FAIL: for %s expect \"%s\" next\n",
432 pat, strCopy);
433 return;
434 }
435 }
436
437 else {
438 UChar32 c;
439
440 u_charsToUChars(p, ustr, 1);
441 c = ustr[0];
442
443 if (itemLen != 0) {
444 log_err("FAIL: for %s expect '%c' next, but got a string\n",
445 pat, *p);
446 return;
447 }
448
449 if (c != start++) {
450 log_err("FAIL: for %s expect '%c' next\n",
451 pat, *p);
452 return;
453 }
454
455 ++p;
456 }
457 }
458
459 if (uset_size(set) == expectedSize) {
460 log_verbose("Ok: %s size is %d\n", pat, expectedSize);
461 } else {
462 log_err("FAIL: %s size is %d, expected %d\n",
463 pat, uset_size(set), expectedSize);
464 }
465 }
466
467 static void
TestSerialized()468 TestSerialized() {
469 uint16_t buffer[1000];
470 USerializedSet sset;
471 USet *set;
472 UErrorCode errorCode;
473 UChar32 c;
474 int32_t length;
475
476 /* use a pattern that generates both BMP and supplementary code points */
477 U_STRING_DECL(pattern, "[:Cf:]", 6);
478 U_STRING_INIT(pattern, "[:Cf:]", 6);
479
480 errorCode=U_ZERO_ERROR;
481 set=uset_openPattern(pattern, -1, &errorCode);
482 if(U_FAILURE(errorCode)) {
483 log_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode));
484 return;
485 }
486
487 length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
488 if(U_FAILURE(errorCode)) {
489 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
490 uset_close(set);
491 return;
492 }
493
494 uset_getSerializedSet(&sset, buffer, length);
495 for(c=0; c<=0x10ffff; ++c) {
496 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) {
497 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c);
498 break;
499 }
500 }
501
502 uset_close(set);
503 }
504
505 /**
506 * Make sure that when non-invariant chars are passed to uset_openPattern
507 * they do not cause an ugly failure mode (e.g. assertion failure).
508 * JB#3795.
509 */
510 static void
TestNonInvariantPattern()511 TestNonInvariantPattern() {
512 UErrorCode ec = U_ZERO_ERROR;
513 /* The critical part of this test is that the following pattern
514 must contain a non-invariant character. */
515 static const char *pattern = "[:ccc!=0:]";
516 UChar buf[256];
517 int32_t len = u_unescape(pattern, buf, 256);
518 /* This test 'fails' by having an assertion failure within the
519 following call. It passes by running to completion with no
520 assertion failure. */
521 USet *set = uset_openPattern(buf, len, &ec);
522 uset_close(set);
523 }
524
TestBadPattern(void)525 static void TestBadPattern(void) {
526 UErrorCode status = U_ZERO_ERROR;
527 USet *pat;
528 U_STRING_DECL(pattern, "[", 1);
529 U_STRING_INIT(pattern, "[", 1);
530 pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
531 if (pat != NULL || U_SUCCESS(status)) {
532 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
533 }
534 }
535
openIDSet()536 static USet *openIDSet() {
537 UErrorCode errorCode = U_ZERO_ERROR;
538 U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
539 U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
540 return uset_openPattern(pattern, 15, &errorCode);
541 }
542
TestFreezable()543 static void TestFreezable() {
544 USet *idSet;
545 USet *frozen;
546 USet *thawed;
547
548 idSet=openIDSet();
549
550 if (idSet == NULL) {
551 log_err("openIDSet() returned NULL");
552 uset_close(idSet);
553 return;
554 }
555
556 frozen=uset_clone(idSet);
557
558 if (frozen == NULL) {
559 log_err("uset_Clone() returned NULL");
560 return;
561 }
562
563 if(!uset_equals(frozen, idSet)) {
564 log_err("uset_clone() did not make an equal copy\n");
565 }
566
567 uset_freeze(frozen);
568 uset_addRange(frozen, 0xd802, 0xd805);
569
570 if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
571 log_err("uset_freeze() or uset_isFrozen() does not work\n");
572 }
573
574 thawed=uset_cloneAsThawed(frozen);
575
576 if (thawed == NULL) {
577 log_err("uset_cloneAsThawed(frozen) returned NULL");
578 uset_close(frozen);
579 uset_close(idSet);
580 return;
581 }
582
583 uset_addRange(thawed, 0xd802, 0xd805);
584
585 if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
586 log_err("uset_cloneAsThawed() does not work\n");
587 }
588
589 uset_close(idSet);
590 uset_close(frozen);
591 uset_close(thawed);
592 }
593
TestSpan()594 static void TestSpan() {
595 static const UChar s16[2]={ 0xe01, 0x3000 };
596 static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
597
598 USet *idSet=openIDSet();
599
600 if (idSet == NULL) {
601 log_err("openIDSet() returned NULL");
602 return;
603 }
604
605 if(
606 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
607 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
608 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
609 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
610 ) {
611 log_err("uset_span() or uset_spanBack() does not work\n");
612 }
613
614 if(
615 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
616 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
617 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
618 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
619 ) {
620 log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
621 }
622
623 uset_freeze(idSet);
624
625 if(
626 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
627 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
628 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
629 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
630 ) {
631 log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
632 }
633
634 if(
635 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
636 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
637 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
638 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
639 ) {
640 log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
641 }
642
643 uset_close(idSet);
644 }
645
646 /*eof*/
647