• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 package ohos.global.icu.dev.test.lang;
11 
12 import java.text.NumberFormat;
13 import java.text.ParsePosition;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.Collection;
17 import java.util.Collections;
18 import java.util.Comparator;
19 import java.util.HashMap;
20 import java.util.HashSet;
21 import java.util.Iterator;
22 import java.util.LinkedHashSet;
23 import java.util.List;
24 import java.util.Set;
25 import java.util.SortedSet;
26 import java.util.TreeSet;
27 
28 import org.junit.Test;
29 import org.junit.runner.RunWith;
30 import org.junit.runners.JUnit4;
31 
32 import ohos.global.icu.dev.test.TestFmwk;
33 import ohos.global.icu.dev.util.CollectionUtilities;
34 import ohos.global.icu.impl.SortedSetRelation;
35 import ohos.global.icu.impl.Utility;
36 import ohos.global.icu.lang.UCharacter;
37 import ohos.global.icu.lang.UCharacterEnums.ECharacterCategory;
38 import ohos.global.icu.lang.UProperty;
39 import ohos.global.icu.lang.UScript;
40 import ohos.global.icu.text.SymbolTable;
41 import ohos.global.icu.text.UTF16;
42 import ohos.global.icu.text.UnicodeMatcher;
43 import ohos.global.icu.text.UnicodeSet;
44 import ohos.global.icu.text.UnicodeSet.ComparisonStyle;
45 import ohos.global.icu.text.UnicodeSet.EntryRange;
46 import ohos.global.icu.text.UnicodeSet.SpanCondition;
47 import ohos.global.icu.text.UnicodeSetIterator;
48 import ohos.global.icu.text.UnicodeSetSpanner;
49 import ohos.global.icu.text.UnicodeSetSpanner.CountMethod;
50 import ohos.global.icu.text.UnicodeSetSpanner.TrimOption;
51 import ohos.global.icu.util.OutputInt;
52 
53 
54 /**
55  * @test
56  * @summary General test of UnicodeSet
57  */
58 
59 @RunWith(JUnit4.class)
60 public class UnicodeSetTest extends TestFmwk {
61 
62     static final String NOT = "%%%%";
63 
isCccValue(int ccc)64     private static final boolean isCccValue(int ccc) {
65         switch (ccc) {
66         case 0:
67         case 1:
68         case 7:
69         case 8:
70         case 9:
71         case 200:
72         case 202:
73         case 216:
74         case 218:
75         case 220:
76         case 222:
77         case 224:
78         case 226:
79         case 228:
80         case 230:
81         case 232:
82         case 233:
83         case 234:
84         case 240:
85             return true;
86         default:
87             return false;
88         }
89     }
90 
91     @Test
TestPropertyAccess()92     public void TestPropertyAccess() {
93         int count = 0;
94         // test to see that all of the names work
95         for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) {
96             count++;
97             //Skipping tests in the non-exhaustive mode to shorten the test time ticket#6475
98             if(TestFmwk.getExhaustiveness()<=5 && count%5!=0){
99                 continue;
100             }
101             if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) { // skip the gap
102                 propNum = UProperty.INT_START;
103             }
104             for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) {
105                 String propName;
106                 try {
107                     propName = UCharacter.getPropertyName(propNum, nameChoice);
108                     if (propName == null) {
109                         if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
110                         throw new NullPointerException();
111                     }
112                 } catch (RuntimeException e1) {
113                     errln("Can't get property name for: "
114                             + "Property (" + propNum + ")"
115                             + ", NameChoice: " + nameChoice + ", "
116                             + e1.getClass().getName());
117                     continue;
118                 }
119                 logln("Property (" + propNum + "): " + propName);
120                 for (int valueNum = UCharacter.getIntPropertyMinValue(propNum); valueNum <= UCharacter.getIntPropertyMaxValue(propNum); ++valueNum) {
121                     String valueName;
122                     try {
123                         valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice);
124                         if (valueName == null) {
125                             if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
126                             if ((propNum == UProperty.CANONICAL_COMBINING_CLASS ||
127                                     propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
128                                     propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) &&
129                                     !isCccValue(valueNum)) {
130                                 // Only a few of the canonical combining classes have names.
131                                 // Otherwise they are just integer values.
132                                 continue;
133                             } else {
134                                 throw new NullPointerException();
135                             }
136                         }
137                     } catch (RuntimeException e1) {
138                         errln("Can't get property value name for: "
139                                 + "Property (" + propNum + "): " + propName + ", "
140                                 + "Value (" + valueNum + ") "
141                                 + ", NameChoice: " + nameChoice + ", "
142                                 + e1.getClass().getName());
143                         continue;
144                     }
145                     logln("Value (" + valueNum + "): " + valueName);
146                     UnicodeSet testSet;
147                     try {
148                         testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
149                     } catch (RuntimeException e) {
150                         errln("Can't create UnicodeSet for: "
151                                 + "Property (" + propNum + "): " + propName + ", "
152                                 + "Value (" + valueNum + "): " + valueName + ", "
153                                 + e.getClass().getName());
154                         continue;
155                     }
156                     UnicodeSet collectedErrors = new UnicodeSet();
157                     for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
158                         int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
159                         if (value != valueNum) {
160                             collectedErrors.add(it.codepoint);
161                         }
162                     }
163                     if (collectedErrors.size() != 0) {
164                         errln("Property Value Differs: "
165                                 + "Property (" + propNum + "): " + propName + ", "
166                                 + "Value (" + valueNum + "): " + valueName + ", "
167                                 + "Differing values: " + collectedErrors.toPattern(true));
168                     }
169                 }
170             }
171         }
172     }
173 
174 
175     /**
176      * Test toPattern().
177      */
178     @Test
TestToPattern()179     public void TestToPattern() throws Exception {
180         // Test that toPattern() round trips with syntax characters
181         // and whitespace.
182         for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
183             checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(OTHER_TOPATTERN_TESTS[i]));
184         }
185         for (int i = 0; i <= 0x10FFFF; ++i) {
186             if ((i <= 0xFF && !UCharacter.isLetter(i)) || UCharacter.isWhitespace(i)) {
187                 // check various combinations to make sure they all work.
188                 if (i != 0 && !toPatternAux(i, i)) continue;
189                 if (!toPatternAux(0, i)) continue;
190                 if (!toPatternAux(i, 0xFFFF)) continue;
191             }
192         }
193 
194         // Test pattern behavior of multicharacter strings.
195         UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
196         expectToPattern(s, "[a-z{aa}{ab}]",
197                 new String[] {"aa", "ab", NOT, "ac"});
198         s.add("ac");
199         expectToPattern(s, "[a-z{aa}{ab}{ac}]",
200                 new String[] {"aa", "ab", "ac", NOT, "xy"});
201 
202         s.applyPattern("[a-z {\\{l} {r\\}}]");
203         expectToPattern(s, "[a-z{r\\}}{\\{l}]",
204                 new String[] {"{l", "r}", NOT, "xy"});
205         s.add("[]");
206         expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]",
207                 new String[] {"{l", "r}", "[]", NOT, "xy"});
208 
209         s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
210         expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
211                 new String[] {"\u4E01\u4E02", "\n\r"});
212 
213         s.clear();
214         s.add("abc");
215         s.add("abc");
216         expectToPattern(s, "[{abc}]",
217                 new String[] {"abc", NOT, "ab"});
218 
219         // JB#3400: For 2 character ranges prefer [ab] to [a-b]
220         s.clear();
221         s.add('a', 'b');
222         expectToPattern(s, "[ab]", null);
223 
224         // Cover applyPattern, applyPropertyAlias
225         s.clear();
226         s.applyPattern("[ab ]", true);
227         expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
228         s.clear();
229         s.applyPattern("[ab ]", false);
230         expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
231 
232         s.clear();
233         s.applyPropertyAlias("nv", "0.5");
234         s.retainAll(new UnicodeSet("[:age=6.0:]"));  // stabilize this test
235         expectToPattern(s, "[\\u00BD\\u0B73\\u0D74\\u0F2A\\u2CFD\\uA831\\U00010141\\U00010175\\U00010176\\U00010E7B]", null);
236         // Unicode 5.1 adds Malayalam 1/2 (\u0D74)
237         // Unicode 5.2 adds U+A831 NORTH INDIC FRACTION ONE HALF and U+10E7B RUMI FRACTION ONE HALF
238         // Unicode 6.0 adds U+0B73 ORIYA FRACTION ONE HALF
239 
240         s.clear();
241         s.applyPropertyAlias("gc", "Lu");
242         // TODO expectToPattern(s, what?)
243 
244         // RemoveAllStrings()
245         s.clear();
246         s.applyPattern("[a-z{abc}{def}]");
247         expectToPattern(s, "[a-z{abc}{def}]", null);
248         s.removeAllStrings();
249         expectToPattern(s, "[a-z]", null);
250     }
251 
252     static String[] OTHER_TOPATTERN_TESTS = {
253         "[[:latin:]&[:greek:]]",
254         "[[:latin:]-[:greek:]]",
255         "[:nonspacing mark:]"
256     };
257 
258 
toPatternAux(int start, int end)259     public boolean toPatternAux(int start, int end) {
260         // use Integer.toString because Utility.hex doesn't handle ints
261         String source = "0x" + Integer.toString(start,16).toUpperCase();
262         if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
263         UnicodeSet testSet = new UnicodeSet();
264         testSet.add(start, end);
265         return checkPat(source, testSet);
266     }
267 
checkPat(String source, UnicodeSet testSet)268     boolean checkPat (String source, UnicodeSet testSet) {
269         String pat = "";
270         try {
271             // What we want to make sure of is that a pattern generated
272             // by toPattern(), with or without escaped unprintables, can
273             // be passed back into the UnicodeSet constructor.
274             String pat0 = testSet.toPattern(true);
275             if (!checkPat(source + " (escaped)", testSet, pat0)) return false;
276 
277             //String pat1 = unescapeLeniently(pat0);
278             //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
279 
280             String pat2 = testSet.toPattern(false);
281             if (!checkPat(source, testSet, pat2)) return false;
282 
283             //String pat3 = unescapeLeniently(pat2);
284             //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
285 
286             //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
287             logln(source + " => " + pat0 + ", " + pat2);
288         } catch (Exception e) {
289             errln("EXCEPTION in toPattern: " + source + " => " + pat);
290             return false;
291         }
292         return true;
293     }
294 
checkPat(String source, UnicodeSet testSet, String pat)295     boolean checkPat (String source, UnicodeSet testSet, String pat) {
296         UnicodeSet testSet2 = new UnicodeSet(pat);
297         if (!testSet2.equals(testSet)) {
298             errln("Fail toPattern: " + source + "; " + pat + " => " +
299                     testSet2.toPattern(false) + ", expected " +
300                     testSet.toPattern(false));
301             return false;
302         }
303         return true;
304     }
305 
306     // NOTE: copied the following from Utility. There ought to be a version in there with a flag
307     // that does the Java stuff
308 
unescapeAt(String s, int[] offset16)309     public static int unescapeAt(String s, int[] offset16) {
310         int c;
311         int result = 0;
312         int n = 0;
313         int minDig = 0;
314         int maxDig = 0;
315         int bitsPerDigit = 4;
316         int dig;
317         int i;
318 
319         /* Check that offset is in range */
320         int offset = offset16[0];
321         int length = s.length();
322         if (offset < 0 || offset >= length) {
323             return -1;
324         }
325 
326         /* Fetch first UChar after '\\' */
327         c = UTF16.charAt(s, offset);
328         offset += UTF16.getCharCount(c);
329 
330         /* Convert hexadecimal and octal escapes */
331         switch (c) {
332         case 'u':
333             minDig = maxDig = 4;
334             break;
335             /*
336          case 'U':
337          minDig = maxDig = 8;
338          break;
339          case 'x':
340          minDig = 1;
341          maxDig = 2;
342          break;
343              */
344         default:
345             dig = UCharacter.digit(c, 8);
346             if (dig >= 0) {
347                 minDig = 1;
348                 maxDig = 3;
349                 n = 1; /* Already have first octal digit */
350                 bitsPerDigit = 3;
351                 result = dig;
352             }
353             break;
354         }
355         if (minDig != 0) {
356             while (offset < length && n < maxDig) {
357                 // TEMPORARY
358                 // TODO: Restore the char32-based code when UCharacter.digit
359                 // is working (Bug 66).
360 
361                 //c = UTF16.charAt(s, offset);
362                 //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
363                 c = s.charAt(offset);
364                 dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
365                 if (dig < 0) {
366                     break;
367                 }
368                 result = (result << bitsPerDigit) | dig;
369                 //offset += UTF16.getCharCount(c);
370                 ++offset;
371                 ++n;
372             }
373             if (n < minDig) {
374                 return -1;
375             }
376             offset16[0] = offset;
377             return result;
378         }
379 
380         /* Convert C-style escapes in table */
381         for (i=0; i<UNESCAPE_MAP.length; i+=2) {
382             if (c == UNESCAPE_MAP[i]) {
383                 offset16[0] = offset;
384                 return UNESCAPE_MAP[i+1];
385             } else if (c < UNESCAPE_MAP[i]) {
386                 break;
387             }
388         }
389 
390         /* If no special forms are recognized, then consider
391          * the backslash to generically escape the next character. */
392         offset16[0] = offset;
393         return c;
394     }
395 
396     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
397     static private final char[] UNESCAPE_MAP = {
398         /*"   0x22, 0x22 */
399         /*'   0x27, 0x27 */
400         /*?   0x3F, 0x3F */
401         /*\   0x5C, 0x5C */
402         /*a*/ 0x61, 0x07,
403         /*b*/ 0x62, 0x08,
404         /*f*/ 0x66, 0x0c,
405         /*n*/ 0x6E, 0x0a,
406         /*r*/ 0x72, 0x0d,
407         /*t*/ 0x74, 0x09,
408         /*v*/ 0x76, 0x0b
409     };
410 
411     /**
412      * Convert all escapes in a given string using unescapeAt().
413      * Leave invalid escape sequences unchanged.
414      */
unescapeLeniently(String s)415     public static String unescapeLeniently(String s) {
416         StringBuffer buf = new StringBuffer();
417         int[] pos = new int[1];
418         for (int i=0; i<s.length(); ) {
419             char c = s.charAt(i++);
420             if (c == '\\') {
421                 pos[0] = i;
422                 int e = unescapeAt(s, pos);
423                 if (e < 0) {
424                     buf.append(c);
425                 } else {
426                     UTF16.append(buf, e);
427                     i = pos[0];
428                 }
429             } else {
430                 buf.append(c);
431             }
432         }
433         return buf.toString();
434     }
435 
436     @Test
TestPatterns()437     public void TestPatterns() {
438         UnicodeSet set = new UnicodeSet();
439         expectPattern(set, "[[a-m]&[d-z]&[k-y]]",  "km");
440         expectPattern(set, "[[a-z]-[m-y]-[d-r]]",  "aczz");
441         expectPattern(set, "[a\\-z]",  "--aazz");
442         expectPattern(set, "[-az]",  "--aazz");
443         expectPattern(set, "[az-]",  "--aazz");
444         expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
445 
446         // Throw in a test of complement
447         set.complement();
448         String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
449         expectPairs(set, exp);
450     }
451 
452     @Test
TestCategories()453     public void TestCategories() {
454         int failures = 0;
455         UnicodeSet set = new UnicodeSet("[:Lu:]");
456         expectContainment(set, "ABC", "abc");
457 
458         // Make sure generation of L doesn't pollute cached Lu set
459         // First generate L, then Lu
460         // not used int TOP = 0x200; // Don't need to go over the whole range:
461         set = new UnicodeSet("[:L:]");
462         for (int i=0; i<0x200; ++i) {
463             boolean l = UCharacter.isLetter(i);
464             if (l != set.contains((char)i)) {
465                 errln("FAIL: L contains " + (char)i + " = " +
466                         set.contains((char)i));
467                 if (++failures == 10) break;
468             }
469         }
470 
471         set = new UnicodeSet("[:Lu:]");
472         for (int i=0; i<0x200; ++i) {
473             boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
474             if (lu != set.contains((char)i)) {
475                 errln("FAIL: Lu contains " + (char)i + " = " +
476                         set.contains((char)i));
477                 if (++failures == 20) break;
478             }
479         }
480     }
481 
482     @Test
TestAddRemove()483     public void TestAddRemove() {
484         UnicodeSet set = new UnicodeSet();
485         set.add('a', 'z');
486         expectPairs(set, "az");
487         set.remove('m', 'p');
488         expectPairs(set, "alqz");
489         set.remove('e', 'g');
490         expectPairs(set, "adhlqz");
491         set.remove('d', 'i');
492         expectPairs(set, "acjlqz");
493         set.remove('c', 'r');
494         expectPairs(set, "absz");
495         set.add('f', 'q');
496         expectPairs(set, "abfqsz");
497         set.remove('a', 'g');
498         expectPairs(set, "hqsz");
499         set.remove('a', 'z');
500         expectPairs(set, "");
501 
502         // Try removing an entire set from another set
503         expectPattern(set, "[c-x]", "cx");
504         UnicodeSet set2 = new UnicodeSet();
505         expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
506         set.removeAll(set2);
507         expectPairs(set, "deluxx");
508 
509         // Try adding an entire set to another set
510         expectPattern(set, "[jackiemclean]", "aacceein");
511         expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
512         set.addAll(set2);
513         expectPairs(set, "aacehort");
514 
515         // Test commutativity
516         expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
517         expectPattern(set2, "[jackiemclean]", "aacceein");
518         set.addAll(set2);
519         expectPairs(set, "aacehort");
520     }
521 
522     /**
523      * Make sure minimal representation is maintained.
524      */
525     @Test
TestMinimalRep()526     public void TestMinimalRep() {
527         // This is pretty thoroughly tested by checkCanonicalRep()
528         // run against the exhaustive operation results.  Use the code
529         // here for debugging specific spot problems.
530 
531         // 1 overlap against 2
532         UnicodeSet set = new UnicodeSet("[h-km-q]");
533         UnicodeSet set2 = new UnicodeSet("[i-o]");
534         set.addAll(set2);
535         expectPairs(set, "hq");
536         // right
537         set.applyPattern("[a-m]");
538         set2.applyPattern("[e-o]");
539         set.addAll(set2);
540         expectPairs(set, "ao");
541         // left
542         set.applyPattern("[e-o]");
543         set2.applyPattern("[a-m]");
544         set.addAll(set2);
545         expectPairs(set, "ao");
546         // 1 overlap against 3
547         set.applyPattern("[a-eg-mo-w]");
548         set2.applyPattern("[d-q]");
549         set.addAll(set2);
550         expectPairs(set, "aw");
551     }
552 
553     @Test
TestAPI()554     public void TestAPI() {
555         // default ct
556         UnicodeSet set = new UnicodeSet();
557         if (!set.isEmpty() || set.getRangeCount() != 0) {
558             errln("FAIL, set should be empty but isn't: " +
559                     set);
560         }
561 
562         // clear(), isEmpty()
563         set.add('a');
564         if (set.isEmpty()) {
565             errln("FAIL, set shouldn't be empty but is: " +
566                     set);
567         }
568         set.clear();
569         if (!set.isEmpty()) {
570             errln("FAIL, set should be empty but isn't: " +
571                     set);
572         }
573 
574         // size()
575         set.clear();
576         if (set.size() != 0) {
577             errln("FAIL, size should be 0, but is " + set.size() +
578                     ": " + set);
579         }
580         set.add('a');
581         if (set.size() != 1) {
582             errln("FAIL, size should be 1, but is " + set.size() +
583                     ": " + set);
584         }
585         set.add('1', '9');
586         if (set.size() != 10) {
587             errln("FAIL, size should be 10, but is " + set.size() +
588                     ": " + set);
589         }
590         set.clear();
591         set.complement();
592         if (set.size() != 0x110000) {
593             errln("FAIL, size should be 0x110000, but is" + set.size());
594         }
595 
596         // contains(first, last)
597         set.clear();
598         set.applyPattern("[A-Y 1-8 b-d l-y]");
599         for (int i = 0; i<set.getRangeCount(); ++i) {
600             int a = set.getRangeStart(i);
601             int b = set.getRangeEnd(i);
602             if (!set.contains(a, b)) {
603                 errln("FAIL, should contain " + (char)a + '-' + (char)b +
604                         " but doesn't: " + set);
605             }
606             if (set.contains((char)(a-1), b)) {
607                 errln("FAIL, shouldn't contain " +
608                         (char)(a-1) + '-' + (char)b +
609                         " but does: " + set);
610             }
611             if (set.contains(a, (char)(b+1))) {
612                 errln("FAIL, shouldn't contain " +
613                         (char)a + '-' + (char)(b+1) +
614                         " but does: " + set);
615             }
616         }
617 
618         // Ported InversionList test.
619         UnicodeSet a = new UnicodeSet((char)3,(char)10);
620         UnicodeSet b = new UnicodeSet((char)7,(char)15);
621         UnicodeSet c = new UnicodeSet();
622 
623         logln("a [3-10]: " + a);
624         logln("b [7-15]: " + b);
625         c.set(a); c.addAll(b);
626         UnicodeSet exp = new UnicodeSet((char)3,(char)15);
627         if (c.equals(exp)) {
628             logln("c.set(a).add(b): " + c);
629         } else {
630             errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
631         }
632         c.complement();
633         exp.set((char)0, (char)2);
634         exp.add((char)16, UnicodeSet.MAX_VALUE);
635         if (c.equals(exp)) {
636             logln("c.complement(): " + c);
637         } else {
638             errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
639         }
640         c.complement();
641         exp.set((char)3, (char)15);
642         if (c.equals(exp)) {
643             logln("c.complement(): " + c);
644         } else {
645             errln("FAIL: c.complement() = " + c + ", expect " + exp);
646         }
647         c.set(a); c.complementAll(b);
648         exp.set((char)3,(char)6);
649         exp.add((char)11,(char) 15);
650         if (c.equals(exp)) {
651             logln("c.set(a).complement(b): " + c);
652         } else {
653             errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
654         }
655 
656         exp.set(c);
657         c = bitsToSet(setToBits(c));
658         if (c.equals(exp)) {
659             logln("bitsToSet(setToBits(c)): " + c);
660         } else {
661             errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
662         }
663 
664         // Additional tests for coverage JB#2118
665         //UnicodeSet::complement(class UnicodeString const &)
666         //UnicodeSet::complementAll(class UnicodeString const &)
667         //UnicodeSet::containsNone(class UnicodeSet const &)
668         //UnicodeSet::containsNone(long,long)
669         //UnicodeSet::containsSome(class UnicodeSet const &)
670         //UnicodeSet::containsSome(long,long)
671         //UnicodeSet::removeAll(class UnicodeString const &)
672         //UnicodeSet::retain(long)
673         //UnicodeSet::retainAll(class UnicodeString const &)
674         //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
675         //UnicodeSetIterator::getString(void)
676         set.clear();
677         set.complement("ab");
678         exp.applyPattern("[{ab}]");
679         if (!set.equals(exp)) { errln("FAIL: complement(\"ab\")"); return; }
680 
681         UnicodeSetIterator iset = new UnicodeSetIterator(set);
682         if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
683             errln("FAIL: UnicodeSetIterator.next/IS_STRING");
684         } else if (!iset.string.equals("ab")) {
685             errln("FAIL: UnicodeSetIterator.string");
686         }
687 
688         set.add((char)0x61, (char)0x7A);
689         set.complementAll("alan");
690         exp.applyPattern("[{ab}b-kmo-z]");
691         if (!set.equals(exp)) { errln("FAIL: complementAll(\"alan\")"); return; }
692 
693         exp.applyPattern("[a-z]");
694         if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
695         if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
696         exp.applyPattern("[aln]");
697         if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
698         if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
699 
700         if (set.containsNone((char)0x61, (char)0x7A)) {
701             errln("FAIL: containsNone(char, char)");
702         }
703         if (!set.containsSome((char)0x61, (char)0x7A)) {
704             errln("FAIL: containsSome(char, char)");
705         }
706         if (!set.containsNone((char)0x41, (char)0x5A)) {
707             errln("FAIL: containsNone(char, char)");
708         }
709         if (set.containsSome((char)0x41, (char)0x5A)) {
710             errln("FAIL: containsSome(char, char)");
711         }
712 
713         set.removeAll("liu");
714         exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
715         if (!set.equals(exp)) { errln("FAIL: removeAll(\"liu\")"); return; }
716 
717         set.retainAll("star");
718         exp.applyPattern("[rst]");
719         if (!set.equals(exp)) { errln("FAIL: retainAll(\"star\")"); return; }
720 
721         set.retain((char)0x73);
722         exp.applyPattern("[s]");
723         if (!set.equals(exp)) { errln("FAIL: retain('s')"); return; }
724 
725         // ICU 2.6 coverage tests
726         // public final UnicodeSet retain(String s);
727         // public final UnicodeSet remove(int c);
728         // public final UnicodeSet remove(String s);
729         // public int hashCode();
730         set.applyPattern("[a-z{ab}{cd}]");
731         set.retain("cd");
732         exp.applyPattern("[{cd}]");
733         if (!set.equals(exp)) { errln("FAIL: retain(\"cd\")"); return; }
734 
735         set.applyPattern("[a-z{ab}{cd}]");
736         set.remove((char)0x63);
737         exp.applyPattern("[abd-z{ab}{cd}]");
738         if (!set.equals(exp)) { errln("FAIL: remove('c')"); return; }
739 
740         set.remove("cd");
741         exp.applyPattern("[abd-z{ab}]");
742         if (!set.equals(exp)) { errln("FAIL: remove(\"cd\")"); return; }
743 
744         if (set.hashCode() != exp.hashCode()) {
745             errln("FAIL: hashCode() unequal");
746         }
747         exp.clear();
748         if (set.hashCode() == exp.hashCode()) {
749             errln("FAIL: hashCode() equal");
750         }
751 
752         {
753             //Cover addAll(Collection) and addAllTo(Collection)
754             //  Seems that there is a bug in addAll(Collection) operation
755             //    Ram also add a similar test to UtilityTest.java
756             logln("Testing addAll(Collection) ... ");
757             String[] array = {"a", "b", "c", "de"};
758             List list = Arrays.asList(array);
759             Set aset = new HashSet(list);
760             logln(" *** The source set's size is: " + aset.size());
761 
762             set.clear();
763             set.addAll(aset);
764             if (set.size() != aset.size()) {
765                 errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() +
766                         ", " + set.size() + " seen instead!");
767             } else {
768                 logln("OK: After addAll, the UnicodeSet size got " + set.size());
769             }
770 
771             List list2 = new ArrayList();
772             set.addAllTo(list2);
773 
774             //verify the result
775             log(" *** The elements are: ");
776             String s = set.toPattern(true);
777             logln(s);
778             Iterator myiter = list2.iterator();
779             while(myiter.hasNext()) {
780                 log(myiter.next().toString() + "  ");
781             }
782             logln("");  // a new line
783         }
784 
785     }
786 
787     @Test
TestStrings()788     public void TestStrings() {
789         //  Object[][] testList = {
790         //  {I_EQUALS,  UnicodeSet.fromAll("abc"),
791         //  new UnicodeSet("[a-c]")},
792         //
793         //  {I_EQUALS,  UnicodeSet.from("ch").add('a','z').add("ll"),
794         //  new UnicodeSet("[{ll}{ch}a-z]")},
795         //
796         //  {I_EQUALS,  UnicodeSet.from("ab}c"),
797         //  new UnicodeSet("[{ab\\}c}]")},
798         //
799         //  {I_EQUALS,  new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
800         //  new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
801         //  };
802         //
803         //  for (int i = 0; i < testList.length; ++i) {
804         //  expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
805         //  }
806 
807         UnicodeSet[][] testList = {
808                 {UnicodeSet.fromAll("abc"),
809                     new UnicodeSet("[a-c]")},
810 
811                     {UnicodeSet.from("ch").add('a','z').add("ll"),
812                         new UnicodeSet("[{ll}{ch}a-z]")},
813 
814                         {UnicodeSet.from("ab}c"),
815                             new UnicodeSet("[{ab\\}c}]")},
816 
817                             {new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
818                                 new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
819         };
820 
821         for (int i = 0; i < testList.length; ++i) {
822             if (!testList[i][0].equals(testList[i][1])) {
823                 errln("FAIL: sets unequal; see source code (" + i + ")");
824             }
825         }
826     }
827 
828     static final Integer
829     I_ANY = new Integer(SortedSetRelation.ANY),
830     I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
831     I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
832     I_NO_B = new Integer(SortedSetRelation.NO_B),
833     I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
834     I_EQUALS = new Integer(SortedSetRelation.EQUALS),
835     I_NO_A = new Integer(SortedSetRelation.NO_A),
836     I_NONE = new Integer(SortedSetRelation.NONE);
837 
838     @Test
TestSetRelation()839     public void TestSetRelation() {
840 
841         String[] choices = {"a", "b", "cd", "ef"};
842         int limit = 1 << choices.length;
843 
844         SortedSet iset = new TreeSet();
845         SortedSet jset = new TreeSet();
846 
847         for (int i = 0; i < limit; ++i) {
848             pick(i, choices, iset);
849             for (int j = 0; j < limit; ++j) {
850                 pick(j, choices, jset);
851                 checkSetRelation(iset, jset, "(" + i + ")");
852             }
853         }
854     }
855 
856     @Test
TestSetSpeed()857     public void TestSetSpeed() {
858         // skip unless verbose
859         if (!isVerbose()) return;
860 
861         SetSpeed2(100);
862         SetSpeed2(1000);
863     }
864 
SetSpeed2(int size)865     public void SetSpeed2(int size) {
866 
867         SortedSet iset = new TreeSet();
868         SortedSet jset = new TreeSet();
869 
870         for (int i = 0; i < size*2; i += 2) { // only even values
871             iset.add(new Integer(i));
872             jset.add(new Integer(i));
873         }
874 
875         int iterations = 1000000 / size;
876 
877         logln("Timing comparison of Java vs Utility");
878         logln("For about " + size + " objects that are almost all the same.");
879 
880         CheckSpeed(iset, jset, "when a = b", iterations);
881 
882         iset.add(new Integer(size + 1));    // add odd value in middle
883 
884         CheckSpeed(iset, jset, "when a contains b", iterations);
885         CheckSpeed(jset, iset, "when b contains a", iterations);
886 
887         jset.add(new Integer(size - 1));    // add different odd value in middle
888 
889         CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
890     }
891 
CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations)892     void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
893         CheckSpeed2(iset, jset, message, iterations);
894         CheckSpeed3(iset, jset, message, iterations);
895     }
896 
CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations)897     void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
898         boolean x;
899         boolean y;
900 
901         // make sure code is loaded:
902         x = iset.containsAll(jset);
903         y = SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
904         if (x != y) errln("FAIL contains comparison");
905 
906         double start = System.currentTimeMillis();
907         for (int i = 0; i < iterations; ++i) {
908             x |= iset.containsAll(jset);
909         }
910         double middle = System.currentTimeMillis();
911         for (int i = 0; i < iterations; ++i) {
912             y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
913         }
914         double end = System.currentTimeMillis();
915 
916         double jtime = (middle - start)/iterations;
917         double utime = (end - middle)/iterations;
918 
919         NumberFormat nf = NumberFormat.getPercentInstance();
920         logln("Test contains: " + message + ": Java: " + jtime
921                 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
922     }
923 
CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations)924     void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
925         boolean x;
926         boolean y;
927 
928         // make sure code is loaded:
929         x = iset.equals(jset);
930         y = SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
931         if (x != y) errln("FAIL equality comparison");
932 
933 
934         double start = System.currentTimeMillis();
935         for (int i = 0; i < iterations; ++i) {
936             x |= iset.equals(jset);
937         }
938         double middle = System.currentTimeMillis();
939         for (int i = 0; i < iterations; ++i) {
940             y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
941         }
942         double end = System.currentTimeMillis();
943 
944         double jtime = (middle - start)/iterations;
945         double utime = (end - middle)/iterations;
946 
947         NumberFormat nf = NumberFormat.getPercentInstance();
948         logln("Test equals:   " + message + ": Java: " + jtime
949                 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
950     }
951 
pick(int bits, Object[] examples, SortedSet output)952     void pick(int bits, Object[] examples, SortedSet output) {
953         output.clear();
954         for (int k = 0; k < 32; ++k) {
955             if (((1<<k) & bits) != 0) output.add(examples[k]);
956         }
957     }
958 
959     public static final String[] RELATION_NAME = {
960         "both-are-null",
961         "a-is-null",
962         "equals",
963         "is-contained-in",
964         "b-is-null",
965         "is-disjoint_with",
966         "contains",
967         "any", };
968 
dumbHasRelation(Collection A, int filter, Collection B)969     boolean dumbHasRelation(Collection A, int filter, Collection B) {
970         Collection ab = new TreeSet(A);
971         ab.retainAll(B);
972         if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0) return false;
973 
974         // A - B size == A.size - A&B.size
975         if (A.size() > ab.size() && (filter & SortedSetRelation.A_NOT_B) == 0) return false;
976 
977         // B - A size == B.size - A&B.size
978         if (B.size() > ab.size() && (filter & SortedSetRelation.B_NOT_A) == 0) return false;
979 
980 
981         return true;
982     }
983 
checkSetRelation(SortedSet a, SortedSet b, String message)984     void checkSetRelation(SortedSet a, SortedSet b, String message) {
985         for (int i = 0; i < 8; ++i) {
986 
987             boolean hasRelation = SortedSetRelation.hasRelation(a, i, b);
988             boolean dumbHasRelation = dumbHasRelation(a, i, b);
989 
990             logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
991 
992             if (hasRelation != dumbHasRelation) {
993                 errln("FAIL: " +
994                         message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
995             }
996         }
997         logln("");
998     }
999 
1000     /**
1001      * Test the [:Latin:] syntax.
1002      */
1003     @Test
TestScriptSet()1004     public void TestScriptSet() {
1005 
1006         expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
1007 
1008         expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
1009 
1010         /* Jitterbug 1423 */
1011         expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
1012 
1013     }
1014 
1015     /**
1016      * Test the [:Latin:] syntax.
1017      */
1018     @Test
TestPropertySet()1019     public void TestPropertySet() {
1020         String[] DATA = {
1021                 // Pattern, Chars IN, Chars NOT in
1022 
1023                 "[:Latin:]",
1024                 "aA",
1025                 "\u0391\u03B1",
1026 
1027                 "[\\p{Greek}]",
1028                 "\u0391\u03B1",
1029                 "aA",
1030 
1031                 "\\P{ GENERAL Category = upper case letter }",
1032                 "abc",
1033                 "ABC",
1034 
1035                 // Combining class: @since ICU 2.2
1036                 // Check both symbolic and numeric
1037                 "\\p{ccc=Nukta}",
1038                 "\u0ABC",
1039                 "abc",
1040 
1041                 "\\p{Canonical Combining Class = 11}",
1042                 "\u05B1",
1043                 "\u05B2",
1044 
1045                 "[:c c c = iota subscript :]",
1046                 "\u0345",
1047                 "xyz",
1048 
1049                 // Bidi class: @since ICU 2.2
1050                 "\\p{bidiclass=lefttoright}",
1051                 "abc",
1052                 "\u0671\u0672",
1053 
1054                 // Binary properties: @since ICU 2.2
1055                 "\\p{ideographic}",
1056                 "\u4E0A",
1057                 "x",
1058 
1059                 "[:math=false:]",
1060                 "q)*(", // )(and * were removed from math in Unicode 4.0.1
1061                 "+<>^",
1062 
1063                 // JB#1767 \N{}, \p{ASCII}
1064                 "[:Ascii:]",
1065                 "abc\u0000\u007F",
1066                 "\u0080\u4E00",
1067 
1068                 "[\\N{ latin small letter  a  }[:name= latin small letter z:]]",
1069                 "az",
1070                 "qrs",
1071 
1072                 // JB#2015
1073                 "[:any:]",
1074                 "a\\U0010FFFF",
1075                 "",
1076 
1077                 "[:nv=0.5:]",
1078                 "\u00BD\u0F2A",
1079                 "\u00BC",
1080 
1081                 // JB#2653: Age
1082                 "[:Age=1.1:]",
1083                 "\u03D6", // 1.1
1084                 "\u03D8\u03D9", // 3.2
1085 
1086                 "[:Age=3.1:]",
1087                 "\\u1800\\u3400\\U0002f800",
1088                 "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
1089 
1090                 // JB#2350: Case_Sensitive
1091                 "[:Case Sensitive:]",
1092                 "A\u1FFC\\U00010410",
1093                 ";\u00B4\\U00010500",
1094 
1095 
1096                 // Regex compatibility test
1097                 "[-b]", // leading '-' is literal
1098                 "-b",
1099                 "ac",
1100 
1101                 "[^-b]", // leading '-' is literal
1102                 "ac",
1103                 "-b",
1104 
1105                 "[b-]", // trailing '-' is literal
1106                 "-b",
1107                 "ac",
1108 
1109                 "[^b-]", // trailing '-' is literal
1110                 "ac",
1111                 "-b",
1112 
1113                 "[a-b-]", // trailing '-' is literal
1114                 "ab-",
1115                 "c=",
1116 
1117                 "[[a-q]&[p-z]-]", // trailing '-' is literal
1118                 "pq-",
1119                 "or=",
1120 
1121                 "[\\s|\\)|:|$|\\>]", // from regex tests
1122                 "s|):$>",
1123                 "\\abc",
1124 
1125                 "[\uDC00cd]", // JB#2906: isolated trail at start
1126                 "cd\uDC00",
1127                 "ab\uD800\\U00010000",
1128 
1129                 "[ab\uD800]", // JB#2906: isolated trail at start
1130                 "ab\uD800",
1131                 "cd\uDC00\\U00010000",
1132 
1133                 "[ab\uD800cd]", // JB#2906: isolated lead in middle
1134                 "abcd\uD800",
1135                 "ef\uDC00\\U00010000",
1136 
1137                 "[ab\uDC00cd]", // JB#2906: isolated trail in middle
1138                 "abcd\uDC00",
1139                 "ef\uD800\\U00010000",
1140 
1141                 "[:^lccc=0:]", // Lead canonical class
1142                 "\u0300\u0301",
1143                 "abcd\u00c0\u00c5",
1144 
1145                 "[:^tccc=0:]", // Trail canonical class
1146                 "\u0300\u0301\u00c0\u00c5",
1147                 "abcd",
1148 
1149                 "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
1150                 "\u0300\u0301\u00c0\u00c5",
1151                 "abcd",
1152 
1153                 "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
1154                 "",
1155                 "abcd\u0300\u0301\u00c0\u00c5",
1156 
1157                 "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
1158                 "\u0F73\u0F75\u0F81",
1159                 "abcd\u0300\u0301\u00c0\u00c5",
1160 
1161                 "[:Assigned:]",
1162                 "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
1163                 "\\u0888\\uFDD3\\uFFFE\\U00050005",
1164 
1165                 // Script_Extensions, new in Unicode 6.0
1166                 "[:scx=Arab:]",
1167                 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
1168                 "\\u061D\\uFDEF\\uFDFE",
1169 
1170                 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
1171                 // so scx-sc is missing U+FDF2.
1172                 "[[:Script_Extensions=Arabic:]-[:Arab:]]",
1173                 "\\u0640\\u064B\\u0650\\u0655",
1174                 "\\uFDF2"
1175         };
1176 
1177         for (int i=0; i<DATA.length; i+=3) {
1178             expectContainment(DATA[i], DATA[i+1], DATA[i+2]);
1179         }
1180     }
1181 
1182     @Test
TestUnicodeSetStrings()1183     public void TestUnicodeSetStrings() {
1184         UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
1185         logln(uset + " ~ " + uset.getRegexEquivalent());
1186         String[][] testStrings = {{"x", "none"},
1187                 {"bc", "all"},
1188                 {"cdbca", "all"},
1189                 {"a", "all"},
1190                 {"bcx", "some"},
1191                 {"ab", "some"},
1192                 {"acb", "some"},
1193                 {"bcda", "some"},
1194                 {"dccbx", "none"},
1195         };
1196         for (int i = 0; i < testStrings.length; ++i) {
1197             check(uset, testStrings[i][0], testStrings[i][1]);
1198         }
1199     }
1200 
1201 
check(UnicodeSet uset, String string, String desiredStatus)1202     private void check(UnicodeSet uset, String string, String desiredStatus) {
1203         boolean shouldContainAll = desiredStatus.equals("all");
1204         boolean shouldContainNone = desiredStatus.equals("none");
1205         if (uset.containsAll(string) != shouldContainAll) {
1206             errln("containsAll " +  string + " should be " + shouldContainAll);
1207         } else {
1208             logln("containsAll " +  string + " = " + shouldContainAll);
1209         }
1210         if (uset.containsNone(string) != shouldContainNone) {
1211             errln("containsNone " +  string + " should be " + shouldContainNone);
1212         } else {
1213             logln("containsNone " +  string + " = " + shouldContainNone);
1214         }
1215     }
1216 
1217     /**
1218      * Test cloning of UnicodeSet
1219      */
1220     @Test
TestClone()1221     public void TestClone() {
1222         UnicodeSet s = new UnicodeSet("[abcxyz]");
1223         UnicodeSet t = (UnicodeSet) s.clone();
1224         expectContainment(t, "abc", "def");
1225     }
1226 
1227     /**
1228      * Test the indexOf() and charAt() methods.
1229      */
1230     @Test
TestIndexOf()1231     public void TestIndexOf() {
1232         UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
1233         for (int i=0; i<set.size(); ++i) {
1234             int c = set.charAt(i);
1235             if (set.indexOf(c) != i) {
1236                 errln("FAIL: charAt(" + i + ") = " + c +
1237                         " => indexOf() => " + set.indexOf(c));
1238             }
1239         }
1240         int c = set.charAt(set.size());
1241         if (c != -1) {
1242             errln("FAIL: charAt(<out of range>) = " +
1243                     Utility.escape(String.valueOf(c)));
1244         }
1245         int j = set.indexOf('q');
1246         if (j != -1) {
1247             errln("FAIL: indexOf('q') = " + j);
1248         }
1249     }
1250 
1251     @Test
TestContainsString()1252     public void TestContainsString() {
1253         UnicodeSet x = new UnicodeSet("[a{bc}]");
1254         if (x.contains("abc")) errln("FAIL");
1255     }
1256 
1257     @Test
TestExhaustive()1258     public void TestExhaustive() {
1259         // exhaustive tests. Simulate UnicodeSets with integers.
1260         // That gives us very solid tests (except for large memory tests).
1261 
1262         char limit = (char)128;
1263 
1264         for (char i = 0; i < limit; ++i) {
1265             logln("Testing " + i + ", " + bitsToSet(i));
1266             _testComplement(i);
1267 
1268             // AS LONG AS WE ARE HERE, check roundtrip
1269             checkRoundTrip(bitsToSet(i));
1270 
1271             for (char j = 0; j < limit; ++j) {
1272                 _testAdd(i,j);
1273                 _testXor(i,j);
1274                 _testRetain(i,j);
1275                 _testRemove(i,j);
1276             }
1277         }
1278     }
1279 
1280     /**
1281      * Make sure each script name and abbreviated name can be used
1282      * to construct a UnicodeSet.
1283      */
1284     @Test
TestScriptNames()1285     public void TestScriptNames() {
1286         for (int i=0; i<UScript.CODE_LIMIT; ++i) {
1287             for (int j=0; j<2; ++j) {
1288                 String pat = "";
1289                 try {
1290                     String name =
1291                             (j==0) ? UScript.getName(i) : UScript.getShortName(i);
1292                             pat = "[:" + name + ":]";
1293                             UnicodeSet set = new UnicodeSet(pat);
1294                             logln("Ok: " + pat + " -> " + set.toPattern(false));
1295                 } catch (IllegalArgumentException e) {
1296                     if (pat.length() == 0) {
1297                         errln("FAIL (in UScript): No name for script " + i);
1298                     } else {
1299                         errln("FAIL: Couldn't create " + pat);
1300                     }
1301                 }
1302             }
1303         }
1304     }
1305 
1306     /**
1307      * Test closure API.
1308      */
1309     @Test
TestCloseOver()1310     public void TestCloseOver() {
1311         String CASE = String.valueOf(UnicodeSet.CASE);
1312         String[] DATA = {
1313                 // selector, input, output
1314                 CASE,
1315                 "[aq\u00DF{Bc}{bC}{Fi}]",
1316                 "[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
1317 
1318                 CASE,
1319                 "[\u01F1]", // 'DZ'
1320                 "[\u01F1\u01F2\u01F3]",
1321 
1322                 CASE,
1323                 "[\u1FB4]",
1324                 "[\u1FB4{\u03AC\u03B9}]",
1325 
1326                 CASE,
1327                 "[{F\uFB01}]",
1328                 "[\uFB03{ffi}]",
1329 
1330                 CASE,
1331                 "[a-z]","[A-Za-z\u017F\u212A]",
1332                 CASE,
1333                 "[abc]","[A-Ca-c]",
1334                 CASE,
1335                 "[ABC]","[A-Ca-c]",
1336         };
1337 
1338         UnicodeSet s = new UnicodeSet();
1339         UnicodeSet t = new UnicodeSet();
1340         for (int i=0; i<DATA.length; i+=3) {
1341             int selector = Integer.parseInt(DATA[i]);
1342             String pat = DATA[i+1];
1343             String exp = DATA[i+2];
1344             s.applyPattern(pat);
1345             s.closeOver(selector);
1346             t.applyPattern(exp);
1347             if (s.equals(t)) {
1348                 logln("Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
1349             } else {
1350                 errln("FAIL: " + pat + ".closeOver(" + selector + ") => " +
1351                         s.toPattern(true) + ", expected " + exp);
1352             }
1353         }
1354 
1355         // Test the pattern API
1356         s.applyPattern("[abc]", UnicodeSet.CASE);
1357         expectContainment(s, "abcABC", "defDEF");
1358         s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
1359         expectContainment(s, "defDEF", "abcABC");
1360     }
1361 
1362     @Test
TestEscapePattern()1363     public void TestEscapePattern() {
1364         // The following pattern must contain at least one range "c-d"
1365         // where c or d is a Pattern_White_Space.
1366         String pattern =
1367                 "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1368         String exp =
1369                 "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1370         // We test this with two passes; in the second pass we
1371         // pre-unescape the pattern.  Since U+200E is Pattern_White_Space,
1372         // this fails -- which is what we expect.
1373         for (int pass=1; pass<=2; ++pass) {
1374             String pat = pattern;
1375             if (pass==2) {
1376                 pat = Utility.unescape(pat);
1377             }
1378             // Pattern is only good for pass 1
1379             boolean isPatternValid = (pass==1);
1380 
1381             UnicodeSet set = null;
1382             try {
1383                 set = new UnicodeSet(pat);
1384             } catch (IllegalArgumentException e) {
1385                 set = null;
1386             }
1387             if ((set != null) != isPatternValid){
1388                 errln("FAIL: applyPattern(" +
1389                         Utility.escape(pat) + ") => " + set);
1390                 continue;
1391             }
1392             if (set == null) {
1393                 continue;
1394             }
1395             if (set.contains((char)0x0644)){
1396                 errln("FAIL: " + Utility.escape(pat) + " contains(U+0664)");
1397             }
1398 
1399             String newpat = set.toPattern(true);
1400             if (newpat.equals(exp)) {
1401                 logln(Utility.escape(pat) + " => " + newpat);
1402             } else {
1403                 errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
1404             }
1405 
1406             for (int i=0; i<set.getRangeCount(); ++i) {
1407                 StringBuffer str = new StringBuffer("Range ");
1408                 str.append((char)(0x30 + i))
1409                 .append(": ");
1410                 UTF16.append(str, set.getRangeStart(i));
1411                 str.append(" - ");
1412                 UTF16.append(str, set.getRangeEnd(i));
1413                 String s = Utility.escape(str.toString() + " (" + set.getRangeStart(i) + " - " +
1414                         set.getRangeEnd(i) + ")");
1415                 if (set.getRangeStart(i) < 0) {
1416                     errln("FAIL: " + s);
1417                 } else {
1418                     logln(s);
1419                 }
1420             }
1421         }
1422     }
1423 
1424     @Test
TestSymbolTable()1425     public void TestSymbolTable() {
1426         // Multiple test cases can be set up here.  Each test case
1427         // is terminated by null:
1428         // var, value, var, value,..., input pat., exp. output pat., null
1429         String DATA[] = {
1430                 "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
1431                 "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null,
1432                 "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null
1433         };
1434 
1435         for (int i=0; i<DATA.length; ++i) {
1436             TokenSymbolTable sym = new TokenSymbolTable();
1437 
1438             // Set up variables
1439             while (DATA[i+2] != null) {
1440                 sym.add(DATA[i], DATA[i+1]);
1441                 i += 2;
1442             }
1443 
1444             // Input pattern and expected output pattern
1445             String inpat = DATA[i], exppat = DATA[i+1];
1446             i += 2;
1447 
1448             ParsePosition pos = new ParsePosition(0);
1449             UnicodeSet us = new UnicodeSet(inpat, pos, sym);
1450 
1451             // results
1452             if (pos.getIndex() != inpat.length()) {
1453                 errln("Failed to read to end of string \""
1454                         + inpat + "\": read to "
1455                         + pos.getIndex() + ", length is "
1456                         + inpat.length());
1457             }
1458 
1459             UnicodeSet us2 = new UnicodeSet(exppat);
1460             if (!us.equals(us2)) {
1461                 errln("Failed, got " + us + ", expected " + us2);
1462             } else {
1463                 logln("Ok, got " + us);
1464             }
1465 
1466             //cover Unicode(String,ParsePosition,SymbolTable,int)
1467             ParsePosition inpos = new ParsePosition(0);
1468             UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym, UnicodeSet.IGNORE_SPACE);
1469             UnicodeSet expSet = new UnicodeSet(exppat);
1470             if (!inSet.equals(expSet)) {
1471                 errln("FAIL: Failed, got " + inSet + ", expected " + expSet);
1472             } else {
1473                 logln("OK: got " + inSet);
1474             }
1475         }
1476     }
1477 
1478     /**
1479      * Test that Posix style character classes [:digit:], etc.
1480      *   have the Unicode definitions from TR 18.
1481      */
1482     @Test
TestPosixClasses()1483     public void TestPosixClasses() {
1484         expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
1485         expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
1486         expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
1487         expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
1488         expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
1489         expectEqual("POSIX xdigit", "[:xdigit:]", "[\\p{DecimalNumber}\\p{HexDigit}]");
1490         expectEqual("POSIX alnum", "[:alnum:]", "[\\p{Alphabetic}\\p{DecimalNumber}]");
1491         expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
1492         expectEqual("POSIX blank", "[:blank:]", "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
1493         expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
1494         expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
1495         expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
1496     }
1497 
1498     @Test
TestHangulSyllable()1499     public void TestHangulSyllable() {
1500         final UnicodeSet lvt = new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]");
1501         assertNotEquals("LVT count", new UnicodeSet(), lvt);
1502         logln(lvt + ": " + lvt.size());
1503         final UnicodeSet lv = new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]");
1504         assertNotEquals("LV count", new UnicodeSet(), lv);
1505         logln(lv + ": " + lv.size());
1506     }
1507 
1508     /**
1509      * Test that frozen classes disallow changes. For 4217
1510      */
1511     @Test
TestFrozen()1512     public void TestFrozen() {
1513         UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
1514         test.freeze();
1515         checkModification(test, true);
1516         checkModification(test, false);
1517     }
1518 
1519     /**
1520      * Test Generic support
1521      */
1522     @Test
TestGenerics()1523     public void TestGenerics() {
1524         UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
1525         UnicodeSet set2 = new UnicodeSet("[e-f {ch}]").freeze();
1526         UnicodeSet set3 = new UnicodeSet("[d m-n {dh}]").freeze();
1527         // A useful range of sets for testing, including both characters and strings
1528         // set 1 contains set2
1529         // set 1 is overlaps with set 3
1530         // set 2 is disjoint with set 3
1531 
1532         //public Iterator<String> iterator() {
1533 
1534         ArrayList<String> oldList = new ArrayList<String>();
1535         for (UnicodeSetIterator it = new UnicodeSetIterator(set1); it.next();) {
1536             oldList.add(it.getString());
1537         }
1538 
1539         ArrayList<String> list1 = new ArrayList<String>();
1540         for (String s : set1) {
1541             list1.add(s);
1542         }
1543         assertEquals("iteration test", oldList, list1);
1544 
1545         //addAllTo(Iterable<T>, U)
1546         list1.clear();
1547         set1.addAllTo(list1);
1548         assertEquals("iteration test", oldList, list1);
1549 
1550         list1 = set1.addAllTo(new ArrayList<String>());
1551         assertEquals("addAllTo", oldList, list1);
1552 
1553         ArrayList<String> list2 = set2.addAllTo(new ArrayList<String>());
1554         ArrayList<String> list3 = set3.addAllTo(new ArrayList<String>());
1555 
1556         // put them into different order, to check that order doesn't matter
1557         TreeSet sorted1 = set1.addAllTo(new TreeSet<String>());
1558         TreeSet sorted2 = set2.addAllTo(new TreeSet<String>());
1559         TreeSet sorted3 = set3.addAllTo(new TreeSet<String>());
1560 
1561         //containsAll(Collection<String> collection)
1562         assertTrue("containsAll", set1.containsAll(list1));
1563         assertTrue("containsAll", set1.containsAll(sorted1));
1564         assertTrue("containsAll", set1.containsAll(list2));
1565         assertTrue("containsAll", set1.containsAll(sorted2));
1566         assertFalse("containsAll", set1.containsAll(list3));
1567         assertFalse("containsAll", set1.containsAll(sorted3));
1568         assertFalse("containsAll", set2.containsAll(list3));
1569         assertFalse("containsAll", set2.containsAll(sorted3));
1570 
1571         //containsSome(Collection<String>)
1572         assertTrue("containsSome", set1.containsSome(list1));
1573         assertTrue("containsSome", set1.containsSome(sorted1));
1574         assertTrue("containsSome", set1.containsSome(list2));
1575         assertTrue("containsSome", set1.containsSome(sorted2));
1576         assertTrue("containsSome", set1.containsSome(list3));
1577         assertTrue("containsSome", set1.containsSome(sorted3));
1578         assertFalse("containsSome", set2.containsSome(list3));
1579         assertFalse("containsSome", set2.containsSome(sorted3));
1580 
1581         //containsNone(Collection<String>)
1582         assertFalse("containsNone", set1.containsNone(list1));
1583         assertFalse("containsNone", set1.containsNone(sorted1));
1584         assertFalse("containsNone", set1.containsNone(list2));
1585         assertFalse("containsNone", set1.containsNone(sorted2));
1586         assertFalse("containsNone", set1.containsNone(list3));
1587         assertFalse("containsNone", set1.containsNone(sorted3));
1588         assertTrue("containsNone", set2.containsNone(list3));
1589         assertTrue("containsNone", set2.containsNone(sorted3));
1590 
1591         //addAll(String...)
1592         UnicodeSet other3 = new UnicodeSet().addAll("d", "m", "n", "dh");
1593         assertEquals("addAll", set3, other3);
1594 
1595         //removeAll(Collection<String>)
1596         UnicodeSet mod1 = new UnicodeSet(set1).removeAll(set2);
1597         UnicodeSet mod2 = new UnicodeSet(set1).removeAll(list2);
1598         assertEquals("remove all", mod1, mod2);
1599 
1600         //retainAll(Collection<String>)
1601         mod1 = new UnicodeSet(set1).retainAll(set2);
1602         mod2 = new UnicodeSet(set1).retainAll(set2.addAllTo(new LinkedHashSet<String>()));
1603         assertEquals("remove all", mod1, mod2);
1604     }
1605 
1606     @Test
TestComparison()1607     public void TestComparison() {
1608         UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
1609         UnicodeSet set2 = new UnicodeSet("[c-e {ch}]").freeze();
1610         UnicodeSet set3 = new UnicodeSet("[d m-n z {dh}]").freeze();
1611 
1612         //compareTo(UnicodeSet)
1613         // do indirectly, by sorting
1614         List<UnicodeSet> unsorted = Arrays.asList(set3, set2, set1);
1615         List<UnicodeSet> goalShortest = Arrays.asList(set2, set3, set1);
1616         List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
1617         List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);
1618 
1619         List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
1620         assertNotEquals("compareTo-shorter-first", unsorted, sorted);
1621         assertEquals("compareTo-shorter-first", goalShortest, sorted);
1622 
1623         TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
1624             @Override
1625             public int compare(UnicodeSet o1, UnicodeSet o2) {
1626                 // TODO Auto-generated method stub
1627                 return o1.compareTo(o2, ComparisonStyle.LONGER_FIRST);
1628             }});
1629         sorted1.addAll(unsorted);
1630         sorted = new ArrayList(sorted1);
1631         assertNotEquals("compareTo-longer-first", unsorted, sorted);
1632         assertEquals("compareTo-longer-first", goalLongest, sorted);
1633 
1634         sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
1635             @Override
1636             public int compare(UnicodeSet o1, UnicodeSet o2) {
1637                 // TODO Auto-generated method stub
1638                 return o1.compareTo(o2, ComparisonStyle.LEXICOGRAPHIC);
1639             }});
1640         sorted1.addAll(unsorted);
1641         sorted = new ArrayList(sorted1);
1642         assertNotEquals("compareTo-lex", unsorted, sorted);
1643         assertEquals("compareTo-lex", goalLex, sorted);
1644 
1645         //compare(String, int)
1646         // make a list of interesting combinations
1647         List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
1648         TreeSet<String> target = new TreeSet<String>();
1649         for (String s : sources) {
1650             target.add(s);
1651             for (String t : sources) {
1652                 target.add(s + t);
1653                 for (String u : sources) {
1654                     target.add(s + t + u);
1655                 }
1656             }
1657         }
1658         // now compare all the combinations. If any of them is a code point, use it.
1659         int maxErrorCount = 0;
1660         compare:
1661             for (String last : target) {
1662                 for (String curr : target) {
1663                     int lastCount = Character.codePointCount(last, 0, last.length());
1664                     int currCount = Character.codePointCount(curr, 0, curr.length());
1665                     int comparison;
1666                     if (lastCount == 1) {
1667                         comparison = UnicodeSet.compare(last.codePointAt(0), curr);
1668                     } else if (currCount == 1) {
1669                         comparison = UnicodeSet.compare(last, curr.codePointAt(0));
1670                     } else {
1671                         continue;
1672                     }
1673                     if (comparison != last.compareTo(curr)) {
1674                         // repeat for debugging
1675                         if (lastCount == 1) {
1676                             comparison = UnicodeSet.compare(last.codePointAt(0), curr);
1677                         } else if (currCount == 1) {
1678                             comparison = UnicodeSet.compare(last, curr.codePointAt(0));
1679                         }
1680                         if (maxErrorCount++ > 10) {
1681                             errln(maxErrorCount + " Failure in comparing " + last + " & " + curr + "\tOmitting others...");
1682                             break compare;
1683                         }
1684                         errln(maxErrorCount + " Failure in comparing " + last + " & " + curr);
1685                     }
1686                 }
1687             }
1688 
1689         //compare(Iterable<T>, Iterable<T>)
1690         int max = 10;
1691         List<String> test1 = new ArrayList<String>(max);
1692         List<String> test2 = new ArrayList<String>(max);
1693         for (int i = 0; i <= max; ++i) {
1694             test1.add("a" + i);
1695             test2.add("a" + (max - i)); // add in reverse order
1696         }
1697         assertNotEquals("compare iterable test", test1, test2);
1698         TreeSet<CharSequence> sortedTest1 = new TreeSet<CharSequence>(test1);
1699         TreeSet<CharSequence> sortedTest2 = new TreeSet<CharSequence>(test2);
1700         assertEquals("compare iterable test", sortedTest1, sortedTest2);
1701     }
1702 
1703     @Test
TestRangeConstructor()1704     public void TestRangeConstructor() {
1705         UnicodeSet w = new UnicodeSet().addAll(3,5);
1706         UnicodeSet s = new UnicodeSet(3,5);
1707         assertEquals("new constructor", w, s);
1708 
1709         w = new UnicodeSet().addAll(3,5).addAll(7,7);
1710         UnicodeSet t = new UnicodeSet(3,5, 7,7);
1711         assertEquals("new constructor", w, t);
1712         // check to make sure right exceptions are thrown
1713         Class expected = IllegalArgumentException.class;
1714         Class actual;
1715 
1716         try {
1717             actual = null;
1718             @SuppressWarnings("unused")
1719             UnicodeSet u = new UnicodeSet(5);
1720         } catch (IllegalArgumentException e) {
1721             actual = e.getClass();
1722         }
1723         assertEquals("exception if odd", expected, actual);
1724 
1725         try {
1726             actual = null;
1727             @SuppressWarnings("unused")
1728             UnicodeSet u = new UnicodeSet(3, 2, 7, 9);
1729         } catch (IllegalArgumentException e) {
1730             actual = e.getClass();
1731         }
1732         assertEquals("exception for start/end problem", expected, actual);
1733 
1734         try {
1735             actual = null;
1736             @SuppressWarnings("unused")
1737             UnicodeSet u = new UnicodeSet(3, 5, 6, 9);
1738         } catch (IllegalArgumentException e) {
1739             actual = e.getClass();
1740         }
1741         assertEquals("exception for end/start problem", expected, actual);
1742 
1743         CheckRangeSpeed(10000, new UnicodeSet("[:whitespace:]"));
1744         CheckRangeSpeed(1000, new UnicodeSet("[:letter:]"));
1745     }
1746 
1747     /**
1748      * @param iterations
1749      * @param testSet
1750      */
CheckRangeSpeed(int iterations, UnicodeSet testSet)1751     private void CheckRangeSpeed(int iterations, UnicodeSet testSet) {
1752         testSet.complement().complement();
1753         String testPattern = testSet.toString();
1754         // fill a set of pairs from the pattern
1755         int[] pairs = new int[testSet.getRangeCount()*2];
1756         int j = 0;
1757         for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.nextRange();) {
1758             pairs[j++] = it.codepoint;
1759             pairs[j++] = it.codepointEnd;
1760         }
1761         UnicodeSet fromRange = new UnicodeSet(testSet);
1762         assertEquals("from range vs pattern", testSet, fromRange);
1763 
1764         double start = System.currentTimeMillis();
1765         for (int i = 0; i < iterations; ++i) {
1766             fromRange = new UnicodeSet(testSet);
1767         }
1768         double middle = System.currentTimeMillis();
1769         for (int i = 0; i < iterations; ++i) {
1770             new UnicodeSet(testPattern);
1771         }
1772         double end = System.currentTimeMillis();
1773 
1774         double rangeConstructorTime = (middle - start)/iterations;
1775         double patternConstructorTime = (end - middle)/iterations;
1776         String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t"
1777                 + percent.format(rangeConstructorTime/patternConstructorTime-1);
1778         if (rangeConstructorTime < 2*patternConstructorTime) {
1779             logln(message);
1780         } else {
1781             errln(message);
1782         }
1783     }
1784 
1785     NumberFormat percent = NumberFormat.getPercentInstance();
1786     {
1787         percent.setMaximumFractionDigits(2);
1788     }
1789     // ****************************************
1790     // UTILITIES
1791     // ****************************************
1792 
checkModification(UnicodeSet original, boolean isFrozen)1793     public void checkModification(UnicodeSet original, boolean isFrozen) {
1794         main:
1795             for (int i = 0; ;++i) {
1796                 UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
1797                 boolean gotException = true;
1798                 boolean checkEquals = true;
1799                 try {
1800                     switch(i) {
1801                     case 0: test.add(0); break;
1802                     case 1: test.add(0,1); break;
1803                     case 2: test.add("a"); break;
1804                     case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
1805                     case 4: test.addAll("ab"); break;
1806                     case 5: test.addAll(new UnicodeSet("[ab]")); break;
1807                     case 6: test.applyIntPropertyValue(0,0); break;
1808                     case 7: test.applyPattern("[ab]"); break;
1809                     case 8: test.applyPattern("[ab]", true); break;
1810                     case 9: test.applyPattern("[ab]", 0); break;
1811                     case 10: test.applyPropertyAlias("hex","true"); break;
1812                     case 11: test.applyPropertyAlias("hex", "true", null); break;
1813                     case 12: test.closeOver(UnicodeSet.CASE); break;
1814                     case 13: test.compact(); checkEquals = false; break;
1815                     case 14: test.complement(0); break;
1816                     case 15: test.complement(0,0); break;
1817                     case 16: test.complement("ab"); break;
1818                     case 17: test.complementAll("ab"); break;
1819                     case 18: test.complementAll(new UnicodeSet("[ab]")); break;
1820                     case 19: test.remove(' '); break;
1821                     case 20: test.remove(' ','a'); break;
1822                     case 21: test.remove(" "); break;
1823                     case 22: test.removeAll(" a"); break;
1824                     case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
1825                     case 24: test.retain(' '); break;
1826                     case 25: test.retain(' ','a'); break;
1827                     case 26: test.retain(" "); break;
1828                     case 27: test.retainAll(" a"); break;
1829                     case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
1830                     case 29: test.set(0,1); break;
1831                     case 30: test.set(new UnicodeSet("[ab]")); break;
1832 
1833                     default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
1834                     case 35: return;
1835                     }
1836                     gotException = false;
1837                 } catch (UnsupportedOperationException e) {
1838                     // do nothing
1839                 }
1840                 if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
1841                 if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
1842                 if (checkEquals) {
1843                     if (test.equals(original)) {
1844                         if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
1845                     } else { // unequal
1846                         if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
1847                     }
1848                 }
1849             }
1850     }
1851 
1852     // Following cod block is commented out to eliminate PrettyPrinter depenencies
1853 
1854     //    String[] prettyData = {
1855     //            "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
1856     //            "[:any:]",
1857     //            "[:whitespace:]",
1858     //            "[:linebreak=AL:]",
1859     //    };
1860     //
1861     //    public void TestPrettyPrinting() {
1862     //        try{
1863     //            PrettyPrinter pp = new PrettyPrinter();
1864     //
1865     //            int i = 0;
1866     //            for (; i < prettyData.length; ++i) {
1867     //                UnicodeSet test = new UnicodeSet(prettyData[i]);
1868     //                checkPrettySet(pp, i, test);
1869     //            }
1870     //            Random random = new Random(0);
1871     //            UnicodeSet test = new UnicodeSet();
1872     //
1873     //            // To keep runtimes under control, make the number of random test cases
1874     //            //   to try depends on the test framework exhaustive setting.
1875     //            //  params.inclusions = 5:   default exhaustive value
1876     //            //  params.inclusions = 10:  max exhaustive value.
1877     //            int iterations = 50;
1878     //            if (params.inclusion > 5) {
1879     //                iterations = (params.inclusion-5) * 200;
1880     //            }
1881     //            for (; i < iterations; ++i) {
1882     //                double start = random.nextGaussian() * 0x10000;
1883     //                if (start < 0) start = - start;
1884     //                if (start > 0x10FFFF) {
1885     //                    start = 0x10FFFF;
1886     //                }
1887     //                double end = random.nextGaussian() * 0x100;
1888     //                if (end < 0) end = -end;
1889     //                end = start + end;
1890     //                if (end > 0x10FFFF) {
1891     //                    end = 0x10FFFF;
1892     //                }
1893     //                test.complement((int)start, (int)end);
1894     //                checkPrettySet(pp, i, test);
1895     //            }
1896     //        }catch(RuntimeException ex){
1897     //            warnln("Could not load Collator");
1898     //        }
1899     //    }
1900     //
1901     //    private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
1902     //        String pretty = pp.toPattern(test);
1903     //        UnicodeSet retry = new UnicodeSet(pretty);
1904     //        if (!test.equals(retry)) {
1905     //            errln(i + ". Failed test: " + test + " != " + pretty);
1906     //        } else {
1907     //            logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
1908     //        }
1909     //    }
1910     //
1911     //    private String truncate(String string) {
1912     //        if (string.length() <= 100) return string;
1913     //        return string.substring(0,97) + "...";
1914     //    }
1915 
1916     public class TokenSymbolTable implements SymbolTable {
1917         HashMap contents = new HashMap();
1918 
1919         /**
1920          * (Non-SymbolTable API) Add the given variable and value to
1921          * the table.  Variable should NOT contain leading '$'.
1922          */
add(String var, String value)1923         public void add(String var, String value) {
1924             char[] buffer = new char[value.length()];
1925             value.getChars(0, value.length(), buffer, 0);
1926             add(var, buffer);
1927         }
1928 
1929         /**
1930          * (Non-SymbolTable API) Add the given variable and value to
1931          * the table.  Variable should NOT contain leading '$'.
1932          */
add(String var, char[] body)1933         public void add(String var, char[] body) {
1934             logln("TokenSymbolTable: add \"" + var + "\" => \"" +
1935                     new String(body) + "\"");
1936             contents.put(var, body);
1937         }
1938 
1939         /* (non-Javadoc)
1940          * @see ohos.global.icu.text.SymbolTable#lookup(java.lang.String)
1941          */
1942         @Override
lookup(String s)1943         public char[] lookup(String s) {
1944             logln("TokenSymbolTable: lookup \"" + s + "\" => \"" +
1945                     new String((char[]) contents.get(s)) + "\"");
1946             return (char[])contents.get(s);
1947         }
1948 
1949         /* (non-Javadoc)
1950          * @see ohos.global.icu.text.SymbolTable#lookupMatcher(int)
1951          */
1952         @Override
lookupMatcher(int ch)1953         public UnicodeMatcher lookupMatcher(int ch) {
1954             return null;
1955         }
1956 
1957         /* (non-Javadoc)
1958          * @see ohos.global.icu.text.SymbolTable#parseReference(java.lang.String,
1959      java.text.ParsePosition, int)
1960          */
1961         @Override
parseReference(String text, ParsePosition pos, int limit)1962         public String parseReference(String text, ParsePosition pos, int
1963                 limit) {
1964             int cp;
1965             int start = pos.getIndex();
1966             int i;
1967             for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
1968                 cp = UTF16.charAt(text, i);
1969                 if (!ohos.global.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
1970                     break;
1971                 }
1972             }
1973             logln("TokenSymbolTable: parse \"" + text + "\" from " +
1974                     start + " to " + i +
1975                     " => \"" + text.substring(start,i) + "\"");
1976             pos.setIndex(i);
1977             return text.substring(start,i);
1978         }
1979     }
1980 
1981     @Test
TestSurrogate()1982     public void TestSurrogate() {
1983         String DATA[] = {
1984                 // These should all behave identically
1985                 "[abc\\uD800\\uDC00]",
1986                 "[abc\uD800\uDC00]",
1987                 "[abc\\U00010000]",
1988         };
1989         for (int i=0; i<DATA.length; ++i) {
1990             logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
1991             UnicodeSet set = new UnicodeSet(DATA[i]);
1992             expectContainment(set,
1993                     CharsToUnicodeString("abc\\U00010000"),
1994                     "\uD800;\uDC00"); // split apart surrogate-pair
1995             if (set.size() != 4) {
1996                 errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
1997                         set.size() + ", expected 4"));
1998             }
1999         }
2000     }
2001 
2002     @Test
TestContains()2003     public void TestContains() {
2004         int limit = 256; // combinations to test
2005         for (int i = 0; i < limit; ++i) {
2006             logln("Trying: " + i);
2007             UnicodeSet x = bitsToSet(i);
2008             for (int j = 0; j < limit; ++j) {
2009                 UnicodeSet y = bitsToSet(j);
2010                 boolean containsNone = (i & j) == 0;
2011                 boolean containsAll = (i & j) == j;
2012                 boolean equals = i == j;
2013                 if (containsNone != x.containsNone(y)) {
2014                     x.containsNone(y); // repeat for debugging
2015                     errln("FAILED: " + x +  " containsSome " + y);
2016                 }
2017                 if (containsAll != x.containsAll(y)) {
2018                     x.containsAll(y); // repeat for debugging
2019                     errln("FAILED: " + x +  " containsAll " + y);
2020                 }
2021                 if (equals != x.equals(y)) {
2022                     x.equals(y); // repeat for debugging
2023                     errln("FAILED: " + x +  " equals " + y);
2024                 }
2025             }
2026         }
2027     }
2028 
_testComplement(int a)2029     void _testComplement(int a) {
2030         UnicodeSet x = bitsToSet(a);
2031         UnicodeSet z = bitsToSet(a);
2032         z.complement();
2033         int c = setToBits(z);
2034         if (c != (~a)) {
2035             errln("FAILED: add: ~" + x +  " != " + z);
2036             errln("FAILED: add: ~" + a + " != " + c);
2037         }
2038         checkCanonicalRep(z, "complement " + a);
2039     }
2040 
_testAdd(int a, int b)2041     void _testAdd(int a, int b) {
2042         UnicodeSet x = bitsToSet(a);
2043         UnicodeSet y = bitsToSet(b);
2044         UnicodeSet z = bitsToSet(a);
2045         z.addAll(y);
2046         int c = setToBits(z);
2047         if (c != (a | b)) {
2048             errln(Utility.escape("FAILED: add: " + x + " | " + y + " != " + z));
2049             errln("FAILED: add: " + a + " | " + b + " != " + c);
2050         }
2051         checkCanonicalRep(z, "add " + a + "," + b);
2052     }
2053 
_testRetain(int a, int b)2054     void _testRetain(int a, int b) {
2055         UnicodeSet x = bitsToSet(a);
2056         UnicodeSet y = bitsToSet(b);
2057         UnicodeSet z = bitsToSet(a);
2058         z.retainAll(y);
2059         int c = setToBits(z);
2060         if (c != (a & b)) {
2061             errln("FAILED: retain: " + x + " & " + y + " != " + z);
2062             errln("FAILED: retain: " + a + " & " + b + " != " + c);
2063         }
2064         checkCanonicalRep(z, "retain " + a + "," + b);
2065     }
2066 
_testRemove(int a, int b)2067     void _testRemove(int a, int b) {
2068         UnicodeSet x = bitsToSet(a);
2069         UnicodeSet y = bitsToSet(b);
2070         UnicodeSet z = bitsToSet(a);
2071         z.removeAll(y);
2072         int c = setToBits(z);
2073         if (c != (a &~ b)) {
2074             errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
2075             errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
2076         }
2077         checkCanonicalRep(z, "remove " + a + "," + b);
2078     }
2079 
_testXor(int a, int b)2080     void _testXor(int a, int b) {
2081         UnicodeSet x = bitsToSet(a);
2082         UnicodeSet y = bitsToSet(b);
2083         UnicodeSet z = bitsToSet(a);
2084         z.complementAll(y);
2085         int c = setToBits(z);
2086         if (c != (a ^ b)) {
2087             errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
2088             errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
2089         }
2090         checkCanonicalRep(z, "complement " + a + "," + b);
2091     }
2092 
2093     /**
2094      * Check that ranges are monotonically increasing and non-
2095      * overlapping.
2096      */
checkCanonicalRep(UnicodeSet set, String msg)2097     void checkCanonicalRep(UnicodeSet set, String msg) {
2098         int n = set.getRangeCount();
2099         if (n < 0) {
2100             errln("FAIL result of " + msg +
2101                     ": range count should be >= 0 but is " +
2102                     n + " for " + Utility.escape(set.toString()));
2103             return;
2104         }
2105         int last = 0;
2106         for (int i=0; i<n; ++i) {
2107             int start = set.getRangeStart(i);
2108             int end = set.getRangeEnd(i);
2109             if (start > end) {
2110                 errln("FAIL result of " + msg +
2111                         ": range " + (i+1) +
2112                         " start > end: " + start + ", " + end +
2113                         " for " + Utility.escape(set.toString()));
2114             }
2115             if (i > 0 && start <= last) {
2116                 errln("FAIL result of " + msg +
2117                         ": range " + (i+1) +
2118                         " overlaps previous range: " + start + ", " + end +
2119                         " for " + Utility.escape(set.toString()));
2120             }
2121             last = end;
2122         }
2123     }
2124 
2125     /**
2126      * Convert a bitmask to a UnicodeSet.
2127      */
bitsToSet(int a)2128     UnicodeSet bitsToSet(int a) {
2129         UnicodeSet result = new UnicodeSet();
2130         for (int i = 0; i < 32; ++i) {
2131             if ((a & (1<<i)) != 0) {
2132                 result.add((char)i,(char)i);
2133             }
2134         }
2135 
2136         return result;
2137     }
2138 
2139     /**
2140      * Convert a UnicodeSet to a bitmask.  Only the characters
2141      * U+0000 to U+0020 are represented in the bitmask.
2142      */
setToBits(UnicodeSet x)2143     static int setToBits(UnicodeSet x) {
2144         int result = 0;
2145         for (int i = 0; i < 32; ++i) {
2146             if (x.contains((char)i)) {
2147                 result |= (1<<i);
2148             }
2149         }
2150         return result;
2151     }
2152 
2153     /**
2154      * Return the representation of an inversion list based UnicodeSet
2155      * as a pairs list.  Ranges are listed in ascending Unicode order.
2156      * For example, the set [a-zA-M3] is represented as "33AMaz".
2157      */
getPairs(UnicodeSet set)2158     static String getPairs(UnicodeSet set) {
2159         StringBuffer pairs = new StringBuffer();
2160         for (int i=0; i<set.getRangeCount(); ++i) {
2161             int start = set.getRangeStart(i);
2162             int end = set.getRangeEnd(i);
2163             if (end > 0xFFFF) {
2164                 end = 0xFFFF;
2165                 i = set.getRangeCount(); // Should be unnecessary
2166             }
2167             pairs.append((char)start).append((char)end);
2168         }
2169         return pairs.toString();
2170     }
2171 
2172     /**
2173      * Test function. Make sure that the sets have the right relation
2174      */
2175 
expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message)2176     void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
2177         int relation = ((Integer) relationObj).intValue();
2178         UnicodeSet set1 = (UnicodeSet) set1Obj;
2179         UnicodeSet set2 = (UnicodeSet) set2Obj;
2180 
2181         // by-the-by, check the iterator
2182         checkRoundTrip(set1);
2183         checkRoundTrip(set2);
2184 
2185         boolean contains = set1.containsAll(set2);
2186         boolean isContained = set2.containsAll(set1);
2187         boolean disjoint = set1.containsNone(set2);
2188         boolean equals = set1.equals(set2);
2189 
2190         UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
2191         UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
2192         UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
2193 
2194         // test basic properties
2195 
2196         if (contains != (intersection.size() == set2.size())) {
2197             errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
2198         }
2199 
2200         if (contains != (intersection.equals(set2))) {
2201             errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
2202         }
2203 
2204         if (isContained != (intersection.size() == set1.size())) {
2205             errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
2206         }
2207 
2208         if (isContained != (intersection.equals(set1))) {
2209             errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
2210         }
2211 
2212         if ((contains && isContained) != equals) {
2213             errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
2214         }
2215 
2216         if (disjoint != (intersection.size() == 0)) {
2217             errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
2218         }
2219 
2220         // Now see if the expected relation is true
2221         int status = (minus12.size() != 0 ? 4 : 0)
2222                 | (intersection.size() != 0 ? 2 : 0)
2223                 | (minus21.size() != 0 ? 1 : 0);
2224 
2225         if (status != relation) {
2226             errln("FAIL relation incorrect" + message
2227                     + "; desired = " + RELATION_NAME[relation]
2228                             + "; found = " + RELATION_NAME[status]
2229                                     + "; set1 = " + set1.toPattern(true)
2230                                     + "; set2 = " + set2.toPattern(true)
2231                     );
2232         }
2233     }
2234 
2235     /**
2236      * Basic consistency check for a few items.
2237      * That the iterator works, and that we can create a pattern and
2238      * get the same thing back
2239      */
2240 
checkRoundTrip(UnicodeSet s)2241     void checkRoundTrip(UnicodeSet s) {
2242         String pat = s.toPattern(false);
2243         UnicodeSet t = copyWithIterator(s, false);
2244         checkEqual(s, t, "iterator roundtrip");
2245 
2246         t = copyWithIterator(s, true); // try range
2247         checkEqual(s, t, "iterator roundtrip");
2248 
2249         t = new UnicodeSet(pat);
2250         checkEqual(s, t, "toPattern(false)");
2251 
2252         pat = s.toPattern(true);
2253         t = new UnicodeSet(pat);
2254         checkEqual(s, t, "toPattern(true)");
2255     }
2256 
copyWithIterator(UnicodeSet s, boolean withRange)2257     UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
2258         UnicodeSet t = new UnicodeSet();
2259         UnicodeSetIterator it = new UnicodeSetIterator(s);
2260         if (withRange) {
2261             while (it.nextRange()) {
2262                 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
2263                     t.add(it.string);
2264                 } else {
2265                     t.add(it.codepoint, it.codepointEnd);
2266                 }
2267             }
2268         } else {
2269             while (it.next()) {
2270                 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
2271                     t.add(it.string);
2272                 } else {
2273                     t.add(it.codepoint);
2274                 }
2275             }
2276         }
2277         return t;
2278     }
2279 
checkEqual(UnicodeSet s, UnicodeSet t, String message)2280     boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
2281         if (!s.equals(t)) {
2282             errln("FAIL " + message
2283                     + "; source = " + s.toPattern(true)
2284                     + "; result = " + t.toPattern(true)
2285                     );
2286             return false;
2287         }
2288         return true;
2289     }
2290 
expectEqual(String name, String pat1, String pat2)2291     void expectEqual(String name, String pat1, String pat2) {
2292         UnicodeSet set1, set2;
2293         try {
2294             set1 = new UnicodeSet(pat1);
2295             set2 = new UnicodeSet(pat2);
2296         } catch (IllegalArgumentException e) {
2297             errln("FAIL: Couldn't create UnicodeSet from pattern for \"" + name + "\": " + e.getMessage());
2298             return;
2299         }
2300         if(!set1.equals(set2)) {
2301             errln("FAIL: Sets built from patterns differ for \"" + name + "\"");
2302         }
2303     }
2304 
2305     /**
2306      * Expect the given set to contain the characters in charsIn and
2307      * to not contain those in charsOut.
2308      */
expectContainment(String pat, String charsIn, String charsOut)2309     void expectContainment(String pat, String charsIn, String charsOut) {
2310         UnicodeSet set;
2311         try {
2312             set = new UnicodeSet(pat);
2313         } catch (IllegalArgumentException e) {
2314             errln("FAIL: Couldn't create UnicodeSet from pattern \"" +
2315                     pat + "\": " + e.getMessage());
2316             return;
2317         }
2318         expectContainment(set, charsIn, charsOut);
2319     }
2320 
2321     /**
2322      * Expect the given set to contain the characters in charsIn and
2323      * to not contain those in charsOut.
2324      */
expectContainment(UnicodeSet set, String charsIn, String charsOut)2325     void expectContainment(UnicodeSet set, String charsIn, String charsOut) {
2326         StringBuffer bad = new StringBuffer();
2327         if (charsIn != null) {
2328             charsIn = Utility.unescape(charsIn);
2329             for (int i=0; i<charsIn.length(); ) {
2330                 int c = UTF16.charAt(charsIn,i);
2331                 i += UTF16.getCharCount(c);
2332                 if (!set.contains(c)) {
2333                     UTF16.append(bad,c);
2334                 }
2335             }
2336             if (bad.length() > 0) {
2337                 errln(Utility.escape("FAIL: set " + set + " does not contain " + bad +
2338                         ", expected containment of " + charsIn));
2339             } else {
2340                 logln(Utility.escape("Ok: set " + set + " contains " + charsIn));
2341             }
2342         }
2343         if (charsOut != null) {
2344             charsOut = Utility.unescape(charsOut);
2345             bad.setLength(0);
2346             for (int i=0; i<charsOut.length(); ) {
2347                 int c = UTF16.charAt(charsOut,i);
2348                 i += UTF16.getCharCount(c);
2349                 if (set.contains(c)) {
2350                     UTF16.append(bad, c);
2351                 }
2352             }
2353             if (bad.length() > 0) {
2354                 errln(Utility.escape("FAIL: set " + set + " contains " + bad +
2355                         ", expected non-containment of " + charsOut));
2356             } else {
2357                 logln(Utility.escape("Ok: set " + set + " does not contain " + charsOut));
2358             }
2359         }
2360     }
2361 
expectPattern(UnicodeSet set, String pattern, String expectedPairs)2362     void expectPattern(UnicodeSet set,
2363             String pattern,
2364             String expectedPairs) {
2365         set.applyPattern(pattern);
2366         if (!getPairs(set).equals(expectedPairs)) {
2367             errln("FAIL: applyPattern(\"" + pattern +
2368                     "\") => pairs \"" +
2369                     Utility.escape(getPairs(set)) + "\", expected \"" +
2370                     Utility.escape(expectedPairs) + "\"");
2371         } else {
2372             logln("Ok:   applyPattern(\"" + pattern +
2373                     "\") => pairs \"" +
2374                     Utility.escape(getPairs(set)) + "\"");
2375         }
2376     }
2377 
expectToPattern(UnicodeSet set, String expPat, String[] expStrings)2378     void expectToPattern(UnicodeSet set,
2379             String expPat,
2380             String[] expStrings) {
2381         String pat = set.toPattern(true);
2382         if (pat.equals(expPat)) {
2383             logln("Ok:   toPattern() => \"" + pat + "\"");
2384         } else {
2385             errln("FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
2386             return;
2387         }
2388         if (expStrings == null) {
2389             return;
2390         }
2391         boolean in = true;
2392         for (int i=0; i<expStrings.length; ++i) {
2393             if (expStrings[i] == NOT) { // sic; pointer comparison
2394                 in = false;
2395                 continue;
2396             }
2397             boolean contained = set.contains(expStrings[i]);
2398             if (contained == in) {
2399                 logln("Ok: " + expPat +
2400                         (contained ? " contains {" : " does not contain {") +
2401                         Utility.escape(expStrings[i]) + "}");
2402             } else {
2403                 errln("FAIL: " + expPat +
2404                         (contained ? " contains {" : " does not contain {") +
2405                         Utility.escape(expStrings[i]) + "}");
2406             }
2407         }
2408     }
2409 
expectPairs(UnicodeSet set, String expectedPairs)2410     void expectPairs(UnicodeSet set, String expectedPairs) {
2411         if (!getPairs(set).equals(expectedPairs)) {
2412             errln("FAIL: Expected pair list \"" +
2413                     Utility.escape(expectedPairs) + "\", got \"" +
2414                     Utility.escape(getPairs(set)) + "\"");
2415         }
2416     }
CharsToUnicodeString(String s)2417     static final String CharsToUnicodeString(String s) {
2418         return Utility.unescape(s);
2419     }
2420 
2421     /* Test the method public UnicodeSet getSet() */
2422     @Test
TestGetSet()2423     public void TestGetSet() {
2424         UnicodeSetIterator us = new UnicodeSetIterator();
2425         try {
2426             us.getSet();
2427         } catch (Exception e) {
2428             errln("UnicodeSetIterator.getSet() was not suppose to given an " + "an exception.");
2429         }
2430     }
2431 
2432     /* Tests the method public UnicodeSet add(Collection<?> source) */
2433     @Test
TestAddCollection()2434     public void TestAddCollection() {
2435         UnicodeSet us = new UnicodeSet();
2436         Collection<?> s = null;
2437         try {
2438             us.add(s);
2439             errln("UnicodeSet.add(Collection<?>) was suppose to return an exception for a null parameter.");
2440         } catch (Exception e) {
2441         }
2442     }
2443 
2444     @Test
TestConstants()2445     public void TestConstants() {
2446         assertEquals("Empty", new UnicodeSet(), UnicodeSet.EMPTY);
2447         assertEquals("All", new UnicodeSet(0,0x10FFFF), UnicodeSet.ALL_CODE_POINTS);
2448     }
2449 
2450     @Test
TestIteration()2451     public void TestIteration() {
2452         UnicodeSet us1 = new UnicodeSet("[abcM{xy}]");
2453         assertEquals("", "M, a-c", CollectionUtilities.join(us1.ranges(), ", "));
2454 
2455         // Sample code
2456         for (@SuppressWarnings("unused") EntryRange range : us1.ranges()) {
2457             // do something with code points between range.codepointEnd and range.codepointEnd;
2458         }
2459         for (@SuppressWarnings("unused") String s : us1.strings()) {
2460             // do something with each string;
2461         }
2462 
2463         String[] tests = {
2464                 "[M-Qzab{XY}{ZW}]",
2465                 "[]",
2466                 "[a]",
2467                 "[a-c]",
2468                 "[{XY}]",
2469         };
2470         for (String test : tests) {
2471             UnicodeSet us = new UnicodeSet(test);
2472             UnicodeSetIterator it = new UnicodeSetIterator(us);
2473             for (EntryRange range : us.ranges()) {
2474                 final String title = range.toString();
2475                 logln(title);
2476                 it.nextRange();
2477                 assertEquals(title, it.codepoint, range.codepoint);
2478                 assertEquals(title, it.codepointEnd, range.codepointEnd);
2479             }
2480             for (String s : us.strings()) {
2481                 it.nextRange();
2482                 assertEquals("strings", it.string, s);
2483             }
2484             assertFalse("", it.next());
2485         }
2486     }
2487 
2488     @Test
TestReplaceAndDelete()2489     public void TestReplaceAndDelete() {
2490         UnicodeSetSpanner m;
2491 
2492         m = new UnicodeSetSpanner(new UnicodeSet("[._]"));
2493         assertEquals("", "abc", m.deleteFrom("_._a_._b_._c_._"));
2494         assertEquals("", "_.__.__.__._", m.deleteFrom("_._a_._b_._c_._", SpanCondition.NOT_CONTAINED));
2495 
2496         assertEquals("", "a_._b_._c", m.trim("_._a_._b_._c_._"));
2497         assertEquals("", "a_._b_._c_._", m.trim("_._a_._b_._c_._", TrimOption.LEADING));
2498         assertEquals("", "_._a_._b_._c", m.trim("_._a_._b_._c_._", TrimOption.TRAILING));
2499 
2500         assertEquals("", "a??b??c", m.replaceFrom("a_._b_._c", "??", CountMethod.WHOLE_SPAN));
2501         assertEquals("", "a??b??c", m.replaceFrom(m.trim("_._a_._b_._c_._"), "??", CountMethod.WHOLE_SPAN));
2502         assertEquals("", "XYXYXYaXYXYXYbXYXYXYcXYXYXY", m.replaceFrom("_._a_._b_._c_._", "XY"));
2503         assertEquals("", "XYaXYbXYcXY", m.replaceFrom("_._a_._b_._c_._", "XY", CountMethod.WHOLE_SPAN));
2504 
2505         m = new UnicodeSetSpanner(new UnicodeSet("\\p{uppercase}"));
2506         assertEquals("", "TQBF", m.deleteFrom("The Quick Brown Fox.", SpanCondition.NOT_CONTAINED));
2507 
2508         m = new UnicodeSetSpanner(m.getUnicodeSet().addAll(new UnicodeSet("\\p{lowercase}")));
2509         assertEquals("", "TheQuickBrownFox", m.deleteFrom("The Quick Brown Fox.", SpanCondition.NOT_CONTAINED));
2510 
2511         m = new UnicodeSetSpanner(new UnicodeSet("[{ab}]"));
2512         assertEquals("", "XXc acb", m.replaceFrom("ababc acb", "X"));
2513         assertEquals("", "Xc acb", m.replaceFrom("ababc acb", "X", CountMethod.WHOLE_SPAN));
2514         assertEquals("", "ababX", m.replaceFrom("ababc acb", "X", CountMethod.WHOLE_SPAN, SpanCondition.NOT_CONTAINED));
2515     }
2516 
2517     @Test
TestCodePoints()2518     public void TestCodePoints() {
2519         // test supplemental code points and strings clusters
2520         checkCodePoints("x\u0308", "z\u0308", CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, null, 1);
2521         checkCodePoints("��", "��", CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, null, 1);
2522         checkCodePoints("��", "��", CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, null, 1);
2523     }
2524 
checkCodePoints(String a, String b, CountMethod quantifier, SpanCondition spanCondition, String expectedReplaced, int expectedCount)2525     private void checkCodePoints(String a, String b, CountMethod quantifier, SpanCondition spanCondition,
2526             String expectedReplaced, int expectedCount) {
2527         final String ab = a+b;
2528         UnicodeSetSpanner m = new UnicodeSetSpanner(new UnicodeSet("[{" + a + "}]"));
2529         assertEquals("new UnicodeSetSpanner(\"[{" + a + "}]\").countIn(\"" + ab + "\")",
2530                 expectedCount,
2531                 callCountIn(m, ab, quantifier, spanCondition)
2532                 );
2533 
2534         if (expectedReplaced == null) {
2535             expectedReplaced = "-" + b;
2536         }
2537         assertEquals("new UnicodeSetSpanner(\"[{" + a + "}]\").replaceFrom(\"" + ab + "\", \"-\")",
2538                 expectedReplaced, m.replaceFrom(ab, "-", quantifier));
2539     }
2540 
2541     @Test
TestCountIn()2542     public void TestCountIn() {
2543         UnicodeSetSpanner m = new UnicodeSetSpanner(new UnicodeSet("[ab]"));
2544         checkCountIn(m, CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, "abc", 2);
2545         checkCountIn(m, CountMethod.WHOLE_SPAN, SpanCondition.SIMPLE, "abc", 1);
2546         checkCountIn(m, CountMethod.MIN_ELEMENTS, SpanCondition.NOT_CONTAINED, "acccb", 3);
2547     }
2548 
checkCountIn(UnicodeSetSpanner m, CountMethod countMethod, SpanCondition spanCondition, String target, int expected)2549     public void checkCountIn(UnicodeSetSpanner m, CountMethod countMethod, SpanCondition spanCondition, String target, int expected) {
2550         final String message = "countIn " + countMethod + ", " + spanCondition;
2551         assertEquals(message, callCountIn(m, target, countMethod, spanCondition), expected);
2552     }
2553 
callCountIn(UnicodeSetSpanner m, final String ab, CountMethod countMethod, SpanCondition spanCondition)2554     public int callCountIn(UnicodeSetSpanner m, final String ab, CountMethod countMethod, SpanCondition spanCondition) {
2555         return spanCondition != SpanCondition.SIMPLE ? m.countIn(ab, countMethod, spanCondition)
2556                 : countMethod != CountMethod.MIN_ELEMENTS ? m.countIn(ab, countMethod)
2557                         : m.countIn(ab);
2558     }
2559 
2560     @Test
testForSpanGaps()2561     public void testForSpanGaps() {
2562         String[] items = {"a", "b", "c", "{ab}", "{bc}", "{cd}", "{abc}", "{bcd}"};
2563         final int limit = 1<<items.length;
2564         // build long string for testing
2565         StringBuilder longBuffer = new StringBuilder();
2566         for (int i = 1; i < limit; ++i) {
2567             longBuffer.append("x");
2568             longBuffer.append(getCombinations(items, i));
2569         }
2570         String longString = longBuffer.toString();
2571         longString = longString.replace("{","").replace("}","");
2572 
2573         long start = System.nanoTime();
2574         for (int i = 1; i < limit; ++i) {
2575             UnicodeSet us = new UnicodeSet("[" + getCombinations(items, i) + "]");
2576             int problemFound = checkSpan(longString, us, SpanCondition.SIMPLE);
2577             if (problemFound >= 0) {
2578                 assertEquals("Testing " + longString + ", found gap at", -1, problemFound);
2579                 break;
2580             }
2581         }
2582         long end = System.nanoTime();
2583         logln("Time for SIMPLE   :\t" + (end-start));
2584         start = System.nanoTime();
2585         for (int i = 1; i < limit; ++i) {
2586             UnicodeSet us = new UnicodeSet("[" + getCombinations(items, i) + "]");
2587             int problemFound = checkSpan(longString, us, SpanCondition.CONTAINED);
2588             if (problemFound >= 0) {
2589                 assertEquals("Testing " + longString + ", found gap at", -1, problemFound);
2590                 break;
2591             }
2592         }
2593         end = System.nanoTime();
2594         logln("Time for CONTAINED:\t" + (end-start));
2595     }
2596 
2597     /**
2598      * Check that there are no gaps, when we alternate spanning. That is, there
2599      * should only be a zero length span at the very start.
2600      */
checkSpan(String longString, UnicodeSet us, SpanCondition spanCondition)2601     private int checkSpan(String longString, UnicodeSet us, SpanCondition spanCondition) {
2602         int start = 0;
2603         while (start < longString.length()) {
2604             int limit = us.span(longString, start, spanCondition);
2605             if (limit == longString.length()) {
2606                 break;
2607             } else if (limit == start && start != 0) {
2608                 return start;
2609             }
2610             start = limit;
2611             limit = us.span(longString, start, SpanCondition.NOT_CONTAINED);
2612             if (limit == start) {
2613                 return start;
2614             }
2615             start = limit;
2616         }
2617         return -1; // all ok
2618     }
2619 
getCombinations(String[] items, int bitset)2620     private String getCombinations(String[] items, int bitset) {
2621         StringBuilder result = new StringBuilder();
2622         for (int i = 0; bitset != 0; ++i) {
2623             int other = bitset & (1 << i);
2624             if (other != 0) {
2625                 bitset ^= other;
2626                 result.append(items[i]);
2627             }
2628         }
2629         return result.toString();
2630     }
2631 
2632     @Test
TestCharSequenceArgs()2633     public void TestCharSequenceArgs() {
2634         // statics
2635         assertEquals("CharSequence from", new UnicodeSet("[{abc}]"), UnicodeSet.from(new StringBuilder("abc")));
2636         assertEquals("CharSequence fromAll", new UnicodeSet("[a-c]"), UnicodeSet.fromAll(new StringBuilder("abc")));
2637         assertEquals("CharSequence compare", 1.0f, Math.signum(UnicodeSet.compare(new StringBuilder("abc"), 0x61)));
2638         assertEquals("CharSequence compare", -1.0f, Math.signum(UnicodeSet.compare(0x61, new StringBuilder("abc"))));
2639         assertEquals("CharSequence compare", 0.0f, Math.signum(UnicodeSet.compare(new StringBuilder("a"), 0x61)));
2640         assertEquals("CharSequence compare", 0.0f, Math.signum(UnicodeSet.compare(0x61, new StringBuilder("a"))));
2641         assertEquals("CharSequence getSingleCodePoint", 0x1F466, UnicodeSet.getSingleCodePoint(new StringBuilder("��")));
2642 
2643         // iterables/arrays
2644         Iterable<StringBuilder> iterable = Arrays.asList(new StringBuilder("A"), new StringBuilder("B"));
2645         assertEquals("CharSequence containsAll", true, new UnicodeSet("[AB]").containsAll(iterable));
2646         assertEquals("CharSequence containsAll", false, new UnicodeSet("[a-cA]").containsAll(iterable));
2647         assertEquals("CharSequence containsNone", true, new UnicodeSet("[a-c]").containsNone(iterable) );
2648         assertEquals("CharSequence containsNone", false, new UnicodeSet("[a-cA]").containsNone(iterable) );
2649         assertEquals("CharSequence containsSome", true, new UnicodeSet("[a-cA]").containsSome(iterable) );
2650         assertEquals("CharSequence containsSome", false, new UnicodeSet("[a-c]").containsSome(iterable) );
2651         assertEquals("CharSequence addAll", new UnicodeSet("[a-cAB]"), new UnicodeSet("[a-cA]").addAll(new StringBuilder("A"), new StringBuilder("B")) );
2652         assertEquals("CharSequence removeAll", new UnicodeSet("[a-c]"), new UnicodeSet("[a-cA]").removeAll( iterable) );
2653         assertEquals("CharSequence retainAll", new UnicodeSet("[A]"), new UnicodeSet("[a-cA]").retainAll( iterable) );
2654 
2655         // UnicodeSet results
2656         assertEquals("CharSequence add", new UnicodeSet("[Aa-c{abc}{qr}]"), new UnicodeSet("[a-cA{qr}]").add(new StringBuilder("abc")) );
2657         assertEquals("CharSequence retain", new UnicodeSet("[{abc}]"), new UnicodeSet("[a-cA{abc}{qr}]").retain(new StringBuilder("abc")) );
2658         assertEquals("CharSequence remove", new UnicodeSet("[Aa-c{qr}]"), new UnicodeSet("[a-cA{abc}{qr}]").remove(new StringBuilder("abc")) );
2659         assertEquals("CharSequence complement", new UnicodeSet("[Aa-c{qr}]"), new UnicodeSet("[a-cA{abc}{qr}]").complement(new StringBuilder("abc")) );
2660         assertEquals("CharSequence complement", new UnicodeSet("[Aa-c{abc}{qr}]"), new UnicodeSet("[a-cA{qr}]").complement(new StringBuilder("abc")) );
2661 
2662         assertEquals("CharSequence addAll", new UnicodeSet("[a-cABC]"), new UnicodeSet("[a-cA]").addAll(new StringBuilder("ABC")) );
2663         assertEquals("CharSequence retainAll", new UnicodeSet("[a-c]"), new UnicodeSet("[a-cA]").retainAll(new StringBuilder("abcB")) );
2664         assertEquals("CharSequence removeAll", new UnicodeSet("[Aab]"), new UnicodeSet("[a-cA]").removeAll(new StringBuilder("cC")) );
2665         assertEquals("CharSequence complementAll", new UnicodeSet("[ABbc]"), new UnicodeSet("[a-cA]").complementAll(new StringBuilder("aB")) );
2666 
2667         // containment
2668         assertEquals("CharSequence contains", true, new UnicodeSet("[a-cA{ab}]"). contains(new StringBuilder("ab")) );
2669         assertEquals("CharSequence containsNone", false, new UnicodeSet("[a-cA]"). containsNone(new StringBuilder("ab"))  );
2670         assertEquals("CharSequence containsSome", true, new UnicodeSet("[a-cA{ab}]"). containsSome(new StringBuilder("ab"))  );
2671 
2672         // spanning
2673         assertEquals("CharSequence span", 3, new UnicodeSet("[a-cA]"). span(new StringBuilder("abc"), SpanCondition.SIMPLE) );
2674         assertEquals("CharSequence span", 3, new UnicodeSet("[a-cA]"). span(new StringBuilder("abc"), 1, SpanCondition.SIMPLE) );
2675         assertEquals("CharSequence spanBack", 0, new UnicodeSet("[a-cA]"). spanBack(new StringBuilder("abc"), SpanCondition.SIMPLE) );
2676         assertEquals("CharSequence spanBack", 0, new UnicodeSet("[a-cA]"). spanBack(new StringBuilder("abc"), 1, SpanCondition.SIMPLE) );
2677 
2678         // internal
2679         OutputInt outCount = new OutputInt();
2680         assertEquals("CharSequence matchesAt", 2, new UnicodeSet("[a-cA]"). matchesAt(new StringBuilder("abc"), 1) );
2681         assertEquals("CharSequence spanAndCount", 3, new UnicodeSet("[a-cA]"). spanAndCount(new StringBuilder("abc"), 1, SpanCondition.SIMPLE, outCount ) );
2682         assertEquals("CharSequence findIn", 3, new UnicodeSet("[a-cA]"). findIn(new StringBuilder("abc"), 1, true) );
2683         assertEquals("CharSequence findLastIn", -1, new UnicodeSet("[a-cA]"). findLastIn(new StringBuilder("abc"), 1, true) );
2684         assertEquals("CharSequence add", "c", new UnicodeSet("[abA]"). stripFrom(new StringBuilder("abc"), true));
2685     }
2686 
2687     @Test
TestAStringRange()2688     public void TestAStringRange() {
2689         String[][] tests = {
2690                 {"[{ax}-{bz}]", "[{ax}{ay}{az}{bx}{by}{bz}]"},
2691                 {"[{a}-{c}]", "[a-c]"},
2692                 //{"[a-{c}]", "[a-c]"}, // don't handle these yet: enable once we do
2693                 //{"[{a}-c]", "[a-c]"}, // don't handle these yet: enable once we do
2694                 {"[{ax}-{by}-{cz}]", "Error: '-' not after char, string, or set at \"[{ax}-{by}-{|cz}]\""},
2695                 {"[{a}-{bz}]", "Error: Range must have equal-length strings at \"[{a}-{bz}|]\""},
2696                 {"[{ax}-{b}]", "Error: Range must have equal-length strings at \"[{ax}-{b}|]\""},
2697                 {"[{ax}-bz]", "Error: Invalid range at \"[{ax}-b|z]\""},
2698                 {"[ax-{bz}]", "Error: Range must have 2 valid strings at \"[ax-{bz}|]\""},
2699                 {"[{bx}-{az}]", "Error: Range must have xᵢ ≤ yᵢ for each index i at \"[{bx}-{az}|]\""},
2700         };
2701         int i = 0;
2702         for (String[] test : tests) {
2703             String expected = test[1];
2704             if (test[1].startsWith("[")) {
2705                 expected = new UnicodeSet(expected).toPattern(false);
2706             }
2707             String actual;
2708             try {
2709                 actual = new UnicodeSet(test[0]).toPattern(false);
2710             } catch (Exception e) {
2711                 actual = e.getMessage();
2712             }
2713             assertEquals("StringRange " + i, expected, actual);
2714             ++i;
2715         }
2716     }
2717 
2718     @Test
testAddAll_CharacterSequences()2719     public void testAddAll_CharacterSequences() {
2720         UnicodeSet unicodeSet = new UnicodeSet();
2721         unicodeSet.addAll("a", "b");
2722         assertEquals("Wrong UnicodeSet pattern", "[ab]", unicodeSet.toPattern(true));
2723         unicodeSet.addAll("b", "x");
2724         assertEquals("Wrong UnicodeSet pattern", "[abx]", unicodeSet.toPattern(true));
2725         unicodeSet.addAll(new CharSequence[]{new StringBuilder("foo"), new StringBuffer("bar")});
2726         assertEquals("Wrong UnicodeSet pattern", "[abx{bar}{foo}]", unicodeSet.toPattern(true));
2727     }
2728 
2729     @Test
testCompareTo()2730     public void testCompareTo() {
2731         Set<String> test_set = Collections.emptySet();
2732         assertEquals("UnicodeSet not empty", 0, UnicodeSet.EMPTY.compareTo(test_set));
2733         assertEquals("UnicodeSet comparison wrong",
2734                 0, UnicodeSet.fromAll("a").compareTo(Collections.singleton("a")));
2735 
2736         // Longer is bigger
2737         assertTrue("UnicodeSet is empty",
2738                 UnicodeSet.ALL_CODE_POINTS.compareTo(test_set) > 0);
2739         assertTrue("UnicodeSet not empty",
2740                 UnicodeSet.EMPTY.compareTo(Collections.singleton("a")) < 0);
2741 
2742         // Equal length compares on first difference.
2743         assertTrue("UnicodeSet comparison wrong",
2744                 UnicodeSet.fromAll("a").compareTo(Collections.singleton("b")) < 0);
2745         assertTrue("UnicodeSet comparison wrong",
2746                 UnicodeSet.fromAll("ab").compareTo(Arrays.asList("a", "c")) < 0);
2747         assertTrue("UnicodeSet comparison wrong",
2748                 UnicodeSet.fromAll("b").compareTo(Collections.singleton("a")) > 0);
2749     }
2750 
2751     @Test
TestUnusedCcc()2752     public void TestUnusedCcc() {
2753         // All numeric ccc values 0..255 are valid, but many are unused.
2754         UnicodeSet ccc2 = new UnicodeSet("[:ccc=2:]");
2755         assertTrue("[:ccc=2:] -> empty set", ccc2.isEmpty());
2756 
2757         UnicodeSet ccc255 = new UnicodeSet("[:ccc=255:]");
2758         assertTrue("[:ccc=255:] -> empty set", ccc255.isEmpty());
2759 
2760         // Non-integer values and values outside 0..255 are invalid.
2761         try {
2762             new UnicodeSet("[:ccc=-1:]");
2763             fail("[:ccc=-1:] -> illegal argument");
2764         } catch (IllegalArgumentException expected) {
2765         }
2766 
2767         try {
2768             new UnicodeSet("[:ccc=256:]");
2769             fail("[:ccc=256:] -> illegal argument");
2770         } catch (IllegalArgumentException expected) {
2771         }
2772 
2773         try {
2774             new UnicodeSet("[:ccc=1.1:]");
2775             fail("[:ccc=1.1:] -> illegal argument");
2776         } catch (IllegalArgumentException expected) {
2777         }
2778     }
2779 
2780     @Test
TestDeepPattern()2781     public void TestDeepPattern() {
2782         // Nested ranges are parsed via recursion which can use a lot of stack space.
2783         // After a reasonable limit, we should get an error.
2784         final int DEPTH = 20000;
2785         StringBuilder pattern = new StringBuilder();
2786         StringBuilder suffix = new StringBuilder();
2787         for (int i = 0; i < DEPTH; ++i) {
2788             pattern.append("[a");
2789             suffix.append(']');
2790         }
2791         pattern.append(suffix);
2792         try {
2793             new UnicodeSet(pattern.toString());
2794             fail("[a[a[a...1000s...]]] did not throw an exception");
2795         } catch(RuntimeException expected) {
2796         }
2797     }
2798 }
2799