• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1diff --git a/source/common/characterproperties.cpp b/source/common/characterproperties.cpp
2index 3aff85b3..b416ef52 100644
3--- a/source/common/characterproperties.cpp
4+++ b/source/common/characterproperties.cpp
5@@ -23,6 +23,9 @@
6 #include "umutex.h"
7 #include "uprops.h"
8
9+using icu::LocalPointer;
10+using icu::Normalizer2Factory;
11+using icu::Normalizer2Impl;
12 using icu::UInitOnce;
13 using icu::UnicodeSet;
14
15@@ -30,11 +33,13 @@ namespace {
16
17 UBool U_CALLCONV characterproperties_cleanup();
18
19+constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
20+
21 struct Inclusion {
22     UnicodeSet  *fSet;
23     UInitOnce    fInitOnce;
24 };
25-Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
26+Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
27
28 UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
29
30@@ -80,35 +85,22 @@ UBool U_CALLCONV characterproperties_cleanup() {
31     return TRUE;
32 }
33
34-}  // namespace
35-
36-U_NAMESPACE_BEGIN
37-
38-/*
39-Reduce excessive reallocation, and make it easier to detect initialization problems.
40-Usually you don't see smaller sets than this for Unicode 5.0.
41-*/
42-constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;
43-
44-void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
45+void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
46     // This function is invoked only via umtx_initOnce().
47-    // This function is a friend of class UnicodeSet.
48-
49     U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
50     if (src == UPROPS_SRC_NONE) {
51         errorCode = U_INTERNAL_PROGRAM_ERROR;
52         return;
53     }
54-    UnicodeSet * &incl = gInclusions[src].fSet;
55-    U_ASSERT(incl == nullptr);
56+    U_ASSERT(gInclusions[src].fSet == nullptr);
57
58-    incl = new UnicodeSet();
59-    if (incl == nullptr) {
60+    LocalPointer<UnicodeSet> incl(new UnicodeSet());
61+    if (incl.isNull()) {
62         errorCode = U_MEMORY_ALLOCATION_ERROR;
63         return;
64     }
65     USetAdder sa = {
66-        (USet *)incl,
67+        (USet *)incl.getAlias(),
68         _set_add,
69         _set_addRange,
70         _set_addString,
71@@ -116,7 +108,6 @@ void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCo
72         nullptr // don't need removeRange()
73     };
74
75-    incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
76     switch(src) {
77     case UPROPS_SRC_CHAR:
78         uchar_addPropertyStarts(&sa, &errorCode);
79@@ -183,12 +174,15 @@ void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCo
80     }
81
82     if (U_FAILURE(errorCode)) {
83-        delete incl;
84-        incl = nullptr;
85         return;
86     }
87-    // Compact for caching
88+    if (incl->isBogus()) {
89+        errorCode = U_MEMORY_ALLOCATION_ERROR;
90+        return;
91+    }
92+    // Compact for caching.
93     incl->compact();
94+    gInclusions[src].fSet = incl.orphan();
95     ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
96 }
97
98@@ -199,15 +193,66 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
99         return nullptr;
100     }
101     Inclusion &i = gInclusions[src];
102-    umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
103+    umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
104     return i.fSet;
105 }
106
107+void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
108+    // This function is invoked only via umtx_initOnce().
109+    U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
110+    int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
111+    U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
112+    UPropertySource src = uprops_getSource(prop);
113+    const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
114+    if (U_FAILURE(errorCode)) {
115+        return;
116+    }
117+
118+    LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
119+    if (intPropIncl.isNull()) {
120+        errorCode = U_MEMORY_ALLOCATION_ERROR;
121+        return;
122+    }
123+    int32_t numRanges = incl->getRangeCount();
124+    int32_t prevValue = 0;
125+    for (int32_t i = 0; i < numRanges; ++i) {
126+        UChar32 rangeEnd = incl->getRangeEnd(i);
127+        for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
128+            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
129+            int32_t value = u_getIntPropertyValue(c, prop);
130+            if (value != prevValue) {
131+                intPropIncl->add(c);
132+                prevValue = value;
133+            }
134+        }
135+    }
136+
137+    if (intPropIncl->isBogus()) {
138+        errorCode = U_MEMORY_ALLOCATION_ERROR;
139+        return;
140+    }
141+    // Compact for caching.
142+    intPropIncl->compact();
143+    gInclusions[inclIndex].fSet = intPropIncl.orphan();
144+    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
145+}
146+
147+}  // namespace
148+
149+U_NAMESPACE_BEGIN
150+
151 const UnicodeSet *CharacterProperties::getInclusionsForProperty(
152         UProperty prop, UErrorCode &errorCode) {
153     if (U_FAILURE(errorCode)) { return nullptr; }
154-    UPropertySource src = uprops_getSource(prop);
155-    return getInclusionsForSource(src, errorCode);
156+    if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
157+        int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
158+        Inclusion &i = gInclusions[inclIndex];
159+        umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
160+        return i.fSet;
161+    } else {
162+        UPropertySource src = uprops_getSource(prop);
163+        return getInclusionsForSource(src, errorCode);
164+    }
165 }
166
167 U_NAMESPACE_END
168@@ -216,7 +261,7 @@ namespace {
169
170 UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
171     if (U_FAILURE(errorCode)) { return nullptr; }
172-    icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
173+    LocalPointer<UnicodeSet> set(new UnicodeSet());
174     if (set.isNull()) {
175         errorCode = U_MEMORY_ALLOCATION_ERROR;
176         return nullptr;
177diff --git a/source/common/ucptrie.cpp b/source/common/ucptrie.cpp
178index 13496ad5..b72e3183 100644
179--- a/source/common/ucptrie.cpp
180+++ b/source/common/ucptrie.cpp
181@@ -280,7 +280,7 @@ UChar32 getRange(const void *t, UChar32 start,
182     int32_t prevI3Block = -1;
183     int32_t prevBlock = -1;
184     UChar32 c = start;
185-    uint32_t value;
186+    uint32_t trieValue, value;
187     bool haveValue = false;
188     do {
189         int32_t i3Block;
190@@ -319,6 +319,7 @@ UChar32 getRange(const void *t, UChar32 start,
191                         return c - 1;
192                     }
193                 } else {
194+                    trieValue = trie->nullValue;
195                     value = nullValue;
196                     if (pValue != nullptr) { *pValue = nullValue; }
197                     haveValue = true;
198@@ -357,6 +358,7 @@ UChar32 getRange(const void *t, UChar32 start,
199                             return c - 1;
200                         }
201                     } else {
202+                        trieValue = trie->nullValue;
203                         value = nullValue;
204                         if (pValue != nullptr) { *pValue = nullValue; }
205                         haveValue = true;
206@@ -364,23 +366,32 @@ UChar32 getRange(const void *t, UChar32 start,
207                     c = (c + dataBlockLength) & ~dataMask;
208                 } else {
209                     int32_t di = block + (c & dataMask);
210-                    uint32_t value2 = getValue(trie->data, valueWidth, di);
211-                    value2 = maybeFilterValue(value2, trie->nullValue, nullValue,
212-                                              filter, context);
213+                    uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
214                     if (haveValue) {
215-                        if (value2 != value) {
216-                            return c - 1;
217+                        if (trieValue2 != trieValue) {
218+                            if (filter == nullptr ||
219+                                    maybeFilterValue(trieValue2, trie->nullValue, nullValue,
220+                                                     filter, context) != value) {
221+                                return c - 1;
222+                            }
223+                            trieValue = trieValue2;  // may or may not help
224                         }
225                     } else {
226-                        value = value2;
227+                        trieValue = trieValue2;
228+                        value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
229+                                                 filter, context);
230                         if (pValue != nullptr) { *pValue = value; }
231                         haveValue = true;
232                     }
233                     while ((++c & dataMask) != 0) {
234-                        if (maybeFilterValue(getValue(trie->data, valueWidth, ++di),
235-                                             trie->nullValue, nullValue,
236-                                             filter, context) != value) {
237-                            return c - 1;
238+                        trieValue2 = getValue(trie->data, valueWidth, ++di);
239+                        if (trieValue2 != trieValue) {
240+                            if (filter == nullptr ||
241+                                    maybeFilterValue(trieValue2, trie->nullValue, nullValue,
242+                                                     filter, context) != value) {
243+                                return c - 1;
244+                            }
245+                            trieValue = trieValue2;  // may or may not help
246                         }
247                     }
248                 }
249diff --git a/source/common/umutablecptrie.cpp b/source/common/umutablecptrie.cpp
250index 44af8309..926be468 100644
251--- a/source/common/umutablecptrie.cpp
252+++ b/source/common/umutablecptrie.cpp
253@@ -304,41 +304,56 @@ UChar32 MutableCodePointTrie::getRange(
254     uint32_t nullValue = initialValue;
255     if (filter != nullptr) { nullValue = filter(context, nullValue); }
256     UChar32 c = start;
257-    uint32_t value;
258+    uint32_t trieValue, value;
259     bool haveValue = false;
260     int32_t i = c >> UCPTRIE_SHIFT_3;
261     do {
262         if (flags[i] == ALL_SAME) {
263-            uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue,
264-                                               filter, context);
265+            uint32_t trieValue2 = index[i];
266             if (haveValue) {
267-                if (value2 != value) {
268-                    return c - 1;
269+                if (trieValue2 != trieValue) {
270+                    if (filter == nullptr ||
271+                            maybeFilterValue(trieValue2, initialValue, nullValue,
272+                                             filter, context) != value) {
273+                        return c - 1;
274+                    }
275+                    trieValue = trieValue2;  // may or may not help
276                 }
277             } else {
278-                value = value2;
279+                trieValue = trieValue2;
280+                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
281                 if (pValue != nullptr) { *pValue = value; }
282                 haveValue = true;
283             }
284             c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
285         } else /* MIXED */ {
286             int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
287-            uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue,
288-                                               filter, context);
289+            uint32_t trieValue2 = data[di];
290             if (haveValue) {
291-                if (value2 != value) {
292-                    return c - 1;
293+                if (trieValue2 != trieValue) {
294+                    if (filter == nullptr ||
295+                            maybeFilterValue(trieValue2, initialValue, nullValue,
296+                                             filter, context) != value) {
297+                        return c - 1;
298+                    }
299+                    trieValue = trieValue2;  // may or may not help
300                 }
301             } else {
302-                value = value2;
303+                trieValue = trieValue2;
304+                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
305                 if (pValue != nullptr) { *pValue = value; }
306                 haveValue = true;
307             }
308             while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
309-                if (maybeFilterValue(data[++di], initialValue, nullValue,
310-                                     filter, context) != value) {
311-                    return c - 1;
312+                trieValue2 = data[++di];
313+                if (trieValue2 != trieValue) {
314+                    if (filter == nullptr ||
315+                            maybeFilterValue(trieValue2, initialValue, nullValue,
316+                                             filter, context) != value) {
317+                        return c - 1;
318+                    }
319                 }
320+                trieValue = trieValue2;  // may or may not help
321             }
322         }
323         ++i;
324diff --git a/source/common/unicode/uniset.h b/source/common/unicode/uniset.h
325index 0abc7542..af56b872 100644
326--- a/source/common/unicode/uniset.h
327+++ b/source/common/unicode/uniset.h
328@@ -27,7 +27,6 @@ U_NAMESPACE_BEGIN
329
330 // Forward Declarations.
331 class BMPSet;
332-class CharacterProperties;
333 class ParsePosition;
334 class RBBIRuleScanner;
335 class SymbolTable;
336@@ -276,14 +275,23 @@ class RuleCharacterIterator;
337  * @stable ICU 2.0
338  */
339 class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
340+private:
341+    /**
342+     * Enough for sets with few ranges.
343+     * For example, White_Space has 10 ranges, list length 21.
344+     */
345+    static constexpr int32_t INITIAL_CAPACITY = 25;
346+    // fFlags constant
347+    static constexpr uint8_t kIsBogus = 1;  // This set is bogus (i.e. not valid)
348+
349+    UChar32* list = stackList; // MUST be terminated with HIGH
350+    int32_t capacity = INITIAL_CAPACITY; // capacity of list
351+    int32_t len = 1; // length of list used; 1 <= len <= capacity
352+    uint8_t fFlags = 0;         // Bit flag (see constants above)
353
354-    int32_t len; // length of list used; 0 <= len <= capacity
355-    int32_t capacity; // capacity of list
356-    UChar32* list; // MUST be terminated with HIGH
357-    BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
358-    UChar32* buffer; // internal buffer, may be NULL
359-    int32_t bufferCapacity; // capacity of buffer
360-    int32_t patLen;
361+    BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not NULL.
362+    UChar32* buffer = nullptr; // internal buffer, may be NULL
363+    int32_t bufferCapacity = 0; // capacity of buffer
364
365     /**
366      * The pattern representation of this set.  This may not be the
367@@ -294,15 +302,19 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
368      * indicating that toPattern() must generate a pattern
369      * representation from the inversion list.
370      */
371-    char16_t *pat;
372-    UVector* strings; // maintained in sorted order
373-    UnicodeSetStringSpan *stringSpan;
374+    char16_t *pat = nullptr;
375+    int32_t patLen = 0;
376+
377+    UVector* strings = nullptr; // maintained in sorted order
378+    UnicodeSetStringSpan *stringSpan = nullptr;
379+
380+    /**
381+     * Initial list array.
382+     * Avoids some heap allocations, and list is never nullptr.
383+     * Increases the object size a bit.
384+     */
385+    UChar32 stackList[INITIAL_CAPACITY];
386
387-private:
388-    enum { // constants
389-        kIsBogus = 1       // This set is bogus (i.e. not valid)
390-    };
391-    uint8_t fFlags;         // Bit flag (see constants above)
392 public:
393     /**
394      * Determine if this object contains a valid set.
395@@ -1480,8 +1492,6 @@ private:
396
397     friend class USetAccess;
398
399-    int32_t getStringCount() const;
400-
401     const UnicodeString* getString(int32_t index) const;
402
403     //----------------------------------------------------------------
404@@ -1528,13 +1538,18 @@ private:
405     // Implementation: Utility methods
406     //----------------------------------------------------------------
407
408-    void ensureCapacity(int32_t newLen, UErrorCode& ec);
409+    static int32_t nextCapacity(int32_t minCapacity);
410+
411+    bool ensureCapacity(int32_t newLen);
412
413-    void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
414+    bool ensureBufferCapacity(int32_t newLen);
415
416     void swapBuffers(void);
417
418     UBool allocateStrings(UErrorCode &status);
419+    UBool hasStrings() const;
420+    int32_t stringsSize() const;
421+    UBool stringsContains(const UnicodeString &s) const;
422
423     UnicodeString& _toPattern(UnicodeString& result,
424                               UBool escapeUnprintable) const;
425@@ -1614,7 +1629,6 @@ private:
426                               UnicodeString& rebuiltPat,
427                               UErrorCode& ec);
428
429-    friend class CharacterProperties;
430     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
431
432     /**
433@@ -1646,7 +1660,10 @@ private:
434     /**
435      * Set the new pattern to cache.
436      */
437-    void setPattern(const UnicodeString& newPat);
438+    void setPattern(const UnicodeString& newPat) {
439+        setPattern(newPat.getBuffer(), newPat.length());
440+    }
441+    void setPattern(const char16_t *newPat, int32_t newPatLen);
442     /**
443      * Release existing cached pattern.
444      */
445diff --git a/source/common/uniset.cpp b/source/common/uniset.cpp
446index e8378e0a..20242776 100644
447--- a/source/common/uniset.cpp
448+++ b/source/common/uniset.cpp
449@@ -14,6 +14,7 @@
450 #include "unicode/parsepos.h"
451 #include "unicode/symtable.h"
452 #include "unicode/uniset.h"
453+#include "unicode/ustring.h"
454 #include "unicode/utf8.h"
455 #include "unicode/utf16.h"
456 #include "ruleiter.h"
457@@ -53,11 +54,8 @@
458 // LOW <= all valid values. ZERO for codepoints
459 #define UNICODESET_LOW 0x000000
460
461-// initial storage. Must be >= 0
462-#define START_EXTRA 16
463-
464-// extra amount for growth. Must be >= 0
465-#define GROW_EXTRA START_EXTRA
466+/** Max list [0, 1, 2, ..., max code point, HIGH] */
467+constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1;
468
469 U_NAMESPACE_BEGIN
470
471@@ -137,6 +135,18 @@ static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
472     return a.compare(b);
473 }
474
475+UBool UnicodeSet::hasStrings() const {
476+    return strings != nullptr && !strings->isEmpty();
477+}
478+
479+int32_t UnicodeSet::stringsSize() const {
480+    return strings == nullptr ? 0 : strings->size();
481+}
482+
483+UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
484+    return strings != nullptr && strings->contains((void*) &s);
485+}
486+
487 //----------------------------------------------------------------
488 // Constructors &c
489 //----------------------------------------------------------------
490@@ -144,24 +154,8 @@ static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
491 /**
492  * Constructs an empty set.
493  */
494-UnicodeSet::UnicodeSet() :
495-    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
496-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
497-    fFlags(0)
498-{
499-    UErrorCode status = U_ZERO_ERROR;
500-    allocateStrings(status);
501-    if (U_FAILURE(status)) {
502-        setToBogus(); // If memory allocation failed, set to bogus state.
503-        return;
504-    }
505-    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
506-    if(list!=NULL){
507-        list[0] = UNICODESET_HIGH;
508-    } else { // If memory allocation failed, set to bogus state.
509-        setToBogus();
510-        return;
511-    }
512+UnicodeSet::UnicodeSet() {
513+    list[0] = UNICODESET_HIGH;
514     _dbgct(this);
515 }
516
517@@ -172,89 +166,39 @@ UnicodeSet::UnicodeSet() :
518  * @param start first character, inclusive, of range
519  * @param end last character, inclusive, of range
520  */
521-UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
522-    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
523-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
524-    fFlags(0)
525-{
526-    UErrorCode status = U_ZERO_ERROR;
527-    allocateStrings(status);
528-    if (U_FAILURE(status)) {
529-        setToBogus(); // If memory allocation failed, set to bogus state.
530-        return;
531-    }
532-    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
533-    if(list!=NULL){
534-        list[0] = UNICODESET_HIGH;
535-        complement(start, end);
536-    } else { // If memory allocation failed, set to bogus state.
537-        setToBogus();
538-        return;
539-    }
540+UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) {
541+    list[0] = UNICODESET_HIGH;
542+    add(start, end);
543     _dbgct(this);
544 }
545
546 /**
547  * Constructs a set that is identical to the given UnicodeSet.
548  */
549-UnicodeSet::UnicodeSet(const UnicodeSet& o) :
550-    UnicodeFilter(o),
551-    len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0),
552-    bmpSet(0),
553-    buffer(0), bufferCapacity(0),
554-    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
555-    fFlags(0)
556-{
557-    UErrorCode status = U_ZERO_ERROR;
558-    allocateStrings(status);
559-    if (U_FAILURE(status)) {
560-        setToBogus(); // If memory allocation failed, set to bogus state.
561-        return;
562-    }
563-    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
564-    if(list!=NULL){
565-        *this = o;
566-    } else { // If memory allocation failed, set to bogus state.
567-        setToBogus();
568-        return;
569-    }
570+UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) {
571+    *this = o;
572     _dbgct(this);
573 }
574
575 // Copy-construct as thawed.
576-UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
577-    UnicodeFilter(o),
578-    len(0), capacity(o.len + GROW_EXTRA), list(0),
579-    bmpSet(0),
580-    buffer(0), bufferCapacity(0),
581-    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
582-    fFlags(0)
583-{
584-    UErrorCode status = U_ZERO_ERROR;
585-    allocateStrings(status);
586-    if (U_FAILURE(status)) {
587-        setToBogus(); // If memory allocation failed, set to bogus state.
588-        return;
589-    }
590-    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
591-    if(list!=NULL){
592+UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) {
593+    if (ensureCapacity(o.len)) {
594         // *this = o except for bmpSet and stringSpan
595         len = o.len;
596         uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
597-        if (strings != NULL && o.strings != NULL) {
598-            strings->assign(*o.strings, cloneUnicodeString, status);
599-        } else { // Invalid strings.
600-            setToBogus();
601-            return;
602+        if (o.hasStrings()) {
603+            UErrorCode status = U_ZERO_ERROR;
604+            if (!allocateStrings(status) ||
605+                    (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
606+                setToBogus();
607+                return;
608+            }
609         }
610         if (o.pat) {
611-            setPattern(UnicodeString(o.pat, o.patLen));
612+            setPattern(o.pat, o.patLen);
613         }
614-    } else { // If memory allocation failed, set to bogus state.
615-        setToBogus();
616-        return;
617+        _dbgct(this);
618     }
619-    _dbgct(this);
620 }
621
622 /**
623@@ -262,9 +206,11 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
624  */
625 UnicodeSet::~UnicodeSet() {
626     _dbgdt(this); // first!
627-    uprv_free(list);
628+    if (list != stackList) {
629+        uprv_free(list);
630+    }
631     delete bmpSet;
632-    if (buffer) {
633+    if (buffer != stackList) {
634         uprv_free(buffer);
635     }
636     delete strings;
637@@ -290,32 +236,30 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
638         setToBogus();
639         return *this;
640     }
641-    UErrorCode ec = U_ZERO_ERROR;
642-    ensureCapacity(o.len, ec);
643-    if (U_FAILURE(ec)) {
644+    if (!ensureCapacity(o.len)) {
645         // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens.
646         return *this;
647     }
648     len = o.len;
649     uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32));
650-    if (o.bmpSet == NULL || asThawed) {
651-        bmpSet = NULL;
652-    } else {
653+    if (o.bmpSet != nullptr && !asThawed) {
654         bmpSet = new BMPSet(*o.bmpSet, list, len);
655         if (bmpSet == NULL) { // Check for memory allocation error.
656             setToBogus();
657             return *this;
658         }
659     }
660-    if (strings != NULL && o.strings != NULL) {
661-        strings->assign(*o.strings, cloneUnicodeString, ec);
662-    } else { // Invalid strings.
663-        setToBogus();
664-        return *this;
665+    if (o.hasStrings()) {
666+        UErrorCode status = U_ZERO_ERROR;
667+        if ((strings == nullptr && !allocateStrings(status)) ||
668+                (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
669+            setToBogus();
670+            return *this;
671+        }
672+    } else if (hasStrings()) {
673+        strings->removeAllElements();
674     }
675-    if (o.stringSpan == NULL || asThawed) {
676-        stringSpan = NULL;
677-    } else {
678+    if (o.stringSpan != nullptr && !asThawed) {
679         stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
680         if (stringSpan == NULL) { // Check for memory allocation error.
681             setToBogus();
682@@ -324,7 +268,7 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
683     }
684     releasePattern();
685     if (o.pat) {
686-        setPattern(UnicodeString(o.pat, o.patLen));
687+        setPattern(o.pat, o.patLen);
688     }
689     return *this;
690 }
691@@ -357,7 +301,8 @@ UBool UnicodeSet::operator==(const UnicodeSet& o) const {
692     for (int32_t i = 0; i < len; ++i) {
693         if (list[i] != o.list[i]) return FALSE;
694     }
695-    if (*strings != *o.strings) return FALSE;
696+    if (hasStrings() != o.hasStrings()) { return FALSE; }
697+    if (hasStrings() && *strings != *o.strings) return FALSE;
698     return TRUE;
699 }
700
701@@ -393,7 +338,7 @@ int32_t UnicodeSet::size(void) const {
702     for (int32_t i = 0; i < count; ++i) {
703         n += getRangeEnd(i) - getRangeStart(i) + 1;
704     }
705-    return n + strings->size();
706+    return n + stringsSize();
707 }
708
709 /**
710@@ -402,7 +347,7 @@ int32_t UnicodeSet::size(void) const {
711  * @return <tt>true</tt> if this set contains no elements.
712  */
713 UBool UnicodeSet::isEmpty(void) const {
714-    return len == 1 && strings->size() == 0;
715+    return len == 1 && !hasStrings();
716 }
717
718 /**
719@@ -502,7 +447,7 @@ UBool UnicodeSet::contains(const UnicodeString& s) const {
720     if (s.length() == 0) return FALSE;
721     int32_t cp = getSingleCP(s);
722     if (cp < 0) {
723-        return strings->contains((void*) &s);
724+        return stringsContains(s);
725     } else {
726         return contains((UChar32) cp);
727     }
728@@ -524,8 +469,7 @@ UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
729             return FALSE;
730         }
731     }
732-    if (!strings->containsAll(*c.strings)) return FALSE;
733-    return TRUE;
734+    return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
735 }
736
737 /**
738@@ -571,8 +515,7 @@ UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
739             return FALSE;
740         }
741     }
742-    if (!strings->containsNone(*c.strings)) return FALSE;
743-    return TRUE;
744+    return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
745 }
746
747 /**
748@@ -613,7 +556,7 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
749             return TRUE;
750         }
751     }
752-    if (strings->size() != 0) {
753+    if (hasStrings()) {
754         for (i=0; i<strings->size(); ++i) {
755             const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
756             //if (s.length() == 0) {
757@@ -648,7 +591,7 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
758             return U_MISMATCH;
759         }
760     } else {
761-        if (strings->size() != 0) { // try strings first
762+        if (hasStrings()) { // try strings first
763
764             // might separate forward and backward loops later
765             // for now they are combined
766@@ -849,7 +792,39 @@ UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
767  */
768 UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
769     if (pinCodePoint(start) < pinCodePoint(end)) {
770-        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
771+        UChar32 limit = end + 1;
772+        // Fast path for adding a new range after the last one.
773+        // Odd list length: [..., lastStart, lastLimit, HIGH]
774+        if ((len & 1) != 0) {
775+            // If the list is empty, set lastLimit low enough to not be adjacent to 0.
776+            UChar32 lastLimit = len == 1 ? -2 : list[len - 2];
777+            if (lastLimit <= start && !isFrozen() && !isBogus()) {
778+                if (lastLimit == start) {
779+                    // Extend the last range.
780+                    list[len - 2] = limit;
781+                    if (limit == UNICODESET_HIGH) {
782+                        --len;
783+                    }
784+                } else {
785+                    list[len - 1] = start;
786+                    if (limit < UNICODESET_HIGH) {
787+                        if (ensureCapacity(len + 2)) {
788+                            list[len++] = limit;
789+                            list[len++] = UNICODESET_HIGH;
790+                        }
791+                    } else {  // limit == UNICODESET_HIGH
792+                        if (ensureCapacity(len + 1)) {
793+                            list[len++] = UNICODESET_HIGH;
794+                        }
795+                    }
796+                }
797+                releasePattern();
798+                return *this;
799+            }
800+        }
801+        // This is slow. Could be much faster using findCodePoint(start)
802+        // and modifying the list, dealing with adjacent & overlapping ranges.
803+        UChar32 range[3] = { start, limit, UNICODESET_HIGH };
804         add(range, 2, 0);
805     } else if (start == end) {
806         add(start);
807@@ -918,9 +893,7 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
808         list[i] = c;
809         // if we touched the HIGH mark, then add a new one
810         if (c == (UNICODESET_HIGH - 1)) {
811-            UErrorCode status = U_ZERO_ERROR;
812-            ensureCapacity(len+1, status);
813-            if (U_FAILURE(status)) {
814+            if (!ensureCapacity(len+1)) {
815                 // ensureCapacity will mark the object as Bogus if OOM failure happens.
816                 return *this;
817             }
818@@ -964,21 +937,13 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
819         //                             ^
820         //                             list[i]
821
822-        UErrorCode status = U_ZERO_ERROR;
823-        ensureCapacity(len+2, status);
824-        if (U_FAILURE(status)) {
825+        if (!ensureCapacity(len+2)) {
826             // ensureCapacity will mark the object as Bogus if OOM failure happens.
827             return *this;
828         }
829
830-        //for (int32_t k=len-1; k>=i; --k) {
831-        //    list[k+2] = list[k];
832-        //}
833-        UChar32* src = list + len;
834-        UChar32* dst = src + 2;
835-        UChar32* srclimit = list + i;
836-        while (src > srclimit) *(--dst) = *(--src);
837-
838+        UChar32 *p = list + i;
839+        uprv_memmove(p + 2, p, (len - i) * sizeof(*p));
840         list[i] = c;
841         list[i+1] = c+1;
842         len += 2;
843@@ -1014,7 +979,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
844     if (s.length() == 0 || isFrozen() || isBogus()) return *this;
845     int32_t cp = getSingleCP(s);
846     if (cp < 0) {
847-        if (!strings->contains((void*) &s)) {
848+        if (!stringsContains(s)) {
849             _add(s);
850             releasePattern();
851         }
852@@ -1033,12 +998,16 @@ void UnicodeSet::_add(const UnicodeString& s) {
853     if (isFrozen() || isBogus()) {
854         return;
855     }
856+    UErrorCode ec = U_ZERO_ERROR;
857+    if (strings == nullptr && !allocateStrings(ec)) {
858+        setToBogus();
859+        return;
860+    }
861     UnicodeString* t = new UnicodeString(s);
862     if (t == NULL) { // Check for memory allocation error.
863         setToBogus();
864         return;
865     }
866-    UErrorCode ec = U_ZERO_ERROR;
867     strings->sortedInsert(t, compareUnicodeString, ec);
868     if (U_FAILURE(ec)) {
869         setToBogus();
870@@ -1121,7 +1090,10 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
871 }
872
873 UnicodeSet& UnicodeSet::removeAllStrings() {
874-    strings->removeAllElements();
875+    if (!isFrozen() && hasStrings()) {
876+        strings->removeAllElements();
877+        releasePattern();
878+    }
879     return *this;
880 }
881
882@@ -1217,8 +1189,9 @@ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
883     if (s.length() == 0 || isFrozen() || isBogus()) return *this;
884     int32_t cp = getSingleCP(s);
885     if (cp < 0) {
886-        strings->removeElement((void*) &s);
887-        releasePattern();
888+        if (strings != nullptr && strings->removeElement((void*) &s)) {
889+            releasePattern();
890+        }
891     } else {
892         remove((UChar32)cp, (UChar32)cp);
893     }
894@@ -1260,24 +1233,17 @@ UnicodeSet& UnicodeSet::complement(void) {
895     if (isFrozen() || isBogus()) {
896         return *this;
897     }
898-    UErrorCode status = U_ZERO_ERROR;
899     if (list[0] == UNICODESET_LOW) {
900-        ensureBufferCapacity(len-1, status);
901-        if (U_FAILURE(status)) {
902-            return *this;
903-        }
904-        uprv_memcpy(buffer, list + 1, (size_t)(len-1)*sizeof(UChar32));
905+        uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32));
906         --len;
907     } else {
908-        ensureBufferCapacity(len+1, status);
909-        if (U_FAILURE(status)) {
910+        if (!ensureCapacity(len+1)) {
911             return *this;
912         }
913-        uprv_memcpy(buffer + 1, list, (size_t)len*sizeof(UChar32));
914-        buffer[0] = UNICODESET_LOW;
915+        uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32));
916+        list[0] = UNICODESET_LOW;
917         ++len;
918     }
919-    swapBuffers();
920     releasePattern();
921     return *this;
922 }
923@@ -1294,7 +1260,7 @@ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
924     if (s.length() == 0 || isFrozen() || isBogus()) return *this;
925     int32_t cp = getSingleCP(s);
926     if (cp < 0) {
927-        if (strings->contains((void*) &s)) {
928+        if (stringsContains(s)) {
929             strings->removeElement((void*) &s);
930         } else {
931             _add(s);
932@@ -1325,7 +1291,7 @@ UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
933     if ( c.strings!=NULL ) {
934         for (int32_t i=0; i<c.strings->size(); ++i) {
935             const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
936-            if (!strings->contains((void*) s)) {
937+            if (!stringsContains(*s)) {
938                 _add(*s);
939             }
940         }
941@@ -1347,7 +1313,13 @@ UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
942         return *this;
943     }
944     retain(c.list, c.len, 0);
945-    strings->retainAll(*c.strings);
946+    if (hasStrings()) {
947+        if (!c.hasStrings()) {
948+            strings->removeAllElements();
949+        } else {
950+            strings->retainAll(*c.strings);
951+        }
952+    }
953     return *this;
954 }
955
956@@ -1365,7 +1337,9 @@ UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
957         return *this;
958     }
959     retain(c.list, c.len, 2);
960-    strings->removeAll(*c.strings);
961+    if (hasStrings() && c.hasStrings()) {
962+        strings->removeAll(*c.strings);
963+    }
964     return *this;
965 }
966
967@@ -1383,10 +1357,12 @@ UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
968     }
969     exclusiveOr(c.list, c.len, 0);
970
971-    for (int32_t i=0; i<c.strings->size(); ++i) {
972-        void* e = c.strings->elementAt(i);
973-        if (!strings->removeElement(e)) {
974-            _add(*(const UnicodeString*)e);
975+    if (c.strings != nullptr) {
976+        for (int32_t i=0; i<c.strings->size(); ++i) {
977+            void* e = c.strings->elementAt(i);
978+            if (strings == nullptr || !strings->removeElement(e)) {
979+                _add(*(const UnicodeString*)e);
980+            }
981         }
982     }
983     return *this;
984@@ -1400,18 +1376,14 @@ UnicodeSet& UnicodeSet::clear(void) {
985     if (isFrozen()) {
986         return *this;
987     }
988-    if (list != NULL) {
989-        list[0] = UNICODESET_HIGH;
990-    }
991+    list[0] = UNICODESET_HIGH;
992     len = 1;
993     releasePattern();
994     if (strings != NULL) {
995         strings->removeAllElements();
996     }
997-    if (list != NULL && strings != NULL) {
998-        // Remove bogus
999-        fFlags = 0;
1000-    }
1001+    // Remove bogus
1002+    fFlags = 0;
1003     return *this;
1004 }
1005
1006@@ -1445,10 +1417,6 @@ UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
1007     return list[index*2 + 1] - 1;
1008 }
1009
1010-int32_t UnicodeSet::getStringCount() const {
1011-    return strings->size();
1012-}
1013-
1014 const UnicodeString* UnicodeSet::getString(int32_t index) const {
1015     return (const UnicodeString*) strings->elementAt(index);
1016 }
1017@@ -1462,22 +1430,32 @@ UnicodeSet& UnicodeSet::compact() {
1018         return *this;
1019     }
1020     // Delete buffer first to defragment memory less.
1021-    if (buffer != NULL) {
1022+    if (buffer != stackList) {
1023         uprv_free(buffer);
1024         buffer = NULL;
1025-    }
1026-    if (len < capacity) {
1027-        // Make the capacity equal to len or 1.
1028-        // We don't want to realloc of 0 size.
1029-        int32_t newCapacity = len + (len == 0);
1030-        UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity);
1031+        bufferCapacity = 0;
1032+    }
1033+    if (list == stackList) {
1034+        // pass
1035+    } else if (len <= INITIAL_CAPACITY) {
1036+        uprv_memcpy(stackList, list, len * sizeof(UChar32));
1037+        uprv_free(list);
1038+        list = stackList;
1039+        capacity = INITIAL_CAPACITY;
1040+    } else if ((len + 7) < capacity) {
1041+        // If we have more than a little unused capacity, shrink it to len.
1042+        UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len);
1043         if (temp) {
1044             list = temp;
1045-            capacity = newCapacity;
1046+            capacity = len;
1047         }
1048         // else what the heck happened?! We allocated less memory!
1049         // Oh well. We'll keep our original array.
1050     }
1051+    if (strings != nullptr && strings->isEmpty()) {
1052+        delete strings;
1053+        strings = nullptr;
1054+    }
1055     return *this;
1056 }
1057
1058@@ -1488,10 +1466,8 @@ UnicodeSet& UnicodeSet::compact() {
1059 /**
1060  * Deserialize constructor.
1061  */
1062-UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, UErrorCode &ec)
1063-  : len(1), capacity(1+START_EXTRA), list(0), bmpSet(0), buffer(0),
1064-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
1065-    fFlags(0) {
1066+UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization,
1067+                       UErrorCode &ec) {
1068
1069   if(U_FAILURE(ec)) {
1070     setToBogus();
1071@@ -1506,24 +1482,15 @@ UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization se
1072     return;
1073   }
1074
1075-  allocateStrings(ec);
1076-  if (U_FAILURE(ec)) {
1077-    setToBogus();
1078-    return;
1079-  }
1080-
1081   // bmp?
1082   int32_t headerSize = ((data[0]&0x8000)) ?2:1;
1083   int32_t bmpLength = (headerSize==1)?data[0]:data[1];
1084
1085-  len = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
1086+  int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
1087 #ifdef DEBUG_SERIALIZE
1088-  printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,len, data[0],data[1],data[2],data[3]);
1089+  printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]);
1090 #endif
1091-  capacity = len+1;
1092-  list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
1093-  if(!list || U_FAILURE(ec)) {
1094-    setToBogus();
1095+  if(!ensureCapacity(newLength + 1)) {  // +1 for HIGH
1096     return;
1097   }
1098   // copy bmp
1099@@ -1535,15 +1502,18 @@ UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization se
1100 #endif
1101   }
1102   // copy smp
1103-  for(i=bmpLength;i<len;i++) {
1104+  for(i=bmpLength;i<newLength;i++) {
1105     list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
1106               ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
1107 #ifdef DEBUG_SERIALIZE
1108     printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
1109 #endif
1110   }
1111-  // terminator
1112-  list[len++]=UNICODESET_HIGH;
1113+  U_ASSERT(i == newLength);
1114+  if (i == 0 || list[i - 1] != UNICODESET_HIGH) {
1115+    list[i++] = UNICODESET_HIGH;
1116+  }
1117+  len = i;
1118 }
1119
1120
1121@@ -1664,33 +1634,65 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) {
1122     return TRUE;
1123 }
1124
1125-void UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) {
1126+int32_t UnicodeSet::nextCapacity(int32_t minCapacity) {
1127+    // Grow exponentially to reduce the frequency of allocations.
1128+    if (minCapacity < INITIAL_CAPACITY) {
1129+        return minCapacity + INITIAL_CAPACITY;
1130+    } else if (minCapacity <= 2500) {
1131+        return 5 * minCapacity;
1132+    } else {
1133+        int32_t newCapacity = 2 * minCapacity;
1134+        if (newCapacity > MAX_LENGTH) {
1135+            newCapacity = MAX_LENGTH;
1136+        }
1137+        return newCapacity;
1138+    }
1139+}
1140+
1141+bool UnicodeSet::ensureCapacity(int32_t newLen) {
1142+    if (newLen > MAX_LENGTH) {
1143+        newLen = MAX_LENGTH;
1144+    }
1145     if (newLen <= capacity) {
1146-        return;
1147+        return true;
1148     }
1149-    UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA));
1150+    int32_t newCapacity = nextCapacity(newLen);
1151+    UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
1152     if (temp == NULL) {
1153-        ec = U_MEMORY_ALLOCATION_ERROR;
1154         setToBogus(); // set the object to bogus state if an OOM failure occurred.
1155-        return;
1156+        return false;
1157+    }
1158+    // Copy only the actual contents.
1159+    uprv_memcpy(temp, list, len * sizeof(UChar32));
1160+    if (list != stackList) {
1161+        uprv_free(list);
1162     }
1163     list = temp;
1164-    capacity = newLen + GROW_EXTRA;
1165-    // else we keep the original contents on the memory failure.
1166+    capacity = newCapacity;
1167+    return true;
1168 }
1169
1170-void UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) {
1171-    if (buffer != NULL && newLen <= bufferCapacity)
1172-        return;
1173-    UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA));
1174+bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
1175+    if (newLen > MAX_LENGTH) {
1176+        newLen = MAX_LENGTH;
1177+    }
1178+    if (newLen <= bufferCapacity) {
1179+        return true;
1180+    }
1181+    int32_t newCapacity = nextCapacity(newLen);
1182+    UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32));
1183     if (temp == NULL) {
1184-        ec = U_MEMORY_ALLOCATION_ERROR;
1185         setToBogus();
1186-        return;
1187+        return false;
1188+    }
1189+    // The buffer has no contents to be copied.
1190+    // It is always filled from scratch after this call.
1191+    if (buffer != stackList) {
1192+        uprv_free(buffer);
1193     }
1194     buffer = temp;
1195-    bufferCapacity = newLen + GROW_EXTRA;
1196-    // else we keep the original contents on the memory failure.
1197+    bufferCapacity = newCapacity;
1198+    return true;
1199 }
1200
1201 /**
1202@@ -1727,9 +1729,7 @@ void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t pola
1203     if (isFrozen() || isBogus()) {
1204         return;
1205     }
1206-    UErrorCode status = U_ZERO_ERROR;
1207-    ensureBufferCapacity(len + otherLen, status);
1208-    if (U_FAILURE(status)) {
1209+    if (!ensureBufferCapacity(len + otherLen)) {
1210         return;
1211     }
1212
1213@@ -1777,9 +1777,7 @@ void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
1214     if (isFrozen() || isBogus() || other==NULL) {
1215         return;
1216     }
1217-    UErrorCode status = U_ZERO_ERROR;
1218-    ensureBufferCapacity(len + otherLen, status);
1219-    if (U_FAILURE(status)) {
1220+    if (!ensureBufferCapacity(len + otherLen)) {
1221         return;
1222     }
1223
1224@@ -1890,9 +1888,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
1225     if (isFrozen() || isBogus()) {
1226         return;
1227     }
1228-    UErrorCode status = U_ZERO_ERROR;
1229-    ensureBufferCapacity(len + otherLen, status);
1230-    if (U_FAILURE(status)) {
1231+    if (!ensureBufferCapacity(len + otherLen)) {
1232         return;
1233     }
1234
1235@@ -2138,12 +2134,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
1236         }
1237     }
1238
1239-    for (int32_t i = 0; i<strings->size(); ++i) {
1240-        result.append(OPEN_BRACE);
1241-        _appendToPat(result,
1242-                     *(const UnicodeString*) strings->elementAt(i),
1243-                     escapeUnprintable);
1244-        result.append(CLOSE_BRACE);
1245+    if (strings != nullptr) {
1246+        for (int32_t i = 0; i<strings->size(); ++i) {
1247+            result.append(OPEN_BRACE);
1248+            _appendToPat(result,
1249+                         *(const UnicodeString*) strings->elementAt(i),
1250+                         escapeUnprintable);
1251+            result.append(CLOSE_BRACE);
1252+        }
1253     }
1254     return result.append(SET_CLOSE);
1255 }
1256@@ -2162,13 +2160,12 @@ void UnicodeSet::releasePattern() {
1257 /**
1258 * Set the new pattern to cache.
1259 */
1260-void UnicodeSet::setPattern(const UnicodeString& newPat) {
1261+void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
1262     releasePattern();
1263-    int32_t newPatLen = newPat.length();
1264     pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
1265     if (pat) {
1266         patLen = newPatLen;
1267-        newPat.extractBetween(0, patLen, pat);
1268+        u_memcpy(pat, newPat, patLen);
1269         pat[patLen] = 0;
1270     }
1271     // else we don't care if malloc failed. This was just a nice cache.
1272@@ -2177,30 +2174,15 @@ void UnicodeSet::setPattern(const UnicodeString& newPat) {
1273
1274 UnicodeFunctor *UnicodeSet::freeze() {
1275     if(!isFrozen() && !isBogus()) {
1276-        // Do most of what compact() does before freezing because
1277-        // compact() will not work when the set is frozen.
1278-        // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
1279-
1280-        // Delete buffer first to defragment memory less.
1281-        if (buffer != NULL) {
1282-            uprv_free(buffer);
1283-            buffer = NULL;
1284-        }
1285-        if (capacity > (len + GROW_EXTRA)) {
1286-            // Make the capacity equal to len or 1.
1287-            // We don't want to realloc of 0 size.
1288-            capacity = len + (len == 0);
1289-            list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity);
1290-            if (list == NULL) { // Check for memory allocation error.
1291-                setToBogus();
1292-                return this;
1293-            }
1294-        }
1295+        compact();
1296
1297         // Optimize contains() and span() and similar functions.
1298-        if (!strings->isEmpty()) {
1299+        if (hasStrings()) {
1300             stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
1301-            if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) {
1302+            if (stringSpan == nullptr) {
1303+                setToBogus();
1304+                return this;
1305+            } else if (!stringSpan->needsStringSpanUTF16()) {
1306                 // All strings are irrelevant for span() etc. because
1307                 // all of each string's code points are contained in this set.
1308                 // Do not check needsStringSpanUTF8() because UTF-8 has at most as
1309@@ -2233,7 +2215,7 @@ int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanC
1310     }
1311     if(stringSpan!=NULL) {
1312         return stringSpan->span(s, length, spanCondition);
1313-    } else if(!strings->isEmpty()) {
1314+    } else if(hasStrings()) {
1315         uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
1316                             UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
1317                             UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
1318@@ -2270,7 +2252,7 @@ int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition s
1319     }
1320     if(stringSpan!=NULL) {
1321         return stringSpan->spanBack(s, length, spanCondition);
1322-    } else if(!strings->isEmpty()) {
1323+    } else if(hasStrings()) {
1324         uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
1325                             UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
1326                             UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
1327@@ -2308,7 +2290,7 @@ int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition sp
1328     }
1329     if(stringSpan!=NULL) {
1330         return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
1331-    } else if(!strings->isEmpty()) {
1332+    } else if(hasStrings()) {
1333         uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
1334                             UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
1335                             UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
1336@@ -2346,7 +2328,7 @@ int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanConditio
1337     }
1338     if(stringSpan!=NULL) {
1339         return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
1340-    } else if(!strings->isEmpty()) {
1341+    } else if(hasStrings()) {
1342         uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
1343                             UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
1344                             UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
1345diff --git a/source/common/uniset_closure.cpp b/source/common/uniset_closure.cpp
1346index 0b7da796..882231ba 100644
1347--- a/source/common/uniset_closure.cpp
1348+++ b/source/common/uniset_closure.cpp
1349@@ -31,10 +31,6 @@
1350 #include "util.h"
1351 #include "uvector.h"
1352
1353-// initial storage. Must be >= 0
1354-// *** same as in uniset.cpp ! ***
1355-#define START_EXTRA 16
1356-
1357 U_NAMESPACE_BEGIN
1358
1359 // TODO memory debugging provided inside uniset.cpp
1360@@ -49,42 +45,16 @@ U_NAMESPACE_BEGIN
1361 UnicodeSet::UnicodeSet(const UnicodeString& pattern,
1362                        uint32_t options,
1363                        const SymbolTable* symbols,
1364-                       UErrorCode& status) :
1365-    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
1366-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
1367-    fFlags(0)
1368-{
1369-    if(U_SUCCESS(status)){
1370-        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
1371-        /* test for NULL */
1372-        if(list == NULL) {
1373-            status = U_MEMORY_ALLOCATION_ERROR;
1374-        }else{
1375-            allocateStrings(status);
1376-            applyPattern(pattern, options, symbols, status);
1377-        }
1378-    }
1379+                       UErrorCode& status) {
1380+    applyPattern(pattern, options, symbols, status);
1381     _dbgct(this);
1382 }
1383
1384 UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
1385                        uint32_t options,
1386                        const SymbolTable* symbols,
1387-                       UErrorCode& status) :
1388-    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
1389-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
1390-    fFlags(0)
1391-{
1392-    if(U_SUCCESS(status)){
1393-        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
1394-        /* test for NULL */
1395-        if(list == NULL) {
1396-            status = U_MEMORY_ALLOCATION_ERROR;
1397-        }else{
1398-            allocateStrings(status);
1399-            applyPattern(pattern, pos, options, symbols, status);
1400-        }
1401-    }
1402+                       UErrorCode& status) {
1403+    applyPattern(pattern, pos, options, symbols, status);
1404     _dbgct(this);
1405 }
1406
1407@@ -199,7 +169,7 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
1408             // start with input set to guarantee inclusion
1409             // USET_CASE: remove strings because the strings will actually be reduced (folded);
1410             //            therefore, start with no strings and add only those needed
1411-            if (attribute & USET_CASE_INSENSITIVE) {
1412+            if ((attribute & USET_CASE_INSENSITIVE) && foldSet.hasStrings()) {
1413                 foldSet.strings->removeAllElements();
1414             }
1415
1416@@ -234,7 +204,7 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
1417                     }
1418                 }
1419             }
1420-            if (strings != NULL && strings->size() > 0) {
1421+            if (hasStrings()) {
1422                 if (attribute & USET_CASE_INSENSITIVE) {
1423                     for (int32_t j=0; j<strings->size(); ++j) {
1424                         str = *(const UnicodeString *) strings->elementAt(j);
1425diff --git a/source/common/uniset_props.cpp b/source/common/uniset_props.cpp
1426index 6cfd80a7..e98c175f 100644
1427--- a/source/common/uniset_props.cpp
1428+++ b/source/common/uniset_props.cpp
1429@@ -47,10 +47,6 @@
1430
1431 U_NAMESPACE_USE
1432
1433-// initial storage. Must be >= 0
1434-// *** same as in uniset.cpp ! ***
1435-#define START_EXTRA 16
1436-
1437 // Define UChar constants using hex for EBCDIC compatibility
1438 // Used #define to reduce private static exports and memory access time.
1439 #define SET_OPEN        ((UChar)0x005B) /*[*/
1440@@ -185,21 +181,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
1441  * @param pattern a string specifying what characters are in the set
1442  */
1443 UnicodeSet::UnicodeSet(const UnicodeString& pattern,
1444-                       UErrorCode& status) :
1445-    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
1446-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
1447-    fFlags(0)
1448-{
1449-    if(U_SUCCESS(status)){
1450-        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
1451-        /* test for NULL */
1452-        if(list == NULL) {
1453-            status = U_MEMORY_ALLOCATION_ERROR;
1454-        }else{
1455-            allocateStrings(status);
1456-            applyPattern(pattern, status);
1457-        }
1458-    }
1459+                       UErrorCode& status) {
1460+    applyPattern(pattern, status);
1461     _dbgct(this);
1462 }
1463
1464@@ -713,6 +696,11 @@ static UBool numericValueFilter(UChar32 ch, void* context) {
1465     return u_getNumericValue(ch) == *(double*)context;
1466 }
1467
1468+static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
1469+    int32_t value = *(int32_t*)context;
1470+    return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
1471+}
1472+
1473 static UBool versionFilter(UChar32 ch, void* context) {
1474     static const UVersionInfo none = { 0, 0, 0, 0 };
1475     UVersionInfo v;
1476@@ -721,6 +709,16 @@ static UBool versionFilter(UChar32 ch, void* context) {
1477     return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
1478 }
1479
1480+typedef struct {
1481+    UProperty prop;
1482+    int32_t value;
1483+} IntPropertyContext;
1484+
1485+static UBool intPropertyFilter(UChar32 ch, void* context) {
1486+    IntPropertyContext* c = (IntPropertyContext*)context;
1487+    return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
1488+}
1489+
1490 static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
1491     return uscript_hasScript(ch, *(UScriptCode*)context);
1492 }
1493@@ -781,43 +779,6 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
1494
1495 namespace {
1496
1497-/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */
1498-uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) {
1499-    uint32_t mask = *(const uint32_t *)context;
1500-    value = U_MASK(value) & mask;
1501-    if (value != 0) { value = 1; }
1502-    return value;
1503-}
1504-
1505-/** Maps one map value to 1, all others to 0. */
1506-uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) {
1507-    uint32_t v = *(const uint32_t *)context;
1508-    return value == v ? 1 : 0;
1509-}
1510-
1511-}  // namespace
1512-
1513-void UnicodeSet::applyIntPropertyValue(const UCPMap *map,
1514-                                       UCPMapValueFilter *filter, const void *context,
1515-                                       UErrorCode &errorCode) {
1516-    if (U_FAILURE(errorCode)) { return; }
1517-    clear();
1518-    UChar32 start = 0, end;
1519-    uint32_t value;
1520-    while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
1521-                                  filter, context, &value)) >= 0) {
1522-        if (value != 0) {
1523-            add(start, end);
1524-        }
1525-        start = end + 1;
1526-    }
1527-    if (isBogus()) {
1528-        errorCode = U_MEMORY_ALLOCATION_ERROR;
1529-    }
1530-}
1531-
1532-namespace {
1533-
1534 static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
1535     /* Note: we use ' ' in compiler code page */
1536     int32_t j = 0;
1537@@ -845,11 +806,10 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
1538
1539 UnicodeSet&
1540 UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
1541-    if (U_FAILURE(ec)) { return *this; }
1542-    // All of the following check isFrozen() before modifying this set.
1543+    if (U_FAILURE(ec) || isFrozen()) { return *this; }
1544     if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
1545-        const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec);
1546-        applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec);
1547+        const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
1548+        applyFilter(generalCategoryMaskFilter, &value, inclusions, ec);
1549     } else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
1550         const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
1551         UScriptCode script = (UScriptCode)value;
1552@@ -866,14 +826,11 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
1553             clear();
1554         }
1555     } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
1556-        const UCPMap *map = u_getIntPropertyMap(prop, &ec);
1557-        applyIntPropertyValue(map, intValueFilter, &value, ec);
1558+        const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
1559+        IntPropertyContext c = {prop, value};
1560+        applyFilter(intPropertyFilter, &c, inclusions, ec);
1561     } else {
1562-        // This code used to always call getInclusions(property source)
1563-        // which sets an error for an unsupported property.
1564         ec = U_ILLEGAL_ARGUMENT_ERROR;
1565-        // Otherwise we would just clear() this set because
1566-        // getIntPropertyValue(c, prop) returns 0 for all code points.
1567     }
1568     return *this;
1569 }
1570diff --git a/source/common/uprops.h b/source/common/uprops.h
1571index 1a8e4e84..34b3600b 100644
1572--- a/source/common/uprops.h
1573+++ b/source/common/uprops.h
1574@@ -462,7 +462,6 @@ class UnicodeSet;
1575 class CharacterProperties {
1576 public:
1577     CharacterProperties() = delete;
1578-    static void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode);
1579     static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode);
1580 };
1581
1582diff --git a/source/common/uset.cpp b/source/common/uset.cpp
1583index 39ad0a34..eae7981d 100644
1584--- a/source/common/uset.cpp
1585+++ b/source/common/uset.cpp
1586@@ -249,7 +249,7 @@ class USetAccess /* not : public UObject because all methods are static */ {
1587 public:
1588     /* Try to have the compiler inline these*/
1589     inline static int32_t getStringCount(const UnicodeSet& set) {
1590-        return set.getStringCount();
1591+        return set.stringsSize();
1592     }
1593     inline static const UnicodeString* getString(const UnicodeSet& set,
1594                                                  int32_t i) {
1595diff --git a/source/common/usetiter.cpp b/source/common/usetiter.cpp
1596index 93048ba2..79151690 100644
1597--- a/source/common/usetiter.cpp
1598+++ b/source/common/usetiter.cpp
1599@@ -116,7 +116,7 @@ void UnicodeSetIterator::reset() {
1600         stringCount = 0;
1601     } else {
1602         endRange = set->getRangeCount() - 1;
1603-        stringCount = set->strings->size();
1604+        stringCount = set->stringsSize();
1605     }
1606     range = 0;
1607     endElement = -1;
1608