• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2013, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *
7 * scriptset.cpp
8 *
9 * created on: 2013 Jan 7
10 * created by: Andy Heninger
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #include "unicode/uchar.h"
16 #include "unicode/unistr.h"
17 
18 #include "scriptset.h"
19 #include "uassert.h"
20 
21 U_NAMESPACE_BEGIN
22 
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24 
25 //----------------------------------------------------------------------------
26 //
27 //  ScriptSet implementation
28 //
29 //----------------------------------------------------------------------------
ScriptSet()30 ScriptSet::ScriptSet() {
31     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
32         bits[i] = 0;
33     }
34 }
35 
~ScriptSet()36 ScriptSet::~ScriptSet() {
37 }
38 
ScriptSet(const ScriptSet & other)39 ScriptSet::ScriptSet(const ScriptSet &other) {
40     *this = other;
41 }
42 
43 
operator =(const ScriptSet & other)44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
45     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
46         bits[i] = other.bits[i];
47     }
48     return *this;
49 }
50 
51 
operator ==(const ScriptSet & other) const52 UBool ScriptSet::operator == (const ScriptSet &other) const {
53     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
54         if (bits[i] != other.bits[i]) {
55             return FALSE;
56         }
57     }
58     return TRUE;
59 }
60 
test(UScriptCode script,UErrorCode & status) const61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
62     if (U_FAILURE(status)) {
63         return FALSE;
64     }
65     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
66         status = U_ILLEGAL_ARGUMENT_ERROR;
67         return FALSE;
68     }
69     uint32_t index = script / 32;
70     uint32_t bit   = 1 << (script & 31);
71     return ((bits[index] & bit) != 0);
72 }
73 
74 
set(UScriptCode script,UErrorCode & status)75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
76     if (U_FAILURE(status)) {
77         return *this;
78     }
79     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
80         status = U_ILLEGAL_ARGUMENT_ERROR;
81         return *this;
82     }
83     uint32_t index = script / 32;
84     uint32_t bit   = 1 << (script & 31);
85     bits[index] |= bit;
86     return *this;
87 }
88 
reset(UScriptCode script,UErrorCode & status)89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
90     if (U_FAILURE(status)) {
91         return *this;
92     }
93     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
94         status = U_ILLEGAL_ARGUMENT_ERROR;
95         return *this;
96     }
97     uint32_t index = script / 32;
98     uint32_t bit   = 1 << (script & 31);
99     bits[index] &= ~bit;
100     return *this;
101 }
102 
103 
104 
Union(const ScriptSet & other)105 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
106     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
107         bits[i] |= other.bits[i];
108     }
109     return *this;
110 }
111 
intersect(const ScriptSet & other)112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
113     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
114         bits[i] &= other.bits[i];
115     }
116     return *this;
117 }
118 
intersect(UScriptCode script,UErrorCode & status)119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
120     ScriptSet t;
121     t.set(script, status);
122     if (U_SUCCESS(status)) {
123         this->intersect(t);
124     }
125     return *this;
126 }
127 
intersects(const ScriptSet & other) const128 UBool ScriptSet::intersects(const ScriptSet &other) const {
129     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
130         if ((bits[i] & other.bits[i]) != 0) {
131             return true;
132         }
133     }
134     return false;
135 }
136 
contains(const ScriptSet & other) const137 UBool ScriptSet::contains(const ScriptSet &other) const {
138     ScriptSet t(*this);
139     t.intersect(other);
140     return (t == other);
141 }
142 
143 
setAll()144 ScriptSet &ScriptSet::setAll() {
145     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
146         bits[i] = 0xffffffffu;
147     }
148     return *this;
149 }
150 
151 
resetAll()152 ScriptSet &ScriptSet::resetAll() {
153     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
154         bits[i] = 0;
155     }
156     return *this;
157 }
158 
countMembers() const159 int32_t ScriptSet::countMembers() const {
160     // This bit counter is good for sparse numbers of '1's, which is
161     //  very much the case that we will usually have.
162     int32_t count = 0;
163     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
164         uint32_t x = bits[i];
165         while (x > 0) {
166             count++;
167             x &= (x - 1);    // and off the least significant one bit.
168         }
169     }
170     return count;
171 }
172 
hashCode() const173 int32_t ScriptSet::hashCode() const {
174     int32_t hash = 0;
175     for (int32_t i=0; i<LENGTHOF(bits); i++) {
176         hash ^= bits[i];
177     }
178     return hash;
179 }
180 
nextSetBit(int32_t fromIndex) const181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
182     // TODO: Wants a better implementation.
183     if (fromIndex < 0) {
184         return -1;
185     }
186     UErrorCode status = U_ZERO_ERROR;
187     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
188         if (test((UScriptCode)scriptIndex, status)) {
189             return scriptIndex;
190         }
191     }
192     return -1;
193 }
194 
displayScripts(UnicodeString & dest) const195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
196     UBool firstTime = TRUE;
197     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
198         if (!firstTime) {
199             dest.append((UChar)0x20);
200         }
201         firstTime = FALSE;
202         const char *scriptName = uscript_getShortName((UScriptCode(i)));
203         dest.append(UnicodeString(scriptName, -1, US_INV));
204     }
205     return dest;
206 }
207 
parseScripts(const UnicodeString & scriptString,UErrorCode & status)208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
209     resetAll();
210     if (U_FAILURE(status)) {
211         return *this;
212     }
213     UnicodeString oneScriptName;
214     for (int32_t i=0; i<scriptString.length();) {
215         UChar32 c = scriptString.char32At(i);
216         i = scriptString.moveIndex32(i, 1);
217         if (!u_isUWhiteSpace(c)) {
218             oneScriptName.append(c);
219             if (i < scriptString.length()) {
220                 continue;
221             }
222         }
223         if (oneScriptName.length() > 0) {
224             char buf[40];
225             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
226             buf[sizeof(buf)-1] = 0;
227             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
228             if (sc == UCHAR_INVALID_CODE) {
229                 status = U_ILLEGAL_ARGUMENT_ERROR;
230             } else {
231                 this->set((UScriptCode)sc, status);
232             }
233             if (U_FAILURE(status)) {
234                 return *this;
235             }
236             oneScriptName.remove();
237         }
238     }
239     return *this;
240 }
241 
242 U_NAMESPACE_END
243 
244 U_CAPI UBool U_EXPORT2
uhash_equalsScriptSet(const UElement key1,const UElement key2)245 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
246     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
247     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
248     return (*s1 == *s2);
249 }
250 
251 U_CAPI int8_t U_EXPORT2
uhash_compareScriptSet(UElement key0,UElement key1)252 uhash_compareScriptSet(UElement key0, UElement key1) {
253     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
254     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
255     int32_t diff = s0->countMembers() - s1->countMembers();
256     if (diff != 0) return diff;
257     int32_t i0 = s0->nextSetBit(0);
258     int32_t i1 = s1->nextSetBit(0);
259     while ((diff = i0-i1) == 0 && i0 > 0) {
260         i0 = s0->nextSetBit(i0+1);
261         i1 = s1->nextSetBit(i1+1);
262     }
263     return (int8_t)diff;
264 }
265 
266 U_CAPI int32_t U_EXPORT2
uhash_hashScriptSet(const UElement key)267 uhash_hashScriptSet(const UElement key) {
268     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
269     return s->hashCode();
270 }
271 
272 U_CAPI void U_EXPORT2
uhash_deleteScriptSet(void * obj)273 uhash_deleteScriptSet(void *obj) {
274     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
275     delete s;
276 }
277