1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * scriptset.cpp
10 *
11 * created on: 2013 Jan 7
12 * created by: Andy Heninger
13 */
14
15 #include "unicode/utypes.h"
16
17 #include "unicode/uchar.h"
18 #include "unicode/unistr.h"
19
20 #include "scriptset.h"
21 #include "uassert.h"
22 #include "cmemory.h"
23
24 U_NAMESPACE_BEGIN
25
26 //----------------------------------------------------------------------------
27 //
28 // ScriptSet implementation
29 //
30 //----------------------------------------------------------------------------
ScriptSet()31 ScriptSet::ScriptSet() {
32 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
33 bits[i] = 0;
34 }
35 }
36
~ScriptSet()37 ScriptSet::~ScriptSet() {
38 }
39
ScriptSet(const ScriptSet & other)40 ScriptSet::ScriptSet(const ScriptSet &other) {
41 *this = other;
42 }
43
44
operator =(const ScriptSet & other)45 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
46 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
47 bits[i] = other.bits[i];
48 }
49 return *this;
50 }
51
52
operator ==(const ScriptSet & other) const53 UBool ScriptSet::operator == (const ScriptSet &other) const {
54 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
55 if (bits[i] != other.bits[i]) {
56 return FALSE;
57 }
58 }
59 return TRUE;
60 }
61
test(UScriptCode script,UErrorCode & status) const62 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
63 if (U_FAILURE(status)) {
64 return FALSE;
65 }
66 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
67 status = U_ILLEGAL_ARGUMENT_ERROR;
68 return FALSE;
69 }
70 uint32_t index = script / 32;
71 uint32_t bit = 1 << (script & 31);
72 return ((bits[index] & bit) != 0);
73 }
74
75
set(UScriptCode script,UErrorCode & status)76 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
77 if (U_FAILURE(status)) {
78 return *this;
79 }
80 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
81 status = U_ILLEGAL_ARGUMENT_ERROR;
82 return *this;
83 }
84 uint32_t index = script / 32;
85 uint32_t bit = 1 << (script & 31);
86 bits[index] |= bit;
87 return *this;
88 }
89
reset(UScriptCode script,UErrorCode & status)90 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
91 if (U_FAILURE(status)) {
92 return *this;
93 }
94 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
95 status = U_ILLEGAL_ARGUMENT_ERROR;
96 return *this;
97 }
98 uint32_t index = script / 32;
99 uint32_t bit = 1 << (script & 31);
100 bits[index] &= ~bit;
101 return *this;
102 }
103
104
105
Union(const ScriptSet & other)106 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
107 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
108 bits[i] |= other.bits[i];
109 }
110 return *this;
111 }
112
intersect(const ScriptSet & other)113 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
114 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
115 bits[i] &= other.bits[i];
116 }
117 return *this;
118 }
119
intersect(UScriptCode script,UErrorCode & status)120 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
121 ScriptSet t;
122 t.set(script, status);
123 if (U_SUCCESS(status)) {
124 this->intersect(t);
125 }
126 return *this;
127 }
128
intersects(const ScriptSet & other) const129 UBool ScriptSet::intersects(const ScriptSet &other) const {
130 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
131 if ((bits[i] & other.bits[i]) != 0) {
132 return true;
133 }
134 }
135 return false;
136 }
137
contains(const ScriptSet & other) const138 UBool ScriptSet::contains(const ScriptSet &other) const {
139 ScriptSet t(*this);
140 t.intersect(other);
141 return (t == other);
142 }
143
144
setAll()145 ScriptSet &ScriptSet::setAll() {
146 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
147 bits[i] = 0xffffffffu;
148 }
149 return *this;
150 }
151
152
resetAll()153 ScriptSet &ScriptSet::resetAll() {
154 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
155 bits[i] = 0;
156 }
157 return *this;
158 }
159
countMembers() const160 int32_t ScriptSet::countMembers() const {
161 // This bit counter is good for sparse numbers of '1's, which is
162 // very much the case that we will usually have.
163 int32_t count = 0;
164 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
165 uint32_t x = bits[i];
166 while (x > 0) {
167 count++;
168 x &= (x - 1); // and off the least significant one bit.
169 }
170 }
171 return count;
172 }
173
hashCode() const174 int32_t ScriptSet::hashCode() const {
175 int32_t hash = 0;
176 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
177 hash ^= bits[i];
178 }
179 return hash;
180 }
181
nextSetBit(int32_t fromIndex) const182 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
183 // TODO: Wants a better implementation.
184 if (fromIndex < 0) {
185 return -1;
186 }
187 UErrorCode status = U_ZERO_ERROR;
188 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
189 if (test((UScriptCode)scriptIndex, status)) {
190 return scriptIndex;
191 }
192 }
193 return -1;
194 }
195
isEmpty() const196 UBool ScriptSet::isEmpty() const {
197 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
198 if (bits[i] != 0) {
199 return FALSE;
200 }
201 }
202 return TRUE;
203 }
204
displayScripts(UnicodeString & dest) const205 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
206 UBool firstTime = TRUE;
207 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
208 if (!firstTime) {
209 dest.append((UChar)0x20);
210 }
211 firstTime = FALSE;
212 const char *scriptName = uscript_getShortName((UScriptCode(i)));
213 dest.append(UnicodeString(scriptName, -1, US_INV));
214 }
215 return dest;
216 }
217
parseScripts(const UnicodeString & scriptString,UErrorCode & status)218 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
219 resetAll();
220 if (U_FAILURE(status)) {
221 return *this;
222 }
223 UnicodeString oneScriptName;
224 for (int32_t i=0; i<scriptString.length();) {
225 UChar32 c = scriptString.char32At(i);
226 i = scriptString.moveIndex32(i, 1);
227 if (!u_isUWhiteSpace(c)) {
228 oneScriptName.append(c);
229 if (i < scriptString.length()) {
230 continue;
231 }
232 }
233 if (oneScriptName.length() > 0) {
234 char buf[40];
235 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
236 buf[sizeof(buf)-1] = 0;
237 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
238 if (sc == UCHAR_INVALID_CODE) {
239 status = U_ILLEGAL_ARGUMENT_ERROR;
240 } else {
241 this->set((UScriptCode)sc, status);
242 }
243 if (U_FAILURE(status)) {
244 return *this;
245 }
246 oneScriptName.remove();
247 }
248 }
249 return *this;
250 }
251
setScriptExtensions(UChar32 codePoint,UErrorCode & status)252 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
253 if (U_FAILURE(status)) { return; }
254 static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5;
255 MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
256 UErrorCode internalStatus = U_ZERO_ERROR;
257 int32_t script_count = -1;
258
259 while (TRUE) {
260 script_count = uscript_getScriptExtensions(
261 codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
262 if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
263 // Need to allocate more space
264 if (scripts.resize(script_count) == NULL) {
265 status = U_MEMORY_ALLOCATION_ERROR;
266 return;
267 }
268 internalStatus = U_ZERO_ERROR;
269 } else {
270 break;
271 }
272 }
273
274 // Check if we failed for some reason other than buffer overflow
275 if (U_FAILURE(internalStatus)) {
276 status = internalStatus;
277 return;
278 }
279
280 // Load the scripts into the ScriptSet and return
281 for (int32_t i = 0; i < script_count; i++) {
282 this->set(scripts[i], status);
283 if (U_FAILURE(status)) { return; }
284 }
285 }
286
287 U_NAMESPACE_END
288
289 U_CAPI UBool U_EXPORT2
uhash_equalsScriptSet(const UElement key1,const UElement key2)290 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
291 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
292 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
293 return (*s1 == *s2);
294 }
295
296 U_CAPI int8_t U_EXPORT2
uhash_compareScriptSet(UElement key0,UElement key1)297 uhash_compareScriptSet(UElement key0, UElement key1) {
298 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
299 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
300 int32_t diff = s0->countMembers() - s1->countMembers();
301 if (diff != 0) return diff;
302 int32_t i0 = s0->nextSetBit(0);
303 int32_t i1 = s1->nextSetBit(0);
304 while ((diff = i0-i1) == 0 && i0 > 0) {
305 i0 = s0->nextSetBit(i0+1);
306 i1 = s1->nextSetBit(i1+1);
307 }
308 return (int8_t)diff;
309 }
310
311 U_CAPI int32_t U_EXPORT2
uhash_hashScriptSet(const UElement key)312 uhash_hashScriptSet(const UElement key) {
313 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
314 return s->hashCode();
315 }
316
317 U_CAPI void U_EXPORT2
uhash_deleteScriptSet(void * obj)318 uhash_deleteScriptSet(void *obj) {
319 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
320 delete s;
321 }
322