1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7 //===============================================================================
8 //
9 // File sortkey.cpp
10 //
11 //
12 //
13 // Created by: Helena Shih
14 //
15 // Modification History:
16 //
17 // Date Name Description
18 //
19 // 6/20/97 helena Java class name change.
20 // 6/23/97 helena Added comments to make code more readable.
21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
23 // Cleaned up operator=
24 // 07/12/99 helena HPUX 11 CC port.
25 // 03/06/01 synwee Modified compareTo, to handle the result of
26 // 2 string similar in contents, but one is longer
27 // than the other
28 //===============================================================================
29
30 #include "unicode/utypes.h"
31
32 #if !UCONFIG_NO_COLLATION
33
34 #include "unicode/sortkey.h"
35 #include "cmemory.h"
36 #include "uhash.h"
37
38 U_NAMESPACE_BEGIN
39
40 // A hash code of kInvalidHashCode indicates that the has code needs
41 // to be computed. A hash code of kEmptyHashCode is used for empty keys
42 // and for any key whose computed hash code is kInvalidHashCode.
43 #define kInvalidHashCode ((int32_t)0)
44 #define kEmptyHashCode ((int32_t)1)
45
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47
48 CollationKey::CollationKey()
49 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50 fHashCode(kEmptyHashCode), fBytes(NULL)
51 {
52 }
53
54 // Create a collation key from a bit array.
CollationKey(const uint8_t * newValues,int32_t count)55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57 fHashCode(kInvalidHashCode)
58 {
59 fBytes = (uint8_t *)uprv_malloc(count);
60
61 if (fBytes == NULL)
62 {
63 setToBogus();
64 return;
65 }
66
67 uprv_memcpy(fBytes, newValues, fCount);
68 }
69
CollationKey(const CollationKey & other)70 CollationKey::CollationKey(const CollationKey& other)
71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72 fHashCode(other.fHashCode), fBytes(NULL)
73 {
74 if (other.fBogus)
75 {
76 setToBogus();
77 return;
78 }
79
80 fBytes = (uint8_t *)uprv_malloc(fCapacity);
81
82 if (fBytes == NULL)
83 {
84 setToBogus();
85 return;
86 }
87
88 uprv_memcpy(fBytes, other.fBytes, other.fCount);
89 if(fCapacity>fCount) {
90 uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91 }
92 }
93
~CollationKey()94 CollationKey::~CollationKey()
95 {
96 uprv_free(fBytes);
97 }
98
adopt(uint8_t * values,int32_t capacity,int32_t count)99 void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
100 if(fBytes != NULL) {
101 uprv_free(fBytes);
102 }
103 fBytes = values;
104 fCapacity = capacity;
105 setLength(count);
106 }
107
setLength(int32_t newLength)108 void CollationKey::setLength(int32_t newLength) {
109 fBogus = FALSE;
110 fCount = newLength;
111 fHashCode = kInvalidHashCode;
112 }
113
114 // set the key to an empty state
115 CollationKey&
reset()116 CollationKey::reset()
117 {
118 fCount = 0;
119 fBogus = FALSE;
120 fHashCode = kEmptyHashCode;
121
122 return *this;
123 }
124
125 // set the key to a "bogus" or invalid state
126 CollationKey&
setToBogus()127 CollationKey::setToBogus()
128 {
129 uprv_free(fBytes);
130 fBytes = NULL;
131
132 fCapacity = 0;
133 fCount = 0;
134 fHashCode = kInvalidHashCode;
135
136 return *this;
137 }
138
139 UBool
operator ==(const CollationKey & source) const140 CollationKey::operator==(const CollationKey& source) const
141 {
142 return (this->fCount == source.fCount &&
143 (this->fBytes == source.fBytes ||
144 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
145 }
146
147 const CollationKey&
operator =(const CollationKey & other)148 CollationKey::operator=(const CollationKey& other)
149 {
150 if (this != &other)
151 {
152 if (other.isBogus())
153 {
154 return setToBogus();
155 }
156
157 if (other.fBytes != NULL)
158 {
159 ensureCapacity(other.fCount);
160
161 if (isBogus())
162 {
163 return *this;
164 }
165
166 fHashCode = other.fHashCode;
167 uprv_memcpy(fBytes, other.fBytes, fCount);
168 }
169 else
170 {
171 fCount = 0;
172 fBogus = FALSE;
173 fHashCode = kEmptyHashCode;
174 }
175 }
176
177 return *this;
178 }
179
180 // Bitwise comparison for the collation keys.
181 // NOTE: this is somewhat messy 'cause we can't count
182 // on memcmp returning the exact values which match
183 // Collator::EComparisonResult
184 Collator::EComparisonResult
compareTo(const CollationKey & target) const185 CollationKey::compareTo(const CollationKey& target) const
186 {
187 uint8_t *src = this->fBytes;
188 uint8_t *tgt = target.fBytes;
189
190 // are we comparing the same string
191 if (src == tgt)
192 return Collator::EQUAL;
193
194 /*
195 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
196 if (count == 0)
197 {
198 // If count is 0, at least one of the keys is empty.
199 // An empty key is always LESS than a non-empty one
200 // and EQUAL to another empty
201 if (this->fCount < target.fCount)
202 {
203 return Collator::LESS;
204 }
205
206 if (this->fCount > target.fCount)
207 {
208 return Collator::GREATER;
209 }
210 return Collator::EQUAL;
211 }
212 */
213
214 int minLength;
215 Collator::EComparisonResult result;
216
217 // are we comparing different lengths?
218 if (this->fCount != target.fCount) {
219 if (this->fCount < target.fCount) {
220 minLength = this->fCount;
221 result = Collator::LESS;
222 }
223 else {
224 minLength = target.fCount;
225 result = Collator::GREATER;
226 }
227 }
228 else {
229 minLength = target.fCount;
230 result = Collator::EQUAL;
231 }
232
233 if (minLength > 0) {
234 int diff = uprv_memcmp(src, tgt, minLength);
235 if (diff > 0) {
236 return Collator::GREATER;
237 }
238 else
239 if (diff < 0) {
240 return Collator::LESS;
241 }
242 }
243
244 return result;
245 /*
246 if (result < 0)
247 {
248 return Collator::LESS;
249 }
250
251 if (result > 0)
252 {
253 return Collator::GREATER;
254 }
255 return Collator::EQUAL;
256 */
257 }
258
259 // Bitwise comparison for the collation keys.
260 UCollationResult
compareTo(const CollationKey & target,UErrorCode & status) const261 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
262 {
263 if(U_SUCCESS(status)) {
264 uint8_t *src = this->fBytes;
265 uint8_t *tgt = target.fBytes;
266
267 // are we comparing the same string
268 if (src == tgt)
269 return UCOL_EQUAL;
270
271 int minLength;
272 UCollationResult result;
273
274 // are we comparing different lengths?
275 if (this->fCount != target.fCount) {
276 if (this->fCount < target.fCount) {
277 minLength = this->fCount;
278 result = UCOL_LESS;
279 }
280 else {
281 minLength = target.fCount;
282 result = UCOL_GREATER;
283 }
284 }
285 else {
286 minLength = target.fCount;
287 result = UCOL_EQUAL;
288 }
289
290 if (minLength > 0) {
291 int diff = uprv_memcmp(src, tgt, minLength);
292 if (diff > 0) {
293 return UCOL_GREATER;
294 }
295 else
296 if (diff < 0) {
297 return UCOL_LESS;
298 }
299 }
300
301 return result;
302 } else {
303 return UCOL_EQUAL;
304 }
305 }
306
307 CollationKey&
ensureCapacity(int32_t newSize)308 CollationKey::ensureCapacity(int32_t newSize)
309 {
310 if (fCapacity < newSize)
311 {
312 uprv_free(fBytes);
313
314 fBytes = (uint8_t *)uprv_malloc(newSize);
315
316 if (fBytes == NULL)
317 {
318 return setToBogus();
319 }
320
321 uprv_memset(fBytes, 0, fCapacity);
322 fCapacity = newSize;
323 }
324
325 fBogus = FALSE;
326 fCount = newSize;
327 fHashCode = kInvalidHashCode;
328
329 return *this;
330 }
331
332 #ifdef U_USE_COLLATION_KEY_DEPRECATES
333 // Create a copy of the byte array.
334 uint8_t*
toByteArray(int32_t & count) const335 CollationKey::toByteArray(int32_t& count) const
336 {
337 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
338
339 if (result == NULL)
340 {
341 count = 0;
342 }
343 else
344 {
345 count = fCount;
346 uprv_memcpy(result, fBytes, fCount);
347 }
348
349 return result;
350 }
351 #endif
352
353 int32_t
hashCode() const354 CollationKey::hashCode() const
355 {
356 // (Cribbed from UnicodeString)
357 // We cache the hashCode; when it becomes invalid, due to any change to the
358 // string, we note this by setting it to kInvalidHashCode. [LIU]
359
360 // Note: This method is semantically const, but physically non-const.
361
362 if (fHashCode == kInvalidHashCode)
363 {
364 UHashTok key;
365 key.pointer = fBytes;
366 ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
367 #if 0
368 // We compute the hash by iterating sparsely over 64 (at most) characters
369 // spaced evenly through the string. For each character, we multiply the
370 // previous hash value by a prime number and add the new character in,
371 // in the manner of a additive linear congruential random number generator,
372 // thus producing a pseudorandom deterministic value which should be well
373 // distributed over the output range. [LIU]
374 const uint8_t *p = fBytes, *limit = fBytes + fCount;
375 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
376 int32_t hash = 0;
377
378 while (p < limit)
379 {
380 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
381 p += inc;
382 }
383
384 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385 if (hash == kInvalidHashCode)
386 {
387 hash = kEmptyHashCode;
388 }
389
390 ((CollationKey *)this)->fHashCode = hash; // cast away const
391 #endif
392 }
393
394 return fHashCode;
395 }
396
397 U_NAMESPACE_END
398
399 U_CAPI int32_t U_EXPORT2
ucol_keyHashCode(const uint8_t * key,int32_t length)400 ucol_keyHashCode(const uint8_t *key,
401 int32_t length)
402 {
403 U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
404 return newKey.hashCode();
405 }
406
407 #endif /* #if !UCONFIG_NO_COLLATION */
408