• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7 //===============================================================================
8 //
9 // File sortkey.cpp
10 //
11 //
12 //
13 // Created by: Helena Shih
14 //
15 // Modification History:
16 //
17 //  Date         Name          Description
18 //
19 //  6/20/97      helena        Java class name change.
20 //  6/23/97      helena        Added comments to make code more readable.
21 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23 //                             Cleaned up operator=
24 // 07/12/99      helena        HPUX 11 CC port.
25 // 03/06/01      synwee        Modified compareTo, to handle the result of
26 //                             2 string similar in contents, but one is longer
27 //                             than the other
28 //===============================================================================
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_COLLATION
33 
34 #include "unicode/sortkey.h"
35 #include "cmemory.h"
36 #include "uhash.h"
37 
38 U_NAMESPACE_BEGIN
39 
40 // A hash code of kInvalidHashCode indicates that the has code needs
41 // to be computed. A hash code of kEmptyHashCode is used for empty keys
42 // and for any key whose computed hash code is kInvalidHashCode.
43 #define kInvalidHashCode ((int32_t)0)
44 #define kEmptyHashCode ((int32_t)1)
45 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47 
48 CollationKey::CollationKey()
49     : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50       fHashCode(kEmptyHashCode), fBytes(NULL)
51 {
52 }
53 
54 // Create a collation key from a bit array.
CollationKey(const uint8_t * newValues,int32_t count)55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56     : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57       fHashCode(kInvalidHashCode)
58 {
59     fBytes = (uint8_t *)uprv_malloc(count);
60 
61     if (fBytes == NULL)
62     {
63         setToBogus();
64         return;
65     }
66 
67     uprv_memcpy(fBytes, newValues, fCount);
68 }
69 
CollationKey(const CollationKey & other)70 CollationKey::CollationKey(const CollationKey& other)
71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72   fHashCode(other.fHashCode), fBytes(NULL)
73 {
74     if (other.fBogus)
75     {
76         setToBogus();
77         return;
78     }
79 
80     fBytes = (uint8_t *)uprv_malloc(fCapacity);
81 
82     if (fBytes == NULL)
83     {
84         setToBogus();
85         return;
86     }
87 
88     uprv_memcpy(fBytes, other.fBytes, other.fCount);
89     if(fCapacity>fCount) {
90         uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91     }
92 }
93 
~CollationKey()94 CollationKey::~CollationKey()
95 {
96         uprv_free(fBytes);
97 }
98 
adopt(uint8_t * values,int32_t count)99 void CollationKey::adopt(uint8_t *values, int32_t count) {
100     if(fBytes != NULL) {
101         uprv_free(fBytes);
102     }
103     fBogus = FALSE;
104     fBytes = values;
105     fCount = count;
106     fCapacity = count;
107     fHashCode = kInvalidHashCode;
108 }
109 
110 // set the key to an empty state
111 CollationKey&
reset()112 CollationKey::reset()
113 {
114     fCount = 0;
115     fBogus = FALSE;
116     fHashCode = kEmptyHashCode;
117 
118     return *this;
119 }
120 
121 // set the key to a "bogus" or invalid state
122 CollationKey&
setToBogus()123 CollationKey::setToBogus()
124 {
125     uprv_free(fBytes);
126     fBytes = NULL;
127 
128     fCapacity = 0;
129     fCount = 0;
130     fHashCode = kInvalidHashCode;
131 
132     return *this;
133 }
134 
135 UBool
operator ==(const CollationKey & source) const136 CollationKey::operator==(const CollationKey& source) const
137 {
138     return (this->fCount == source.fCount &&
139             (this->fBytes == source.fBytes ||
140              uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
141 }
142 
143 const CollationKey&
operator =(const CollationKey & other)144 CollationKey::operator=(const CollationKey& other)
145 {
146     if (this != &other)
147     {
148         if (other.isBogus())
149         {
150             return setToBogus();
151         }
152 
153         if (other.fBytes != NULL)
154         {
155             ensureCapacity(other.fCount);
156 
157             if (isBogus())
158             {
159                 return *this;
160             }
161 
162             fHashCode = other.fHashCode;
163             uprv_memcpy(fBytes, other.fBytes, fCount);
164         }
165         else
166         {
167             fCount = 0;
168             fBogus = FALSE;
169             fHashCode = kEmptyHashCode;
170         }
171     }
172 
173     return *this;
174 }
175 
176 // Bitwise comparison for the collation keys.
177 // NOTE: this is somewhat messy 'cause we can't count
178 // on memcmp returning the exact values which match
179 // Collator::EComparisonResult
180 Collator::EComparisonResult
compareTo(const CollationKey & target) const181 CollationKey::compareTo(const CollationKey& target) const
182 {
183     uint8_t *src = this->fBytes;
184     uint8_t *tgt = target.fBytes;
185 
186     // are we comparing the same string
187     if (src == tgt)
188         return  Collator::EQUAL;
189 
190         /*
191         int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
192         if (count == 0)
193         {
194         // If count is 0, at least one of the keys is empty.
195         // An empty key is always LESS than a non-empty one
196         // and EQUAL to another empty
197         if (this->fCount < target.fCount)
198         {
199         return Collator::LESS;
200         }
201 
202           if (this->fCount > target.fCount)
203           {
204           return Collator::GREATER;
205           }
206           return Collator::EQUAL;
207           }
208     */
209 
210     int                         minLength;
211     Collator::EComparisonResult result;
212 
213     // are we comparing different lengths?
214     if (this->fCount != target.fCount) {
215         if (this->fCount < target.fCount) {
216             minLength = this->fCount;
217             result    =  Collator::LESS;
218         }
219         else {
220             minLength = target.fCount;
221             result    =  Collator::GREATER;
222         }
223     }
224     else {
225         minLength = target.fCount;
226         result    =  Collator::EQUAL;
227     }
228 
229     if (minLength > 0) {
230         int diff = uprv_memcmp(src, tgt, minLength);
231         if (diff > 0) {
232             return Collator::GREATER;
233         }
234         else
235             if (diff < 0) {
236                 return Collator::LESS;
237             }
238     }
239 
240     return result;
241     /*
242     if (result < 0)
243     {
244     return Collator::LESS;
245     }
246 
247       if (result > 0)
248       {
249       return Collator::GREATER;
250       }
251       return Collator::EQUAL;
252     */
253 }
254 
255 // Bitwise comparison for the collation keys.
256 UCollationResult
compareTo(const CollationKey & target,UErrorCode & status) const257 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
258 {
259   if(U_SUCCESS(status)) {
260     uint8_t *src = this->fBytes;
261     uint8_t *tgt = target.fBytes;
262 
263     // are we comparing the same string
264     if (src == tgt)
265         return  UCOL_EQUAL;
266 
267     int                         minLength;
268     UCollationResult result;
269 
270     // are we comparing different lengths?
271     if (this->fCount != target.fCount) {
272         if (this->fCount < target.fCount) {
273             minLength = this->fCount;
274             result    =  UCOL_LESS;
275         }
276         else {
277             minLength = target.fCount;
278             result    =  UCOL_GREATER;
279         }
280     }
281     else {
282         minLength = target.fCount;
283         result    =  UCOL_EQUAL;
284     }
285 
286     if (minLength > 0) {
287         int diff = uprv_memcmp(src, tgt, minLength);
288         if (diff > 0) {
289             return UCOL_GREATER;
290         }
291         else
292             if (diff < 0) {
293                 return UCOL_LESS;
294             }
295     }
296 
297     return result;
298   } else {
299     return UCOL_EQUAL;
300   }
301 }
302 
303 CollationKey&
ensureCapacity(int32_t newSize)304 CollationKey::ensureCapacity(int32_t newSize)
305 {
306     if (fCapacity < newSize)
307     {
308         uprv_free(fBytes);
309 
310         fBytes = (uint8_t *)uprv_malloc(newSize);
311 
312         if (fBytes == NULL)
313         {
314             return setToBogus();
315         }
316 
317         uprv_memset(fBytes, 0, fCapacity);
318         fCapacity = newSize;
319     }
320 
321     fBogus = FALSE;
322     fCount = newSize;
323     fHashCode = kInvalidHashCode;
324 
325     return *this;
326 }
327 
328 #ifdef U_USE_COLLATION_KEY_DEPRECATES
329 // Create a copy of the byte array.
330 uint8_t*
toByteArray(int32_t & count) const331 CollationKey::toByteArray(int32_t& count) const
332 {
333     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
334 
335     if (result == NULL)
336     {
337         count = 0;
338     }
339     else
340     {
341         count = fCount;
342         uprv_memcpy(result, fBytes, fCount);
343     }
344 
345     return result;
346 }
347 #endif
348 
349 int32_t
hashCode() const350 CollationKey::hashCode() const
351 {
352     // (Cribbed from UnicodeString)
353     // We cache the hashCode; when it becomes invalid, due to any change to the
354     // string, we note this by setting it to kInvalidHashCode. [LIU]
355 
356     // Note: This method is semantically const, but physically non-const.
357 
358     if (fHashCode == kInvalidHashCode)
359     {
360         UHashTok key;
361         key.pointer = fBytes;
362         ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
363 #if 0
364         // We compute the hash by iterating sparsely over 64 (at most) characters
365         // spaced evenly through the string.  For each character, we multiply the
366         // previous hash value by a prime number and add the new character in,
367         // in the manner of a additive linear congruential random number generator,
368         // thus producing a pseudorandom deterministic value which should be well
369         // distributed over the output range. [LIU]
370         const uint8_t   *p = fBytes, *limit = fBytes + fCount;
371         int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
372         int32_t         hash = 0;
373 
374         while (p < limit)
375         {
376             hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
377             p += inc;
378         }
379 
380         // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
381         if (hash == kInvalidHashCode)
382         {
383             hash = kEmptyHashCode;
384         }
385 
386         ((CollationKey *)this)->fHashCode = hash; // cast away const
387 #endif
388     }
389 
390     return fHashCode;
391 }
392 
393 U_NAMESPACE_END
394 
395 U_CAPI int32_t U_EXPORT2
ucol_keyHashCode(const uint8_t * key,int32_t length)396 ucol_keyHashCode(const uint8_t *key,
397                        int32_t  length)
398 {
399     U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
400     return newKey.hashCode();
401 }
402 
403 #endif /* #if !UCONFIG_NO_COLLATION */
404