• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7 //===============================================================================
8 //
9 // File sortkey.cpp
10 //
11 //
12 //
13 // Created by: Helena Shih
14 //
15 // Modification History:
16 //
17 //  Date         Name          Description
18 //
19 //  6/20/97      helena        Java class name change.
20 //  6/23/97      helena        Added comments to make code more readable.
21 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23 //                             Cleaned up operator=
24 // 07/12/99      helena        HPUX 11 CC port.
25 // 03/06/01      synwee        Modified compareTo, to handle the result of
26 //                             2 string similar in contents, but one is longer
27 //                             than the other
28 //===============================================================================
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_COLLATION
33 
34 #include "unicode/sortkey.h"
35 #include "cmemory.h"
36 #include "uhash.h"
37 
38 U_NAMESPACE_BEGIN
39 
40 // A hash code of kInvalidHashCode indicates that the has code needs
41 // to be computed. A hash code of kEmptyHashCode is used for empty keys
42 // and for any key whose computed hash code is kInvalidHashCode.
43 #define kInvalidHashCode ((int32_t)0)
44 #define kEmptyHashCode ((int32_t)1)
45 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47 
48 CollationKey::CollationKey()
49     : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50       fHashCode(kEmptyHashCode), fBytes(NULL)
51 {
52 }
53 
54 // Create a collation key from a bit array.
CollationKey(const uint8_t * newValues,int32_t count)55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56     : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57       fHashCode(kInvalidHashCode)
58 {
59     fBytes = (uint8_t *)uprv_malloc(count);
60 
61     if (fBytes == NULL)
62     {
63         setToBogus();
64         return;
65     }
66 
67     uprv_memcpy(fBytes, newValues, fCount);
68 }
69 
CollationKey(const CollationKey & other)70 CollationKey::CollationKey(const CollationKey& other)
71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72   fHashCode(other.fHashCode), fBytes(NULL)
73 {
74     if (other.fBogus)
75     {
76         setToBogus();
77         return;
78     }
79 
80     fBytes = (uint8_t *)uprv_malloc(fCapacity);
81 
82     if (fBytes == NULL)
83     {
84         setToBogus();
85         return;
86     }
87 
88     uprv_memcpy(fBytes, other.fBytes, other.fCount);
89     if(fCapacity>fCount) {
90         uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91     }
92 }
93 
~CollationKey()94 CollationKey::~CollationKey()
95 {
96         uprv_free(fBytes);
97 }
98 
adopt(uint8_t * values,int32_t capacity,int32_t count)99 void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
100     if(fBytes != NULL) {
101         uprv_free(fBytes);
102     }
103     fBytes = values;
104     fCapacity = capacity;
105     setLength(count);
106 }
107 
setLength(int32_t newLength)108 void CollationKey::setLength(int32_t newLength) {
109     fBogus = FALSE;
110     fCount = newLength;
111     fHashCode = kInvalidHashCode;
112 }
113 
114 // set the key to an empty state
115 CollationKey&
reset()116 CollationKey::reset()
117 {
118     fCount = 0;
119     fBogus = FALSE;
120     fHashCode = kEmptyHashCode;
121 
122     return *this;
123 }
124 
125 // set the key to a "bogus" or invalid state
126 CollationKey&
setToBogus()127 CollationKey::setToBogus()
128 {
129     uprv_free(fBytes);
130     fBytes = NULL;
131 
132     fCapacity = 0;
133     fCount = 0;
134     fHashCode = kInvalidHashCode;
135 
136     return *this;
137 }
138 
139 UBool
operator ==(const CollationKey & source) const140 CollationKey::operator==(const CollationKey& source) const
141 {
142     return (this->fCount == source.fCount &&
143             (this->fBytes == source.fBytes ||
144              uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
145 }
146 
147 const CollationKey&
operator =(const CollationKey & other)148 CollationKey::operator=(const CollationKey& other)
149 {
150     if (this != &other)
151     {
152         if (other.isBogus())
153         {
154             return setToBogus();
155         }
156 
157         if (other.fBytes != NULL)
158         {
159             ensureCapacity(other.fCount);
160 
161             if (isBogus())
162             {
163                 return *this;
164             }
165 
166             fHashCode = other.fHashCode;
167             uprv_memcpy(fBytes, other.fBytes, fCount);
168         }
169         else
170         {
171             fCount = 0;
172             fBogus = FALSE;
173             fHashCode = kEmptyHashCode;
174         }
175     }
176 
177     return *this;
178 }
179 
180 // Bitwise comparison for the collation keys.
181 // NOTE: this is somewhat messy 'cause we can't count
182 // on memcmp returning the exact values which match
183 // Collator::EComparisonResult
184 Collator::EComparisonResult
compareTo(const CollationKey & target) const185 CollationKey::compareTo(const CollationKey& target) const
186 {
187     uint8_t *src = this->fBytes;
188     uint8_t *tgt = target.fBytes;
189 
190     // are we comparing the same string
191     if (src == tgt)
192         return  Collator::EQUAL;
193 
194         /*
195         int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
196         if (count == 0)
197         {
198         // If count is 0, at least one of the keys is empty.
199         // An empty key is always LESS than a non-empty one
200         // and EQUAL to another empty
201         if (this->fCount < target.fCount)
202         {
203         return Collator::LESS;
204         }
205 
206           if (this->fCount > target.fCount)
207           {
208           return Collator::GREATER;
209           }
210           return Collator::EQUAL;
211           }
212     */
213 
214     int                         minLength;
215     Collator::EComparisonResult result;
216 
217     // are we comparing different lengths?
218     if (this->fCount != target.fCount) {
219         if (this->fCount < target.fCount) {
220             minLength = this->fCount;
221             result    =  Collator::LESS;
222         }
223         else {
224             minLength = target.fCount;
225             result    =  Collator::GREATER;
226         }
227     }
228     else {
229         minLength = target.fCount;
230         result    =  Collator::EQUAL;
231     }
232 
233     if (minLength > 0) {
234         int diff = uprv_memcmp(src, tgt, minLength);
235         if (diff > 0) {
236             return Collator::GREATER;
237         }
238         else
239             if (diff < 0) {
240                 return Collator::LESS;
241             }
242     }
243 
244     return result;
245     /*
246     if (result < 0)
247     {
248     return Collator::LESS;
249     }
250 
251       if (result > 0)
252       {
253       return Collator::GREATER;
254       }
255       return Collator::EQUAL;
256     */
257 }
258 
259 // Bitwise comparison for the collation keys.
260 UCollationResult
compareTo(const CollationKey & target,UErrorCode & status) const261 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
262 {
263   if(U_SUCCESS(status)) {
264     uint8_t *src = this->fBytes;
265     uint8_t *tgt = target.fBytes;
266 
267     // are we comparing the same string
268     if (src == tgt)
269         return  UCOL_EQUAL;
270 
271     int                         minLength;
272     UCollationResult result;
273 
274     // are we comparing different lengths?
275     if (this->fCount != target.fCount) {
276         if (this->fCount < target.fCount) {
277             minLength = this->fCount;
278             result    =  UCOL_LESS;
279         }
280         else {
281             minLength = target.fCount;
282             result    =  UCOL_GREATER;
283         }
284     }
285     else {
286         minLength = target.fCount;
287         result    =  UCOL_EQUAL;
288     }
289 
290     if (minLength > 0) {
291         int diff = uprv_memcmp(src, tgt, minLength);
292         if (diff > 0) {
293             return UCOL_GREATER;
294         }
295         else
296             if (diff < 0) {
297                 return UCOL_LESS;
298             }
299     }
300 
301     return result;
302   } else {
303     return UCOL_EQUAL;
304   }
305 }
306 
307 CollationKey&
ensureCapacity(int32_t newSize)308 CollationKey::ensureCapacity(int32_t newSize)
309 {
310     if (fCapacity < newSize)
311     {
312         uprv_free(fBytes);
313 
314         fBytes = (uint8_t *)uprv_malloc(newSize);
315 
316         if (fBytes == NULL)
317         {
318             return setToBogus();
319         }
320 
321         uprv_memset(fBytes, 0, fCapacity);
322         fCapacity = newSize;
323     }
324 
325     fBogus = FALSE;
326     fCount = newSize;
327     fHashCode = kInvalidHashCode;
328 
329     return *this;
330 }
331 
332 #ifdef U_USE_COLLATION_KEY_DEPRECATES
333 // Create a copy of the byte array.
334 uint8_t*
toByteArray(int32_t & count) const335 CollationKey::toByteArray(int32_t& count) const
336 {
337     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
338 
339     if (result == NULL)
340     {
341         count = 0;
342     }
343     else
344     {
345         count = fCount;
346         uprv_memcpy(result, fBytes, fCount);
347     }
348 
349     return result;
350 }
351 #endif
352 
353 int32_t
hashCode() const354 CollationKey::hashCode() const
355 {
356     // (Cribbed from UnicodeString)
357     // We cache the hashCode; when it becomes invalid, due to any change to the
358     // string, we note this by setting it to kInvalidHashCode. [LIU]
359 
360     // Note: This method is semantically const, but physically non-const.
361 
362     if (fHashCode == kInvalidHashCode)
363     {
364         UHashTok key;
365         key.pointer = fBytes;
366         ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
367 #if 0
368         // We compute the hash by iterating sparsely over 64 (at most) characters
369         // spaced evenly through the string.  For each character, we multiply the
370         // previous hash value by a prime number and add the new character in,
371         // in the manner of a additive linear congruential random number generator,
372         // thus producing a pseudorandom deterministic value which should be well
373         // distributed over the output range. [LIU]
374         const uint8_t   *p = fBytes, *limit = fBytes + fCount;
375         int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
376         int32_t         hash = 0;
377 
378         while (p < limit)
379         {
380             hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
381             p += inc;
382         }
383 
384         // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385         if (hash == kInvalidHashCode)
386         {
387             hash = kEmptyHashCode;
388         }
389 
390         ((CollationKey *)this)->fHashCode = hash; // cast away const
391 #endif
392     }
393 
394     return fHashCode;
395 }
396 
397 U_NAMESPACE_END
398 
399 U_CAPI int32_t U_EXPORT2
ucol_keyHashCode(const uint8_t * key,int32_t length)400 ucol_keyHashCode(const uint8_t *key,
401                        int32_t  length)
402 {
403     U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
404     return newKey.hashCode();
405 }
406 
407 #endif /* #if !UCONFIG_NO_COLLATION */
408