• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7 //===============================================================================
8 //
9 // File sortkey.cpp
10 //
11 //
12 //
13 // Created by: Helena Shih
14 //
15 // Modification History:
16 //
17 //  Date         Name          Description
18 //
19 //  6/20/97      helena        Java class name change.
20 //  6/23/97      helena        Added comments to make code more readable.
21 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23 //                             Cleaned up operator=
24 // 07/12/99      helena        HPUX 11 CC port.
25 // 03/06/01      synwee        Modified compareTo, to handle the result of
26 //                             2 string similar in contents, but one is longer
27 //                             than the other
28 //===============================================================================
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_COLLATION
33 
34 #include "unicode/sortkey.h"
35 #include "cmemory.h"
36 #include "uelement.h"
37 #include "ustr_imp.h"
38 
39 U_NAMESPACE_BEGIN
40 
41 // A hash code of kInvalidHashCode indicates that the hash code needs
42 // to be computed. A hash code of kEmptyHashCode is used for empty keys
43 // and for any key whose computed hash code is kInvalidHashCode.
44 static const int32_t kInvalidHashCode = 0;
45 static const int32_t kEmptyHashCode = 1;
46 // The "bogus hash code" replaces a separate fBogus flag.
47 static const int32_t kBogusHashCode = 2;
48 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
50 
51 CollationKey::CollationKey()
52     : UObject(), fFlagAndLength(0),
53       fHashCode(kEmptyHashCode)
54 {
55 }
56 
57 // Create a collation key from a bit array.
CollationKey(const uint8_t * newValues,int32_t count)58 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
59     : UObject(), fFlagAndLength(count),
60       fHashCode(kInvalidHashCode)
61 {
62     if (count < 0 || (newValues == NULL && count != 0) ||
63             (count > getCapacity() && reallocate(count, 0) == NULL)) {
64         setToBogus();
65         return;
66     }
67 
68     if (count > 0) {
69         uprv_memcpy(getBytes(), newValues, count);
70     }
71 }
72 
CollationKey(const CollationKey & other)73 CollationKey::CollationKey(const CollationKey& other)
74     : UObject(other), fFlagAndLength(other.getLength()),
75       fHashCode(other.fHashCode)
76 {
77     if (other.isBogus())
78     {
79         setToBogus();
80         return;
81     }
82 
83     int32_t length = fFlagAndLength;
84     if (length > getCapacity() && reallocate(length, 0) == NULL) {
85         setToBogus();
86         return;
87     }
88 
89     if (length > 0) {
90         uprv_memcpy(getBytes(), other.getBytes(), length);
91     }
92 }
93 
~CollationKey()94 CollationKey::~CollationKey()
95 {
96     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
97 }
98 
reallocate(int32_t newCapacity,int32_t length)99 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
100     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
101     if(newBytes == NULL) { return NULL; }
102     if(length > 0) {
103         uprv_memcpy(newBytes, getBytes(), length);
104     }
105     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
106     fUnion.fFields.fBytes = newBytes;
107     fUnion.fFields.fCapacity = newCapacity;
108     fFlagAndLength |= 0x80000000;
109     return newBytes;
110 }
111 
setLength(int32_t newLength)112 void CollationKey::setLength(int32_t newLength) {
113     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
114     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
115     fHashCode = kInvalidHashCode;
116 }
117 
118 // set the key to an empty state
119 CollationKey&
reset()120 CollationKey::reset()
121 {
122     fFlagAndLength &= 0x80000000;
123     fHashCode = kEmptyHashCode;
124 
125     return *this;
126 }
127 
128 // set the key to a "bogus" or invalid state
129 CollationKey&
setToBogus()130 CollationKey::setToBogus()
131 {
132     fFlagAndLength &= 0x80000000;
133     fHashCode = kBogusHashCode;
134 
135     return *this;
136 }
137 
138 UBool
operator ==(const CollationKey & source) const139 CollationKey::operator==(const CollationKey& source) const
140 {
141     return getLength() == source.getLength() &&
142             (this == &source ||
143              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
144 }
145 
146 const CollationKey&
operator =(const CollationKey & other)147 CollationKey::operator=(const CollationKey& other)
148 {
149     if (this != &other)
150     {
151         if (other.isBogus())
152         {
153             return setToBogus();
154         }
155 
156         int32_t length = other.getLength();
157         if (length > getCapacity() && reallocate(length, 0) == NULL) {
158             return setToBogus();
159         }
160         if (length > 0) {
161             uprv_memcpy(getBytes(), other.getBytes(), length);
162         }
163         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
164         fHashCode = other.fHashCode;
165     }
166 
167     return *this;
168 }
169 
170 // Bitwise comparison for the collation keys.
171 Collator::EComparisonResult
compareTo(const CollationKey & target) const172 CollationKey::compareTo(const CollationKey& target) const
173 {
174     UErrorCode errorCode = U_ZERO_ERROR;
175     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
176 }
177 
178 // Bitwise comparison for the collation keys.
179 UCollationResult
compareTo(const CollationKey & target,UErrorCode & status) const180 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
181 {
182   if(U_SUCCESS(status)) {
183     const uint8_t *src = getBytes();
184     const uint8_t *tgt = target.getBytes();
185 
186     // are we comparing the same string
187     if (src == tgt)
188         return  UCOL_EQUAL;
189 
190     UCollationResult result;
191 
192     // are we comparing different lengths?
193     int32_t minLength = getLength();
194     int32_t targetLength = target.getLength();
195     if (minLength < targetLength) {
196         result = UCOL_LESS;
197     } else if (minLength == targetLength) {
198         result = UCOL_EQUAL;
199     } else {
200         minLength = targetLength;
201         result = UCOL_GREATER;
202     }
203 
204     if (minLength > 0) {
205         int diff = uprv_memcmp(src, tgt, minLength);
206         if (diff > 0) {
207             return UCOL_GREATER;
208         }
209         else
210             if (diff < 0) {
211                 return UCOL_LESS;
212             }
213     }
214 
215     return result;
216   } else {
217     return UCOL_EQUAL;
218   }
219 }
220 
221 #ifdef U_USE_COLLATION_KEY_DEPRECATES
222 // Create a copy of the byte array.
223 uint8_t*
toByteArray(int32_t & count) const224 CollationKey::toByteArray(int32_t& count) const
225 {
226     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
227 
228     if (result == NULL)
229     {
230         count = 0;
231     }
232     else
233     {
234         count = fCount;
235         if (count > 0) {
236             uprv_memcpy(result, fBytes, fCount);
237         }
238     }
239 
240     return result;
241 }
242 #endif
243 
244 static int32_t
computeHashCode(const uint8_t * key,int32_t length)245 computeHashCode(const uint8_t *key, int32_t  length) {
246     const char *s = reinterpret_cast<const char *>(key);
247     int32_t hash;
248     if (s == NULL || length == 0) {
249         hash = kEmptyHashCode;
250     } else {
251         hash = ustr_hashCharsN(s, length);
252         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
253             hash = kEmptyHashCode;
254         }
255     }
256     return hash;
257 }
258 
259 int32_t
hashCode() const260 CollationKey::hashCode() const
261 {
262     // (Cribbed from UnicodeString)
263     // We cache the hashCode; when it becomes invalid, due to any change to the
264     // string, we note this by setting it to kInvalidHashCode. [LIU]
265 
266     // Note: This method is semantically const, but physically non-const.
267 
268     if (fHashCode == kInvalidHashCode)
269     {
270         fHashCode = computeHashCode(getBytes(), getLength());
271     }
272 
273     return fHashCode;
274 }
275 
276 U_NAMESPACE_END
277 
278 U_CAPI int32_t U_EXPORT2
ucol_keyHashCode(const uint8_t * key,int32_t length)279 ucol_keyHashCode(const uint8_t *key,
280                        int32_t  length)
281 {
282     return icu::computeHashCode(key, length);
283 }
284 
285 #endif /* #if !UCONFIG_NO_COLLATION */
286