1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *****************************************************************************
5 * Copyright (C) 1996-2014, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 *****************************************************************************
8 *
9 * File sortkey.h
10 *
11 * Created by: Helena Shih
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 *
17 * 6/20/97 helena Java class name change.
18 * 8/18/97 helena Added internal API documentation.
19 * 6/26/98 erm Changed to use byte arrays and memcmp.
20 *****************************************************************************
21 */
22
23 #ifndef SORTKEY_H
24 #define SORTKEY_H
25
26 #include "unicode/utypes.h"
27
28 #if U_SHOW_CPLUSPLUS_API
29
30 /**
31 * \file
32 * \brief C++ API: Keys for comparing strings multiple times.
33 */
34
35 #if !UCONFIG_NO_COLLATION
36
37 #include "unicode/uobject.h"
38 #include "unicode/unistr.h"
39 #include "unicode/coll.h"
40
41 U_NAMESPACE_BEGIN
42
43 /* forward declaration */
44 class RuleBasedCollator;
45 class CollationKeyByteSink;
46
47 /**
48 *
49 * Collation keys are generated by the Collator class. Use the CollationKey objects
50 * instead of Collator to compare strings multiple times. A CollationKey
51 * preprocesses the comparison information from the Collator object to
52 * make the comparison faster. If you are not going to comparing strings
53 * multiple times, then using the Collator object is generally faster,
54 * since it only processes as much of the string as needed to make a
55 * comparison.
56 * <p> For example (with strength == tertiary)
57 * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
58 * only needs to process a couple of characters, while a comparison
59 * with CollationKeys will process all of the characters. On the other hand,
60 * if you are doing a sort of a number of fields, it is much faster to use
61 * CollationKeys, since you will be comparing strings multiple times.
62 * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
63 * in a hidden field, and use it for sorting or indexing.
64 *
65 * <p>Example of use:
66 * <pre>
67 * \code
68 * UErrorCode success = U_ZERO_ERROR;
69 * Collator* myCollator = Collator::createInstance(success);
70 * CollationKey* keys = new CollationKey [3];
71 * myCollator->getCollationKey("Tom", keys[0], success );
72 * myCollator->getCollationKey("Dick", keys[1], success );
73 * myCollator->getCollationKey("Harry", keys[2], success );
74 *
75 * // Inside body of sort routine, compare keys this way:
76 * CollationKey tmp;
77 * if(keys[0].compareTo( keys[1] ) > 0 ) {
78 * tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
79 * }
80 * //...
81 * \endcode
82 * </pre>
83 * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
84 * long lists of words by retrieving collation keys with Collator::getCollationKey().
85 * You can then cache the collation keys and compare them using CollationKey::compareTo().
86 * <p>
87 * <strong>Note:</strong> <code>Collator</code>s with different Locale,
88 * CollationStrength and DecompositionMode settings will return different
89 * CollationKeys for the same set of strings. Locales have specific
90 * collation rules, and the way in which secondary and tertiary differences
91 * are taken into account, for example, will result in different CollationKeys
92 * for same strings.
93 * <p>
94
95 * @see Collator
96 * @see RuleBasedCollator
97 * @version 1.3 12/18/96
98 * @author Helena Shih
99 * @stable ICU 2.0
100 */
101 class U_I18N_API CollationKey : public UObject {
102 public:
103 /**
104 * This creates an empty collation key based on the null string. An empty
105 * collation key contains no sorting information. When comparing two empty
106 * collation keys, the result is Collator::EQUAL. Comparing empty collation key
107 * with non-empty collation key is always Collator::LESS.
108 * @stable ICU 2.0
109 */
110 CollationKey();
111
112
113 /**
114 * Creates a collation key based on the collation key values.
115 * @param values the collation key values
116 * @param count number of collation key values, including trailing nulls.
117 * @stable ICU 2.0
118 */
119 CollationKey(const uint8_t* values,
120 int32_t count);
121
122 /**
123 * Copy constructor.
124 * @param other the object to be copied.
125 * @stable ICU 2.0
126 */
127 CollationKey(const CollationKey& other);
128
129 /**
130 * Sort key destructor.
131 * @stable ICU 2.0
132 */
133 virtual ~CollationKey();
134
135 /**
136 * Assignment operator
137 * @param other the object to be copied.
138 * @stable ICU 2.0
139 */
140 const CollationKey& operator=(const CollationKey& other);
141
142 /**
143 * Compare if two collation keys are the same.
144 * @param source the collation key to compare to.
145 * @return Returns true if two collation keys are equal, false otherwise.
146 * @stable ICU 2.0
147 */
148 UBool operator==(const CollationKey& source) const;
149
150 /**
151 * Compare if two collation keys are not the same.
152 * @param source the collation key to compare to.
153 * @return Returns TRUE if two collation keys are different, FALSE otherwise.
154 * @stable ICU 2.0
155 */
156 UBool operator!=(const CollationKey& source) const;
157
158
159 /**
160 * Test to see if the key is in an invalid state. The key will be in an
161 * invalid state if it couldn't allocate memory for some operation.
162 * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
163 * @stable ICU 2.0
164 */
165 UBool isBogus(void) const;
166
167 /**
168 * Returns a pointer to the collation key values. The storage is owned
169 * by the collation key and the pointer will become invalid if the key
170 * is deleted.
171 * @param count the output parameter of number of collation key values,
172 * including any trailing nulls.
173 * @return a pointer to the collation key values.
174 * @stable ICU 2.0
175 */
176 const uint8_t* getByteArray(int32_t& count) const;
177
178 #ifdef U_USE_COLLATION_KEY_DEPRECATES
179 /**
180 * Extracts the collation key values into a new array. The caller owns
181 * this storage and should free it.
182 * @param count the output parameter of number of collation key values,
183 * including any trailing nulls.
184 * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
185 */
186 uint8_t* toByteArray(int32_t& count) const;
187 #endif
188
189 #ifndef U_HIDE_DEPRECATED_API
190 /**
191 * Convenience method which does a string(bit-wise) comparison of the
192 * two collation keys.
193 * @param target target collation key to be compared with
194 * @return Returns Collator::LESS if sourceKey < targetKey,
195 * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
196 * otherwise.
197 * @deprecated ICU 2.6 use the overload with error code
198 */
199 Collator::EComparisonResult compareTo(const CollationKey& target) const;
200 #endif /* U_HIDE_DEPRECATED_API */
201
202 /**
203 * Convenience method which does a string(bit-wise) comparison of the
204 * two collation keys.
205 * @param target target collation key to be compared with
206 * @param status error code
207 * @return Returns UCOL_LESS if sourceKey < targetKey,
208 * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
209 * otherwise.
210 * @stable ICU 2.6
211 */
212 UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
213
214 /**
215 * Creates an integer that is unique to the collation key. NOTE: this
216 * is not the same as String.hashCode.
217 * <p>Example of use:
218 * <pre>
219 * . UErrorCode status = U_ZERO_ERROR;
220 * . Collator *myCollation = Collator::createInstance(Locale::US, status);
221 * . if (U_FAILURE(status)) return;
222 * . CollationKey key1, key2;
223 * . UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
224 * . myCollation->getCollationKey("abc", key1, status1);
225 * . if (U_FAILURE(status1)) { delete myCollation; return; }
226 * . myCollation->getCollationKey("ABC", key2, status2);
227 * . if (U_FAILURE(status2)) { delete myCollation; return; }
228 * . // key1.hashCode() != key2.hashCode()
229 * </pre>
230 * @return the hash value based on the string's collation order.
231 * @see UnicodeString#hashCode
232 * @stable ICU 2.0
233 */
234 int32_t hashCode(void) const;
235
236 /**
237 * ICU "poor man's RTTI", returns a UClassID for the actual class.
238 * @stable ICU 2.2
239 */
240 virtual UClassID getDynamicClassID() const;
241
242 /**
243 * ICU "poor man's RTTI", returns a UClassID for this class.
244 * @stable ICU 2.2
245 */
246 static UClassID U_EXPORT2 getStaticClassID();
247
248 private:
249 /**
250 * Replaces the current bytes buffer with a new one of newCapacity
251 * and copies length bytes from the old buffer to the new one.
252 * @return the new buffer, or NULL if the allocation failed
253 */
254 uint8_t *reallocate(int32_t newCapacity, int32_t length);
255 /**
256 * Set a new length for a new sort key in the existing fBytes.
257 */
258 void setLength(int32_t newLength);
259
getBytes()260 uint8_t *getBytes() {
261 return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
262 }
getBytes()263 const uint8_t *getBytes() const {
264 return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
265 }
getCapacity()266 int32_t getCapacity() const {
267 return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity;
268 }
getLength()269 int32_t getLength() const { return fFlagAndLength & 0x7fffffff; }
270
271 /**
272 * Set the CollationKey to a "bogus" or invalid state
273 * @return this CollationKey
274 */
275 CollationKey& setToBogus(void);
276 /**
277 * Resets this CollationKey to an empty state
278 * @return this CollationKey
279 */
280 CollationKey& reset(void);
281
282 /**
283 * Allow private access to RuleBasedCollator
284 */
285 friend class RuleBasedCollator;
286 friend class CollationKeyByteSink;
287
288 // Class fields. sizeof(CollationKey) is intended to be 48 bytes
289 // on a machine with 64-bit pointers.
290 // We use a union to maximize the size of the internal buffer,
291 // similar to UnicodeString but not as tight and complex.
292
293 // (implicit) *vtable;
294 /**
295 * Sort key length and flag.
296 * Bit 31 is set if the buffer is heap-allocated.
297 * Bits 30..0 contain the sort key length.
298 */
299 int32_t fFlagAndLength;
300 /**
301 * Unique hash value of this CollationKey.
302 * Special value 2 if the key is bogus.
303 */
304 mutable int32_t fHashCode;
305 /**
306 * fUnion provides 32 bytes for the internal buffer or for
307 * pointer+capacity.
308 */
309 union StackBufferOrFields {
310 /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */
311 uint8_t fStackBuffer[32];
312 struct {
313 uint8_t *fBytes;
314 int32_t fCapacity;
315 } fFields;
316 } fUnion;
317 };
318
319 inline UBool
320 CollationKey::operator!=(const CollationKey& other) const
321 {
322 return !(*this == other);
323 }
324
325 inline UBool
isBogus()326 CollationKey::isBogus() const
327 {
328 return fHashCode == 2; // kBogusHashCode
329 }
330
331 inline const uint8_t*
getByteArray(int32_t & count)332 CollationKey::getByteArray(int32_t &count) const
333 {
334 count = getLength();
335 return getBytes();
336 }
337
338 U_NAMESPACE_END
339
340 #endif /* #if !UCONFIG_NO_COLLATION */
341
342 #endif /* U_SHOW_CPLUSPLUS_API */
343
344 #endif
345