• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
8 /**
9  * \file
10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
11  */
12 
13 /**
14 * File tblcoll.h
15 *
16 * Created by: Helena Shih
17 *
18 * Modification History:
19 *
20 *  Date        Name        Description
21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
22 *                          constructor which reads RuleBasedCollator object from
23 *                          a binary file.  Added writeToFile method which streams
24 *                          RuleBasedCollator out to a binary file.  The streamIn
25 *                          and streamOut methods use istream and ostream objects
26 *                          in binary mode.
27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
28 *                          hold invariant data.
29 *  2/13/97     aliu        Moved several methods into this class from Collation.
30 *                          Added a private RuleBasedCollator(Locale&) constructor,
31 *                          to be used by Collator::createDefault().  General
32 *                          clean up.
33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
34 *                          constructor and getDynamicClassID.
35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
36 *                          specifying whether or not binary loading is to be
37 *                          attempted.  This is required for dynamic rule loading.
38 * 05/07/97     helena      Added memory allocation error detection.
39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
40 *                          use MergeCollation::getPattern.
41 *  6/20/97     helena      Java class name change.
42 *  8/18/97     helena      Added internal API documentation.
43 * 09/03/97     helena      Added createCollationKeyValues().
44 * 02/10/98     damiba      Added compare with "length" parameter
45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
47 *                          Normalizer::EMode
48 * 06/14/99     stephen     Removed kResourceBundleSuffix
49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
50 *                          UnicodeString construction and special case for NO_OP.
51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
52 *                          internal state management.
53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
54 *                          to implementation file.
55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
56 *                          (ucol.h)
57 */
58 
59 #ifndef TBLCOLL_H
60 #define TBLCOLL_H
61 
62 #include "unicode/utypes.h"
63 
64 
65 #if !UCONFIG_NO_COLLATION
66 
67 #include "unicode/coll.h"
68 #include "unicode/ucol.h"
69 #include "unicode/sortkey.h"
70 #include "unicode/normlzr.h"
71 
72 U_NAMESPACE_BEGIN
73 
74 /**
75 * @stable ICU 2.0
76 */
77 class StringSearch;
78 /**
79 * @stable ICU 2.0
80 */
81 class CollationElementIterator;
82 
83 /**
84  * The RuleBasedCollator class provides the simple implementation of
85  * Collator, using data-driven tables. The user can create a customized
86  * table-based collation.
87  * <P>
88  * <em>Important: </em>The ICU collation service has been reimplemented
89  * in order to achieve better performance and UCA compliance.
90  * For details, see the
91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
92  * collation design document</a>.
93  * <p>
94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
95  * <p>
96  * For more information about the collation service see
97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
98  * <p>
99  * Collation service provides correct sorting orders for most locales supported in ICU.
100  * If specific data for a locale is not available, the orders eventually falls back
101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
102  * <p>
103  * Sort ordering may be customized by providing your own set of rules. For more on
104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
105  * Collation customization</a> section of the users guide.
106  * <p>
107  * Note, RuleBasedCollator is not to be subclassed.
108  * @see        Collator
109  * @version    2.0 11/15/2001
110  */
111 class U_I18N_API RuleBasedCollator : public Collator
112 {
113 public:
114 
115   // constructor -------------------------------------------------------------
116 
117     /**
118      * RuleBasedCollator constructor. This takes the table rules and builds a
119      * collation table out of them. Please see RuleBasedCollator class
120      * description for more details on the collation rule syntax.
121      * @param rules the collation rules to build the collation table from.
122      * @param status reporting a success or an error.
123      * @see Locale
124      * @stable ICU 2.0
125      */
126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
127 
128     /**
129      * RuleBasedCollator constructor. This takes the table rules and builds a
130      * collation table out of them. Please see RuleBasedCollator class
131      * description for more details on the collation rule syntax.
132      * @param rules the collation rules to build the collation table from.
133      * @param collationStrength default strength for comparison
134      * @param status reporting a success or an error.
135      * @see Locale
136      * @stable ICU 2.0
137      */
138     RuleBasedCollator(const UnicodeString& rules,
139                        ECollationStrength collationStrength,
140                        UErrorCode& status);
141 
142     /**
143      * RuleBasedCollator constructor. This takes the table rules and builds a
144      * collation table out of them. Please see RuleBasedCollator class
145      * description for more details on the collation rule syntax.
146      * @param rules the collation rules to build the collation table from.
147      * @param decompositionMode the normalisation mode
148      * @param status reporting a success or an error.
149      * @see Locale
150      * @stable ICU 2.0
151      */
152     RuleBasedCollator(const UnicodeString& rules,
153                     UColAttributeValue decompositionMode,
154                     UErrorCode& status);
155 
156     /**
157      * RuleBasedCollator constructor. This takes the table rules and builds a
158      * collation table out of them. Please see RuleBasedCollator class
159      * description for more details on the collation rule syntax.
160      * @param rules the collation rules to build the collation table from.
161      * @param collationStrength default strength for comparison
162      * @param decompositionMode the normalisation mode
163      * @param status reporting a success or an error.
164      * @see Locale
165      * @stable ICU 2.0
166      */
167     RuleBasedCollator(const UnicodeString& rules,
168                     ECollationStrength collationStrength,
169                     UColAttributeValue decompositionMode,
170                     UErrorCode& status);
171 
172     /**
173      * Copy constructor.
174      * @param other the RuleBasedCollator object to be copied
175      * @see Locale
176      * @stable ICU 2.0
177      */
178     RuleBasedCollator(const RuleBasedCollator& other);
179 
180 
181     /** Opens a collator from a collator binary image created using
182     *  cloneBinary. Binary image used in instantiation of the
183     *  collator remains owned by the user and should stay around for
184     *  the lifetime of the collator. The API also takes a base collator
185     *  which usualy should be UCA.
186     *  @param bin binary image owned by the user and required through the
187     *             lifetime of the collator
188     *  @param length size of the image. If negative, the API will try to
189     *                figure out the length of the image
190     *  @param base fallback collator, usually UCA. Base is required to be
191     *              present through the lifetime of the collator. Currently
192     *              it cannot be NULL.
193     *  @param status for catching errors
194     *  @return newly created collator
195     *  @see cloneBinary
196     *  @stable ICU 3.4
197     */
198     RuleBasedCollator(const uint8_t *bin, int32_t length,
199                     const RuleBasedCollator *base,
200                     UErrorCode &status);
201     // destructor --------------------------------------------------------------
202 
203     /**
204      * Destructor.
205      * @stable ICU 2.0
206      */
207     virtual ~RuleBasedCollator();
208 
209     // public methods ----------------------------------------------------------
210 
211     /**
212      * Assignment operator.
213      * @param other other RuleBasedCollator object to compare with.
214      * @stable ICU 2.0
215      */
216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
217 
218     /**
219      * Returns true if argument is the same as this object.
220      * @param other Collator object to be compared.
221      * @return true if arguments is the same as this object.
222      * @stable ICU 2.0
223      */
224     virtual UBool operator==(const Collator& other) const;
225 
226     /**
227      * Returns true if argument is not the same as this object.
228      * @param other Collator object to be compared
229      * @return returns true if argument is not the same as this object.
230      * @stable ICU 2.0
231      */
232     virtual UBool operator!=(const Collator& other) const;
233 
234     /**
235      * Makes a deep copy of the object.
236      * The caller owns the returned object.
237      * @return the cloned object.
238      * @stable ICU 2.0
239      */
240     virtual Collator* clone(void) const;
241 
242     /**
243      * Creates a collation element iterator for the source string. The caller of
244      * this method is responsible for the memory management of the return
245      * pointer.
246      * @param source the string over which the CollationElementIterator will
247      *        iterate.
248      * @return the collation element iterator of the source string using this as
249      *         the based Collator.
250      * @stable ICU 2.2
251      */
252     virtual CollationElementIterator* createCollationElementIterator(
253                                            const UnicodeString& source) const;
254 
255     /**
256      * Creates a collation element iterator for the source. The caller of this
257      * method is responsible for the memory management of the returned pointer.
258      * @param source the CharacterIterator which produces the characters over
259      *        which the CollationElementItgerator will iterate.
260      * @return the collation element iterator of the source using this as the
261      *         based Collator.
262      * @stable ICU 2.2
263      */
264     virtual CollationElementIterator* createCollationElementIterator(
265                                          const CharacterIterator& source) const;
266 
267     /**
268      * Compares a range of character data stored in two different strings based
269      * on the collation rules. Returns information about whether a string is
270      * less than, greater than or equal to another string in a language.
271      * This can be overriden in a subclass.
272      * @param source the source string.
273      * @param target the target string to be compared with the source string.
274      * @return the comparison result. GREATER if the source string is greater
275      *         than the target string, LESS if the source is less than the
276      *         target. Otherwise, returns EQUAL.
277      * @deprecated ICU 2.6 Use overload with UErrorCode&
278      */
279     virtual EComparisonResult compare(const UnicodeString& source,
280                                       const UnicodeString& target) const;
281 
282 
283     /**
284     * The comparison function compares the character data stored in two
285     * different strings. Returns information about whether a string is less
286     * than, greater than or equal to another string.
287     * @param source the source string to be compared with.
288     * @param target the string that is to be compared with the source string.
289     * @param status possible error code
290     * @return Returns an enum value. UCOL_GREATER if source is greater
291     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
292     * than target
293     * @stable ICU 2.6
294     **/
295     virtual UCollationResult compare(const UnicodeString& source,
296                                       const UnicodeString& target,
297                                       UErrorCode &status) const;
298 
299     /**
300      * Compares a range of character data stored in two different strings based
301      * on the collation rules up to the specified length. Returns information
302      * about whether a string is less than, greater than or equal to another
303      * string in a language. This can be overriden in a subclass.
304      * @param source the source string.
305      * @param target the target string to be compared with the source string.
306      * @param length compares up to the specified length
307      * @return the comparison result. GREATER if the source string is greater
308      *         than the target string, LESS if the source is less than the
309      *         target. Otherwise, returns EQUAL.
310      * @deprecated ICU 2.6 Use overload with UErrorCode&
311      */
312     virtual EComparisonResult compare(const UnicodeString& source,
313                                       const UnicodeString&  target,
314                                       int32_t length) const;
315 
316     /**
317     * Does the same thing as compare but limits the comparison to a specified
318     * length
319     * @param source the source string to be compared with.
320     * @param target the string that is to be compared with the source string.
321     * @param length the length the comparison is limited to
322     * @param status possible error code
323     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
324     *         length) is greater than target; UCOL_EQUAL if source (up to specified
325     *         length) is equal to target; UCOL_LESS if source (up to the specified
326     *         length) is less  than target.
327     * @stable ICU 2.6
328     */
329     virtual UCollationResult compare(const UnicodeString& source,
330                                       const UnicodeString& target,
331                                       int32_t length,
332                                       UErrorCode &status) const;
333 
334     /**
335      * The comparison function compares the character data stored in two
336      * different string arrays. Returns information about whether a string array
337      * is less than, greater than or equal to another string array.
338      * <p>Example of use:
339      * <pre>
340      * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
341      * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
342      * .       UErrorCode status = U_ZERO_ERROR;
343      * .       Collator *myCollation =
344      * .                         Collator::createInstance(Locale::US, status);
345      * .       if (U_FAILURE(status)) return;
346      * .       myCollation->setStrength(Collator::PRIMARY);
347      * .       // result would be Collator::EQUAL ("abc" == "ABC")
348      * .       // (no primary difference between "abc" and "ABC")
349      * .       Collator::EComparisonResult result =
350      * .                             myCollation->compare(abc, 3, ABC, 3);
351      * .       myCollation->setStrength(Collator::TERTIARY);
352      * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
353      * .       // (with tertiary difference between "abc" and "ABC")
354      * .       result =  myCollation->compare(abc, 3, ABC, 3);
355      * </pre>
356      * @param source the source string array to be compared with.
357      * @param sourceLength the length of the source string array. If this value
358      *        is equal to -1, the string array is null-terminated.
359      * @param target the string that is to be compared with the source string.
360      * @param targetLength the length of the target string array. If this value
361      *        is equal to -1, the string array is null-terminated.
362      * @return Returns a byte value. GREATER if source is greater than target;
363      *         EQUAL if source is equal to target; LESS if source is less than
364      *         target
365      * @deprecated ICU 2.6 Use overload with UErrorCode&
366      */
367     virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
368                                       const UChar* target, int32_t targetLength)
369                                       const;
370 
371     /**
372     * The comparison function compares the character data stored in two
373     * different string arrays. Returns information about whether a string array
374     * is less than, greater than or equal to another string array.
375     * @param source the source string array to be compared with.
376     * @param sourceLength the length of the source string array.  If this value
377     *        is equal to -1, the string array is null-terminated.
378     * @param target the string that is to be compared with the source string.
379     * @param targetLength the length of the target string array.  If this value
380     *        is equal to -1, the string array is null-terminated.
381     * @param status possible error code
382     * @return Returns an enum value. UCOL_GREATER if source is greater
383     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
384     * than target
385     * @stable ICU 2.6
386     */
387     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
388                                       const UChar* target, int32_t targetLength,
389                                       UErrorCode &status) const;
390 
391     /**
392      * Compares two strings using the Collator.
393      * Returns whether the first one compares less than/equal to/greater than
394      * the second one.
395      * This version takes UCharIterator input.
396      * @param sIter the first ("source") string iterator
397      * @param tIter the second ("target") string iterator
398      * @param status ICU status
399      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
400      * @stable ICU 4.2
401      */
402     virtual UCollationResult compare(UCharIterator &sIter,
403                                      UCharIterator &tIter,
404                                      UErrorCode &status) const;
405 
406     /**
407     * Transforms a specified region of the string into a series of characters
408     * that can be compared with CollationKey.compare. Use a CollationKey when
409     * you need to do repeated comparisions on the same string. For a single
410     * comparison the compare method will be faster.
411     * @param source the source string.
412     * @param key the transformed key of the source string.
413     * @param status the error code status.
414     * @return the transformed key.
415     * @see CollationKey
416     * @deprecated ICU 2.8 Use getSortKey(...) instead
417     */
418     virtual CollationKey& getCollationKey(const UnicodeString& source,
419                                           CollationKey& key,
420                                           UErrorCode& status) const;
421 
422     /**
423     * Transforms a specified region of the string into a series of characters
424     * that can be compared with CollationKey.compare. Use a CollationKey when
425     * you need to do repeated comparisions on the same string. For a single
426     * comparison the compare method will be faster.
427     * @param source the source string.
428     * @param sourceLength the length of the source string.
429     * @param key the transformed key of the source string.
430     * @param status the error code status.
431     * @return the transformed key.
432     * @see CollationKey
433     * @deprecated ICU 2.8 Use getSortKey(...) instead
434     */
435     virtual CollationKey& getCollationKey(const UChar *source,
436                                           int32_t sourceLength,
437                                           CollationKey& key,
438                                           UErrorCode& status) const;
439 
440     /**
441      * Generates the hash code for the rule-based collation object.
442      * @return the hash code.
443      * @stable ICU 2.0
444      */
445     virtual int32_t hashCode(void) const;
446 
447     /**
448     * Gets the locale of the Collator
449     * @param type can be either requested, valid or actual locale. For more
450     *             information see the definition of ULocDataLocaleType in
451     *             uloc.h
452     * @param status the error code status.
453     * @return locale where the collation data lives. If the collator
454     *         was instantiated from rules, locale is empty.
455     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
456     */
457     virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
458 
459     /**
460      * Gets the table-based rules for the collation object.
461      * @return returns the collation rules that the table collation object was
462      *         created from.
463      * @stable ICU 2.0
464      */
465     const UnicodeString& getRules(void) const;
466 
467     /**
468      * Gets the version information for a Collator.
469      * @param info the version # information, the result will be filled in
470      * @stable ICU 2.0
471      */
472     virtual void getVersion(UVersionInfo info) const;
473 
474     /**
475      * Return the maximum length of any expansion sequences that end with the
476      * specified comparison order.
477      * @param order a collation order returned by previous or next.
478      * @return maximum size of the expansion sequences ending with the collation
479      *         element or 1 if collation element does not occur at the end of
480      *         any expansion sequence
481      * @see CollationElementIterator#getMaxExpansion
482      * @stable ICU 2.0
483      */
484     int32_t getMaxExpansion(int32_t order) const;
485 
486     /**
487      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
488      * method is to implement a simple version of RTTI, since not all C++
489      * compilers support genuine RTTI. Polymorphic operator==() and clone()
490      * methods call this method.
491      * @return The class ID for this object. All objects of a given class have
492      *         the same class ID. Objects of other classes have different class
493      *         IDs.
494      * @stable ICU 2.0
495      */
496     virtual UClassID getDynamicClassID(void) const;
497 
498     /**
499      * Returns the class ID for this class. This is useful only for comparing to
500      * a return value from getDynamicClassID(). For example:
501      * <pre>
502      * Base* polymorphic_pointer = createPolymorphicObject();
503      * if (polymorphic_pointer->getDynamicClassID() ==
504      *                                          Derived::getStaticClassID()) ...
505      * </pre>
506      * @return The class ID for all objects of this class.
507      * @stable ICU 2.0
508      */
509     static UClassID U_EXPORT2 getStaticClassID(void);
510 
511     /**
512      * Returns the binary format of the class's rules. The format is that of
513      * .col files.
514      * @param length Returns the length of the data, in bytes
515      * @param status the error code status.
516      * @return memory, owned by the caller, of size 'length' bytes.
517      * @stable ICU 2.2
518      */
519     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
520 
521 
522     /** Creates a binary image of a collator. This binary image can be stored and
523     *  later used to instantiate a collator using ucol_openBinary.
524     *  This API supports preflighting.
525     *  @param buffer a fill-in buffer to receive the binary image
526     *  @param capacity capacity of the destination buffer
527     *  @param status for catching errors
528     *  @return size of the image
529     *  @see ucol_openBinary
530     *  @stable ICU 3.4
531     */
532     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
533 
534     /**
535      * Returns current rules. Delta defines whether full rules are returned or
536      * just the tailoring.
537      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
538      * @param buffer UnicodeString to store the result rules
539      * @stable ICU 2.2
540      */
541     void getRules(UColRuleOption delta, UnicodeString &buffer);
542 
543     /**
544      * Universal attribute setter
545      * @param attr attribute type
546      * @param value attribute value
547      * @param status to indicate whether the operation went on smoothly or there were errors
548      * @stable ICU 2.2
549      */
550     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
551                               UErrorCode &status);
552 
553     /**
554      * Universal attribute getter.
555      * @param attr attribute type
556      * @param status to indicate whether the operation went on smoothly or there were errors
557      * @return attribute value
558      * @stable ICU 2.2
559      */
560     virtual UColAttributeValue getAttribute(UColAttribute attr,
561                                             UErrorCode &status);
562 
563     /**
564      * Sets the variable top to a collation element value of a string supplied.
565      * @param varTop one or more (if contraction) UChars to which the variable top should be set
566      * @param len length of variable top string. If -1 it is considered to be zero terminated.
567      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
568      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
569      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
570      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
571      * @stable ICU 2.0
572      */
573     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
574 
575     /**
576      * Sets the variable top to a collation element value of a string supplied.
577      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
578      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
579      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
580      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
581      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
582      * @stable ICU 2.0
583      */
584     virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
585 
586     /**
587      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
588      * Lower 16 bits are ignored.
589      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
590      * @param status error code (not changed by function)
591      * @stable ICU 2.0
592      */
593     virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
594 
595     /**
596      * Gets the variable top value of a Collator.
597      * Lower 16 bits are undefined and should be ignored.
598      * @param status error code (not changed by function). If error code is set, the return value is undefined.
599      * @stable ICU 2.0
600      */
601     virtual uint32_t getVariableTop(UErrorCode &status) const;
602 
603     /**
604      * Get an UnicodeSet that contains all the characters and sequences tailored in
605      * this collator.
606      * @param status      error code of the operation
607      * @return a pointer to a UnicodeSet object containing all the
608      *         code points and sequences that may sort differently than
609      *         in the UCA. The object must be disposed of by using delete
610      * @stable ICU 2.4
611      */
612     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
613 
614     /**
615      * Thread safe cloning operation.
616      * @return pointer to the new clone, user should remove it.
617      * @stable ICU 2.2
618      */
619     virtual Collator* safeClone(void);
620 
621     /**
622      * Get the sort key as an array of bytes from an UnicodeString.
623      * @param source string to be processed.
624      * @param result buffer to store result in. If NULL, number of bytes needed
625      *        will be returned.
626      * @param resultLength length of the result buffer. If if not enough the
627      *        buffer will be filled to capacity.
628      * @return Number of bytes needed for storing the sort key
629      * @stable ICU 2.0
630      */
631     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
632                                int32_t resultLength) const;
633 
634     /**
635      * Get the sort key as an array of bytes from an UChar buffer.
636      * @param source string to be processed.
637      * @param sourceLength length of string to be processed. If -1, the string
638      *        is 0 terminated and length will be decided by the function.
639      * @param result buffer to store result in. If NULL, number of bytes needed
640      *        will be returned.
641      * @param resultLength length of the result buffer. If if not enough the
642      *        buffer will be filled to capacity.
643      * @return Number of bytes needed for storing the sort key
644      * @stable ICU 2.2
645      */
646     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
647                                uint8_t *result, int32_t resultLength) const;
648 
649     /**
650     * Determines the minimum strength that will be use in comparison or
651     * transformation.
652     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
653     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
654     * are ignored.
655     * @return the current comparison level.
656     * @see RuleBasedCollator#setStrength
657     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
658     */
659     virtual ECollationStrength getStrength(void) const;
660 
661     /**
662     * Sets the minimum strength to be used in comparison or transformation.
663     * @see RuleBasedCollator#getStrength
664     * @param newStrength the new comparison level.
665     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
666     */
667     virtual void setStrength(ECollationStrength newStrength);
668 
669     /**
670      * Retrieves the reordering codes for this collator.
671      * @param dest The array to fill with the script ordering.
672      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
673      *  will only return the length of the result without writing any of the result string (pre-flighting).
674      * @param status A reference to an error code value, which must not indicate
675      * a failure before the function call.
676      * @return The length of the script ordering array.
677      * @see ucol_setReorderCodes
678      * @see Collator#getEquivalentReorderCodes
679      * @see Collator#setReorderCodes
680      * @draft ICU 4.8
681      */
682      virtual int32_t U_EXPORT2 getReorderCodes(int32_t *dest,
683                                     int32_t destCapacity,
684                                     UErrorCode& status) const;
685 
686     /**
687      * Sets the ordering of scripts for this collator.
688      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
689      * length is also set to 0. An empty array will clear any reordering codes on the collator.
690      * @param reorderCodesLength The length of reorderCodes.
691      * @param status error code
692      * @see Collator#getReorderCodes
693      * @see Collator#getEquivalentReorderCodes
694      * @draft ICU 4.8
695      */
696      virtual void U_EXPORT2 setReorderCodes(const int32_t* reorderCodes,
697                                 int32_t reorderCodesLength,
698                                 UErrorCode& status) ;
699 
700     /**
701      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
702      * codes will be grouped and must reorder together.
703      * @param reorderCode The reorder code to determine equivalence for.
704      * @param dest The array to fill with the script equivalene reordering codes.
705      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
706      * function will only return the length of the result without writing any of the result
707      * string (pre-flighting).
708      * @param status A reference to an error code value, which must not indicate
709      * a failure before the function call.
710      * @return The length of the of the reordering code equivalence array.
711      * @see ucol_setReorderCodes
712      * @see Collator#getReorderCodes
713      * @see Collator#setReorderCodes
714      * @draft ICU 4.8
715      */
716     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
717                                 int32_t* dest,
718                                 int32_t destCapacity,
719                                 UErrorCode& status);
720 
721 
722 private:
723 
724     // private static constants -----------------------------------------------
725 
726     enum {
727         /* need look up in .commit() */
728         CHARINDEX = 0x70000000,
729         /* Expand index follows */
730         EXPANDCHARINDEX = 0x7E000000,
731         /* contract indexes follows */
732         CONTRACTCHARINDEX = 0x7F000000,
733         /* unmapped character values */
734         UNMAPPED = 0xFFFFFFFF,
735         /* primary strength increment */
736         PRIMARYORDERINCREMENT = 0x00010000,
737         /* secondary strength increment */
738         SECONDARYORDERINCREMENT = 0x00000100,
739         /* tertiary strength increment */
740         TERTIARYORDERINCREMENT = 0x00000001,
741         /* mask off anything but primary order */
742         PRIMARYORDERMASK = 0xffff0000,
743         /* mask off anything but secondary order */
744         SECONDARYORDERMASK = 0x0000ff00,
745         /* mask off anything but tertiary order */
746         TERTIARYORDERMASK = 0x000000ff,
747         /* mask off ignorable char order */
748         IGNORABLEMASK = 0x0000ffff,
749         /* use only the primary difference */
750         PRIMARYDIFFERENCEONLY = 0xffff0000,
751         /* use only the primary and secondary difference */
752         SECONDARYDIFFERENCEONLY = 0xffffff00,
753         /* primary order shift */
754         PRIMARYORDERSHIFT = 16,
755         /* secondary order shift */
756         SECONDARYORDERSHIFT = 8,
757         /* starting value for collation elements */
758         COLELEMENTSTART = 0x02020202,
759         /* testing mask for primary low element */
760         PRIMARYLOWZEROMASK = 0x00FF0000,
761         /* reseting value for secondaries and tertiaries */
762         RESETSECONDARYTERTIARY = 0x00000202,
763         /* reseting value for tertiaries */
764         RESETTERTIARY = 0x00000002,
765 
766         PRIMIGNORABLE = 0x0202
767     };
768 
769     // private data members ---------------------------------------------------
770 
771     UBool dataIsOwned;
772 
773     UBool isWriteThroughAlias;
774 
775     /**
776     * c struct for collation. All initialisation for it has to be done through
777     * setUCollator().
778     */
779     UCollator *ucollator;
780 
781     /**
782     * Rule UnicodeString
783     */
784     UnicodeString urulestring;
785 
786     // friend classes --------------------------------------------------------
787 
788     /**
789     * Used to iterate over collation elements in a character source.
790     */
791     friend class CollationElementIterator;
792 
793     /**
794     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
795     *                                                       UErrorCode&)
796     */
797     friend class Collator;
798 
799     /**
800     * Searching over collation elements in a character source
801     */
802     friend class StringSearch;
803 
804     // private constructors --------------------------------------------------
805 
806     /**
807      * Default constructor
808      */
809     RuleBasedCollator();
810 
811     /**
812      * RuleBasedCollator constructor. This constructor takes a locale. The
813      * only caller of this class should be Collator::createInstance(). If
814      * createInstance() happens to know that the requested locale's collation is
815      * implemented as a RuleBasedCollator, it can then call this constructor.
816      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
817      * COLLATION TABLE. It does this by falling back to defaults.
818      * @param desiredLocale locale used
819      * @param status error code status
820      */
821     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
822 
823     /**
824      * common constructor implementation
825      *
826      * @param rules the collation rules to build the collation table from.
827      * @param collationStrength default strength for comparison
828      * @param decompositionMode the normalisation mode
829      * @param status reporting a success or an error.
830      */
831     void
832     construct(const UnicodeString& rules,
833               UColAttributeValue collationStrength,
834               UColAttributeValue decompositionMode,
835               UErrorCode& status);
836 
837     // private methods -------------------------------------------------------
838 
839     /**
840     * Creates the c struct for ucollator
841     * @param locale desired locale
842     * @param status error status
843     */
844     void setUCollator(const Locale& locale, UErrorCode& status);
845 
846     /**
847     * Creates the c struct for ucollator
848     * @param locale desired locale name
849     * @param status error status
850     */
851     void setUCollator(const char* locale, UErrorCode& status);
852 
853     /**
854     * Creates the c struct for ucollator. This used internally by StringSearch.
855     * Hence the responsibility of cleaning up the ucollator is not done by
856     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
857     * @param collator new ucollator data
858     */
859     void setUCollator(UCollator *collator);
860 
861 public:
862     /**
863     * Get UCollator data struct. Used only by StringSearch & intltest.
864     * @return UCollator data struct
865     * @internal
866     */
867     const UCollator * getUCollator();
868 
869 protected:
870    /**
871     * Used internally by registraton to define the requested and valid locales.
872     * @param requestedLocale the requsted locale
873     * @param validLocale the valid locale
874     * @param actualLocale the actual locale
875     * @internal
876     */
877     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
878 
879 private:
880 
881     // if not owned and not a write through alias, copy the ucollator
882     void checkOwned(void);
883 
884     // utility to init rule string used by checkOwned and construct
885     void setRuleStringFromCollator();
886 
887     /**
888     * Converts C's UCollationResult to EComparisonResult
889     * @param result member of the enum UComparisonResult
890     * @return EComparisonResult equivalent of UCollationResult
891     * @deprecated ICU 2.6. We will not need it.
892     */
893     Collator::EComparisonResult getEComparisonResult(
894                                             const UCollationResult &result) const;
895 
896     /**
897     * Converts C's UCollationStrength to ECollationStrength
898     * @param strength member of the enum UCollationStrength
899     * @return ECollationStrength equivalent of UCollationStrength
900     */
901     Collator::ECollationStrength getECollationStrength(
902                                         const UCollationStrength &strength) const;
903 
904     /**
905     * Converts C++'s ECollationStrength to UCollationStrength
906     * @param strength member of the enum ECollationStrength
907     * @return UCollationStrength equivalent of ECollationStrength
908     */
909     UCollationStrength getUCollationStrength(
910       const Collator::ECollationStrength &strength) const;
911 };
912 
913 // inline method implementation ---------------------------------------------
914 
setUCollator(const Locale & locale,UErrorCode & status)915 inline void RuleBasedCollator::setUCollator(const Locale &locale,
916                                                UErrorCode &status)
917 {
918     setUCollator(locale.getName(), status);
919 }
920 
921 
setUCollator(UCollator * collator)922 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
923 {
924 
925     if (ucollator && dataIsOwned) {
926         ucol_close(ucollator);
927     }
928     ucollator   = collator;
929     dataIsOwned = FALSE;
930     isWriteThroughAlias = TRUE;
931     setRuleStringFromCollator();
932 }
933 
getUCollator()934 inline const UCollator * RuleBasedCollator::getUCollator()
935 {
936     return ucollator;
937 }
938 
getEComparisonResult(const UCollationResult & result)939 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
940                                            const UCollationResult &result) const
941 {
942     switch (result)
943     {
944     case UCOL_LESS :
945         return Collator::LESS;
946     case UCOL_EQUAL :
947         return Collator::EQUAL;
948     default :
949         return Collator::GREATER;
950     }
951 }
952 
getECollationStrength(const UCollationStrength & strength)953 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
954                                        const UCollationStrength &strength) const
955 {
956     switch (strength)
957     {
958     case UCOL_PRIMARY :
959         return Collator::PRIMARY;
960     case UCOL_SECONDARY :
961         return Collator::SECONDARY;
962     case UCOL_TERTIARY :
963         return Collator::TERTIARY;
964     case UCOL_QUATERNARY :
965         return Collator::QUATERNARY;
966     default :
967         return Collator::IDENTICAL;
968     }
969 }
970 
getUCollationStrength(const Collator::ECollationStrength & strength)971 inline UCollationStrength RuleBasedCollator::getUCollationStrength(
972                              const Collator::ECollationStrength &strength) const
973 {
974     switch (strength)
975     {
976     case Collator::PRIMARY :
977         return UCOL_PRIMARY;
978     case Collator::SECONDARY :
979         return UCOL_SECONDARY;
980     case Collator::TERTIARY :
981         return UCOL_TERTIARY;
982     case Collator::QUATERNARY :
983         return UCOL_QUATERNARY;
984     default :
985         return UCOL_IDENTICAL;
986     }
987 }
988 
989 U_NAMESPACE_END
990 
991 #endif /* #if !UCONFIG_NO_COLLATION */
992 
993 #endif
994