• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2007, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
8 /**
9 * File tblcoll.h
10 *
11 * Created by: Helena Shih
12 *
13 * Modification History:
14 *
15 *  Date        Name        Description
16 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
17 *                          constructor which reads RuleBasedCollator object from
18 *                          a binary file.  Added writeToFile method which streams
19 *                          RuleBasedCollator out to a binary file.  The streamIn
20 *                          and streamOut methods use istream and ostream objects
21 *                          in binary mode.
22 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
23 *                          hold invariant data.
24 *  2/13/97     aliu        Moved several methods into this class from Collation.
25 *                          Added a private RuleBasedCollator(Locale&) constructor,
26 *                          to be used by Collator::createDefault().  General
27 *                          clean up.
28 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
29 *                          constructor and getDynamicClassID.
30 *  3/5/97      aliu        Modified constructFromFile() to add parameter
31 *                          specifying whether or not binary loading is to be
32 *                          attempted.  This is required for dynamic rule loading.
33 * 05/07/97     helena      Added memory allocation error detection.
34 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
35 *                          use MergeCollation::getPattern.
36 *  6/20/97     helena      Java class name change.
37 *  8/18/97     helena      Added internal API documentation.
38 * 09/03/97     helena      Added createCollationKeyValues().
39 * 02/10/98     damiba      Added compare with "length" parameter
40 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
41 * 04/23/99     stephen     Removed EDecompositionMode, merged with
42 *                          Normalizer::EMode
43 * 06/14/99     stephen     Removed kResourceBundleSuffix
44 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
45 *                          UnicodeString construction and special case for NO_OP.
46 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
47 *                          internal state management.
48 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
49 *                          to implementation file.
50 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
51 *                          (ucol.h)
52 */
53 
54 #ifndef TBLCOLL_H
55 #define TBLCOLL_H
56 
57 #include "unicode/utypes.h"
58 
59 /**
60  * \file
61  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
62  */
63 
64 #if !UCONFIG_NO_COLLATION
65 
66 #include "unicode/coll.h"
67 #include "unicode/ucol.h"
68 #include "unicode/sortkey.h"
69 #include "unicode/normlzr.h"
70 
71 U_NAMESPACE_BEGIN
72 
73 /**
74 * @stable ICU 2.0
75 */
76 class StringSearch;
77 /**
78 * @stable ICU 2.0
79 */
80 class CollationElementIterator;
81 
82 /**
83  * The RuleBasedCollator class provides the simple implementation of
84  * Collator, using data-driven tables. The user can create a customized
85  * table-based collation.
86  * <P>
87  * <em>Important: </em>The ICU collation service has been reimplemented
88  * in order to achieve better performance and UCA compliance.
89  * For details, see the
90  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
91  * collation design document</a>.
92  * <p>
93  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
94  * <p>
95  * For more information about the collation service see
96  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
97  * <p>
98  * Collation service provides correct sorting orders for most locales supported in ICU.
99  * If specific data for a locale is not available, the orders eventually falls back
100  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
101  * <p>
102  * Sort ordering may be customized by providing your own set of rules. For more on
103  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
104  * Collation customization</a> section of the users guide.
105  * <p>
106  * Note, RuleBasedCollator is not to be subclassed.
107  * @see        Collator
108  * @version    2.0 11/15/2001
109  */
110 class U_I18N_API RuleBasedCollator : public Collator
111 {
112 public:
113 
114   // constructor -------------------------------------------------------------
115 
116     /**
117      * RuleBasedCollator constructor. This takes the table rules and builds a
118      * collation table out of them. Please see RuleBasedCollator class
119      * description for more details on the collation rule syntax.
120      * @param rules the collation rules to build the collation table from.
121      * @param status reporting a success or an error.
122      * @see Locale
123      * @stable ICU 2.0
124      */
125     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
126 
127     /**
128      * RuleBasedCollator constructor. This takes the table rules and builds a
129      * collation table out of them. Please see RuleBasedCollator class
130      * description for more details on the collation rule syntax.
131      * @param rules the collation rules to build the collation table from.
132      * @param collationStrength default strength for comparison
133      * @param status reporting a success or an error.
134      * @see Locale
135      * @stable ICU 2.0
136      */
137     RuleBasedCollator(const UnicodeString& rules,
138                        ECollationStrength collationStrength,
139                        UErrorCode& status);
140 
141     /**
142      * RuleBasedCollator constructor. This takes the table rules and builds a
143      * collation table out of them. Please see RuleBasedCollator class
144      * description for more details on the collation rule syntax.
145      * @param rules the collation rules to build the collation table from.
146      * @param decompositionMode the normalisation mode
147      * @param status reporting a success or an error.
148      * @see Locale
149      * @stable ICU 2.0
150      */
151     RuleBasedCollator(const UnicodeString& rules,
152                     UColAttributeValue decompositionMode,
153                     UErrorCode& status);
154 
155     /**
156      * RuleBasedCollator constructor. This takes the table rules and builds a
157      * collation table out of them. Please see RuleBasedCollator class
158      * description for more details on the collation rule syntax.
159      * @param rules the collation rules to build the collation table from.
160      * @param collationStrength default strength for comparison
161      * @param decompositionMode the normalisation mode
162      * @param status reporting a success or an error.
163      * @see Locale
164      * @stable ICU 2.0
165      */
166     RuleBasedCollator(const UnicodeString& rules,
167                     ECollationStrength collationStrength,
168                     UColAttributeValue decompositionMode,
169                     UErrorCode& status);
170 
171     /**
172      * Copy constructor.
173      * @param other the RuleBasedCollator object to be copied
174      * @see Locale
175      * @stable ICU 2.0
176      */
177     RuleBasedCollator(const RuleBasedCollator& other);
178 
179 
180     /** Opens a collator from a collator binary image created using
181     *  cloneBinary. Binary image used in instantiation of the
182     *  collator remains owned by the user and should stay around for
183     *  the lifetime of the collator. The API also takes a base collator
184     *  which usualy should be UCA.
185     *  @param bin binary image owned by the user and required through the
186     *             lifetime of the collator
187     *  @param length size of the image. If negative, the API will try to
188     *                figure out the length of the image
189     *  @param base fallback collator, usually UCA. Base is required to be
190     *              present through the lifetime of the collator. Currently
191     *              it cannot be NULL.
192     *  @param status for catching errors
193     *  @return newly created collator
194     *  @see cloneBinary
195     *  @stable ICU 3.4
196     */
197     RuleBasedCollator(const uint8_t *bin, int32_t length,
198                     const RuleBasedCollator *base,
199                     UErrorCode &status);
200     // destructor --------------------------------------------------------------
201 
202     /**
203      * Destructor.
204      * @stable ICU 2.0
205      */
206     virtual ~RuleBasedCollator();
207 
208     // public methods ----------------------------------------------------------
209 
210     /**
211      * Assignment operator.
212      * @param other other RuleBasedCollator object to compare with.
213      * @stable ICU 2.0
214      */
215     RuleBasedCollator& operator=(const RuleBasedCollator& other);
216 
217     /**
218      * Returns true if argument is the same as this object.
219      * @param other Collator object to be compared.
220      * @return true if arguments is the same as this object.
221      * @stable ICU 2.0
222      */
223     virtual UBool operator==(const Collator& other) const;
224 
225     /**
226      * Returns true if argument is not the same as this object.
227      * @param other Collator object to be compared
228      * @return returns true if argument is not the same as this object.
229      * @stable ICU 2.0
230      */
231     virtual UBool operator!=(const Collator& other) const;
232 
233     /**
234      * Makes a deep copy of the object.
235      * The caller owns the returned object.
236      * @return the cloned object.
237      * @stable ICU 2.0
238      */
239     virtual Collator* clone(void) const;
240 
241     /**
242      * Creates a collation element iterator for the source string. The caller of
243      * this method is responsible for the memory management of the return
244      * pointer.
245      * @param source the string over which the CollationElementIterator will
246      *        iterate.
247      * @return the collation element iterator of the source string using this as
248      *         the based Collator.
249      * @stable ICU 2.2
250      */
251     virtual CollationElementIterator* createCollationElementIterator(
252                                            const UnicodeString& source) const;
253 
254     /**
255      * Creates a collation element iterator for the source. The caller of this
256      * method is responsible for the memory management of the returned pointer.
257      * @param source the CharacterIterator which produces the characters over
258      *        which the CollationElementItgerator will iterate.
259      * @return the collation element iterator of the source using this as the
260      *         based Collator.
261      * @stable ICU 2.2
262      */
263     virtual CollationElementIterator* createCollationElementIterator(
264                                          const CharacterIterator& source) const;
265 
266     /**
267      * Compares a range of character data stored in two different strings based
268      * on the collation rules. Returns information about whether a string is
269      * less than, greater than or equal to another string in a language.
270      * This can be overriden in a subclass.
271      * @param source the source string.
272      * @param target the target string to be compared with the source string.
273      * @return the comparison result. GREATER if the source string is greater
274      *         than the target string, LESS if the source is less than the
275      *         target. Otherwise, returns EQUAL.
276      * @deprecated ICU 2.6 Use overload with UErrorCode&
277      */
278     virtual EComparisonResult compare(const UnicodeString& source,
279                                       const UnicodeString& target) const;
280 
281 
282     /**
283     * The comparison function compares the character data stored in two
284     * different strings. Returns information about whether a string is less
285     * than, greater than or equal to another string.
286     * @param source the source string to be compared with.
287     * @param target the string that is to be compared with the source string.
288     * @param status possible error code
289     * @return Returns an enum value. UCOL_GREATER if source is greater
290     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
291     * than target
292     * @stable ICU 2.6
293     **/
294     virtual UCollationResult compare(const UnicodeString& source,
295                                       const UnicodeString& target,
296                                       UErrorCode &status) const;
297 
298     /**
299      * Compares a range of character data stored in two different strings based
300      * on the collation rules up to the specified length. Returns information
301      * about whether a string is less than, greater than or equal to another
302      * string in a language. This can be overriden in a subclass.
303      * @param source the source string.
304      * @param target the target string to be compared with the source string.
305      * @param length compares up to the specified length
306      * @return the comparison result. GREATER if the source string is greater
307      *         than the target string, LESS if the source is less than the
308      *         target. Otherwise, returns EQUAL.
309      * @deprecated ICU 2.6 Use overload with UErrorCode&
310      */
311     virtual EComparisonResult compare(const UnicodeString& source,
312                                       const UnicodeString&  target,
313                                       int32_t length) const;
314 
315     /**
316     * Does the same thing as compare but limits the comparison to a specified
317     * length
318     * @param source the source string to be compared with.
319     * @param target the string that is to be compared with the source string.
320     * @param length the length the comparison is limited to
321     * @param status possible error code
322     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
323     *         length) is greater than target; UCOL_EQUAL if source (up to specified
324     *         length) is equal to target; UCOL_LESS if source (up to the specified
325     *         length) is less  than target.
326     * @stable ICU 2.6
327     */
328     virtual UCollationResult compare(const UnicodeString& source,
329                                       const UnicodeString& target,
330                                       int32_t length,
331                                       UErrorCode &status) const;
332 
333     /**
334      * The comparison function compares the character data stored in two
335      * different string arrays. Returns information about whether a string array
336      * is less than, greater than or equal to another string array.
337      * <p>Example of use:
338      * <pre>
339      * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
340      * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
341      * .       UErrorCode status = U_ZERO_ERROR;
342      * .       Collator *myCollation =
343      * .                         Collator::createInstance(Locale::US, status);
344      * .       if (U_FAILURE(status)) return;
345      * .       myCollation->setStrength(Collator::PRIMARY);
346      * .       // result would be Collator::EQUAL ("abc" == "ABC")
347      * .       // (no primary difference between "abc" and "ABC")
348      * .       Collator::EComparisonResult result =
349      * .                             myCollation->compare(abc, 3, ABC, 3);
350      * .       myCollation->setStrength(Collator::TERTIARY);
351      * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
352      * .       // (with tertiary difference between "abc" and "ABC")
353      * .       result =  myCollation->compare(abc, 3, ABC, 3);
354      * </pre>
355      * @param source the source string array to be compared with.
356      * @param sourceLength the length of the source string array. If this value
357      *        is equal to -1, the string array is null-terminated.
358      * @param target the string that is to be compared with the source string.
359      * @param targetLength the length of the target string array. If this value
360      *        is equal to -1, the string array is null-terminated.
361      * @return Returns a byte value. GREATER if source is greater than target;
362      *         EQUAL if source is equal to target; LESS if source is less than
363      *         target
364      * @deprecated ICU 2.6 Use overload with UErrorCode&
365      */
366     virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
367                                       const UChar* target, int32_t targetLength)
368                                       const;
369 
370     /**
371     * The comparison function compares the character data stored in two
372     * different string arrays. Returns information about whether a string array
373     * is less than, greater than or equal to another string array.
374     * @param source the source string array to be compared with.
375     * @param sourceLength the length of the source string array.  If this value
376     *        is equal to -1, the string array is null-terminated.
377     * @param target the string that is to be compared with the source string.
378     * @param targetLength the length of the target string array.  If this value
379     *        is equal to -1, the string array is null-terminated.
380     * @param status possible error code
381     * @return Returns an enum value. UCOL_GREATER if source is greater
382     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
383     * than target
384     * @stable ICU 2.6
385     */
386     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
387                                       const UChar* target, int32_t targetLength,
388                                       UErrorCode &status) const;
389 
390     /**
391     * Transforms a specified region of the string into a series of characters
392     * that can be compared with CollationKey.compare. Use a CollationKey when
393     * you need to do repeated comparisions on the same string. For a single
394     * comparison the compare method will be faster.
395     * @param source the source string.
396     * @param key the transformed key of the source string.
397     * @param status the error code status.
398     * @return the transformed key.
399     * @see CollationKey
400     * @deprecated ICU 2.8 Use getSortKey(...) instead
401     */
402     virtual CollationKey& getCollationKey(const UnicodeString& source,
403                                           CollationKey& key,
404                                           UErrorCode& status) const;
405 
406     /**
407     * Transforms a specified region of the string into a series of characters
408     * that can be compared with CollationKey.compare. Use a CollationKey when
409     * you need to do repeated comparisions on the same string. For a single
410     * comparison the compare method will be faster.
411     * @param source the source string.
412     * @param sourceLength the length of the source string.
413     * @param key the transformed key of the source string.
414     * @param status the error code status.
415     * @return the transformed key.
416     * @see CollationKey
417     * @deprecated ICU 2.8 Use getSortKey(...) instead
418     */
419     virtual CollationKey& getCollationKey(const UChar *source,
420                                           int32_t sourceLength,
421                                           CollationKey& key,
422                                           UErrorCode& status) const;
423 
424     /**
425      * Generates the hash code for the rule-based collation object.
426      * @return the hash code.
427      * @stable ICU 2.0
428      */
429     virtual int32_t hashCode(void) const;
430 
431     /**
432     * Gets the locale of the Collator
433     * @param type can be either requested, valid or actual locale. For more
434     *             information see the definition of ULocDataLocaleType in
435     *             uloc.h
436     * @param status the error code status.
437     * @return locale where the collation data lives. If the collator
438     *         was instantiated from rules, locale is empty.
439     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
440     */
441     virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
442 
443     /**
444      * Gets the table-based rules for the collation object.
445      * @return returns the collation rules that the table collation object was
446      *         created from.
447      * @stable ICU 2.0
448      */
449     const UnicodeString& getRules(void) const;
450 
451     /**
452      * Gets the version information for a Collator.
453      * @param info the version # information, the result will be filled in
454      * @stable ICU 2.0
455      */
456     virtual void getVersion(UVersionInfo info) const;
457 
458     /**
459      * Return the maximum length of any expansion sequences that end with the
460      * specified comparison order.
461      * @param order a collation order returned by previous or next.
462      * @return maximum size of the expansion sequences ending with the collation
463      *         element or 1 if collation element does not occur at the end of
464      *         any expansion sequence
465      * @see CollationElementIterator#getMaxExpansion
466      * @stable ICU 2.0
467      */
468     int32_t getMaxExpansion(int32_t order) const;
469 
470     /**
471      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
472      * method is to implement a simple version of RTTI, since not all C++
473      * compilers support genuine RTTI. Polymorphic operator==() and clone()
474      * methods call this method.
475      * @return The class ID for this object. All objects of a given class have
476      *         the same class ID. Objects of other classes have different class
477      *         IDs.
478      * @stable ICU 2.0
479      */
480     virtual UClassID getDynamicClassID(void) const;
481 
482     /**
483      * Returns the class ID for this class. This is useful only for comparing to
484      * a return value from getDynamicClassID(). For example:
485      * <pre>
486      * Base* polymorphic_pointer = createPolymorphicObject();
487      * if (polymorphic_pointer->getDynamicClassID() ==
488      *                                          Derived::getStaticClassID()) ...
489      * </pre>
490      * @return The class ID for all objects of this class.
491      * @stable ICU 2.0
492      */
493     static UClassID U_EXPORT2 getStaticClassID(void);
494 
495     /**
496      * Returns the binary format of the class's rules. The format is that of
497      * .col files.
498      * @param length Returns the length of the data, in bytes
499      * @param status the error code status.
500      * @return memory, owned by the caller, of size 'length' bytes.
501      * @stable ICU 2.2
502      */
503     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
504 
505 
506     /** Creates a binary image of a collator. This binary image can be stored and
507     *  later used to instantiate a collator using ucol_openBinary.
508     *  This API supports preflighting.
509     *  @param buffer a fill-in buffer to receive the binary image
510     *  @param capacity capacity of the destination buffer
511     *  @param status for catching errors
512     *  @return size of the image
513     *  @see ucol_openBinary
514     *  @stable ICU 3.4
515     */
516     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
517 
518     /**
519      * Returns current rules. Delta defines whether full rules are returned or
520      * just the tailoring.
521      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
522      * @param buffer UnicodeString to store the result rules
523      * @stable ICU 2.2
524      */
525     void getRules(UColRuleOption delta, UnicodeString &buffer);
526 
527     /**
528      * Universal attribute setter
529      * @param attr attribute type
530      * @param value attribute value
531      * @param status to indicate whether the operation went on smoothly or there were errors
532      * @stable ICU 2.2
533      */
534     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
535                               UErrorCode &status);
536 
537     /**
538      * Universal attribute getter.
539      * @param attr attribute type
540      * @param status to indicate whether the operation went on smoothly or there were errors
541      * @return attribute value
542      * @stable ICU 2.2
543      */
544     virtual UColAttributeValue getAttribute(UColAttribute attr,
545                                             UErrorCode &status);
546 
547     /**
548      * Sets the variable top to a collation element value of a string supplied.
549      * @param varTop one or more (if contraction) UChars to which the variable top should be set
550      * @param len length of variable top string. If -1 it is considered to be zero terminated.
551      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
552      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
553      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
554      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
555      * @stable ICU 2.0
556      */
557     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
558 
559     /**
560      * Sets the variable top to a collation element value of a string supplied.
561      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
562      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
563      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
564      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
565      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
566      * @stable ICU 2.0
567      */
568     virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
569 
570     /**
571      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
572      * Lower 16 bits are ignored.
573      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
574      * @param status error code (not changed by function)
575      * @stable ICU 2.0
576      */
577     virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
578 
579     /**
580      * Gets the variable top value of a Collator.
581      * Lower 16 bits are undefined and should be ignored.
582      * @param status error code (not changed by function). If error code is set, the return value is undefined.
583      * @stable ICU 2.0
584      */
585     virtual uint32_t getVariableTop(UErrorCode &status) const;
586 
587     /**
588      * Get an UnicodeSet that contains all the characters and sequences tailored in
589      * this collator.
590      * @param status      error code of the operation
591      * @return a pointer to a UnicodeSet object containing all the
592      *         code points and sequences that may sort differently than
593      *         in the UCA. The object must be disposed of by using delete
594      * @stable ICU 2.4
595      */
596     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
597 
598     /**
599      * Thread safe cloning operation.
600      * @return pointer to the new clone, user should remove it.
601      * @stable ICU 2.2
602      */
603     virtual Collator* safeClone(void);
604 
605     /**
606      * Get the sort key as an array of bytes from an UnicodeString.
607      * @param source string to be processed.
608      * @param result buffer to store result in. If NULL, number of bytes needed
609      *        will be returned.
610      * @param resultLength length of the result buffer. If if not enough the
611      *        buffer will be filled to capacity.
612      * @return Number of bytes needed for storing the sort key
613      * @stable ICU 2.0
614      */
615     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
616                                int32_t resultLength) const;
617 
618     /**
619      * Get the sort key as an array of bytes from an UChar buffer.
620      * @param source string to be processed.
621      * @param sourceLength length of string to be processed. If -1, the string
622      *        is 0 terminated and length will be decided by the function.
623      * @param result buffer to store result in. If NULL, number of bytes needed
624      *        will be returned.
625      * @param resultLength length of the result buffer. If if not enough the
626      *        buffer will be filled to capacity.
627      * @return Number of bytes needed for storing the sort key
628      * @stable ICU 2.2
629      */
630     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
631                                uint8_t *result, int32_t resultLength) const;
632 
633     /**
634     * Determines the minimum strength that will be use in comparison or
635     * transformation.
636     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
637     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
638     * are ignored.
639     * @return the current comparison level.
640     * @see RuleBasedCollator#setStrength
641     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
642     */
643     virtual ECollationStrength getStrength(void) const;
644 
645     /**
646     * Sets the minimum strength to be used in comparison or transformation.
647     * @see RuleBasedCollator#getStrength
648     * @param newStrength the new comparison level.
649     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
650     */
651     virtual void setStrength(ECollationStrength newStrength);
652 
653 private:
654 
655     // private static constants -----------------------------------------------
656 
657     enum {
658         /* need look up in .commit() */
659         CHARINDEX = 0x70000000,
660         /* Expand index follows */
661         EXPANDCHARINDEX = 0x7E000000,
662         /* contract indexes follows */
663         CONTRACTCHARINDEX = 0x7F000000,
664         /* unmapped character values */
665         UNMAPPED = 0xFFFFFFFF,
666         /* primary strength increment */
667         PRIMARYORDERINCREMENT = 0x00010000,
668         /* secondary strength increment */
669         SECONDARYORDERINCREMENT = 0x00000100,
670         /* tertiary strength increment */
671         TERTIARYORDERINCREMENT = 0x00000001,
672         /* mask off anything but primary order */
673         PRIMARYORDERMASK = 0xffff0000,
674         /* mask off anything but secondary order */
675         SECONDARYORDERMASK = 0x0000ff00,
676         /* mask off anything but tertiary order */
677         TERTIARYORDERMASK = 0x000000ff,
678         /* mask off ignorable char order */
679         IGNORABLEMASK = 0x0000ffff,
680         /* use only the primary difference */
681         PRIMARYDIFFERENCEONLY = 0xffff0000,
682         /* use only the primary and secondary difference */
683         SECONDARYDIFFERENCEONLY = 0xffffff00,
684         /* primary order shift */
685         PRIMARYORDERSHIFT = 16,
686         /* secondary order shift */
687         SECONDARYORDERSHIFT = 8,
688         /* starting value for collation elements */
689         COLELEMENTSTART = 0x02020202,
690         /* testing mask for primary low element */
691         PRIMARYLOWZEROMASK = 0x00FF0000,
692         /* reseting value for secondaries and tertiaries */
693         RESETSECONDARYTERTIARY = 0x00000202,
694         /* reseting value for tertiaries */
695         RESETTERTIARY = 0x00000002,
696 
697         PRIMIGNORABLE = 0x0202
698     };
699 
700     // private data members ---------------------------------------------------
701 
702     UBool dataIsOwned;
703 
704     UBool isWriteThroughAlias;
705 
706     /**
707     * c struct for collation. All initialisation for it has to be done through
708     * setUCollator().
709     */
710     UCollator *ucollator;
711 
712     /**
713     * Rule UnicodeString
714     */
715     UnicodeString urulestring;
716 
717     // friend classes --------------------------------------------------------
718 
719     /**
720     * Used to iterate over collation elements in a character source.
721     */
722     friend class CollationElementIterator;
723 
724     /**
725     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
726     *                                                       UErrorCode&)
727     */
728     friend class Collator;
729 
730     /**
731     * Searching over collation elements in a character source
732     */
733     friend class StringSearch;
734 
735     // private constructors --------------------------------------------------
736 
737     /**
738      * Default constructor
739      */
740     RuleBasedCollator();
741 
742     /**
743      * RuleBasedCollator constructor. This constructor takes a locale. The
744      * only caller of this class should be Collator::createInstance(). If
745      * createInstance() happens to know that the requested locale's collation is
746      * implemented as a RuleBasedCollator, it can then call this constructor.
747      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
748      * COLLATION TABLE. It does this by falling back to defaults.
749      * @param desiredLocale locale used
750      * @param status error code status
751      */
752     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
753 
754     /**
755      * common constructor implementation
756      *
757      * @param rules the collation rules to build the collation table from.
758      * @param collationStrength default strength for comparison
759      * @param decompositionMode the normalisation mode
760      * @param status reporting a success or an error.
761      */
762     void
763     construct(const UnicodeString& rules,
764               UColAttributeValue collationStrength,
765               UColAttributeValue decompositionMode,
766               UErrorCode& status);
767 
768     // private methods -------------------------------------------------------
769 
770     /**
771     * Creates the c struct for ucollator
772     * @param locale desired locale
773     * @param status error status
774     */
775     void setUCollator(const Locale& locale, UErrorCode& status);
776 
777     /**
778     * Creates the c struct for ucollator
779     * @param locale desired locale name
780     * @param status error status
781     */
782     void setUCollator(const char* locale, UErrorCode& status);
783 
784     /**
785     * Creates the c struct for ucollator. This used internally by StringSearch.
786     * Hence the responsibility of cleaning up the ucollator is not done by
787     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
788     * @param collator new ucollator data
789     * @param rules corresponding collation rules
790     */
791     void setUCollator(UCollator *collator);
792 
793 public:
794     /**
795     * Get UCollator data struct. Used only by StringSearch & intltest.
796     * @return UCollator data struct
797     * @internal
798     */
799     const UCollator * getUCollator();
800 
801 protected:
802    /**
803     * Used internally by registraton to define the requested and valid locales.
804     * @param requestedLocale the requsted locale
805     * @param validLocale the valid locale
806     * @internal
807     */
808     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale);
809 
810 private:
811 
812     // if not owned and not a write through alias, copy the ucollator
813     void checkOwned(void);
814 
815     // utility to init rule string used by checkOwned and construct
816     void setRuleStringFromCollator();
817 
818     /**
819     * Converts C's UCollationResult to EComparisonResult
820     * @param result member of the enum UComparisonResult
821     * @return EComparisonResult equivalent of UCollationResult
822     * @deprecated ICU 2.6. We will not need it.
823     */
824     Collator::EComparisonResult getEComparisonResult(
825                                             const UCollationResult &result) const;
826 
827     /**
828     * Converts C's UCollationStrength to ECollationStrength
829     * @param strength member of the enum UCollationStrength
830     * @return ECollationStrength equivalent of UCollationStrength
831     */
832     Collator::ECollationStrength getECollationStrength(
833                                         const UCollationStrength &strength) const;
834 
835     /**
836     * Converts C++'s ECollationStrength to UCollationStrength
837     * @param strength member of the enum ECollationStrength
838     * @return UCollationStrength equivalent of ECollationStrength
839     */
840     UCollationStrength getUCollationStrength(
841       const Collator::ECollationStrength &strength) const;
842 };
843 
844 // inline method implementation ---------------------------------------------
845 
setUCollator(const Locale & locale,UErrorCode & status)846 inline void RuleBasedCollator::setUCollator(const Locale &locale,
847                                                UErrorCode &status)
848 {
849     setUCollator(locale.getName(), status);
850 }
851 
852 
setUCollator(UCollator * collator)853 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
854 {
855 
856     if (ucollator && dataIsOwned) {
857         ucol_close(ucollator);
858     }
859     ucollator   = collator;
860     dataIsOwned = FALSE;
861     isWriteThroughAlias = TRUE;
862     setRuleStringFromCollator();
863 }
864 
getUCollator()865 inline const UCollator * RuleBasedCollator::getUCollator()
866 {
867     return ucollator;
868 }
869 
getEComparisonResult(const UCollationResult & result)870 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
871                                            const UCollationResult &result) const
872 {
873     switch (result)
874     {
875     case UCOL_LESS :
876         return Collator::LESS;
877     case UCOL_EQUAL :
878         return Collator::EQUAL;
879     default :
880         return Collator::GREATER;
881     }
882 }
883 
getECollationStrength(const UCollationStrength & strength)884 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
885                                        const UCollationStrength &strength) const
886 {
887     switch (strength)
888     {
889     case UCOL_PRIMARY :
890         return Collator::PRIMARY;
891     case UCOL_SECONDARY :
892         return Collator::SECONDARY;
893     case UCOL_TERTIARY :
894         return Collator::TERTIARY;
895     case UCOL_QUATERNARY :
896         return Collator::QUATERNARY;
897     default :
898         return Collator::IDENTICAL;
899     }
900 }
901 
getUCollationStrength(const Collator::ECollationStrength & strength)902 inline UCollationStrength RuleBasedCollator::getUCollationStrength(
903                              const Collator::ECollationStrength &strength) const
904 {
905     switch (strength)
906     {
907     case Collator::PRIMARY :
908         return UCOL_PRIMARY;
909     case Collator::SECONDARY :
910         return UCOL_SECONDARY;
911     case Collator::TERTIARY :
912         return UCOL_TERTIARY;
913     case Collator::QUATERNARY :
914         return UCOL_QUATERNARY;
915     default :
916         return UCOL_IDENTICAL;
917     }
918 }
919 
920 U_NAMESPACE_END
921 
922 #endif /* #if !UCONFIG_NO_COLLATION */
923 
924 #endif
925