• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
8 /**
9  * \file
10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
11  */
12 
13 /**
14 * File tblcoll.h
15 *
16 * Created by: Helena Shih
17 *
18 * Modification History:
19 *
20 *  Date        Name        Description
21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
22 *                          constructor which reads RuleBasedCollator object from
23 *                          a binary file.  Added writeToFile method which streams
24 *                          RuleBasedCollator out to a binary file.  The streamIn
25 *                          and streamOut methods use istream and ostream objects
26 *                          in binary mode.
27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
28 *                          hold invariant data.
29 *  2/13/97     aliu        Moved several methods into this class from Collation.
30 *                          Added a private RuleBasedCollator(Locale&) constructor,
31 *                          to be used by Collator::createDefault().  General
32 *                          clean up.
33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
34 *                          constructor and getDynamicClassID.
35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
36 *                          specifying whether or not binary loading is to be
37 *                          attempted.  This is required for dynamic rule loading.
38 * 05/07/97     helena      Added memory allocation error detection.
39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
40 *                          use MergeCollation::getPattern.
41 *  6/20/97     helena      Java class name change.
42 *  8/18/97     helena      Added internal API documentation.
43 * 09/03/97     helena      Added createCollationKeyValues().
44 * 02/10/98     damiba      Added compare with "length" parameter
45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
47 *                          Normalizer::EMode
48 * 06/14/99     stephen     Removed kResourceBundleSuffix
49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
50 *                          UnicodeString construction and special case for NO_OP.
51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
52 *                          internal state management.
53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
54 *                          to implementation file.
55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
56 *                          (ucol.h)
57 */
58 
59 #ifndef TBLCOLL_H
60 #define TBLCOLL_H
61 
62 #include "unicode/utypes.h"
63 
64 
65 #if !UCONFIG_NO_COLLATION
66 
67 #include "unicode/coll.h"
68 #include "unicode/ucol.h"
69 #include "unicode/sortkey.h"
70 #include "unicode/normlzr.h"
71 
72 U_NAMESPACE_BEGIN
73 
74 /**
75 * @stable ICU 2.0
76 */
77 class StringSearch;
78 /**
79 * @stable ICU 2.0
80 */
81 class CollationElementIterator;
82 
83 /**
84  * The RuleBasedCollator class provides the simple implementation of
85  * Collator, using data-driven tables. The user can create a customized
86  * table-based collation.
87  * <P>
88  * <em>Important: </em>The ICU collation service has been reimplemented
89  * in order to achieve better performance and UCA compliance.
90  * For details, see the
91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
92  * collation design document</a>.
93  * <p>
94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
95  * <p>
96  * For more information about the collation service see
97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
98  * <p>
99  * Collation service provides correct sorting orders for most locales supported in ICU.
100  * If specific data for a locale is not available, the orders eventually falls back
101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
102  * <p>
103  * Sort ordering may be customized by providing your own set of rules. For more on
104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
105  * Collation customization</a> section of the users guide.
106  * <p>
107  * Note, RuleBasedCollator is not to be subclassed.
108  * @see        Collator
109  * @version    2.0 11/15/2001
110  */
111 class U_I18N_API RuleBasedCollator : public Collator
112 {
113 public:
114 
115   // constructor -------------------------------------------------------------
116 
117     /**
118      * RuleBasedCollator constructor. This takes the table rules and builds a
119      * collation table out of them. Please see RuleBasedCollator class
120      * description for more details on the collation rule syntax.
121      * @param rules the collation rules to build the collation table from.
122      * @param status reporting a success or an error.
123      * @see Locale
124      * @stable ICU 2.0
125      */
126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
127 
128     /**
129      * RuleBasedCollator constructor. This takes the table rules and builds a
130      * collation table out of them. Please see RuleBasedCollator class
131      * description for more details on the collation rule syntax.
132      * @param rules the collation rules to build the collation table from.
133      * @param collationStrength default strength for comparison
134      * @param status reporting a success or an error.
135      * @see Locale
136      * @stable ICU 2.0
137      */
138     RuleBasedCollator(const UnicodeString& rules,
139                        ECollationStrength collationStrength,
140                        UErrorCode& status);
141 
142     /**
143      * RuleBasedCollator constructor. This takes the table rules and builds a
144      * collation table out of them. Please see RuleBasedCollator class
145      * description for more details on the collation rule syntax.
146      * @param rules the collation rules to build the collation table from.
147      * @param decompositionMode the normalisation mode
148      * @param status reporting a success or an error.
149      * @see Locale
150      * @stable ICU 2.0
151      */
152     RuleBasedCollator(const UnicodeString& rules,
153                     UColAttributeValue decompositionMode,
154                     UErrorCode& status);
155 
156     /**
157      * RuleBasedCollator constructor. This takes the table rules and builds a
158      * collation table out of them. Please see RuleBasedCollator class
159      * description for more details on the collation rule syntax.
160      * @param rules the collation rules to build the collation table from.
161      * @param collationStrength default strength for comparison
162      * @param decompositionMode the normalisation mode
163      * @param status reporting a success or an error.
164      * @see Locale
165      * @stable ICU 2.0
166      */
167     RuleBasedCollator(const UnicodeString& rules,
168                     ECollationStrength collationStrength,
169                     UColAttributeValue decompositionMode,
170                     UErrorCode& status);
171 
172     /**
173      * Copy constructor.
174      * @param other the RuleBasedCollator object to be copied
175      * @see Locale
176      * @stable ICU 2.0
177      */
178     RuleBasedCollator(const RuleBasedCollator& other);
179 
180 
181     /** Opens a collator from a collator binary image created using
182     *  cloneBinary. Binary image used in instantiation of the
183     *  collator remains owned by the user and should stay around for
184     *  the lifetime of the collator. The API also takes a base collator
185     *  which usualy should be UCA.
186     *  @param bin binary image owned by the user and required through the
187     *             lifetime of the collator
188     *  @param length size of the image. If negative, the API will try to
189     *                figure out the length of the image
190     *  @param base fallback collator, usually UCA. Base is required to be
191     *              present through the lifetime of the collator. Currently
192     *              it cannot be NULL.
193     *  @param status for catching errors
194     *  @return newly created collator
195     *  @see cloneBinary
196     *  @stable ICU 3.4
197     */
198     RuleBasedCollator(const uint8_t *bin, int32_t length,
199                     const RuleBasedCollator *base,
200                     UErrorCode &status);
201     // destructor --------------------------------------------------------------
202 
203     /**
204      * Destructor.
205      * @stable ICU 2.0
206      */
207     virtual ~RuleBasedCollator();
208 
209     // public methods ----------------------------------------------------------
210 
211     /**
212      * Assignment operator.
213      * @param other other RuleBasedCollator object to compare with.
214      * @stable ICU 2.0
215      */
216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
217 
218     /**
219      * Returns true if argument is the same as this object.
220      * @param other Collator object to be compared.
221      * @return true if arguments is the same as this object.
222      * @stable ICU 2.0
223      */
224     virtual UBool operator==(const Collator& other) const;
225 
226     /**
227      * Makes a copy of this object.
228      * @return a copy of this object, owned by the caller
229      * @stable ICU 2.0
230      */
231     virtual Collator* clone(void) const;
232 
233     /**
234      * Creates a collation element iterator for the source string. The caller of
235      * this method is responsible for the memory management of the return
236      * pointer.
237      * @param source the string over which the CollationElementIterator will
238      *        iterate.
239      * @return the collation element iterator of the source string using this as
240      *         the based Collator.
241      * @stable ICU 2.2
242      */
243     virtual CollationElementIterator* createCollationElementIterator(
244                                            const UnicodeString& source) const;
245 
246     /**
247      * Creates a collation element iterator for the source. The caller of this
248      * method is responsible for the memory management of the returned pointer.
249      * @param source the CharacterIterator which produces the characters over
250      *        which the CollationElementItgerator will iterate.
251      * @return the collation element iterator of the source using this as the
252      *         based Collator.
253      * @stable ICU 2.2
254      */
255     virtual CollationElementIterator* createCollationElementIterator(
256                                          const CharacterIterator& source) const;
257 
258     // Make deprecated versions of Collator::compare() visible.
259     using Collator::compare;
260 
261     /**
262     * The comparison function compares the character data stored in two
263     * different strings. Returns information about whether a string is less
264     * than, greater than or equal to another string.
265     * @param source the source string to be compared with.
266     * @param target the string that is to be compared with the source string.
267     * @param status possible error code
268     * @return Returns an enum value. UCOL_GREATER if source is greater
269     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
270     * than target
271     * @stable ICU 2.6
272     **/
273     virtual UCollationResult compare(const UnicodeString& source,
274                                       const UnicodeString& target,
275                                       UErrorCode &status) const;
276 
277     /**
278     * Does the same thing as compare but limits the comparison to a specified
279     * length
280     * @param source the source string to be compared with.
281     * @param target the string that is to be compared with the source string.
282     * @param length the length the comparison is limited to
283     * @param status possible error code
284     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
285     *         length) is greater than target; UCOL_EQUAL if source (up to specified
286     *         length) is equal to target; UCOL_LESS if source (up to the specified
287     *         length) is less  than target.
288     * @stable ICU 2.6
289     */
290     virtual UCollationResult compare(const UnicodeString& source,
291                                       const UnicodeString& target,
292                                       int32_t length,
293                                       UErrorCode &status) const;
294 
295     /**
296     * The comparison function compares the character data stored in two
297     * different string arrays. Returns information about whether a string array
298     * is less than, greater than or equal to another string array.
299     * @param source the source string array to be compared with.
300     * @param sourceLength the length of the source string array.  If this value
301     *        is equal to -1, the string array is null-terminated.
302     * @param target the string that is to be compared with the source string.
303     * @param targetLength the length of the target string array.  If this value
304     *        is equal to -1, the string array is null-terminated.
305     * @param status possible error code
306     * @return Returns an enum value. UCOL_GREATER if source is greater
307     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
308     * than target
309     * @stable ICU 2.6
310     */
311     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
312                                       const UChar* target, int32_t targetLength,
313                                       UErrorCode &status) const;
314 
315     /**
316      * Compares two strings using the Collator.
317      * Returns whether the first one compares less than/equal to/greater than
318      * the second one.
319      * This version takes UCharIterator input.
320      * @param sIter the first ("source") string iterator
321      * @param tIter the second ("target") string iterator
322      * @param status ICU status
323      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
324      * @stable ICU 4.2
325      */
326     virtual UCollationResult compare(UCharIterator &sIter,
327                                      UCharIterator &tIter,
328                                      UErrorCode &status) const;
329 
330     /**
331     * Transforms a specified region of the string into a series of characters
332     * that can be compared with CollationKey.compare. Use a CollationKey when
333     * you need to do repeated comparisions on the same string. For a single
334     * comparison the compare method will be faster.
335     * @param source the source string.
336     * @param key the transformed key of the source string.
337     * @param status the error code status.
338     * @return the transformed key.
339     * @see CollationKey
340     * @stable ICU 2.0
341     */
342     virtual CollationKey& getCollationKey(const UnicodeString& source,
343                                           CollationKey& key,
344                                           UErrorCode& status) const;
345 
346     /**
347     * Transforms a specified region of the string into a series of characters
348     * that can be compared with CollationKey.compare. Use a CollationKey when
349     * you need to do repeated comparisions on the same string. For a single
350     * comparison the compare method will be faster.
351     * @param source the source string.
352     * @param sourceLength the length of the source string.
353     * @param key the transformed key of the source string.
354     * @param status the error code status.
355     * @return the transformed key.
356     * @see CollationKey
357     * @stable ICU 2.0
358     */
359     virtual CollationKey& getCollationKey(const UChar *source,
360                                           int32_t sourceLength,
361                                           CollationKey& key,
362                                           UErrorCode& status) const;
363 
364     /**
365      * Generates the hash code for the rule-based collation object.
366      * @return the hash code.
367      * @stable ICU 2.0
368      */
369     virtual int32_t hashCode(void) const;
370 
371     /**
372     * Gets the locale of the Collator
373     * @param type can be either requested, valid or actual locale. For more
374     *             information see the definition of ULocDataLocaleType in
375     *             uloc.h
376     * @param status the error code status.
377     * @return locale where the collation data lives. If the collator
378     *         was instantiated from rules, locale is empty.
379     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
380     */
381     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
382 
383     /**
384      * Gets the tailoring rules for this collator.
385      * @return the collation tailoring from which this collator was created
386      * @stable ICU 2.0
387      */
388     const UnicodeString& getRules(void) const;
389 
390     /**
391      * Gets the version information for a Collator.
392      * @param info the version # information, the result will be filled in
393      * @stable ICU 2.0
394      */
395     virtual void getVersion(UVersionInfo info) const;
396 
397     /**
398      * Return the maximum length of any expansion sequences that end with the
399      * specified comparison order.
400      * @param order a collation order returned by previous or next.
401      * @return maximum size of the expansion sequences ending with the collation
402      *         element or 1 if collation element does not occur at the end of
403      *         any expansion sequence
404      * @see CollationElementIterator#getMaxExpansion
405      * @stable ICU 2.0
406      */
407     int32_t getMaxExpansion(int32_t order) const;
408 
409     /**
410      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
411      * method is to implement a simple version of RTTI, since not all C++
412      * compilers support genuine RTTI. Polymorphic operator==() and clone()
413      * methods call this method.
414      * @return The class ID for this object. All objects of a given class have
415      *         the same class ID. Objects of other classes have different class
416      *         IDs.
417      * @stable ICU 2.0
418      */
419     virtual UClassID getDynamicClassID(void) const;
420 
421     /**
422      * Returns the class ID for this class. This is useful only for comparing to
423      * a return value from getDynamicClassID(). For example:
424      * <pre>
425      * Base* polymorphic_pointer = createPolymorphicObject();
426      * if (polymorphic_pointer->getDynamicClassID() ==
427      *                                          Derived::getStaticClassID()) ...
428      * </pre>
429      * @return The class ID for all objects of this class.
430      * @stable ICU 2.0
431      */
432     static UClassID U_EXPORT2 getStaticClassID(void);
433 
434     /**
435      * Returns the binary format of the class's rules. The format is that of
436      * .col files.
437      * @param length Returns the length of the data, in bytes
438      * @param status the error code status.
439      * @return memory, owned by the caller, of size 'length' bytes.
440      * @stable ICU 2.2
441      */
442     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
443 
444 
445     /** Creates a binary image of a collator. This binary image can be stored and
446     *  later used to instantiate a collator using ucol_openBinary.
447     *  This API supports preflighting.
448     *  @param buffer a fill-in buffer to receive the binary image
449     *  @param capacity capacity of the destination buffer
450     *  @param status for catching errors
451     *  @return size of the image
452     *  @see ucol_openBinary
453     *  @stable ICU 3.4
454     */
455     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
456 
457     /**
458      * Returns current rules. Delta defines whether full rules are returned or
459      * just the tailoring.
460      *
461      * getRules(void) should normally be used instead.
462      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
463      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
464      * @param buffer UnicodeString to store the result rules
465      * @stable ICU 2.2
466      * @see UCOL_FULL_RULES
467      */
468     void getRules(UColRuleOption delta, UnicodeString &buffer);
469 
470     /**
471      * Universal attribute setter
472      * @param attr attribute type
473      * @param value attribute value
474      * @param status to indicate whether the operation went on smoothly or there were errors
475      * @stable ICU 2.2
476      */
477     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
478                               UErrorCode &status);
479 
480     /**
481      * Universal attribute getter.
482      * @param attr attribute type
483      * @param status to indicate whether the operation went on smoothly or there were errors
484      * @return attribute value
485      * @stable ICU 2.2
486      */
487     virtual UColAttributeValue getAttribute(UColAttribute attr,
488                                             UErrorCode &status) const;
489 
490     /**
491      * Sets the variable top to a collation element value of a string supplied.
492      * @param varTop one or more (if contraction) UChars to which the variable top should be set
493      * @param len length of variable top string. If -1 it is considered to be zero terminated.
494      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
495      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
496      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
497      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
498      * @stable ICU 2.0
499      */
500     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
501 
502     /**
503      * Sets the variable top to a collation element value of a string supplied.
504      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
505      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
506      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
507      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
508      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
509      * @stable ICU 2.0
510      */
511     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
512 
513     /**
514      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
515      * Lower 16 bits are ignored.
516      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
517      * @param status error code (not changed by function)
518      * @stable ICU 2.0
519      */
520     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
521 
522     /**
523      * Gets the variable top value of a Collator.
524      * Lower 16 bits are undefined and should be ignored.
525      * @param status error code (not changed by function). If error code is set, the return value is undefined.
526      * @stable ICU 2.0
527      */
528     virtual uint32_t getVariableTop(UErrorCode &status) const;
529 
530     /**
531      * Get an UnicodeSet that contains all the characters and sequences tailored in
532      * this collator.
533      * @param status      error code of the operation
534      * @return a pointer to a UnicodeSet object containing all the
535      *         code points and sequences that may sort differently than
536      *         in the UCA. The object must be disposed of by using delete
537      * @stable ICU 2.4
538      */
539     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
540 
541     /**
542      * Get the sort key as an array of bytes from an UnicodeString.
543      * @param source string to be processed.
544      * @param result buffer to store result in. If NULL, number of bytes needed
545      *        will be returned.
546      * @param resultLength length of the result buffer. If if not enough the
547      *        buffer will be filled to capacity.
548      * @return Number of bytes needed for storing the sort key
549      * @stable ICU 2.0
550      */
551     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
552                                int32_t resultLength) const;
553 
554     /**
555      * Get the sort key as an array of bytes from an UChar buffer.
556      * @param source string to be processed.
557      * @param sourceLength length of string to be processed. If -1, the string
558      *        is 0 terminated and length will be decided by the function.
559      * @param result buffer to store result in. If NULL, number of bytes needed
560      *        will be returned.
561      * @param resultLength length of the result buffer. If if not enough the
562      *        buffer will be filled to capacity.
563      * @return Number of bytes needed for storing the sort key
564      * @stable ICU 2.2
565      */
566     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
567                                uint8_t *result, int32_t resultLength) const;
568 
569     /**
570      * Retrieves the reordering codes for this collator.
571      * @param dest The array to fill with the script ordering.
572      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
573      *  will only return the length of the result without writing any of the result string (pre-flighting).
574      * @param status A reference to an error code value, which must not indicate
575      * a failure before the function call.
576      * @return The length of the script ordering array.
577      * @see ucol_setReorderCodes
578      * @see Collator#getEquivalentReorderCodes
579      * @see Collator#setReorderCodes
580      * @stable ICU 4.8
581      */
582      virtual int32_t getReorderCodes(int32_t *dest,
583                                      int32_t destCapacity,
584                                      UErrorCode& status) const;
585 
586     /**
587      * Sets the ordering of scripts for this collator.
588      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
589      * length is also set to 0. An empty array will clear any reordering codes on the collator.
590      * @param reorderCodesLength The length of reorderCodes.
591      * @param status error code
592      * @see Collator#getReorderCodes
593      * @see Collator#getEquivalentReorderCodes
594      * @stable ICU 4.8
595      */
596      virtual void setReorderCodes(const int32_t* reorderCodes,
597                                   int32_t reorderCodesLength,
598                                   UErrorCode& status) ;
599 
600     /**
601      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
602      * codes will be grouped and must reorder together.
603      * @param reorderCode The reorder code to determine equivalence for.
604      * @param dest The array to fill with the script equivalene reordering codes.
605      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
606      * function will only return the length of the result without writing any of the result
607      * string (pre-flighting).
608      * @param status A reference to an error code value, which must not indicate
609      * a failure before the function call.
610      * @return The length of the of the reordering code equivalence array.
611      * @see ucol_setReorderCodes
612      * @see Collator#getReorderCodes
613      * @see Collator#setReorderCodes
614      * @stable ICU 4.8
615      */
616     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
617                                 int32_t* dest,
618                                 int32_t destCapacity,
619                                 UErrorCode& status);
620 
621 private:
622 
623     // private static constants -----------------------------------------------
624 
625     enum {
626         /* need look up in .commit() */
627         CHARINDEX = 0x70000000,
628         /* Expand index follows */
629         EXPANDCHARINDEX = 0x7E000000,
630         /* contract indexes follows */
631         CONTRACTCHARINDEX = 0x7F000000,
632         /* unmapped character values */
633         UNMAPPED = 0xFFFFFFFF,
634         /* primary strength increment */
635         PRIMARYORDERINCREMENT = 0x00010000,
636         /* secondary strength increment */
637         SECONDARYORDERINCREMENT = 0x00000100,
638         /* tertiary strength increment */
639         TERTIARYORDERINCREMENT = 0x00000001,
640         /* mask off anything but primary order */
641         PRIMARYORDERMASK = 0xffff0000,
642         /* mask off anything but secondary order */
643         SECONDARYORDERMASK = 0x0000ff00,
644         /* mask off anything but tertiary order */
645         TERTIARYORDERMASK = 0x000000ff,
646         /* mask off ignorable char order */
647         IGNORABLEMASK = 0x0000ffff,
648         /* use only the primary difference */
649         PRIMARYDIFFERENCEONLY = 0xffff0000,
650         /* use only the primary and secondary difference */
651         SECONDARYDIFFERENCEONLY = 0xffffff00,
652         /* primary order shift */
653         PRIMARYORDERSHIFT = 16,
654         /* secondary order shift */
655         SECONDARYORDERSHIFT = 8,
656         /* starting value for collation elements */
657         COLELEMENTSTART = 0x02020202,
658         /* testing mask for primary low element */
659         PRIMARYLOWZEROMASK = 0x00FF0000,
660         /* reseting value for secondaries and tertiaries */
661         RESETSECONDARYTERTIARY = 0x00000202,
662         /* reseting value for tertiaries */
663         RESETTERTIARY = 0x00000002,
664 
665         PRIMIGNORABLE = 0x0202
666     };
667 
668     // private data members ---------------------------------------------------
669 
670     UBool dataIsOwned;
671 
672     UBool isWriteThroughAlias;
673 
674     /**
675     * c struct for collation. All initialisation for it has to be done through
676     * setUCollator().
677     */
678     UCollator *ucollator;
679 
680     /**
681     * Rule UnicodeString
682     */
683     UnicodeString urulestring;
684 
685     // friend classes --------------------------------------------------------
686 
687     /**
688     * Used to iterate over collation elements in a character source.
689     */
690     friend class CollationElementIterator;
691 
692     /**
693     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
694     *                                                       UErrorCode&)
695     */
696     friend class Collator;
697 
698     /**
699     * Searching over collation elements in a character source
700     */
701     friend class StringSearch;
702 
703     // private constructors --------------------------------------------------
704 
705     /**
706      * Default constructor
707      */
708     RuleBasedCollator();
709 
710     /**
711      * RuleBasedCollator constructor. This constructor takes a locale. The
712      * only caller of this class should be Collator::createInstance(). If
713      * createInstance() happens to know that the requested locale's collation is
714      * implemented as a RuleBasedCollator, it can then call this constructor.
715      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
716      * COLLATION TABLE. It does this by falling back to defaults.
717      * @param desiredLocale locale used
718      * @param status error code status
719      */
720     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
721 
722     /**
723      * common constructor implementation
724      *
725      * @param rules the collation rules to build the collation table from.
726      * @param collationStrength default strength for comparison
727      * @param decompositionMode the normalisation mode
728      * @param status reporting a success or an error.
729      */
730     void
731     construct(const UnicodeString& rules,
732               UColAttributeValue collationStrength,
733               UColAttributeValue decompositionMode,
734               UErrorCode& status);
735 
736     // private methods -------------------------------------------------------
737 
738     /**
739     * Creates the c struct for ucollator
740     * @param locale desired locale
741     * @param status error status
742     */
743     void setUCollator(const Locale& locale, UErrorCode& status);
744 
745     /**
746     * Creates the c struct for ucollator
747     * @param locale desired locale name
748     * @param status error status
749     */
750     void setUCollator(const char* locale, UErrorCode& status);
751 
752     /**
753     * Creates the c struct for ucollator. This used internally by StringSearch.
754     * Hence the responsibility of cleaning up the ucollator is not done by
755     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
756     * @param collator new ucollator data
757     */
758     void setUCollator(UCollator *collator);
759 
760 public:
761 #ifndef U_HIDE_INTERNAL_API
762     /**
763     * Get UCollator data struct. Used only by StringSearch & intltest.
764     * @return UCollator data struct
765     * @internal
766     */
767     const UCollator * getUCollator();
768 #endif  /* U_HIDE_INTERNAL_API */
769 
770 protected:
771    /**
772     * Used internally by registraton to define the requested and valid locales.
773     * @param requestedLocale the requsted locale
774     * @param validLocale the valid locale
775     * @param actualLocale the actual locale
776     * @internal
777     */
778     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
779 
780 private:
781     // if not owned and not a write through alias, copy the ucollator
782     void checkOwned(void);
783 
784     // utility to init rule string used by checkOwned and construct
785     void setRuleStringFromCollator();
786 
787 public:
788     /** Get the short definition string for a collator. This internal API harvests the collator's
789      *  locale and the attribute set and produces a string that can be used for opening
790      *  a collator with the same properties using the ucol_openFromShortString API.
791      *  This string will be normalized.
792      *  The structure and the syntax of the string is defined in the "Naming collators"
793      *  section of the users guide:
794      *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
795      *  This function supports preflighting.
796      *
797      *  This is internal, and intended to be used with delegate converters.
798      *
799      *  @param locale a locale that will appear as a collators locale in the resulting
800      *                short string definition. If NULL, the locale will be harvested
801      *                from the collator.
802      *  @param buffer space to hold the resulting string
803      *  @param capacity capacity of the buffer
804      *  @param status for returning errors. All the preflighting errors are featured
805      *  @return length of the resulting string
806      *  @see ucol_openFromShortString
807      *  @see ucol_normalizeShortDefinitionString
808      *  @see ucol_getShortDefinitionString
809      *  @internal
810      */
811     virtual int32_t internalGetShortDefinitionString(const char *locale,
812                                                      char *buffer,
813                                                      int32_t capacity,
814                                                      UErrorCode &status) const;
815 };
816 
817 // inline method implementation ---------------------------------------------
818 
setUCollator(const Locale & locale,UErrorCode & status)819 inline void RuleBasedCollator::setUCollator(const Locale &locale,
820                                                UErrorCode &status)
821 {
822     setUCollator(locale.getName(), status);
823 }
824 
825 
setUCollator(UCollator * collator)826 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
827 {
828 
829     if (ucollator && dataIsOwned) {
830         ucol_close(ucollator);
831     }
832     ucollator   = collator;
833     dataIsOwned = FALSE;
834     isWriteThroughAlias = TRUE;
835     setRuleStringFromCollator();
836 }
837 
838 #ifndef U_HIDE_INTERNAL_API
getUCollator()839 inline const UCollator * RuleBasedCollator::getUCollator()
840 {
841     return ucollator;
842 }
843 #endif
844 
845 U_NAMESPACE_END
846 
847 #endif /* #if !UCONFIG_NO_COLLATION */
848 
849 #endif
850