• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *   Copyright (C) 1996-2010, International Business Machines                 *
4 *   Corporation and others.  All Rights Reserved.                            *
5 ******************************************************************************
6 */
7 
8 /**
9  * \file
10  * \brief C++ API: Collation Service.
11  */
12 
13 /**
14 * File coll.h
15 *
16 * Created by: Helena Shih
17 *
18 * Modification History:
19 *
20 *  Date        Name        Description
21 * 02/5/97      aliu        Modified createDefault to load collation data from
22 *                          binary files when possible.  Added related methods
23 *                          createCollationFromFile, chopLocale, createPathName.
24 * 02/11/97     aliu        Added members addToCache, findInCache, and fgCache.
25 * 02/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
26 *                          Moved cache out of Collation class.
27 * 02/13/97     aliu        Moved several methods out of this class and into
28 *                          RuleBasedCollator, with modifications.  Modified
29 *                          createDefault() to call new RuleBasedCollator(Locale&)
30 *                          constructor.  General clean up and documentation.
31 * 02/20/97     helena      Added clone, operator==, operator!=, operator=, copy
32 *                          constructor and getDynamicClassID.
33 * 03/25/97     helena      Updated with platform independent data types.
34 * 05/06/97     helena      Added memory allocation error detection.
35 * 06/20/97     helena      Java class name change.
36 * 09/03/97     helena      Added createCollationKeyValues().
37 * 02/10/98     damiba      Added compare() with length as parameter.
38 * 04/23/99     stephen     Removed EDecompositionMode, merged with
39 *                          Normalizer::EMode.
40 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
41 *                          UnicodeString construction and special case for NO_OP.
42 * 11/23/99     srl         More performance enhancements. Inlining of
43 *                          critical accessors.
44 * 05/15/00     helena      Added version information API.
45 * 01/29/01     synwee      Modified into a C++ wrapper which calls C apis
46 *                          (ucoll.h).
47 */
48 
49 #ifndef COLL_H
50 #define COLL_H
51 
52 #include "unicode/utypes.h"
53 
54 #if !UCONFIG_NO_COLLATION
55 
56 #include "unicode/uobject.h"
57 #include "unicode/ucol.h"
58 #include "unicode/normlzr.h"
59 #include "unicode/locid.h"
60 #include "unicode/uniset.h"
61 #include "unicode/umisc.h"
62 #include "unicode/uiter.h"
63 #include "unicode/stringpiece.h"
64 
65 U_NAMESPACE_BEGIN
66 
67 class StringEnumeration;
68 
69 #if !UCONFIG_NO_SERVICE
70 /**
71  * @stable ICU 2.6
72  */
73 class CollatorFactory;
74 #endif
75 
76 /**
77 * @stable ICU 2.0
78 */
79 class CollationKey;
80 
81 /**
82 * The <code>Collator</code> class performs locale-sensitive string
83 * comparison.<br>
84 * You use this class to build searching and sorting routines for natural
85 * language text.<br>
86 * <em>Important: </em>The ICU collation service has been reimplemented
87 * in order to achieve better performance and UCA compliance.
88 * For details, see the
89 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
90 * collation design document</a>.
91 * <p>
92 * <code>Collator</code> is an abstract base class. Subclasses implement
93 * specific collation strategies. One subclass,
94 * <code>RuleBasedCollator</code>, is currently provided and is applicable
95 * to a wide set of languages. Other subclasses may be created to handle more
96 * specialized needs.
97 * <p>
98 * Like other locale-sensitive classes, you can use the static factory method,
99 * <code>createInstance</code>, to obtain the appropriate
100 * <code>Collator</code> object for a given locale. You will only need to
101 * look at the subclasses of <code>Collator</code> if you need to
102 * understand the details of a particular collation strategy or if you need to
103 * modify that strategy.
104 * <p>
105 * The following example shows how to compare two strings using the
106 * <code>Collator</code> for the default locale.
107 * \htmlonly<blockquote>\endhtmlonly
108 * <pre>
109 * \code
110 * // Compare two strings in the default locale
111 * UErrorCode success = U_ZERO_ERROR;
112 * Collator* myCollator = Collator::createInstance(success);
113 * if (myCollator->compare("abc", "ABC") < 0)
114 *   cout << "abc is less than ABC" << endl;
115 * else
116 *   cout << "abc is greater than or equal to ABC" << endl;
117 * \endcode
118 * </pre>
119 * \htmlonly</blockquote>\endhtmlonly
120 * <p>
121 * You can set a <code>Collator</code>'s <em>strength</em> property to
122 * determine the level of difference considered significant in comparisons.
123 * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
124 * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
125 * The exact assignment of strengths to language features is locale dependant.
126 * For example, in Czech, "e" and "f" are considered primary differences,
127 * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
128 * differences and "e" and "e" are identical. The following shows how both case
129 * and accents could be ignored for US English.
130 * \htmlonly<blockquote>\endhtmlonly
131 * <pre>
132 * \code
133 * //Get the Collator for US English and set its strength to PRIMARY
134 * UErrorCode success = U_ZERO_ERROR;
135 * Collator* usCollator = Collator::createInstance(Locale::US, success);
136 * usCollator->setStrength(Collator::PRIMARY);
137 * if (usCollator->compare("abc", "ABC") == 0)
138 *     cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
139 * \endcode
140 * </pre>
141 * \htmlonly</blockquote>\endhtmlonly
142 * <p>
143 * For comparing strings exactly once, the <code>compare</code> method
144 * provides the best performance. When sorting a list of strings however, it
145 * is generally necessary to compare each string multiple times. In this case,
146 * sort keys provide better performance. The <code>getSortKey</code> methods
147 * convert a string to a series of bytes that can be compared bitwise against
148 * other sort keys using <code>strcmp()</code>. Sort keys are written as
149 * zero-terminated byte strings. They consist of several substrings, one for
150 * each collation strength level, that are delimited by 0x01 bytes.
151 * If the string code points are appended for UCOL_IDENTICAL, then they are
152 * processed for correct code point order comparison and may contain 0x01
153 * bytes but not zero bytes.
154 * </p>
155 * <p>
156 * An older set of APIs returns a <code>CollationKey</code> object that wraps
157 * the sort key bytes instead of returning the bytes themselves.
158 * Its use is deprecated, but it is still available for compatibility with
159 * Java.
160 * </p>
161 * <p>
162 * <strong>Note:</strong> <code>Collator</code>s with different Locale,
163 * and CollationStrength settings will return different sort
164 * orders for the same set of strings. Locales have specific collation rules,
165 * and the way in which secondary and tertiary differences are taken into
166 * account, for example, will result in a different sorting order for same
167 * strings.
168 * </p>
169 * @see         RuleBasedCollator
170 * @see         CollationKey
171 * @see         CollationElementIterator
172 * @see         Locale
173 * @see         Normalizer
174 * @version     2.0 11/15/01
175 */
176 
177 class U_I18N_API Collator : public UObject {
178 public:
179 
180     // Collator public enums -----------------------------------------------
181 
182     /**
183      * Base letter represents a primary difference. Set comparison level to
184      * PRIMARY to ignore secondary and tertiary differences.<br>
185      * Use this to set the strength of a Collator object.<br>
186      * Example of primary difference, "abc" &lt; "abd"
187      *
188      * Diacritical differences on the same base letter represent a secondary
189      * difference. Set comparison level to SECONDARY to ignore tertiary
190      * differences. Use this to set the strength of a Collator object.<br>
191      * Example of secondary difference, "&auml;" >> "a".
192      *
193      * Uppercase and lowercase versions of the same character represents a
194      * tertiary difference.  Set comparison level to TERTIARY to include all
195      * comparison differences. Use this to set the strength of a Collator
196      * object.<br>
197      * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
198      *
199      * Two characters are considered "identical" when they have the same unicode
200      * spellings.<br>
201      * For example, "&auml;" == "&auml;".
202      *
203      * UCollationStrength is also used to determine the strength of sort keys
204      * generated from Collator objects.
205      * @stable ICU 2.0
206      */
207     enum ECollationStrength
208     {
209         PRIMARY    = 0,
210         SECONDARY  = 1,
211         TERTIARY   = 2,
212         QUATERNARY = 3,
213         IDENTICAL  = 15
214     };
215 
216     /**
217      * LESS is returned if source string is compared to be less than target
218      * string in the compare() method.
219      * EQUAL is returned if source string is compared to be equal to target
220      * string in the compare() method.
221      * GREATER is returned if source string is compared to be greater than
222      * target string in the compare() method.
223      * @see Collator#compare
224      * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
225      */
226     enum EComparisonResult
227     {
228         LESS = -1,
229         EQUAL = 0,
230         GREATER = 1
231     };
232 
233     // Collator public destructor -----------------------------------------
234 
235     /**
236      * Destructor
237      * @stable ICU 2.0
238      */
239     virtual ~Collator();
240 
241     // Collator public methods --------------------------------------------
242 
243     /**
244      * Returns true if "other" is the same as "this"
245      * @param other Collator object to be compared
246      * @return true if other is the same as this.
247      * @stable ICU 2.0
248      */
249     virtual UBool operator==(const Collator& other) const;
250 
251     /**
252      * Returns true if "other" is not the same as "this".
253      * @param other Collator object to be compared
254      * @return true if other is not the same as this.
255      * @stable ICU 2.0
256      */
257     virtual UBool operator!=(const Collator& other) const;
258 
259     /**
260      * Makes a shallow copy of the current object.
261      * @return a copy of this object
262      * @stable ICU 2.0
263      */
264     virtual Collator* clone(void) const = 0;
265 
266     /**
267      * Creates the Collator object for the current default locale.
268      * The default locale is determined by Locale::getDefault.
269      * The UErrorCode& err parameter is used to return status information to the user.
270      * To check whether the construction succeeded or not, you should check the
271      * value of U_SUCCESS(err).  If you wish more detailed information, you can
272      * check for informational error results which still indicate success.
273      * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
274      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
275      * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
276      * used; neither the requested locale nor any of its fall back locales
277      * could be found.
278      * The caller owns the returned object and is responsible for deleting it.
279      *
280      * @param err    the error code status.
281      * @return       the collation object of the default locale.(for example, en_US)
282      * @see Locale#getDefault
283      * @stable ICU 2.0
284      */
285     static Collator* U_EXPORT2 createInstance(UErrorCode&  err);
286 
287     /**
288      * Gets the table-based collation object for the desired locale. The
289      * resource of the desired locale will be loaded by ResourceLoader.
290      * Locale::ENGLISH is the base collation table and all other languages are
291      * built on top of it with additional language-specific modifications.
292      * The UErrorCode& err parameter is used to return status information to the user.
293      * To check whether the construction succeeded or not, you should check
294      * the value of U_SUCCESS(err).  If you wish more detailed information, you
295      * can check for informational error results which still indicate success.
296      * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
297      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
298      * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
299      * used; neither the requested locale nor any of its fall back locales
300      * could be found.
301      * The caller owns the returned object and is responsible for deleting it.
302      * @param loc    The locale ID for which to open a collator.
303      * @param err    the error code status.
304      * @return       the created table-based collation object based on the desired
305      *               locale.
306      * @see Locale
307      * @see ResourceLoader
308      * @stable ICU 2.2
309      */
310     static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
311 
312 #ifdef U_USE_COLLATION_OBSOLETE_2_6
313     /**
314      * Create a Collator with a specific version.
315      * This is the same as createInstance(loc, err) except that getVersion() of
316      * the returned object is guaranteed to be the same as the version
317      * parameter.
318      * This is designed to be used to open the same collator for a given
319      * locale even when ICU is updated.
320      * The same locale and version guarantees the same sort keys and
321      * comparison results.
322      * <p>
323      * Note: this API will be removed in a future release.  Use
324      * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
325      *
326      * @param loc The locale ID for which to open a collator.
327      * @param version The requested collator version.
328      * @param err A reference to a UErrorCode,
329      *            must not indicate a failure before calling this function.
330      * @return A pointer to a Collator, or 0 if an error occurred
331      *         or a collator with the requested version is not available.
332      *
333      * @see getVersion
334      * @obsolete ICU 2.6
335      */
336     static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
337 #endif
338 
339     /**
340      * The comparison function compares the character data stored in two
341      * different strings. Returns information about whether a string is less
342      * than, greater than or equal to another string.
343      * @param source the source string to be compared with.
344      * @param target the string that is to be compared with the source string.
345      * @return Returns a byte value. GREATER if source is greater
346      * than target; EQUAL if source is equal to target; LESS if source is less
347      * than target
348      * @deprecated ICU 2.6 use the overload with UErrorCode &
349      */
350     virtual EComparisonResult compare(const UnicodeString& source,
351                                       const UnicodeString& target) const;
352 
353     /**
354      * The comparison function compares the character data stored in two
355      * different strings. Returns information about whether a string is less
356      * than, greater than or equal to another string.
357      * @param source the source string to be compared with.
358      * @param target the string that is to be compared with the source string.
359      * @param status possible error code
360      * @return Returns an enum value. UCOL_GREATER if source is greater
361      * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
362      * than target
363      * @stable ICU 2.6
364      */
365     virtual UCollationResult compare(const UnicodeString& source,
366                                       const UnicodeString& target,
367                                       UErrorCode &status) const = 0;
368 
369     /**
370      * Does the same thing as compare but limits the comparison to a specified
371      * length
372      * @param source the source string to be compared with.
373      * @param target the string that is to be compared with the source string.
374      * @param length the length the comparison is limited to
375      * @return Returns a byte value. GREATER if source (up to the specified
376      *         length) is greater than target; EQUAL if source (up to specified
377      *         length) is equal to target; LESS if source (up to the specified
378      *         length) is less  than target.
379      * @deprecated ICU 2.6 use the overload with UErrorCode &
380      */
381     virtual EComparisonResult compare(const UnicodeString& source,
382                                       const UnicodeString& target,
383                                       int32_t length) const;
384 
385     /**
386      * Does the same thing as compare but limits the comparison to a specified
387      * length
388      * @param source the source string to be compared with.
389      * @param target the string that is to be compared with the source string.
390      * @param length the length the comparison is limited to
391      * @param status possible error code
392      * @return Returns an enum value. UCOL_GREATER if source (up to the specified
393      *         length) is greater than target; UCOL_EQUAL if source (up to specified
394      *         length) is equal to target; UCOL_LESS if source (up to the specified
395      *         length) is less  than target.
396      * @stable ICU 2.6
397      */
398     virtual UCollationResult compare(const UnicodeString& source,
399                                       const UnicodeString& target,
400                                       int32_t length,
401                                       UErrorCode &status) const = 0;
402 
403     /**
404      * The comparison function compares the character data stored in two
405      * different string arrays. Returns information about whether a string array
406      * is less than, greater than or equal to another string array.
407      * @param source the source string array to be compared with.
408      * @param sourceLength the length of the source string array.  If this value
409      *        is equal to -1, the string array is null-terminated.
410      * @param target the string that is to be compared with the source string.
411      * @param targetLength the length of the target string array.  If this value
412      *        is equal to -1, the string array is null-terminated.
413      * @return Returns a byte value. GREATER if source is greater than target;
414      *         EQUAL if source is equal to target; LESS if source is less than
415      *         target
416      * @deprecated ICU 2.6 use the overload with UErrorCode &
417      */
418     virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
419                                       const UChar* target, int32_t targetLength)
420                                       const;
421 
422     /**
423      * The comparison function compares the character data stored in two
424      * different string arrays. Returns information about whether a string array
425      * is less than, greater than or equal to another string array.
426      * @param source the source string array to be compared with.
427      * @param sourceLength the length of the source string array.  If this value
428      *        is equal to -1, the string array is null-terminated.
429      * @param target the string that is to be compared with the source string.
430      * @param targetLength the length of the target string array.  If this value
431      *        is equal to -1, the string array is null-terminated.
432      * @param status possible error code
433      * @return Returns an enum value. UCOL_GREATER if source is greater
434      * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
435      * than target
436      * @stable ICU 2.6
437      */
438     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
439                                       const UChar* target, int32_t targetLength,
440                                       UErrorCode &status) const = 0;
441 
442     /**
443      * Compares two strings using the Collator.
444      * Returns whether the first one compares less than/equal to/greater than
445      * the second one.
446      * This version takes UCharIterator input.
447      * @param sIter the first ("source") string iterator
448      * @param tIter the second ("target") string iterator
449      * @param status ICU status
450      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
451      * @stable ICU 4.2
452      */
453     virtual UCollationResult compare(UCharIterator &sIter,
454                                      UCharIterator &tIter,
455                                      UErrorCode &status) const;
456 
457     /**
458      * Compares two UTF-8 strings using the Collator.
459      * Returns whether the first one compares less than/equal to/greater than
460      * the second one.
461      * This version takes UTF-8 input.
462      * Note that a StringPiece can be implicitly constructed
463      * from a std::string or a NUL-terminated const char * string.
464      * @param source the first UTF-8 string
465      * @param target the second UTF-8 string
466      * @param status ICU status
467      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
468      * @stable ICU 4.2
469      */
470     virtual UCollationResult compareUTF8(const StringPiece &source,
471                                          const StringPiece &target,
472                                          UErrorCode &status) const;
473 
474     /**
475      * Transforms the string into a series of characters that can be compared
476      * with CollationKey::compareTo. It is not possible to restore the original
477      * string from the chars in the sort key.  The generated sort key handles
478      * only a limited number of ignorable characters.
479      * <p>Use CollationKey::equals or CollationKey::compare to compare the
480      * generated sort keys.
481      * If the source string is null, a null collation key will be returned.
482      * @param source the source string to be transformed into a sort key.
483      * @param key the collation key to be filled in
484      * @param status the error code status.
485      * @return the collation key of the string based on the collation rules.
486      * @see CollationKey#compare
487      * @deprecated ICU 2.8 Use getSortKey(...) instead
488      */
489     virtual CollationKey& getCollationKey(const UnicodeString&  source,
490                                           CollationKey& key,
491                                           UErrorCode& status) const = 0;
492 
493     /**
494      * Transforms the string into a series of characters that can be compared
495      * with CollationKey::compareTo. It is not possible to restore the original
496      * string from the chars in the sort key.  The generated sort key handles
497      * only a limited number of ignorable characters.
498      * <p>Use CollationKey::equals or CollationKey::compare to compare the
499      * generated sort keys.
500      * <p>If the source string is null, a null collation key will be returned.
501      * @param source the source string to be transformed into a sort key.
502      * @param sourceLength length of the collation key
503      * @param key the collation key to be filled in
504      * @param status the error code status.
505      * @return the collation key of the string based on the collation rules.
506      * @see CollationKey#compare
507      * @deprecated ICU 2.8 Use getSortKey(...) instead
508      */
509     virtual CollationKey& getCollationKey(const UChar*source,
510                                           int32_t sourceLength,
511                                           CollationKey& key,
512                                           UErrorCode& status) const = 0;
513     /**
514      * Generates the hash code for the collation object
515      * @stable ICU 2.0
516      */
517     virtual int32_t hashCode(void) const = 0;
518 
519     /**
520      * Gets the locale of the Collator
521      *
522      * @param type can be either requested, valid or actual locale. For more
523      *             information see the definition of ULocDataLocaleType in
524      *             uloc.h
525      * @param status the error code status.
526      * @return locale where the collation data lives. If the collator
527      *         was instantiated from rules, locale is empty.
528      * @deprecated ICU 2.8 This API is under consideration for revision
529      * in ICU 3.0.
530      */
531     virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
532 
533     /**
534      * Convenience method for comparing two strings based on the collation rules.
535      * @param source the source string to be compared with.
536      * @param target the target string to be compared with.
537      * @return true if the first string is greater than the second one,
538      *         according to the collation rules. false, otherwise.
539      * @see Collator#compare
540      * @stable ICU 2.0
541      */
542     UBool greater(const UnicodeString& source, const UnicodeString& target)
543                   const;
544 
545     /**
546      * Convenience method for comparing two strings based on the collation rules.
547      * @param source the source string to be compared with.
548      * @param target the target string to be compared with.
549      * @return true if the first string is greater than or equal to the second
550      *         one, according to the collation rules. false, otherwise.
551      * @see Collator#compare
552      * @stable ICU 2.0
553      */
554     UBool greaterOrEqual(const UnicodeString& source,
555                          const UnicodeString& target) const;
556 
557     /**
558      * Convenience method for comparing two strings based on the collation rules.
559      * @param source the source string to be compared with.
560      * @param target the target string to be compared with.
561      * @return true if the strings are equal according to the collation rules.
562      *         false, otherwise.
563      * @see Collator#compare
564      * @stable ICU 2.0
565      */
566     UBool equals(const UnicodeString& source, const UnicodeString& target) const;
567 
568     /**
569      * Determines the minimum strength that will be use in comparison or
570      * transformation.
571      * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
572      * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
573      * are ignored.
574      * @return the current comparison level.
575      * @see Collator#setStrength
576      * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
577      */
578     virtual ECollationStrength getStrength(void) const = 0;
579 
580     /**
581      * Sets the minimum strength to be used in comparison or transformation.
582      * <p>Example of use:
583      * <pre>
584      *  \code
585      *  UErrorCode status = U_ZERO_ERROR;
586      *  Collator*myCollation = Collator::createInstance(Locale::US, status);
587      *  if (U_FAILURE(status)) return;
588      *  myCollation->setStrength(Collator::PRIMARY);
589      *  // result will be "abc" == "ABC"
590      *  // tertiary differences will be ignored
591      *  Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
592      * \endcode
593      * </pre>
594      * @see Collator#getStrength
595      * @param newStrength the new comparison level.
596      * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
597      */
598     virtual void setStrength(ECollationStrength newStrength) = 0;
599 
600     /**
601      * Get the current reordering of scripts (if one has been set).
602      * @param dest The array to fill with the script ordering.
603      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
604      * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
605      * @return The length of the array of the script ordering.
606      * @see ucol_getReorderCodes
607      * @internal
608      */
609     virtual int32_t getReorderCodes(int32_t *dest,
610                                     int32_t destCapacity,
611                                     UErrorCode& status) const;
612 
613     /**
614      * Set the ordering of scripts for this collator.
615      * @param reorderCodes An array of reorder codes in the new order.
616      * @param reorderCodesLength The length of reorderCodes.
617      * @see ucol_setReorderCodes
618      * @internal
619      */
620     virtual void setReorderCodes(const int32_t* reorderCodes,
621                                 int32_t reorderCodesLength,
622                                 UErrorCode& status) ;
623 
624     /**
625      * Get name of the object for the desired Locale, in the desired langauge
626      * @param objectLocale must be from getAvailableLocales
627      * @param displayLocale specifies the desired locale for output
628      * @param name the fill-in parameter of the return value
629      * @return display-able name of the object for the object locale in the
630      *         desired language
631      * @stable ICU 2.0
632      */
633     static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
634                                          const Locale& displayLocale,
635                                          UnicodeString& name);
636 
637     /**
638     * Get name of the object for the desired Locale, in the langauge of the
639     * default locale.
640     * @param objectLocale must be from getAvailableLocales
641     * @param name the fill-in parameter of the return value
642     * @return name of the object for the desired locale in the default language
643     * @stable ICU 2.0
644     */
645     static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
646                                          UnicodeString& name);
647 
648     /**
649      * Get the set of Locales for which Collations are installed.
650      *
651      * <p>Note this does not include locales supported by registered collators.
652      * If collators might have been registered, use the overload of getAvailableLocales
653      * that returns a StringEnumeration.</p>
654      *
655      * @param count the output parameter of number of elements in the locale list
656      * @return the list of available locales for which collations are installed
657      * @stable ICU 2.0
658      */
659     static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
660 
661     /**
662      * Return a StringEnumeration over the locales available at the time of the call,
663      * including registered locales.  If a severe error occurs (such as out of memory
664      * condition) this will return null. If there is no locale data, an empty enumeration
665      * will be returned.
666      * @return a StringEnumeration over the locales available at the time of the call
667      * @stable ICU 2.6
668      */
669     static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
670 
671     /**
672      * Create a string enumerator of all possible keywords that are relevant to
673      * collation. At this point, the only recognized keyword for this
674      * service is "collation".
675      * @param status input-output error code
676      * @return a string enumeration over locale strings. The caller is
677      * responsible for closing the result.
678      * @stable ICU 3.0
679      */
680     static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
681 
682     /**
683      * Given a keyword, create a string enumeration of all values
684      * for that keyword that are currently in use.
685      * @param keyword a particular keyword as enumerated by
686      * ucol_getKeywords. If any other keyword is passed in, status is set
687      * to U_ILLEGAL_ARGUMENT_ERROR.
688      * @param status input-output error code
689      * @return a string enumeration over collation keyword values, or NULL
690      * upon error. The caller is responsible for deleting the result.
691      * @stable ICU 3.0
692      */
693     static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
694 
695     /**
696      * Given a key and a locale, returns an array of string values in a preferred
697      * order that would make a difference. These are all and only those values where
698      * the open (creation) of the service with the locale formed from the input locale
699      * plus input keyword and that value has different behavior than creation with the
700      * input locale alone.
701      * @param keyword        one of the keys supported by this service.  For now, only
702      *                      "collation" is supported.
703      * @param locale        the locale
704      * @param commonlyUsed  if set to true it will return only commonly used values
705      *                      with the given locale in preferred order.  Otherwise,
706      *                      it will return all the available values for the locale.
707      * @param status ICU status
708      * @return a string enumeration over keyword values for the given key and the locale.
709      * @stable ICU 4.2
710      */
711     static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
712                                                                     UBool commonlyUsed, UErrorCode& status);
713 
714     /**
715      * Return the functionally equivalent locale for the given
716      * requested locale, with respect to given keyword, for the
717      * collation service.  If two locales return the same result, then
718      * collators instantiated for these locales will behave
719      * equivalently.  The converse is not always true; two collators
720      * may in fact be equivalent, but return different results, due to
721      * internal details.  The return result has no other meaning than
722      * that stated above, and implies nothing as to the relationship
723      * between the two locales.  This is intended for use by
724      * applications who wish to cache collators, or otherwise reuse
725      * collators when possible.  The functional equivalent may change
726      * over time.  For more information, please see the <a
727      * href="http://icu-project.org/userguide/locale.html#services">
728      * Locales and Services</a> section of the ICU User Guide.
729      * @param keyword a particular keyword as enumerated by
730      * ucol_getKeywords.
731      * @param locale the requested locale
732      * @param isAvailable reference to a fillin parameter that
733      * indicates whether the requested locale was 'available' to the
734      * collation service. A locale is defined as 'available' if it
735      * physically exists within the collation locale data.
736      * @param status reference to input-output error code
737      * @return the functionally equivalent collation locale, or the root
738      * locale upon error.
739      * @stable ICU 3.0
740      */
741     static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
742                                           UBool& isAvailable, UErrorCode& status);
743 
744 #if !UCONFIG_NO_SERVICE
745     /**
746      * Register a new Collator.  The collator will be adopted.
747      * @param toAdopt the Collator instance to be adopted
748      * @param locale the locale with which the collator will be associated
749      * @param status the in/out status code, no special meanings are assigned
750      * @return a registry key that can be used to unregister this collator
751      * @stable ICU 2.6
752      */
753     static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
754 
755     /**
756      * Register a new CollatorFactory.  The factory will be adopted.
757      * @param toAdopt the CollatorFactory instance to be adopted
758      * @param status the in/out status code, no special meanings are assigned
759      * @return a registry key that can be used to unregister this collator
760      * @stable ICU 2.6
761      */
762     static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
763 
764     /**
765      * Unregister a previously-registered Collator or CollatorFactory
766      * using the key returned from the register call.  Key becomes
767      * invalid after a successful call and should not be used again.
768      * The object corresponding to the key will be deleted.
769      * @param key the registry key returned by a previous call to registerInstance
770      * @param status the in/out status code, no special meanings are assigned
771      * @return TRUE if the collator for the key was successfully unregistered
772      * @stable ICU 2.6
773      */
774     static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
775 #endif /* UCONFIG_NO_SERVICE */
776 
777     /**
778      * Gets the version information for a Collator.
779      * @param info the version # information, the result will be filled in
780      * @stable ICU 2.0
781      */
782     virtual void getVersion(UVersionInfo info) const = 0;
783 
784     /**
785      * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
786      * This method is to implement a simple version of RTTI, since not all C++
787      * compilers support genuine RTTI. Polymorphic operator==() and clone()
788      * methods call this method.
789      * @return The class ID for this object. All objects of a given class have
790      *         the same class ID.  Objects of other classes have different class
791      *         IDs.
792      * @stable ICU 2.0
793      */
794     virtual UClassID getDynamicClassID(void) const = 0;
795 
796     /**
797      * Universal attribute setter
798      * @param attr attribute type
799      * @param value attribute value
800      * @param status to indicate whether the operation went on smoothly or
801      *        there were errors
802      * @stable ICU 2.2
803      */
804     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
805                               UErrorCode &status) = 0;
806 
807     /**
808      * Universal attribute getter
809      * @param attr attribute type
810      * @param status to indicate whether the operation went on smoothly or
811      *        there were errors
812      * @return attribute value
813      * @stable ICU 2.2
814      */
815     virtual UColAttributeValue getAttribute(UColAttribute attr,
816                                             UErrorCode &status) = 0;
817 
818     /**
819      * Sets the variable top to a collation element value of a string supplied.
820      * @param varTop one or more (if contraction) UChars to which the variable top should be set
821      * @param len length of variable top string. If -1 it is considered to be zero terminated.
822      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
823      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
824      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
825      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
826      * @stable ICU 2.0
827      */
828     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
829 
830     /**
831      * Sets the variable top to a collation element value of a string supplied.
832      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
833      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
834      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
835      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
836      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
837      * @stable ICU 2.0
838      */
839     virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status) = 0;
840 
841     /**
842      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
843      * Lower 16 bits are ignored.
844      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
845      * @param status error code (not changed by function)
846      * @stable ICU 2.0
847      */
848     virtual void setVariableTop(const uint32_t varTop, UErrorCode &status) = 0;
849 
850     /**
851      * Gets the variable top value of a Collator.
852      * Lower 16 bits are undefined and should be ignored.
853      * @param status error code (not changed by function). If error code is set, the return value is undefined.
854      * @stable ICU 2.0
855      */
856     virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
857 
858     /**
859      * Get an UnicodeSet that contains all the characters and sequences
860      * tailored in this collator.
861      * @param status      error code of the operation
862      * @return a pointer to a UnicodeSet object containing all the
863      *         code points and sequences that may sort differently than
864      *         in the UCA. The object must be disposed of by using delete
865      * @stable ICU 2.4
866      */
867     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
868 
869 
870     /**
871      * Thread safe cloning operation
872      * @return pointer to the new clone, user should remove it.
873      * @stable ICU 2.2
874      */
875     virtual Collator* safeClone(void) = 0;
876 
877     /**
878      * Get the sort key as an array of bytes from an UnicodeString.
879      * Sort key byte arrays are zero-terminated and can be compared using
880      * strcmp().
881      * @param source string to be processed.
882      * @param result buffer to store result in. If NULL, number of bytes needed
883      *        will be returned.
884      * @param resultLength length of the result buffer. If if not enough the
885      *        buffer will be filled to capacity.
886      * @return Number of bytes needed for storing the sort key
887      * @stable ICU 2.2
888      */
889     virtual int32_t getSortKey(const UnicodeString& source,
890                               uint8_t* result,
891                               int32_t resultLength) const = 0;
892 
893     /**
894      * Get the sort key as an array of bytes from an UChar buffer.
895      * Sort key byte arrays are zero-terminated and can be compared using
896      * strcmp().
897      * @param source string to be processed.
898      * @param sourceLength length of string to be processed.
899      *        If -1, the string is 0 terminated and length will be decided by the
900      *        function.
901      * @param result buffer to store result in. If NULL, number of bytes needed
902      *        will be returned.
903      * @param resultLength length of the result buffer. If if not enough the
904      *        buffer will be filled to capacity.
905      * @return Number of bytes needed for storing the sort key
906      * @stable ICU 2.2
907      */
908     virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
909                                uint8_t*result, int32_t resultLength) const = 0;
910 
911     /**
912      * Produce a bound for a given sortkey and a number of levels.
913      * Return value is always the number of bytes needed, regardless of
914      * whether the result buffer was big enough or even valid.<br>
915      * Resulting bounds can be used to produce a range of strings that are
916      * between upper and lower bounds. For example, if bounds are produced
917      * for a sortkey of string "smith", strings between upper and lower
918      * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
919      * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
920      * is produced, strings matched would be as above. However, if bound
921      * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
922      * also match "Smithsonian" and similar.<br>
923      * For more on usage, see example in cintltst/capitst.c in procedure
924      * TestBounds.
925      * Sort keys may be compared using <TT>strcmp</TT>.
926      * @param source The source sortkey.
927      * @param sourceLength The length of source, or -1 if null-terminated.
928      *                     (If an unmodified sortkey is passed, it is always null
929      *                      terminated).
930      * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
931      *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
932      *                  produces upper bound that matches strings of the same length
933      *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
934      *                  same starting substring as the source string.
935      * @param noOfLevels  Number of levels required in the resulting bound (for most
936      *                    uses, the recommended value is 1). See users guide for
937      *                    explanation on number of levels a sortkey can have.
938      * @param result A pointer to a buffer to receive the resulting sortkey.
939      * @param resultLength The maximum size of result.
940      * @param status Used for returning error code if something went wrong. If the
941      *               number of levels requested is higher than the number of levels
942      *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
943      *               issued.
944      * @return The size needed to fully store the bound.
945      * @see ucol_keyHashCode
946      * @stable ICU 2.1
947      */
948     static int32_t U_EXPORT2 getBound(const uint8_t       *source,
949             int32_t             sourceLength,
950             UColBoundMode       boundType,
951             uint32_t            noOfLevels,
952             uint8_t             *result,
953             int32_t             resultLength,
954             UErrorCode          &status);
955 
956 
957 protected:
958 
959     // Collator protected constructors -------------------------------------
960 
961     /**
962     * Default constructor.
963     * Constructor is different from the old default Collator constructor.
964     * The task for determing the default collation strength and normalization
965     * mode is left to the child class.
966     * @stable ICU 2.0
967     */
968     Collator();
969 
970     /**
971     * Constructor.
972     * Empty constructor, does not handle the arguments.
973     * This constructor is done for backward compatibility with 1.7 and 1.8.
974     * The task for handling the argument collation strength and normalization
975     * mode is left to the child class.
976     * @param collationStrength collation strength
977     * @param decompositionMode
978     * @deprecated ICU 2.4. Subclasses should use the default constructor
979     * instead and handle the strength and normalization mode themselves.
980     */
981     Collator(UCollationStrength collationStrength,
982              UNormalizationMode decompositionMode);
983 
984     /**
985     * Copy constructor.
986     * @param other Collator object to be copied from
987     * @stable ICU 2.0
988     */
989     Collator(const Collator& other);
990 
991     // Collator protected methods -----------------------------------------
992 
993 
994    /**
995     * Used internally by registraton to define the requested and valid locales.
996     * @param requestedLocale the requested locale
997     * @param validLocale the valid locale
998     * @param actualLocale the actual locale
999     * @internal
1000     */
1001     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
1002 
1003 public:
1004 #if !UCONFIG_NO_SERVICE
1005     /**
1006      * used only by ucol_open, not for public use
1007      * @internal
1008      */
1009     static UCollator* createUCollator(const char* loc, UErrorCode* status);
1010 #endif
1011 private:
1012     /**
1013      * Assignment operator. Private for now.
1014      * @internal
1015      */
1016     Collator& operator=(const Collator& other);
1017 
1018     friend class CFactory;
1019     friend class SimpleCFactory;
1020     friend class ICUCollatorFactory;
1021     friend class ICUCollatorService;
1022     static Collator* makeInstance(const Locale& desiredLocale,
1023                                   UErrorCode& status);
1024 
1025     // Collator private data members ---------------------------------------
1026 
1027     /*
1028     synwee : removed as attributes to be handled by child class
1029     UCollationStrength  strength;
1030     Normalizer::EMode  decmp;
1031     */
1032     /* This is useless information */
1033 /*  static const UVersionInfo fVersion;*/
1034 };
1035 
1036 #if !UCONFIG_NO_SERVICE
1037 /**
1038  * A factory, used with registerFactory, the creates multiple collators and provides
1039  * display names for them.  A factory supports some number of locales-- these are the
1040  * locales for which it can create collators.  The factory can be visible, in which
1041  * case the supported locales will be enumerated by getAvailableLocales, or invisible,
1042  * in which they are not.  Invisible locales are still supported, they are just not
1043  * listed by getAvailableLocales.
1044  * <p>
1045  * If standard locale display names are sufficient, Collator instances can
1046  * be registered using registerInstance instead.</p>
1047  * <p>
1048  * Note: if the collators are to be used from C APIs, they must be instances
1049  * of RuleBasedCollator.</p>
1050  *
1051  * @stable ICU 2.6
1052  */
1053 class U_I18N_API CollatorFactory : public UObject {
1054 public:
1055 
1056     /**
1057      * Destructor
1058      * @stable ICU 3.0
1059      */
1060     virtual ~CollatorFactory();
1061 
1062     /**
1063      * Return true if this factory is visible.  Default is true.
1064      * If not visible, the locales supported by this factory will not
1065      * be listed by getAvailableLocales.
1066      * @return true if the factory is visible.
1067      * @stable ICU 2.6
1068      */
1069     virtual UBool visible(void) const;
1070 
1071     /**
1072      * Return a collator for the provided locale.  If the locale
1073      * is not supported, return NULL.
1074      * @param loc the locale identifying the collator to be created.
1075      * @return a new collator if the locale is supported, otherwise NULL.
1076      * @stable ICU 2.6
1077      */
1078     virtual Collator* createCollator(const Locale& loc) = 0;
1079 
1080     /**
1081      * Return the name of the collator for the objectLocale, localized for the displayLocale.
1082      * If objectLocale is not supported, or the factory is not visible, set the result string
1083      * to bogus.
1084      * @param objectLocale the locale identifying the collator
1085      * @param displayLocale the locale for which the display name of the collator should be localized
1086      * @param result an output parameter for the display name, set to bogus if not supported.
1087      * @return the display name
1088      * @stable ICU 2.6
1089      */
1090     virtual  UnicodeString& getDisplayName(const Locale& objectLocale,
1091                                            const Locale& displayLocale,
1092                                            UnicodeString& result);
1093 
1094     /**
1095      * Return an array of all the locale names directly supported by this factory.
1096      * The number of names is returned in count.  This array is owned by the factory.
1097      * Its contents must never change.
1098      * @param count output parameter for the number of locales supported by the factory
1099      * @param status the in/out error code
1100      * @return a pointer to an array of count UnicodeStrings.
1101      * @stable ICU 2.6
1102      */
1103     virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
1104 };
1105 #endif /* UCONFIG_NO_SERVICE */
1106 
1107 // Collator inline methods -----------------------------------------------
1108 
1109 U_NAMESPACE_END
1110 
1111 #endif /* #if !UCONFIG_NO_COLLATION */
1112 
1113 #endif
1114