• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (c) 1996-2015, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 #ifndef UCOL_H
11 #define UCOL_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_COLLATION
16 
17 #include "unicode/unorm.h"
18 #include "unicode/parseerr.h"
19 #include "unicode/uloc.h"
20 #include "unicode/uset.h"
21 #include "unicode/uscript.h"
22 
23 #if U_SHOW_CPLUSPLUS_API
24 #include "unicode/localpointer.h"
25 #endif   // U_SHOW_CPLUSPLUS_API
26 
27 /**
28  * \file
29  * \brief C API: Collator
30  *
31  * <h2> Collator C API </h2>
32  *
33  * The C API for Collator performs locale-sensitive
34  * string comparison. You use this service to build
35  * searching and sorting routines for natural language text.
36  * <p>
37  * For more information about the collation service see
38  * <a href="https://unicode-org.github.io/icu/userguide/collation">the User Guide</a>.
39  * <p>
40  * Collation service provides correct sorting orders for most locales supported in ICU.
41  * If specific data for a locale is not available, the orders eventually falls back
42  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
43  * <p>
44  * Sort ordering may be customized by providing your own set of rules. For more on
45  * this subject see the <a href="https://unicode-org.github.io/icu/userguide/collation/customization">
46  * Collation Customization</a> section of the User Guide.
47  * <p>
48  * @see         UCollationResult
49  * @see         UNormalizationMode
50  * @see         UCollationStrength
51  * @see         UCollationElements
52  */
53 
54 /** A collator.
55 *  For usage in C programs.
56 */
57 struct UCollator;
58 /** structure representing a collator object instance
59  * @stable ICU 2.0
60  */
61 typedef struct UCollator UCollator;
62 
63 
64 /**
65  * UCOL_LESS is returned if source string is compared to be less than target
66  * string in the ucol_strcoll() method.
67  * UCOL_EQUAL is returned if source string is compared to be equal to target
68  * string in the ucol_strcoll() method.
69  * UCOL_GREATER is returned if source string is compared to be greater than
70  * target string in the ucol_strcoll() method.
71  * @see ucol_strcoll()
72  * <p>
73  * Possible values for a comparison result
74  * @stable ICU 2.0
75  */
76 typedef enum {
77   /** string a == string b */
78   UCOL_EQUAL    = 0,
79   /** string a > string b */
80   UCOL_GREATER    = 1,
81   /** string a < string b */
82   UCOL_LESS    = -1
83 } UCollationResult ;
84 
85 
86 /** Enum containing attribute values for controlling collation behavior.
87  * Here are all the allowable values. Not every attribute can take every value. The only
88  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
89  * value for that locale
90  * @stable ICU 2.0
91  */
92 typedef enum {
93   /** accepted by most attributes */
94   UCOL_DEFAULT = -1,
95 
96   /** Primary collation strength */
97   UCOL_PRIMARY = 0,
98   /** Secondary collation strength */
99   UCOL_SECONDARY = 1,
100   /** Tertiary collation strength */
101   UCOL_TERTIARY = 2,
102   /** Default collation strength */
103   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
104   UCOL_CE_STRENGTH_LIMIT,
105   /** Quaternary collation strength */
106   UCOL_QUATERNARY=3,
107   /** Identical collation strength */
108   UCOL_IDENTICAL=15,
109   UCOL_STRENGTH_LIMIT,
110 
111   /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
112       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
113       & UCOL_DECOMPOSITION_MODE*/
114   UCOL_OFF = 16,
115   /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
116       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
117       & UCOL_DECOMPOSITION_MODE*/
118   UCOL_ON = 17,
119 
120   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
121   UCOL_SHIFTED = 20,
122   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
123   UCOL_NON_IGNORABLE = 21,
124 
125   /** Valid for UCOL_CASE_FIRST -
126       lower case sorts before upper case */
127   UCOL_LOWER_FIRST = 24,
128   /** upper case sorts before lower case */
129   UCOL_UPPER_FIRST = 25,
130 
131 #ifndef U_HIDE_DEPRECATED_API
132     /**
133      * One more than the highest normal UColAttributeValue value.
134      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
135      */
136   UCOL_ATTRIBUTE_VALUE_COUNT
137 #endif  /* U_HIDE_DEPRECATED_API */
138 } UColAttributeValue;
139 
140 /**
141  * Enum containing the codes for reordering segments of the collation table that are not script
142  * codes. These reordering codes are to be used in conjunction with the script codes.
143  * @see ucol_getReorderCodes
144  * @see ucol_setReorderCodes
145  * @see ucol_getEquivalentReorderCodes
146  * @see UScriptCode
147  * @stable ICU 4.8
148  */
149  typedef enum {
150    /**
151     * A special reordering code that is used to specify the default
152     * reordering codes for a locale.
153     * @stable ICU 4.8
154     */
155     UCOL_REORDER_CODE_DEFAULT       = -1,
156    /**
157     * A special reordering code that is used to specify no reordering codes.
158     * @stable ICU 4.8
159     */
160     UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
161    /**
162     * A special reordering code that is used to specify all other codes used for
163     * reordering except for the codes lised as UColReorderCode values and those
164     * listed explicitly in a reordering.
165     * @stable ICU 4.8
166     */
167     UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
168    /**
169     * Characters with the space property.
170     * This is equivalent to the rule value "space".
171     * @stable ICU 4.8
172     */
173     UCOL_REORDER_CODE_SPACE         = 0x1000,
174    /**
175     * The first entry in the enumeration of reordering groups. This is intended for use in
176     * range checking and enumeration of the reorder codes.
177     * @stable ICU 4.8
178     */
179     UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
180    /**
181     * Characters with the punctuation property.
182     * This is equivalent to the rule value "punct".
183     * @stable ICU 4.8
184     */
185     UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
186    /**
187     * Characters with the symbol property.
188     * This is equivalent to the rule value "symbol".
189     * @stable ICU 4.8
190     */
191     UCOL_REORDER_CODE_SYMBOL        = 0x1002,
192    /**
193     * Characters with the currency property.
194     * This is equivalent to the rule value "currency".
195     * @stable ICU 4.8
196     */
197     UCOL_REORDER_CODE_CURRENCY      = 0x1003,
198    /**
199     * Characters with the digit property.
200     * This is equivalent to the rule value "digit".
201     * @stable ICU 4.8
202     */
203     UCOL_REORDER_CODE_DIGIT         = 0x1004,
204 #ifndef U_HIDE_DEPRECATED_API
205     /**
206      * One more than the highest normal UColReorderCode value.
207      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
208      */
209     UCOL_REORDER_CODE_LIMIT         = 0x1005
210 #endif  /* U_HIDE_DEPRECATED_API */
211 } UColReorderCode;
212 
213 /**
214  * Base letter represents a primary difference.  Set comparison
215  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
216  * Use this to set the strength of a Collator object.
217  * Example of primary difference, "abc" &lt; "abd"
218  *
219  * Diacritical differences on the same base letter represent a secondary
220  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
221  * differences. Use this to set the strength of a Collator object.
222  * Example of secondary difference, "&auml;" >> "a".
223  *
224  * Uppercase and lowercase versions of the same character represents a
225  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
226  * all comparison differences. Use this to set the strength of a Collator
227  * object.
228  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
229  *
230  * Two characters are considered "identical" when they have the same
231  * unicode spellings.  UCOL_IDENTICAL.
232  * For example, "&auml;" == "&auml;".
233  *
234  * UCollationStrength is also used to determine the strength of sort keys
235  * generated from UCollator objects
236  * These values can be now found in the UColAttributeValue enum.
237  * @stable ICU 2.0
238  **/
239 typedef UColAttributeValue UCollationStrength;
240 
241 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
242  * value, as well as the values specific to each one.
243  * @stable ICU 2.0
244  */
245 typedef enum {
246      /** Attribute for direction of secondary weights - used in Canadian French.
247       * Acceptable values are UCOL_ON, which results in secondary weights
248       * being considered backwards and UCOL_OFF which treats secondary
249       * weights in the order they appear.
250       * @stable ICU 2.0
251       */
252      UCOL_FRENCH_COLLATION,
253      /** Attribute for handling variable elements.
254       * Acceptable values are UCOL_NON_IGNORABLE (default)
255       * which treats all the codepoints with non-ignorable
256       * primary weights in the same way,
257       * and UCOL_SHIFTED which causes codepoints with primary
258       * weights that are equal or below the variable top value
259       * to be ignored on primary level and moved to the quaternary
260       * level.
261       * @stable ICU 2.0
262       */
263      UCOL_ALTERNATE_HANDLING,
264      /** Controls the ordering of upper and lower case letters.
265       * Acceptable values are UCOL_OFF (default), which orders
266       * upper and lower case letters in accordance to their tertiary
267       * weights, UCOL_UPPER_FIRST which forces upper case letters to
268       * sort before lower case letters, and UCOL_LOWER_FIRST which does
269       * the opposite.
270       * @stable ICU 2.0
271       */
272      UCOL_CASE_FIRST,
273      /** Controls whether an extra case level (positioned before the third
274       * level) is generated or not. Acceptable values are UCOL_OFF (default),
275       * when case level is not generated, and UCOL_ON which causes the case
276       * level to be generated. Contents of the case level are affected by
277       * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
278       * accent differences in a string is to set the strength to UCOL_PRIMARY
279       * and enable case level.
280       * @stable ICU 2.0
281       */
282      UCOL_CASE_LEVEL,
283      /** Controls whether the normalization check and necessary normalizations
284       * are performed. When set to UCOL_OFF (default) no normalization check
285       * is performed. The correctness of the result is guaranteed only if the
286       * input data is in so-called FCD form (see users manual for more info).
287       * When set to UCOL_ON, an incremental check is performed to see whether
288       * the input data is in the FCD form. If the data is not in the FCD form,
289       * incremental NFD normalization is performed.
290       * @stable ICU 2.0
291       */
292      UCOL_NORMALIZATION_MODE,
293      /** An alias for UCOL_NORMALIZATION_MODE attribute.
294       * @stable ICU 2.0
295       */
296      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
297      /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
298       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
299       * for most locales (except Japanese) is tertiary.
300       *
301       * Quaternary strength
302       * is useful when combined with shifted setting for alternate handling
303       * attribute and for JIS X 4061 collation, when it is used to distinguish
304       * between Katakana and Hiragana.
305       * Otherwise, quaternary level
306       * is affected only by the number of non-ignorable code points in
307       * the string.
308       *
309       * Identical strength is rarely useful, as it amounts
310       * to codepoints of the NFD form of the string.
311       * @stable ICU 2.0
312       */
313      UCOL_STRENGTH,
314 #ifndef U_HIDE_DEPRECATED_API
315      /** When turned on, this attribute positions Hiragana before all
316       * non-ignorables on quaternary level This is a sneaky way to produce JIS
317       * sort order.
318       *
319       * This attribute was an implementation detail of the CLDR Japanese tailoring.
320       * Since ICU 50, this attribute is not settable any more via API functions.
321       * Since CLDR 25/ICU 53, explicit quaternary relations are used
322       * to achieve the same Japanese sort order.
323       *
324       * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
325       */
326      UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
327 #endif  /* U_HIDE_DEPRECATED_API */
328      /**
329       * When turned on, this attribute makes
330       * substrings of digits sort according to their numeric values.
331       *
332       * This is a way to get '100' to sort AFTER '2'. Note that the longest
333       * digit substring that can be treated as a single unit is
334       * 254 digits (not counting leading zeros). If a digit substring is
335       * longer than that, the digits beyond the limit will be treated as a
336       * separate digit substring.
337       *
338       * A "digit" in this sense is a code point with General_Category=Nd,
339       * which does not include circled numbers, roman numerals, etc.
340       * Only a contiguous digit substring is considered, that is,
341       * non-negative integers without separators.
342       * There is no support for plus/minus signs, decimals, exponents, etc.
343       *
344       * @stable ICU 2.8
345       */
346      UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
347 
348     /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
349      * it is needed for layout of RuleBasedCollator object. */
350 #ifndef U_FORCE_HIDE_DEPRECATED_API
351     /**
352      * One more than the highest normal UColAttribute value.
353      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
354      */
355      UCOL_ATTRIBUTE_COUNT
356 #endif  // U_FORCE_HIDE_DEPRECATED_API
357 } UColAttribute;
358 
359 /** Options for retrieving the rule string
360  *  @stable ICU 2.0
361  */
362 typedef enum {
363   /**
364    * Retrieves the tailoring rules only.
365    * Same as calling the version of getRules() without UColRuleOption.
366    * @stable ICU 2.0
367    */
368   UCOL_TAILORING_ONLY,
369   /**
370    * Retrieves the "UCA rules" concatenated with the tailoring rules.
371    * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
372    * They are almost never used or useful at runtime and can be removed from the data.
373    * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
374    * @stable ICU 2.0
375    */
376   UCOL_FULL_RULES
377 } UColRuleOption ;
378 
379 /**
380  * Open a UCollator for comparing strings.
381  *
382  * For some languages, multiple collation types are available;
383  * for example, "de@collation=phonebook".
384  * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
385  * in the old locale extension syntax ("el@colCaseFirst=upper")
386  * or in language tag syntax ("el-u-kf-upper").
387  * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>.
388  *
389  * The UCollator pointer is used in all the calls to the Collation
390  * service. After finished, collator must be disposed of by calling
391  * {@link #ucol_close }.
392  * @param loc The locale containing the required collation rules.
393  *            Special values for locales can be passed in -
394  *            if NULL is passed for the locale, the default locale
395  *            collation rules will be used. If empty string ("") or
396  *            "root" are passed, the root collator will be returned.
397  * @param status A pointer to a UErrorCode to receive any errors
398  * @return A pointer to a UCollator, or 0 if an error occurred.
399  * @see ucol_openRules
400  * @see ucol_safeClone
401  * @see ucol_close
402  * @stable ICU 2.0
403  */
404 U_CAPI UCollator* U_EXPORT2
405 ucol_open(const char *loc, UErrorCode *status);
406 
407 /**
408  * Produce a UCollator instance according to the rules supplied.
409  * The rules are used to change the default ordering, defined in the
410  * UCA in a process called tailoring. The resulting UCollator pointer
411  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
412  * @param rules A string describing the collation rules. For the syntax
413  *              of the rules please see users guide.
414  * @param rulesLength The length of rules, or -1 if null-terminated.
415  * @param normalizationMode The normalization mode: One of
416  *             UCOL_OFF     (expect the text to not need normalization),
417  *             UCOL_ON      (normalize), or
418  *             UCOL_DEFAULT (set the mode according to the rules)
419  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
420  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
421  * @param parseError  A pointer to UParseError to receive information about errors
422  *                    occurred during parsing. This argument can currently be set
423  *                    to NULL, but at users own risk. Please provide a real structure.
424  * @param status A pointer to a UErrorCode to receive any errors
425  * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
426  *         of error - please use status argument to check for errors.
427  * @see ucol_open
428  * @see ucol_safeClone
429  * @see ucol_close
430  * @stable ICU 2.0
431  */
432 U_CAPI UCollator* U_EXPORT2
433 ucol_openRules( const UChar        *rules,
434                 int32_t            rulesLength,
435                 UColAttributeValue normalizationMode,
436                 UCollationStrength strength,
437                 UParseError        *parseError,
438                 UErrorCode         *status);
439 
440 #ifndef U_HIDE_DEPRECATED_API
441 /**
442  * Open a collator defined by a short form string.
443  * The structure and the syntax of the string is defined in the "Naming collators"
444  * section of the users guide:
445  * https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme
446  * Attributes are overridden by the subsequent attributes. So, for "S2_S3", final
447  * strength will be 3. 3066bis locale overrides individual locale parts.
448  * The call to this function is equivalent to a call to ucol_open, followed by a
449  * series of calls to ucol_setAttribute and ucol_setVariableTop.
450  * @param definition A short string containing a locale and a set of attributes.
451  *                   Attributes not explicitly mentioned are left at the default
452  *                   state for a locale.
453  * @param parseError if not NULL, structure that will get filled with error's pre
454  *                   and post context in case of error.
455  * @param forceDefaults if false, the settings that are the same as the collator
456  *                   default settings will not be applied (for example, setting
457  *                   French secondary on a French collator would not be executed).
458  *                   If true, all the settings will be applied regardless of the
459  *                   collator default value. If the definition
460  *                   strings are to be cached, should be set to false.
461  * @param status     Error code. Apart from regular error conditions connected to
462  *                   instantiating collators (like out of memory or similar), this
463  *                   API will return an error if an invalid attribute or attribute/value
464  *                   combination is specified.
465  * @return           A pointer to a UCollator or 0 if an error occurred (including an
466  *                   invalid attribute).
467  * @see ucol_open
468  * @see ucol_setAttribute
469  * @see ucol_setVariableTop
470  * @see ucol_getShortDefinitionString
471  * @see ucol_normalizeShortDefinitionString
472  * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead.
473  */
474 U_DEPRECATED UCollator* U_EXPORT2
475 ucol_openFromShortString( const char *definition,
476                           UBool forceDefaults,
477                           UParseError *parseError,
478                           UErrorCode *status);
479 #endif  /* U_HIDE_DEPRECATED_API */
480 
481 #ifndef U_HIDE_DEPRECATED_API
482 /**
483  * Get a set containing the contractions defined by the collator. The set includes
484  * both the root collator's contractions and the contractions defined by the collator. This set
485  * will contain only strings. If a tailoring explicitly suppresses contractions from
486  * the root collator (like Russian), removed contractions will not be in the resulting set.
487  * @param coll collator
488  * @param conts the set to hold the result. It gets emptied before
489  *              contractions are added.
490  * @param status to hold the error code
491  * @return the size of the contraction set
492  *
493  * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
494  */
495 U_DEPRECATED int32_t U_EXPORT2
496 ucol_getContractions( const UCollator *coll,
497                   USet *conts,
498                   UErrorCode *status);
499 #endif  /* U_HIDE_DEPRECATED_API */
500 
501 /**
502  * Get a set containing the expansions defined by the collator. The set includes
503  * both the root collator's expansions and the expansions defined by the tailoring
504  * @param coll collator
505  * @param contractions if not NULL, the set to hold the contractions
506  * @param expansions if not NULL, the set to hold the expansions
507  * @param addPrefixes add the prefix contextual elements to contractions
508  * @param status to hold the error code
509  *
510  * @stable ICU 3.4
511  */
512 U_CAPI void U_EXPORT2
513 ucol_getContractionsAndExpansions( const UCollator *coll,
514                   USet *contractions, USet *expansions,
515                   UBool addPrefixes, UErrorCode *status);
516 
517 /**
518  * Close a UCollator.
519  * Once closed, a UCollator should not be used. Every open collator should
520  * be closed. Otherwise, a memory leak will result.
521  * @param coll The UCollator to close.
522  * @see ucol_open
523  * @see ucol_openRules
524  * @see ucol_safeClone
525  * @stable ICU 2.0
526  */
527 U_CAPI void U_EXPORT2
528 ucol_close(UCollator *coll);
529 
530 #if U_SHOW_CPLUSPLUS_API
531 
532 U_NAMESPACE_BEGIN
533 
534 /**
535  * \class LocalUCollatorPointer
536  * "Smart pointer" class, closes a UCollator via ucol_close().
537  * For most methods see the LocalPointerBase base class.
538  *
539  * @see LocalPointerBase
540  * @see LocalPointer
541  * @stable ICU 4.4
542  */
543 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
544 
545 U_NAMESPACE_END
546 
547 #endif
548 
549 /**
550  * Compare two strings.
551  * The strings will be compared using the options already specified.
552  * @param coll The UCollator containing the comparison rules.
553  * @param source The source string.
554  * @param sourceLength The length of source, or -1 if null-terminated.
555  * @param target The target string.
556  * @param targetLength The length of target, or -1 if null-terminated.
557  * @return The result of comparing the strings; one of UCOL_EQUAL,
558  * UCOL_GREATER, UCOL_LESS
559  * @see ucol_greater
560  * @see ucol_greaterOrEqual
561  * @see ucol_equal
562  * @stable ICU 2.0
563  */
564 U_CAPI UCollationResult U_EXPORT2
565 ucol_strcoll(    const    UCollator    *coll,
566         const    UChar        *source,
567         int32_t            sourceLength,
568         const    UChar        *target,
569         int32_t            targetLength);
570 
571 /**
572 * Compare two strings in UTF-8.
573 * The strings will be compared using the options already specified.
574 * Note: When input string contains malformed a UTF-8 byte sequence,
575 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
576 * @param coll The UCollator containing the comparison rules.
577 * @param source The source UTF-8 string.
578 * @param sourceLength The length of source, or -1 if null-terminated.
579 * @param target The target UTF-8 string.
580 * @param targetLength The length of target, or -1 if null-terminated.
581 * @param status A pointer to a UErrorCode to receive any errors
582 * @return The result of comparing the strings; one of UCOL_EQUAL,
583 * UCOL_GREATER, UCOL_LESS
584 * @see ucol_greater
585 * @see ucol_greaterOrEqual
586 * @see ucol_equal
587 * @stable ICU 50
588 */
589 U_CAPI UCollationResult U_EXPORT2
590 ucol_strcollUTF8(
591         const UCollator *coll,
592         const char      *source,
593         int32_t         sourceLength,
594         const char      *target,
595         int32_t         targetLength,
596         UErrorCode      *status);
597 
598 /**
599  * Determine if one string is greater than another.
600  * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
601  * @param coll The UCollator containing the comparison rules.
602  * @param source The source string.
603  * @param sourceLength The length of source, or -1 if null-terminated.
604  * @param target The target string.
605  * @param targetLength The length of target, or -1 if null-terminated.
606  * @return true if source is greater than target, false otherwise.
607  * @see ucol_strcoll
608  * @see ucol_greaterOrEqual
609  * @see ucol_equal
610  * @stable ICU 2.0
611  */
612 U_CAPI UBool U_EXPORT2
613 ucol_greater(const UCollator *coll,
614              const UChar     *source, int32_t sourceLength,
615              const UChar     *target, int32_t targetLength);
616 
617 /**
618  * Determine if one string is greater than or equal to another.
619  * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
620  * @param coll The UCollator containing the comparison rules.
621  * @param source The source string.
622  * @param sourceLength The length of source, or -1 if null-terminated.
623  * @param target The target string.
624  * @param targetLength The length of target, or -1 if null-terminated.
625  * @return true if source is greater than or equal to target, false otherwise.
626  * @see ucol_strcoll
627  * @see ucol_greater
628  * @see ucol_equal
629  * @stable ICU 2.0
630  */
631 U_CAPI UBool U_EXPORT2
632 ucol_greaterOrEqual(const UCollator *coll,
633                     const UChar     *source, int32_t sourceLength,
634                     const UChar     *target, int32_t targetLength);
635 
636 /**
637  * Compare two strings for equality.
638  * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
639  * @param coll The UCollator containing the comparison rules.
640  * @param source The source string.
641  * @param sourceLength The length of source, or -1 if null-terminated.
642  * @param target The target string.
643  * @param targetLength The length of target, or -1 if null-terminated.
644  * @return true if source is equal to target, false otherwise
645  * @see ucol_strcoll
646  * @see ucol_greater
647  * @see ucol_greaterOrEqual
648  * @stable ICU 2.0
649  */
650 U_CAPI UBool U_EXPORT2
651 ucol_equal(const UCollator *coll,
652            const UChar     *source, int32_t sourceLength,
653            const UChar     *target, int32_t targetLength);
654 
655 /**
656  * Compare two UTF-8 encoded strings.
657  * The strings will be compared using the options already specified.
658  * @param coll The UCollator containing the comparison rules.
659  * @param sIter The source string iterator.
660  * @param tIter The target string iterator.
661  * @return The result of comparing the strings; one of UCOL_EQUAL,
662  * UCOL_GREATER, UCOL_LESS
663  * @param status A pointer to a UErrorCode to receive any errors
664  * @see ucol_strcoll
665  * @stable ICU 2.6
666  */
667 U_CAPI UCollationResult U_EXPORT2
668 ucol_strcollIter(  const    UCollator    *coll,
669                   UCharIterator *sIter,
670                   UCharIterator *tIter,
671                   UErrorCode *status);
672 
673 /**
674  * Get the collation strength used in a UCollator.
675  * The strength influences how strings are compared.
676  * @param coll The UCollator to query.
677  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
678  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
679  * @see ucol_setStrength
680  * @stable ICU 2.0
681  */
682 U_CAPI UCollationStrength U_EXPORT2
683 ucol_getStrength(const UCollator *coll);
684 
685 /**
686  * Set the collation strength used in a UCollator.
687  * The strength influences how strings are compared.
688  * @param coll The UCollator to set.
689  * @param strength The desired collation strength; one of UCOL_PRIMARY,
690  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
691  * @see ucol_getStrength
692  * @stable ICU 2.0
693  */
694 U_CAPI void U_EXPORT2
695 ucol_setStrength(UCollator *coll,
696                  UCollationStrength strength);
697 
698 /**
699  * Retrieves the reordering codes for this collator.
700  * These reordering codes are a combination of UScript codes and UColReorderCode entries.
701  * @param coll The UCollator to query.
702  * @param dest The array to fill with the script ordering.
703  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
704  * will only return the length of the result without writing any codes (pre-flighting).
705  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
706  * failure before the function call.
707  * @return The number of reordering codes written to the dest array.
708  * @see ucol_setReorderCodes
709  * @see ucol_getEquivalentReorderCodes
710  * @see UScriptCode
711  * @see UColReorderCode
712  * @stable ICU 4.8
713  */
714 U_CAPI int32_t U_EXPORT2
715 ucol_getReorderCodes(const UCollator* coll,
716                     int32_t* dest,
717                     int32_t destCapacity,
718                     UErrorCode *pErrorCode);
719 /**
720  * Sets the reordering codes for this collator.
721  * Collation reordering allows scripts and some other groups of characters
722  * to be moved relative to each other. This reordering is done on top of
723  * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
724  * at the start and/or the end of the collation order. These groups are specified using
725  * UScript codes and UColReorderCode entries.
726  *
727  * <p>By default, reordering codes specified for the start of the order are placed in the
728  * order given after several special non-script blocks. These special groups of characters
729  * are space, punctuation, symbol, currency, and digit. These special groups are represented with
730  * UColReorderCode entries. Script groups can be intermingled with
731  * these special non-script groups if those special groups are explicitly specified in the reordering.
732  *
733  * <p>The special code OTHERS stands for any script that is not explicitly
734  * mentioned in the list of reordering codes given. Anything that is after OTHERS
735  * will go at the very end of the reordering in the order given.
736  *
737  * <p>The special reorder code DEFAULT will reset the reordering for this collator
738  * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
739  * was specified when this collator was created from resource data or from rules. The
740  * DEFAULT code <b>must</b> be the sole code supplied when it is used.
741  * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
742  *
743  * <p>The special reorder code NONE will remove any reordering for this collator.
744  * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
745  * NONE code <b>must</b> be the sole code supplied when it is used.
746  *
747  * @param coll The UCollator to set.
748  * @param reorderCodes An array of script codes in the new order. This can be NULL if the
749  * length is also set to 0. An empty array will clear any reordering codes on the collator.
750  * @param reorderCodesLength The length of reorderCodes.
751  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
752  * failure before the function call.
753  * @see ucol_getReorderCodes
754  * @see ucol_getEquivalentReorderCodes
755  * @see UScriptCode
756  * @see UColReorderCode
757  * @stable ICU 4.8
758  */
759 U_CAPI void U_EXPORT2
760 ucol_setReorderCodes(UCollator* coll,
761                     const int32_t* reorderCodes,
762                     int32_t reorderCodesLength,
763                     UErrorCode *pErrorCode);
764 
765 /**
766  * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
767  * codes will be grouped and must reorder together.
768  * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
769  * for example Hiragana and Katakana.
770  *
771  * @param reorderCode The reorder code to determine equivalence for.
772  * @param dest The array to fill with the script ordering.
773  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
774  * will only return the length of the result without writing any codes (pre-flighting).
775  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
776  * a failure before the function call.
777  * @return The number of reordering codes written to the dest array.
778  * @see ucol_setReorderCodes
779  * @see ucol_getReorderCodes
780  * @see UScriptCode
781  * @see UColReorderCode
782  * @stable ICU 4.8
783  */
784 U_CAPI int32_t U_EXPORT2
785 ucol_getEquivalentReorderCodes(int32_t reorderCode,
786                     int32_t* dest,
787                     int32_t destCapacity,
788                     UErrorCode *pErrorCode);
789 
790 /**
791  * Get the display name for a UCollator.
792  * The display name is suitable for presentation to a user.
793  * @param objLoc The locale of the collator in question.
794  * @param dispLoc The locale for display.
795  * @param result A pointer to a buffer to receive the attribute.
796  * @param resultLength The maximum size of result.
797  * @param status A pointer to a UErrorCode to receive any errors
798  * @return The total buffer size needed; if greater than resultLength,
799  * the output was truncated.
800  * @stable ICU 2.0
801  */
802 U_CAPI int32_t U_EXPORT2
803 ucol_getDisplayName(    const    char        *objLoc,
804             const    char        *dispLoc,
805             UChar             *result,
806             int32_t         resultLength,
807             UErrorCode        *status);
808 
809 /**
810  * Get a locale for which collation rules are available.
811  * A UCollator in a locale returned by this function will perform the correct
812  * collation for the locale.
813  * @param localeIndex The index of the desired locale.
814  * @return A locale for which collation rules are available, or 0 if none.
815  * @see ucol_countAvailable
816  * @stable ICU 2.0
817  */
818 U_CAPI const char* U_EXPORT2
819 ucol_getAvailable(int32_t localeIndex);
820 
821 /**
822  * Determine how many locales have collation rules available.
823  * This function is most useful as determining the loop ending condition for
824  * calls to {@link #ucol_getAvailable }.
825  * @return The number of locales for which collation rules are available.
826  * @see ucol_getAvailable
827  * @stable ICU 2.0
828  */
829 U_CAPI int32_t U_EXPORT2
830 ucol_countAvailable(void);
831 
832 #if !UCONFIG_NO_SERVICE
833 /**
834  * Create a string enumerator of all locales for which a valid
835  * collator may be opened.
836  * @param status input-output error code
837  * @return a string enumeration over locale strings. The caller is
838  * responsible for closing the result.
839  * @stable ICU 3.0
840  */
841 U_CAPI UEnumeration* U_EXPORT2
842 ucol_openAvailableLocales(UErrorCode *status);
843 #endif
844 
845 /**
846  * Create a string enumerator of all possible keywords that are relevant to
847  * collation. At this point, the only recognized keyword for this
848  * service is "collation".
849  * @param status input-output error code
850  * @return a string enumeration over locale strings. The caller is
851  * responsible for closing the result.
852  * @stable ICU 3.0
853  */
854 U_CAPI UEnumeration* U_EXPORT2
855 ucol_getKeywords(UErrorCode *status);
856 
857 /**
858  * Given a keyword, create a string enumeration of all values
859  * for that keyword that are currently in use.
860  * @param keyword a particular keyword as enumerated by
861  * ucol_getKeywords. If any other keyword is passed in, *status is set
862  * to U_ILLEGAL_ARGUMENT_ERROR.
863  * @param status input-output error code
864  * @return a string enumeration over collation keyword values, or NULL
865  * upon error. The caller is responsible for closing the result.
866  * @stable ICU 3.0
867  */
868 U_CAPI UEnumeration* U_EXPORT2
869 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
870 
871 /**
872  * Given a key and a locale, returns an array of string values in a preferred
873  * order that would make a difference. These are all and only those values where
874  * the open (creation) of the service with the locale formed from the input locale
875  * plus input keyword and that value has different behavior than creation with the
876  * input locale alone.
877  * @param key           one of the keys supported by this service.  For now, only
878  *                      "collation" is supported.
879  * @param locale        the locale
880  * @param commonlyUsed  if set to true it will return only commonly used values
881  *                      with the given locale in preferred order.  Otherwise,
882  *                      it will return all the available values for the locale.
883  * @param status error status
884  * @return a string enumeration over keyword values for the given key and the locale.
885  * @stable ICU 4.2
886  */
887 U_CAPI UEnumeration* U_EXPORT2
888 ucol_getKeywordValuesForLocale(const char* key,
889                                const char* locale,
890                                UBool commonlyUsed,
891                                UErrorCode* status);
892 
893 /**
894  * Return the functionally equivalent locale for the specified
895  * input locale, with respect to given keyword, for the
896  * collation service. If two different input locale + keyword
897  * combinations produce the same result locale, then collators
898  * instantiated for these two different input locales will behave
899  * equivalently. The converse is not always true; two collators
900  * may in fact be equivalent, but return different results, due to
901  * internal details. The return result has no other meaning than
902  * that stated above, and implies nothing as to the relationship
903  * between the two locales. This is intended for use by
904  * applications who wish to cache collators, or otherwise reuse
905  * collators when possible. The functional equivalent may change
906  * over time. For more information, please see the <a
907  * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services">
908  * Locales and Services</a> section of the ICU User Guide.
909  * @param result fillin for the functionally equivalent result locale
910  * @param resultCapacity capacity of the fillin buffer
911  * @param keyword a particular keyword as enumerated by
912  * ucol_getKeywords.
913  * @param locale the specified input locale
914  * @param isAvailable if non-NULL, pointer to a fillin parameter that
915  * on return indicates whether the specified input locale was 'available'
916  * to the collation service. A locale is defined as 'available' if it
917  * physically exists within the collation locale data.
918  * @param status pointer to input-output error code
919  * @return the actual buffer size needed for the locale. If greater
920  * than resultCapacity, the returned full name will be truncated and
921  * an error code will be returned.
922  * @stable ICU 3.0
923  */
924 U_CAPI int32_t U_EXPORT2
925 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
926                              const char* keyword, const char* locale,
927                              UBool* isAvailable, UErrorCode* status);
928 
929 /**
930  * Get the collation tailoring rules from a UCollator.
931  * The rules will follow the rule syntax.
932  * @param coll The UCollator to query.
933  * @param length
934  * @return The collation tailoring rules.
935  * @stable ICU 2.0
936  */
937 U_CAPI const UChar* U_EXPORT2
938 ucol_getRules(    const    UCollator    *coll,
939         int32_t            *length);
940 
941 #ifndef U_HIDE_DEPRECATED_API
942 /** Get the short definition string for a collator. This API harvests the collator's
943  *  locale and the attribute set and produces a string that can be used for opening
944  *  a collator with the same attributes using the ucol_openFromShortString API.
945  *  This string will be normalized.
946  *  The structure and the syntax of the string is defined in the "Naming collators"
947  *  section of the users guide:
948  *  https://unicode-org.github.io/icu/userguide/collation/concepts#collator-naming-scheme
949  *  This API supports preflighting.
950  *  @param coll a collator
951  *  @param locale a locale that will appear as a collators locale in the resulting
952  *                short string definition. If NULL, the locale will be harvested
953  *                from the collator.
954  *  @param buffer space to hold the resulting string
955  *  @param capacity capacity of the buffer
956  *  @param status for returning errors. All the preflighting errors are featured
957  *  @return length of the resulting string
958  *  @see ucol_openFromShortString
959  *  @see ucol_normalizeShortDefinitionString
960  *  @deprecated ICU 54
961  */
962 U_DEPRECATED int32_t U_EXPORT2
963 ucol_getShortDefinitionString(const UCollator *coll,
964                               const char *locale,
965                               char *buffer,
966                               int32_t capacity,
967                               UErrorCode *status);
968 
969 /** Verifies and normalizes short definition string.
970  *  Normalized short definition string has all the option sorted by the argument name,
971  *  so that equivalent definition strings are the same.
972  *  This API supports preflighting.
973  *  @param source definition string
974  *  @param destination space to hold the resulting string
975  *  @param capacity capacity of the buffer
976  *  @param parseError if not NULL, structure that will get filled with error's pre
977  *                   and post context in case of error.
978  *  @param status     Error code. This API will return an error if an invalid attribute
979  *                    or attribute/value combination is specified. All the preflighting
980  *                    errors are also featured
981  *  @return length of the resulting normalized string.
982  *
983  *  @see ucol_openFromShortString
984  *  @see ucol_getShortDefinitionString
985  *
986  *  @deprecated ICU 54
987  */
988 
989 U_DEPRECATED int32_t U_EXPORT2
990 ucol_normalizeShortDefinitionString(const char *source,
991                                     char *destination,
992                                     int32_t capacity,
993                                     UParseError *parseError,
994                                     UErrorCode *status);
995 #endif  /* U_HIDE_DEPRECATED_API */
996 
997 
998 /**
999  * Get a sort key for a string from a UCollator.
1000  * Sort keys may be compared using <TT>strcmp</TT>.
1001  *
1002  * Note that sort keys are often less efficient than simply doing comparison.
1003  * For more details, see the ICU User Guide.
1004  *
1005  * Like ICU functions that write to an output buffer, the buffer contents
1006  * is undefined if the buffer capacity (resultLength parameter) is too small.
1007  * Unlike ICU functions that write a string to an output buffer,
1008  * the terminating zero byte is counted in the sort key length.
1009  * @param coll The UCollator containing the collation rules.
1010  * @param source The string to transform.
1011  * @param sourceLength The length of source, or -1 if null-terminated.
1012  * @param result A pointer to a buffer to receive the attribute.
1013  * @param resultLength The maximum size of result.
1014  * @return The size needed to fully store the sort key.
1015  *      If there was an internal error generating the sort key,
1016  *      a zero value is returned.
1017  * @see ucol_keyHashCode
1018  * @stable ICU 2.0
1019  */
1020 U_CAPI int32_t U_EXPORT2
1021 ucol_getSortKey(const    UCollator    *coll,
1022         const    UChar        *source,
1023         int32_t        sourceLength,
1024         uint8_t        *result,
1025         int32_t        resultLength);
1026 
1027 
1028 /** Gets the next count bytes of a sort key. Caller needs
1029  *  to preserve state array between calls and to provide
1030  *  the same type of UCharIterator set with the same string.
1031  *  The destination buffer provided must be big enough to store
1032  *  the number of requested bytes.
1033  *
1034  *  The generated sort key may or may not be compatible with
1035  *  sort keys generated using ucol_getSortKey().
1036  *  @param coll The UCollator containing the collation rules.
1037  *  @param iter UCharIterator containing the string we need
1038  *              the sort key to be calculated for.
1039  *  @param state Opaque state of sortkey iteration.
1040  *  @param dest Buffer to hold the resulting sortkey part
1041  *  @param count number of sort key bytes required.
1042  *  @param status error code indicator.
1043  *  @return the actual number of bytes of a sortkey. It can be
1044  *          smaller than count if we have reached the end of
1045  *          the sort key.
1046  *  @stable ICU 2.6
1047  */
1048 U_CAPI int32_t U_EXPORT2
1049 ucol_nextSortKeyPart(const UCollator *coll,
1050                      UCharIterator *iter,
1051                      uint32_t state[2],
1052                      uint8_t *dest, int32_t count,
1053                      UErrorCode *status);
1054 
1055 /** enum that is taken by ucol_getBound API
1056  * See below for explanation
1057  * do not change the values assigned to the
1058  * members of this enum. Underlying code
1059  * depends on them having these numbers
1060  * @stable ICU 2.0
1061  */
1062 typedef enum {
1063   /** lower bound */
1064   UCOL_BOUND_LOWER = 0,
1065   /** upper bound that will match strings of exact size */
1066   UCOL_BOUND_UPPER = 1,
1067   /** upper bound that will match all the strings that have the same initial substring as the given string */
1068   UCOL_BOUND_UPPER_LONG = 2,
1069 #ifndef U_HIDE_DEPRECATED_API
1070     /**
1071      * One more than the highest normal UColBoundMode value.
1072      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1073      */
1074     UCOL_BOUND_VALUE_COUNT
1075 #endif  /* U_HIDE_DEPRECATED_API */
1076 } UColBoundMode;
1077 
1078 /**
1079  * Produce a bound for a given sortkey and a number of levels.
1080  * Return value is always the number of bytes needed, regardless of
1081  * whether the result buffer was big enough or even valid.<br>
1082  * Resulting bounds can be used to produce a range of strings that are
1083  * between upper and lower bounds. For example, if bounds are produced
1084  * for a sortkey of string "smith", strings between upper and lower
1085  * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
1086  * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
1087  * is produced, strings matched would be as above. However, if bound
1088  * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
1089  * also match "Smithsonian" and similar.<br>
1090  * For more on usage, see example in cintltst/capitst.c in procedure
1091  * TestBounds.
1092  * Sort keys may be compared using <TT>strcmp</TT>.
1093  * @param source The source sortkey.
1094  * @param sourceLength The length of source, or -1 if null-terminated.
1095  *                     (If an unmodified sortkey is passed, it is always null
1096  *                      terminated).
1097  * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
1098  *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
1099  *                  produces upper bound that matches strings of the same length
1100  *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
1101  *                  same starting substring as the source string.
1102  * @param noOfLevels  Number of levels required in the resulting bound (for most
1103  *                    uses, the recommended value is 1). See users guide for
1104  *                    explanation on number of levels a sortkey can have.
1105  * @param result A pointer to a buffer to receive the resulting sortkey.
1106  * @param resultLength The maximum size of result.
1107  * @param status Used for returning error code if something went wrong. If the
1108  *               number of levels requested is higher than the number of levels
1109  *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
1110  *               issued.
1111  * @return The size needed to fully store the bound.
1112  * @see ucol_keyHashCode
1113  * @stable ICU 2.1
1114  */
1115 U_CAPI int32_t U_EXPORT2
1116 ucol_getBound(const uint8_t       *source,
1117         int32_t             sourceLength,
1118         UColBoundMode       boundType,
1119         uint32_t            noOfLevels,
1120         uint8_t             *result,
1121         int32_t             resultLength,
1122         UErrorCode          *status);
1123 
1124 /**
1125  * Gets the version information for a Collator. Version is currently
1126  * an opaque 32-bit number which depends, among other things, on major
1127  * versions of the collator tailoring and UCA.
1128  * @param coll The UCollator to query.
1129  * @param info the version # information, the result will be filled in
1130  * @stable ICU 2.0
1131  */
1132 U_CAPI void U_EXPORT2
1133 ucol_getVersion(const UCollator* coll, UVersionInfo info);
1134 
1135 /**
1136  * Gets the UCA version information for a Collator. Version is the
1137  * UCA version number (3.1.1, 4.0).
1138  * @param coll The UCollator to query.
1139  * @param info the version # information, the result will be filled in
1140  * @stable ICU 2.8
1141  */
1142 U_CAPI void U_EXPORT2
1143 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
1144 
1145 /**
1146  * Merges two sort keys. The levels are merged with their corresponding counterparts
1147  * (primaries with primaries, secondaries with secondaries etc.). Between the values
1148  * from the same level a separator is inserted.
1149  *
1150  * This is useful, for example, for combining sort keys from first and last names
1151  * to sort such pairs.
1152  * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
1153  *
1154  * The recommended way to achieve "merged" sorting is by
1155  * concatenating strings with U+FFFE between them.
1156  * The concatenation has the same sort order as the merged sort keys,
1157  * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2).
1158  * Using strings with U+FFFE may yield shorter sort keys.
1159  *
1160  * For details about Sort Key Features see
1161  * https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features
1162  *
1163  * It is possible to merge multiple sort keys by consecutively merging
1164  * another one with the intermediate result.
1165  *
1166  * The length of the merge result is the sum of the lengths of the input sort keys.
1167  *
1168  * Example (uncompressed):
1169  * <pre>191B1D 01 050505 01 910505 00
1170  * 1F2123 01 050505 01 910505 00</pre>
1171  * will be merged as
1172  * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
1173  *
1174  * If the destination buffer is not big enough, then its contents are undefined.
1175  * If any of source lengths are zero or any of the source pointers are NULL/undefined,
1176  * the result is of size zero.
1177  *
1178  * @param src1 the first sort key
1179  * @param src1Length the length of the first sort key, including the zero byte at the end;
1180  *        can be -1 if the function is to find the length
1181  * @param src2 the second sort key
1182  * @param src2Length the length of the second sort key, including the zero byte at the end;
1183  *        can be -1 if the function is to find the length
1184  * @param dest the buffer where the merged sort key is written,
1185  *        can be NULL if destCapacity==0
1186  * @param destCapacity the number of bytes in the dest buffer
1187  * @return the length of the merged sort key, src1Length+src2Length;
1188  *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
1189  *         in which cases the contents of dest is undefined
1190  * @stable ICU 2.0
1191  */
1192 U_CAPI int32_t U_EXPORT2
1193 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
1194                    const uint8_t *src2, int32_t src2Length,
1195                    uint8_t *dest, int32_t destCapacity);
1196 
1197 /**
1198  * Universal attribute setter
1199  * @param coll collator which attributes are to be changed
1200  * @param attr attribute type
1201  * @param value attribute value
1202  * @param status to indicate whether the operation went on smoothly or there were errors
1203  * @see UColAttribute
1204  * @see UColAttributeValue
1205  * @see ucol_getAttribute
1206  * @stable ICU 2.0
1207  */
1208 U_CAPI void U_EXPORT2
1209 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
1210 
1211 /**
1212  * Universal attribute getter
1213  * @param coll collator which attributes are to be changed
1214  * @param attr attribute type
1215  * @return attribute value
1216  * @param status to indicate whether the operation went on smoothly or there were errors
1217  * @see UColAttribute
1218  * @see UColAttributeValue
1219  * @see ucol_setAttribute
1220  * @stable ICU 2.0
1221  */
1222 U_CAPI UColAttributeValue  U_EXPORT2
1223 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
1224 
1225 /**
1226  * Sets the variable top to the top of the specified reordering group.
1227  * The variable top determines the highest-sorting character
1228  * which is affected by UCOL_ALTERNATE_HANDLING.
1229  * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
1230  * @param coll the collator
1231  * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
1232  *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
1233  *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
1234  * @param pErrorCode Standard ICU error code. Its input value must
1235  *                   pass the U_SUCCESS() test, or else the function returns
1236  *                   immediately. Check for U_FAILURE() on output or use with
1237  *                   function chaining. (See User Guide for details.)
1238  * @see ucol_getMaxVariable
1239  * @stable ICU 53
1240  */
1241 U_CAPI void U_EXPORT2
1242 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
1243 
1244 /**
1245  * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
1246  * @param coll the collator
1247  * @return the maximum variable reordering group.
1248  * @see ucol_setMaxVariable
1249  * @stable ICU 53
1250  */
1251 U_CAPI UColReorderCode U_EXPORT2
1252 ucol_getMaxVariable(const UCollator *coll);
1253 
1254 #ifndef U_HIDE_DEPRECATED_API
1255 /**
1256  * Sets the variable top to the primary weight of the specified string.
1257  *
1258  * Beginning with ICU 53, the variable top is pinned to
1259  * the top of one of the supported reordering groups,
1260  * and it must not be beyond the last of those groups.
1261  * See ucol_setMaxVariable().
1262  * @param coll the collator
1263  * @param varTop one or more (if contraction) UChars to which the variable top should be set
1264  * @param len length of variable top string. If -1 it is considered to be zero terminated.
1265  * @param status error code. If error code is set, the return value is undefined.
1266  *               Errors set by this function are:<br>
1267  *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
1268  *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
1269  *    the last reordering group supported by ucol_setMaxVariable()
1270  * @return variable top primary weight
1271  * @see ucol_getVariableTop
1272  * @see ucol_restoreVariableTop
1273  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
1274  */
1275 U_DEPRECATED uint32_t U_EXPORT2
1276 ucol_setVariableTop(UCollator *coll,
1277                     const UChar *varTop, int32_t len,
1278                     UErrorCode *status);
1279 #endif  /* U_HIDE_DEPRECATED_API */
1280 
1281 /**
1282  * Gets the variable top value of a Collator.
1283  * @param coll collator which variable top needs to be retrieved
1284  * @param status error code (not changed by function). If error code is set,
1285  *               the return value is undefined.
1286  * @return the variable top primary weight
1287  * @see ucol_getMaxVariable
1288  * @see ucol_setVariableTop
1289  * @see ucol_restoreVariableTop
1290  * @stable ICU 2.0
1291  */
1292 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
1293 
1294 #ifndef U_HIDE_DEPRECATED_API
1295 /**
1296  * Sets the variable top to the specified primary weight.
1297  *
1298  * Beginning with ICU 53, the variable top is pinned to
1299  * the top of one of the supported reordering groups,
1300  * and it must not be beyond the last of those groups.
1301  * See ucol_setMaxVariable().
1302  * @param coll collator to be set
1303  * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop
1304  * @param status error code
1305  * @see ucol_getVariableTop
1306  * @see ucol_setVariableTop
1307  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
1308  */
1309 U_DEPRECATED void U_EXPORT2
1310 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
1311 #endif  /* U_HIDE_DEPRECATED_API */
1312 
1313 /**
1314  * Thread safe cloning operation. The result is a clone of a given collator.
1315  * @param coll collator to be cloned
1316  * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
1317  * user allocated space for the new clone.
1318  * If NULL new memory will be allocated.
1319  *  If buffer is not large enough, new memory will be allocated.
1320  *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
1321  * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
1322  *  pointer to size of allocated space.
1323  *  If *pBufferSize == 0, a sufficient size for use in cloning will
1324  *  be returned ('pre-flighting')
1325  *  If *pBufferSize is not enough for a stack-based safe clone,
1326  *  new memory will be allocated.
1327  * @param status to indicate whether the operation went on smoothly or there were errors
1328  *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
1329  * allocations were necessary.
1330  * @return pointer to the new clone
1331  * @see ucol_open
1332  * @see ucol_openRules
1333  * @see ucol_close
1334  * @stable ICU 2.0
1335  */
1336 U_CAPI UCollator* U_EXPORT2
1337 ucol_safeClone(const UCollator *coll,
1338                void            *stackBuffer,
1339                int32_t         *pBufferSize,
1340                UErrorCode      *status);
1341 
1342 #ifndef U_HIDE_DEPRECATED_API
1343 
1344 /** default memory size for the new clone.
1345  * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
1346  */
1347 #define U_COL_SAFECLONE_BUFFERSIZE 1
1348 
1349 #endif /* U_HIDE_DEPRECATED_API */
1350 
1351 /**
1352  * Returns current rules. Delta defines whether full rules are returned or just the tailoring.
1353  * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
1354  * to store rules, will store up to available space.
1355  *
1356  * ucol_getRules() should normally be used instead.
1357  * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
1358  * @param coll collator to get the rules from
1359  * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
1360  * @param buffer buffer to store the result in. If NULL, you'll get no rules.
1361  * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
1362  * @return current rules
1363  * @stable ICU 2.0
1364  * @see UCOL_FULL_RULES
1365  */
1366 U_CAPI int32_t U_EXPORT2
1367 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
1368 
1369 #ifndef U_HIDE_DEPRECATED_API
1370 /**
1371  * gets the locale name of the collator. If the collator
1372  * is instantiated from the rules, then this function returns
1373  * NULL.
1374  * @param coll The UCollator for which the locale is needed
1375  * @param type You can choose between requested, valid and actual
1376  *             locale. For description see the definition of
1377  *             ULocDataLocaleType in uloc.h
1378  * @param status error code of the operation
1379  * @return real locale name from which the collation data comes.
1380  *         If the collator was instantiated from rules, returns
1381  *         NULL.
1382  * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
1383  */
1384 U_DEPRECATED const char * U_EXPORT2
1385 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
1386 #endif  /* U_HIDE_DEPRECATED_API */
1387 
1388 /**
1389  * gets the locale name of the collator. If the collator
1390  * is instantiated from the rules, then this function returns
1391  * NULL.
1392  * @param coll The UCollator for which the locale is needed
1393  * @param type You can choose between requested, valid and actual
1394  *             locale. For description see the definition of
1395  *             ULocDataLocaleType in uloc.h
1396  * @param status error code of the operation
1397  * @return real locale name from which the collation data comes.
1398  *         If the collator was instantiated from rules, returns
1399  *         NULL.
1400  * @stable ICU 2.8
1401  */
1402 U_CAPI const char * U_EXPORT2
1403 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
1404 
1405 /**
1406  * Get a Unicode set that contains all the characters and sequences tailored in
1407  * this collator. The result must be disposed of by using uset_close.
1408  * @param coll        The UCollator for which we want to get tailored chars
1409  * @param status      error code of the operation
1410  * @return a pointer to newly created USet. Must be be disposed by using uset_close
1411  * @see ucol_openRules
1412  * @see uset_close
1413  * @stable ICU 2.4
1414  */
1415 U_CAPI USet * U_EXPORT2
1416 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
1417 
1418 #ifndef U_HIDE_INTERNAL_API
1419 /** Calculates the set of unsafe code points, given a collator.
1420  *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
1421  *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
1422  *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
1423  *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
1424  *  @param coll Collator
1425  *  @param unsafe a fill-in set to receive the unsafe points
1426  *  @param status for catching errors
1427  *  @return number of elements in the set
1428  *  @internal ICU 3.0
1429  */
1430 U_CAPI int32_t U_EXPORT2
1431 ucol_getUnsafeSet( const UCollator *coll,
1432                   USet *unsafe,
1433                   UErrorCode *status);
1434 
1435 /** Touches all resources needed for instantiating a collator from a short string definition,
1436  *  thus filling up the cache.
1437  * @param definition A short string containing a locale and a set of attributes.
1438  *                   Attributes not explicitly mentioned are left at the default
1439  *                   state for a locale.
1440  * @param parseError if not NULL, structure that will get filled with error's pre
1441  *                   and post context in case of error.
1442  * @param forceDefaults if false, the settings that are the same as the collator
1443  *                   default settings will not be applied (for example, setting
1444  *                   French secondary on a French collator would not be executed).
1445  *                   If true, all the settings will be applied regardless of the
1446  *                   collator default value. If the definition
1447  *                   strings are to be cached, should be set to false.
1448  * @param status     Error code. Apart from regular error conditions connected to
1449  *                   instantiating collators (like out of memory or similar), this
1450  *                   API will return an error if an invalid attribute or attribute/value
1451  *                   combination is specified.
1452  * @see ucol_openFromShortString
1453  * @internal ICU 3.2.1
1454  */
1455 U_CAPI void U_EXPORT2
1456 ucol_prepareShortStringOpen( const char *definition,
1457                           UBool forceDefaults,
1458                           UParseError *parseError,
1459                           UErrorCode *status);
1460 #endif  /* U_HIDE_INTERNAL_API */
1461 
1462 /** Creates a binary image of a collator. This binary image can be stored and
1463  *  later used to instantiate a collator using ucol_openBinary.
1464  *  This API supports preflighting.
1465  *  @param coll Collator
1466  *  @param buffer a fill-in buffer to receive the binary image
1467  *  @param capacity capacity of the destination buffer
1468  *  @param status for catching errors
1469  *  @return size of the image
1470  *  @see ucol_openBinary
1471  *  @stable ICU 3.2
1472  */
1473 U_CAPI int32_t U_EXPORT2
1474 ucol_cloneBinary(const UCollator *coll,
1475                  uint8_t *buffer, int32_t capacity,
1476                  UErrorCode *status);
1477 
1478 /** Opens a collator from a collator binary image created using
1479  *  ucol_cloneBinary. Binary image used in instantiation of the
1480  *  collator remains owned by the user and should stay around for
1481  *  the lifetime of the collator. The API also takes a base collator
1482  *  which must be the root collator.
1483  *  @param bin binary image owned by the user and required through the
1484  *             lifetime of the collator
1485  *  @param length size of the image. If negative, the API will try to
1486  *                figure out the length of the image
1487  *  @param base Base collator, for lookup of untailored characters.
1488  *              Must be the root collator, must not be NULL.
1489  *              The base is required to be present through the lifetime of the collator.
1490  *  @param status for catching errors
1491  *  @return newly created collator
1492  *  @see ucol_cloneBinary
1493  *  @stable ICU 3.2
1494  */
1495 U_CAPI UCollator* U_EXPORT2
1496 ucol_openBinary(const uint8_t *bin, int32_t length,
1497                 const UCollator *base,
1498                 UErrorCode *status);
1499 
1500 
1501 #endif /* #if !UCONFIG_NO_COLLATION */
1502 
1503 #endif
1504