• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4 *******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 package com.ibm.icu.text;
10 
11 import java.util.Comparator;
12 import java.util.LinkedList;
13 import java.util.Locale;
14 import java.util.MissingResourceException;
15 import java.util.Set;
16 
17 import com.ibm.icu.impl.ICUData;
18 import com.ibm.icu.impl.ICUDebug;
19 import com.ibm.icu.impl.ICUResourceBundle;
20 import com.ibm.icu.impl.UResource;
21 import com.ibm.icu.impl.coll.CollationData;
22 import com.ibm.icu.impl.coll.CollationRoot;
23 import com.ibm.icu.lang.UCharacter;
24 import com.ibm.icu.lang.UProperty;
25 import com.ibm.icu.lang.UScript;
26 import com.ibm.icu.util.Freezable;
27 import com.ibm.icu.util.ICUException;
28 import com.ibm.icu.util.ULocale;
29 import com.ibm.icu.util.ULocale.Category;
30 import com.ibm.icu.util.UResourceBundle;
31 import com.ibm.icu.util.VersionInfo;
32 
33 /**
34 * {@icuenhanced java.text.Collator}.{@icu _usage_}
35 *
36 * <p>Collator performs locale-sensitive string comparison. A concrete
37 * subclass, RuleBasedCollator, allows customization of the collation
38 * ordering by the use of rule sets.
39 *
40 * <p>A Collator is thread-safe only when frozen. See {@link #isFrozen()} and {@link Freezable}.
41 *
42 * <p>Following the <a href=http://www.unicode.org>Unicode
43 * Consortium</a>'s specifications for the
44 * <a href="https://www.unicode.org/reports/tr10/">Unicode Collation
45 * Algorithm (UCA)</a>, there are 5 different levels of strength used
46 * in comparisons:
47 *
48 * <ul>
49 * <li>PRIMARY strength: Typically, this is used to denote differences between
50 *     base characters (for example, "a" &lt; "b").
51 *     It is the strongest difference. For example, dictionaries are divided
52 *     into different sections by base character.
53 * <li>SECONDARY strength: Accents in the characters are considered secondary
54 *     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other
55 *     differences
56 *     between letters can also be considered secondary differences, depending
57 *     on the language. A secondary difference is ignored when there is a
58 *     primary difference anywhere in the strings.
59 * <li>TERTIARY strength: Upper and lower case differences in characters are
60 *     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
61 *     "a&ograve;"). In addition, a variant of a letter differs from the base
62 *     form on the tertiary strength (such as "A" and "Ⓐ"). Another
63 *     example is the
64 *     difference between large and small Kana. A tertiary difference is ignored
65 *     when there is a primary or secondary difference anywhere in the strings.
66 * <li>QUATERNARY strength: When punctuation is ignored
67 *     (see <a href="https://unicode-org.github.io/icu/userguide/collation/concepts#ignoring-punctuation">
68 *     Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY
69 *     strength, an additional strength level can
70 *     be used to distinguish words with and without punctuation (for example,
71 *     "ab" &lt; "a-b" &lt; "aB").
72 *     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
73 *     difference. The QUATERNARY strength should only be used if ignoring
74 *     punctuation is required.
75 * <li>IDENTICAL strength:
76 *     When all other strengths are equal, the IDENTICAL strength is used as a
77 *     tiebreaker. The Unicode code point values of the NFD form of each string
78 *     are compared, just in case there is no difference.
79 *     For example, Hebrew cantillation marks are only distinguished at this
80 *     strength. This strength should be used sparingly, as only code point
81 *     value differences between two strings is an extremely rare occurrence.
82 *     Using this strength substantially decreases the performance for both
83 *     comparison and collation key generation APIs. This strength also
84 *     increases the size of the collation key.
85 * </ul>
86 *
87 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
88 * the canonical decomposition mode and one that does not use any decomposition.
89 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
90 * is not supported here. If the canonical
91 * decomposition mode is set, the Collator handles un-normalized text properly,
92 * producing the same results as if the text were normalized in NFD. If
93 * canonical decomposition is turned off, it is the user's responsibility to
94 * ensure that all text is already in the appropriate form before performing
95 * a comparison or before getting a CollationKey.
96 *
97 * <p>For more information about the collation service see the
98 * <a href="https://unicode-org.github.io/icu/userguide/collation">User Guide</a>.
99 *
100 * <p>Examples of use
101 * <pre>
102 * // Get the Collator for US English and set its strength to PRIMARY
103 * Collator usCollator = Collator.getInstance(Locale.US);
104 * usCollator.setStrength(Collator.PRIMARY);
105 * if (usCollator.compare("abc", "ABC") == 0) {
106 *     System.out.println("Strings are equivalent");
107 * }
108 *
109 * The following example shows how to compare two strings using the
110 * Collator for the default locale.
111 *
112 * // Compare two strings in the default locale
113 * Collator myCollator = Collator.getInstance();
114 * myCollator.setDecomposition(NO_DECOMPOSITION);
115 * if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
116 *     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
117 *     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
118 *     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
119 *         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
120 *     }
121 *     else {
122 *         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
123 *     }
124 * }
125 * else {
126 *     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
127 * }
128 * </pre>
129 *
130 * @see RuleBasedCollator
131 * @see CollationKey
132 * @author Syn Wee Quek
133 * @stable ICU 2.8
134 */
135 public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable
136 {
137     // public data members ---------------------------------------------------
138 
139     /**
140      * Strongest collator strength value. Typically used to denote differences
141      * between base characters. See class documentation for more explanation.
142      * @see #setStrength
143      * @see #getStrength
144      * @stable ICU 2.8
145      */
146     public final static int PRIMARY = 0;
147 
148     /**
149      * Second level collator strength value.
150      * Accents in the characters are considered secondary differences.
151      * Other differences between letters can also be considered secondary
152      * differences, depending on the language.
153      * See class documentation for more explanation.
154      * @see #setStrength
155      * @see #getStrength
156      * @stable ICU 2.8
157      */
158     public final static int SECONDARY = 1;
159 
160     /**
161      * Third level collator strength value.
162      * Upper and lower case differences in characters are distinguished at this
163      * strength level. In addition, a variant of a letter differs from the base
164      * form on the tertiary level.
165      * See class documentation for more explanation.
166      * @see #setStrength
167      * @see #getStrength
168      * @stable ICU 2.8
169      */
170     public final static int TERTIARY = 2;
171 
172     /**
173      * {@icu} Fourth level collator strength value.
174      * When punctuation is ignored
175      * (see <a href="https://unicode-org.github.io/icu/userguide/collation/concepts#ignoring-punctuation">
176      * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY
177      * strength, an additional strength level can
178      * be used to distinguish words with and without punctuation.
179      * See class documentation for more explanation.
180      * @see #setStrength
181      * @see #getStrength
182      * @stable ICU 2.8
183      */
184     public final static int QUATERNARY = 3;
185 
186     /**
187      * Smallest Collator strength value. When all other strengths are equal,
188      * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
189      * values of the NFD form of each string are compared, just in case there
190      * is no difference.
191      * See class documentation for more explanation.
192      * <p>
193      * Note this value is different from JDK's
194      * @stable ICU 2.8
195      */
196     public final static int IDENTICAL = 15;
197 
198     /**
199      * {@icunote} This is for backwards compatibility with Java APIs only.  It
200      * should not be used, IDENTICAL should be used instead.  ICU's
201      * collation does not support Java's FULL_DECOMPOSITION mode.
202      * @stable ICU 3.4
203      */
204     public final static int FULL_DECOMPOSITION = IDENTICAL;
205 
206     /**
207      * Decomposition mode value. With NO_DECOMPOSITION set, Strings
208      * will not be decomposed for collation. This is the default
209      * decomposition setting unless otherwise specified by the locale
210      * used to create the Collator.
211      *
212      * <p><strong>Note</strong> this value is different from the JDK's.
213      * @see #CANONICAL_DECOMPOSITION
214      * @see #getDecomposition
215      * @see #setDecomposition
216      * @stable ICU 2.8
217      */
218     public final static int NO_DECOMPOSITION = 16;
219 
220     /**
221      * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
222      * characters that are canonical variants according to the Unicode standard
223      * will be decomposed for collation.
224      *
225      * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
226      * described in <a href="https://www.unicode.org/reports/tr15/">
227      * Unicode Technical Report #15</a>.
228      *
229      * @see #NO_DECOMPOSITION
230      * @see #getDecomposition
231      * @see #setDecomposition
232      * @stable ICU 2.8
233      */
234     public final static int CANONICAL_DECOMPOSITION = 17;
235 
236     /**
237      * Reordering codes for non-script groups that can be reordered under collation.
238      *
239      * @see #getReorderCodes
240      * @see #setReorderCodes
241      * @see #getEquivalentReorderCodes
242      * @stable ICU 4.8
243      */
244     public static interface ReorderCodes {
245         /**
246          * A special reordering code that is used to specify the default reordering codes for a locale.
247          * @stable ICU 4.8
248          */
249         public final static int DEFAULT          = -1;  // == UScript.INVALID_CODE
250         /**
251          * A special reordering code that is used to specify no reordering codes.
252          * @stable ICU 4.8
253          */
254         public final static int NONE          = UScript.UNKNOWN;
255         /**
256          * A special reordering code that is used to specify all other codes used for reordering except
257          * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
258          * @stable ICU 4.8
259          */
260         public final static int OTHERS          = UScript.UNKNOWN;
261         /**
262          * Characters with the space property.
263          * This is equivalent to the rule value "space".
264          * @stable ICU 4.8
265          */
266         public final static int SPACE          = 0x1000;
267         /**
268          * The first entry in the enumeration of reordering groups. This is intended for use in
269          * range checking and enumeration of the reorder codes.
270          * @stable ICU 4.8
271          */
272         public final static int FIRST          = SPACE;
273         /**
274          * Characters with the punctuation property.
275          * This is equivalent to the rule value "punct".
276          * @stable ICU 4.8
277          */
278         public final static int PUNCTUATION    = 0x1001;
279         /**
280          * Characters with the symbol property.
281          * This is equivalent to the rule value "symbol".
282          * @stable ICU 4.8
283          */
284         public final static int SYMBOL         = 0x1002;
285         /**
286          * Characters with the currency property.
287          * This is equivalent to the rule value "currency".
288          * @stable ICU 4.8
289          */
290         public final static int CURRENCY       = 0x1003;
291         /**
292          * Characters with the digit property.
293          * This is equivalent to the rule value "digit".
294          * @stable ICU 4.8
295          */
296         public final static int DIGIT          = 0x1004;
297         /**
298          * One more than the highest normal ReorderCodes value.
299          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
300          */
301         @Deprecated
302         public final static int LIMIT          = 0x1005;
303     }
304 
305     // public methods --------------------------------------------------------
306 
307     /**
308      * Compares the equality of two Collator objects. Collator objects are equal if they have the same
309      * collation (sorting &amp; searching) behavior.
310      *
311      * <p>The base class checks for null and for equal types.
312      * Subclasses should override.
313      *
314      * @param obj the Collator to compare to.
315      * @return true if this Collator has exactly the same collation behavior as obj, false otherwise.
316      * @stable ICU 2.8
317      */
318     @Override
equals(Object obj)319     public boolean equals(Object obj) {
320         // Subclasses: Call this method and then add more specific checks.
321         return this == obj || (obj != null && getClass() == obj.getClass());
322     }
323 
324     /**
325      * Generates a hash code for this Collator object.
326      *
327      * <p>The implementation exists just for consistency with {@link #equals(Object)}
328      * implementation in this class and does not generate a useful hash code.
329      * Subclasses should override this implementation.
330      *
331      * @return a hash code value.
332      * @stable ICU 2.8
333      */
334     @Override
hashCode()335     public int hashCode() {
336         // Dummy return to prevent compile warnings.
337         return 0;
338     }
339 
340     // public setters --------------------------------------------------------
341 
checkNotFrozen()342     private void checkNotFrozen() {
343         if (isFrozen()) {
344             throw new UnsupportedOperationException("Attempt to modify frozen Collator");
345         }
346     }
347 
348     /**
349      * Sets this Collator's strength attribute. The strength attribute
350      * determines the minimum level of difference considered significant
351      * during comparison.
352      *
353      * <p>The base class method does nothing. Subclasses should override it if appropriate.
354      *
355      * <p>See the Collator class description for an example of use.
356      * @param newStrength the new strength value.
357      * @see #getStrength
358      * @see #PRIMARY
359      * @see #SECONDARY
360      * @see #TERTIARY
361      * @see #QUATERNARY
362      * @see #IDENTICAL
363      * @throws IllegalArgumentException if the new strength value is not valid.
364      * @stable ICU 2.8
365      */
setStrength(int newStrength)366     public void setStrength(int newStrength)
367     {
368         checkNotFrozen();
369     }
370 
371     /**
372      * @return this, for chaining
373      * @internal Used in UnicodeTools
374      * @deprecated This API is ICU internal only.
375      */
376     @Deprecated
setStrength2(int newStrength)377     public Collator setStrength2(int newStrength)
378     {
379         setStrength(newStrength);
380         return this;
381     }
382 
383     /**
384      * Sets the decomposition mode of this Collator.  Setting this
385      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
386      * Collator to handle un-normalized text properly, producing the
387      * same results as if the text were normalized. If
388      * NO_DECOMPOSITION is set, it is the user's responsibility to
389      * insure that all text is already in the appropriate form before
390      * a comparison or before getting a CollationKey. Adjusting
391      * decomposition mode allows the user to select between faster and
392      * more complete collation behavior.
393      *
394      * <p>Since a great many of the world's languages do not require
395      * text normalization, most locales set NO_DECOMPOSITION as the
396      * default decomposition mode.
397      *
398      * <p>The base class method does nothing. Subclasses should override it if appropriate.
399      *
400      * <p>See getDecomposition for a description of decomposition
401      * mode.
402      *
403      * @param decomposition the new decomposition mode
404      * @see #getDecomposition
405      * @see #NO_DECOMPOSITION
406      * @see #CANONICAL_DECOMPOSITION
407      * @throws IllegalArgumentException If the given value is not a valid
408      *            decomposition mode.
409      * @stable ICU 2.8
410      */
setDecomposition(int decomposition)411     public void setDecomposition(int decomposition)
412     {
413         checkNotFrozen();
414     }
415 
416     /**
417      * Sets the reordering codes for this collator.
418      * Collation reordering allows scripts and some other groups of characters
419      * to be moved relative to each other. This reordering is done on top of
420      * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
421      * at the start and/or the end of the collation order. These groups are specified using
422      * UScript codes and {@link Collator.ReorderCodes} entries.
423      *
424      * <p>By default, reordering codes specified for the start of the order are placed in the
425      * order given after several special non-script blocks. These special groups of characters
426      * are space, punctuation, symbol, currency, and digit. These special groups are represented with
427      * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with
428      * these special non-script groups if those special groups are explicitly specified in the reordering.
429      *
430      * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS}
431      * stands for any script that is not explicitly
432      * mentioned in the list of reordering codes given. Anything that is after OTHERS
433      * will go at the very end of the reordering in the order given.
434      *
435      * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT}
436      * will reset the reordering for this collator
437      * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
438      * was specified when this collator was created from resource data or from rules. The
439      * DEFAULT code <b>must</b> be the sole code supplied when it is used.
440      * If not, then an {@link IllegalArgumentException} will be thrown.
441      *
442      * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE}
443      * will remove any reordering for this collator.
444      * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
445      * NONE code <b>must</b> be the sole code supplied when it is used.
446      *
447      * @param order the reordering codes to apply to this collator; if this is null or an empty array
448      * then this clears any existing reordering
449      * @see #getReorderCodes
450      * @see #getEquivalentReorderCodes
451      * @see Collator.ReorderCodes
452      * @see UScript
453      * @stable ICU 4.8
454      */
setReorderCodes(int... order)455     public void setReorderCodes(int... order)
456     {
457         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
458     }
459 
460     // public getters --------------------------------------------------------
461 
462     /**
463      * Returns the Collator for the current default locale.
464      * The default locale is determined by java.util.Locale.getDefault().
465      * @return the Collator for the default locale (for example, en_US) if it
466      *         is created successfully. Otherwise if there is no Collator
467      *         associated with the current locale, the root collator
468      *         will be returned.
469      * @see java.util.Locale#getDefault()
470      * @see #getInstance(Locale)
471      * @stable ICU 2.8
472      */
getInstance()473     public static final Collator getInstance()
474     {
475         return getInstance(ULocale.getDefault());
476     }
477 
478     /**
479      * Clones the collator.
480      * @stable ICU 2.8
481      * @return a clone of this collator.
482      */
483     @Override
clone()484     public Object clone() throws CloneNotSupportedException {
485         return super.clone();
486     }
487 
488     // begin registry stuff
489 
490     /**
491      * A factory used with registerFactory to register multiple collators and provide
492      * display names for them.  If standard locale display names are sufficient,
493      * Collator instances may be registered instead.
494      * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
495      * ULocale instead of Locale.  Instead of overriding createCollator(Locale),
496      * new implementations should override createCollator(ULocale).  Note that
497      * one of these two methods <b>MUST</b> be overridden or else an infinite
498      * loop will occur.
499      * @stable ICU 2.6
500      */
501     public static abstract class CollatorFactory {
502         /**
503          * Return true if this factory will be visible.  Default is true.
504          * If not visible, the locales supported by this factory will not
505          * be listed by getAvailableLocales.
506          *
507          * @return true if this factory is visible
508          * @stable ICU 2.6
509          */
visible()510         public boolean visible() {
511             return true;
512         }
513 
514         /**
515          * Return an instance of the appropriate collator.  If the locale
516          * is not supported, return null.
517          * <b>Note:</b> as of ICU4J 3.2, implementations should override
518          * this method instead of createCollator(Locale).
519          * @param loc the locale for which this collator is to be created.
520          * @return the newly created collator.
521          * @stable ICU 3.2
522          */
createCollator(ULocale loc)523         public Collator createCollator(ULocale loc) {
524             return createCollator(loc.toLocale());
525         }
526 
527         /**
528          * Return an instance of the appropriate collator.  If the locale
529          * is not supported, return null.
530          * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
531          * createCollator(ULocale) instead of this method, and inherit this
532          * method's implementation.  This method is no longer abstract
533          * and instead delegates to createCollator(ULocale).
534          * @param loc the locale for which this collator is to be created.
535          * @return the newly created collator.
536          * @stable ICU 2.6
537          */
createCollator(Locale loc)538          public Collator createCollator(Locale loc) {
539             return createCollator(ULocale.forLocale(loc));
540         }
541 
542         /**
543          * Return the name of the collator for the objectLocale, localized for the displayLocale.
544          * If objectLocale is not visible or not defined by the factory, return null.
545          * @param objectLocale the locale identifying the collator
546          * @param displayLocale the locale for which the display name of the collator should be localized
547          * @return the display name
548          * @stable ICU 2.6
549          */
getDisplayName(Locale objectLocale, Locale displayLocale)550         public String getDisplayName(Locale objectLocale, Locale displayLocale) {
551             return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
552         }
553 
554         /**
555          * Return the name of the collator for the objectLocale, localized for the displayLocale.
556          * If objectLocale is not visible or not defined by the factory, return null.
557          * @param objectLocale the locale identifying the collator
558          * @param displayLocale the locale for which the display name of the collator should be localized
559          * @return the display name
560          * @stable ICU 3.2
561          */
getDisplayName(ULocale objectLocale, ULocale displayLocale)562         public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
563             if (visible()) {
564                 Set<String> supported = getSupportedLocaleIDs();
565                 String name = objectLocale.getBaseName();
566                 if (supported.contains(name)) {
567                     return objectLocale.getDisplayName(displayLocale);
568                 }
569             }
570             return null;
571         }
572 
573         /**
574          * Return an unmodifiable collection of the locale names directly
575          * supported by this factory.
576          *
577          * @return the set of supported locale IDs.
578          * @stable ICU 2.6
579          */
getSupportedLocaleIDs()580         public abstract Set<String> getSupportedLocaleIDs();
581 
582         /**
583          * Empty default constructor.
584          * @stable ICU 2.6
585          */
CollatorFactory()586         protected CollatorFactory() {
587         }
588     }
589 
590     static abstract class ServiceShim {
getInstance(ULocale l)591         abstract Collator getInstance(ULocale l);
registerInstance(Collator c, ULocale l)592         abstract Object registerInstance(Collator c, ULocale l);
registerFactory(CollatorFactory f)593         abstract Object registerFactory(CollatorFactory f);
unregister(Object k)594         abstract boolean unregister(Object k);
getAvailableLocales()595         abstract Locale[] getAvailableLocales(); // TODO remove
getAvailableULocales()596         abstract ULocale[] getAvailableULocales();
getDisplayName(ULocale ol, ULocale dl)597         abstract String getDisplayName(ULocale ol, ULocale dl);
598     }
599 
600     private static ServiceShim shim;
getShim()601     private static ServiceShim getShim() {
602         // Note: this instantiation is safe on loose-memory-model configurations
603         // despite lack of synchronization, since the shim instance has no state--
604         // it's all in the class init.  The worst problem is we might instantiate
605         // two shim instances, but they'll share the same state so that's ok.
606         if (shim == null) {
607             try {
608                 Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
609                 shim = (ServiceShim)cls.newInstance();
610             }
611             catch (MissingResourceException e)
612             {
613                 ///CLOVER:OFF
614                 throw e;
615                 ///CLOVER:ON
616             }
617             catch (Exception e) {
618                 ///CLOVER:OFF
619                 if(DEBUG){
620                     e.printStackTrace();
621                 }
622                 throw new ICUException(e);
623                 ///CLOVER:ON
624             }
625         }
626         return shim;
627     }
628 
629     /**
630      * Simpler/faster methods for ASCII than ones based on Unicode data.
631      * TODO: There should be code like this somewhere already??
632      */
633     private static final class ASCII {
equalIgnoreCase(CharSequence left, CharSequence right)634         static boolean equalIgnoreCase(CharSequence left, CharSequence right) {
635             int length = left.length();
636             if (length != right.length()) { return false; }
637             for (int i = 0; i < length; ++i) {
638                 char lc = left.charAt(i);
639                 char rc = right.charAt(i);
640                 if (lc == rc) { continue; }
641                 if ('A' <= lc && lc <= 'Z') {
642                     if ((lc + 0x20) == rc) { continue; }
643                 } else if ('A' <= rc && rc <= 'Z') {
644                     if ((rc + 0x20) == lc) { continue; }
645                 }
646                 return false;
647             }
648             return true;
649         }
650     }
651 
getYesOrNo(String keyword, String s)652     private static final boolean getYesOrNo(String keyword, String s) {
653         if (ASCII.equalIgnoreCase(s, "yes")) {
654             return true;
655         }
656         if (ASCII.equalIgnoreCase(s, "no")) {
657             return false;
658         }
659         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
660     }
661 
getIntValue(String keyword, String s, String... values)662     private static final int getIntValue(String keyword, String s, String... values) {
663         for (int i = 0; i < values.length; ++i) {
664             if (ASCII.equalIgnoreCase(s, values[i])) {
665                 return i;
666             }
667         }
668         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
669     }
670 
getReorderCode(String keyword, String s)671     private static final int getReorderCode(String keyword, String s) {
672         return Collator.ReorderCodes.FIRST +
673                 getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit");
674         // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
675         // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
676         // Avoid introducing synonyms/aliases.
677     }
678 
679     /**
680      * Sets collation attributes according to locale keywords. See
681      * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
682      *
683      * Using "alias" keywords and values where defined:
684      * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
685      * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
686      */
setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc)687     private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) {
688         // Check for collation keywords that were already deprecated
689         // before any were supported in createInstance() (except for "collation").
690         String value = loc.getKeywordValue("colHiraganaQuaternary");
691         if (value != null) {
692             throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary");
693         }
694         value = loc.getKeywordValue("variableTop");
695         if (value != null) {
696             throw new UnsupportedOperationException("locale keyword vt/variableTop");
697         }
698         // Parse known collation keywords, ignore others.
699         value = loc.getKeywordValue("colStrength");
700         if (value != null) {
701             // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
702             int strength = getIntValue("colStrength", value,
703                     "primary", "secondary", "tertiary", "quaternary", "identical");
704             coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL);
705         }
706         value = loc.getKeywordValue("colBackwards");
707         if (value != null) {
708             if (rbc != null) {
709                 rbc.setFrenchCollation(getYesOrNo("colBackwards", value));
710             } else {
711                 throw new UnsupportedOperationException(
712                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
713             }
714         }
715         value = loc.getKeywordValue("colCaseLevel");
716         if (value != null) {
717             if (rbc != null) {
718                 rbc.setCaseLevel(getYesOrNo("colCaseLevel", value));
719             } else {
720                 throw new UnsupportedOperationException(
721                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
722             }
723         }
724         value = loc.getKeywordValue("colCaseFirst");
725         if (value != null) {
726             if (rbc != null) {
727                 int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper");
728                 if (cf == 0) {
729                     rbc.setLowerCaseFirst(false);
730                     rbc.setUpperCaseFirst(false);
731                 } else if (cf == 1) {
732                     rbc.setLowerCaseFirst(true);
733                 } else /* cf == 2 */ {
734                     rbc.setUpperCaseFirst(true);
735                 }
736             } else {
737                 throw new UnsupportedOperationException(
738                         "locale keyword kf/colCaseFirst only settable for RuleBasedCollator");
739             }
740         }
741         value = loc.getKeywordValue("colAlternate");
742         if (value != null) {
743             if (rbc != null) {
744                 rbc.setAlternateHandlingShifted(
745                         getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0);
746             } else {
747                 throw new UnsupportedOperationException(
748                         "locale keyword ka/colAlternate only settable for RuleBasedCollator");
749             }
750         }
751         value = loc.getKeywordValue("colNormalization");
752         if (value != null) {
753             coll.setDecomposition(getYesOrNo("colNormalization", value) ?
754                     Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION);
755         }
756         value = loc.getKeywordValue("colNumeric");
757         if (value != null) {
758             if (rbc != null) {
759                 rbc.setNumericCollation(getYesOrNo("colNumeric", value));
760             } else {
761                 throw new UnsupportedOperationException(
762                         "locale keyword kn/colNumeric only settable for RuleBasedCollator");
763             }
764         }
765         value = loc.getKeywordValue("colReorder");
766         if (value != null) {
767             int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST];
768             int codesLength = 0;
769             int scriptNameStart = 0;
770             for (;;) {
771                 if (codesLength == codes.length) {
772                     throw new IllegalArgumentException(
773                             "too many script codes for colReorder locale keyword: " + value);
774                 }
775                 int limit = scriptNameStart;
776                 while (limit < value.length() && value.charAt(limit) != '-') { ++limit; }
777                 String scriptName = value.substring(scriptNameStart, limit);
778                 int code;
779                 if (scriptName.length() == 4) {
780                     // Strict parsing, accept only 4-letter script codes, not long names.
781                     code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName);
782                 } else {
783                     code = getReorderCode("colReorder", scriptName);
784                 }
785                 codes[codesLength++] = code;
786                 if (limit == value.length()) { break; }
787                 scriptNameStart = limit + 1;
788             }
789             if (codesLength == 0) {
790                 throw new IllegalArgumentException("no script codes for colReorder locale keyword");
791             }
792             int[] args = new int[codesLength];
793             System.arraycopy(codes, 0, args, 0, codesLength);
794             coll.setReorderCodes(args);
795         }
796         value = loc.getKeywordValue("kv");
797         if (value != null) {
798             coll.setMaxVariable(getReorderCode("kv", value));
799         }
800     }
801 
802     /**
803      * {@icu} Returns the Collator for the desired locale.
804      *
805      * <p>For some languages, multiple collation types are available;
806      * for example, "de@collation=phonebook".
807      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
808      * in the old locale extension syntax ("el@colCaseFirst=upper")
809      * or in language tag syntax ("el-u-kf-upper").
810      * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>.
811      *
812      * @param locale the desired locale.
813      * @return Collator for the desired locale if it is created successfully.
814      *         Otherwise if there is no Collator
815      *         associated with the current locale, the root collator will
816      *         be returned.
817      * @see java.util.Locale
818      * @see java.util.ResourceBundle
819      * @see #getInstance(Locale)
820      * @see #getInstance()
821      * @stable ICU 3.0
822      */
getInstance(ULocale locale)823     public static final Collator getInstance(ULocale locale) {
824         // fetching from service cache is faster than instantiation
825         if (locale == null) {
826             locale = ULocale.getDefault();
827         }
828         Collator coll = getShim().getInstance(locale);
829         if (!locale.getName().equals(locale.getBaseName())) {  // any keywords?
830             setAttributesFromKeywords(locale, coll,
831                     (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null);
832         }
833         return coll;
834     }
835 
836     /**
837      * Returns the Collator for the desired locale.
838      *
839      * <p>For some languages, multiple collation types are available;
840      * for example, "de-u-co-phonebk".
841      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
842      * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale})
843      * or in language tag syntax ("el-u-kf-upper").
844      * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>.
845      *
846      * @param locale the desired locale.
847      * @return Collator for the desired locale if it is created successfully.
848      *         Otherwise if there is no Collator
849      *         associated with the current locale, the root collator will
850      *         be returned.
851      * @see java.util.Locale
852      * @see java.util.ResourceBundle
853      * @see #getInstance(ULocale)
854      * @see #getInstance()
855      * @stable ICU 2.8
856      */
getInstance(Locale locale)857     public static final Collator getInstance(Locale locale) {
858         return getInstance(ULocale.forLocale(locale));
859     }
860 
861     /**
862      * {@icu} Registers a collator as the default collator for the provided locale.  The
863      * collator should not be modified after it is registered.
864      *
865      * <p>Because ICU may choose to cache Collator objects internally, this must
866      * be called at application startup, prior to any calls to
867      * Collator.getInstance to avoid undefined behavior.
868      *
869      * @param collator the collator to register
870      * @param locale the locale for which this is the default collator
871      * @return an object that can be used to unregister the registered collator.
872      *
873      * @stable ICU 3.2
874      */
registerInstance(Collator collator, ULocale locale)875     public static final Object registerInstance(Collator collator, ULocale locale) {
876         return getShim().registerInstance(collator, locale);
877     }
878 
879     /**
880      * {@icu} Registers a collator factory.
881      *
882      * <p>Because ICU may choose to cache Collator objects internally, this must
883      * be called at application startup, prior to any calls to
884      * Collator.getInstance to avoid undefined behavior.
885      *
886      * @param factory the factory to register
887      * @return an object that can be used to unregister the registered factory.
888      *
889      * @stable ICU 2.6
890      */
registerFactory(CollatorFactory factory)891     public static final Object registerFactory(CollatorFactory factory) {
892         return getShim().registerFactory(factory);
893     }
894 
895     /**
896      * {@icu} Unregisters a collator previously registered using registerInstance.
897      * @param registryKey the object previously returned by registerInstance.
898      * @return true if the collator was successfully unregistered.
899      * @stable ICU 2.6
900      */
unregister(Object registryKey)901     public static final boolean unregister(Object registryKey) {
902         if (shim == null) {
903             return false;
904         }
905         return shim.unregister(registryKey);
906     }
907 
908     /**
909      * Returns the set of locales, as Locale objects, for which collators
910      * are installed.  Note that Locale objects do not support RFC 3066.
911      * @return the list of locales in which collators are installed.
912      * This list includes any that have been registered, in addition to
913      * those that are installed with ICU4J.
914      * @stable ICU 2.4
915      */
getAvailableLocales()916     public static Locale[] getAvailableLocales() {
917         // TODO make this wrap getAvailableULocales later
918         if (shim == null) {
919             return ICUResourceBundle.getAvailableLocales(
920                 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
921         }
922         return shim.getAvailableLocales();
923     }
924 
925     /**
926      * {@icu} Returns the set of locales, as ULocale objects, for which collators
927      * are installed.  ULocale objects support RFC 3066.
928      * @return the list of locales in which collators are installed.
929      * This list includes any that have been registered, in addition to
930      * those that are installed with ICU4J.
931      * @stable ICU 3.0
932      */
getAvailableULocales()933     public static final ULocale[] getAvailableULocales() {
934         if (shim == null) {
935             return ICUResourceBundle.getAvailableULocales(
936                     ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
937         }
938         return shim.getAvailableULocales();
939     }
940 
941     /**
942      * The list of keywords for this service.  This must be kept in sync with
943      * the resource data.
944      * @since ICU 3.0
945      */
946     private static final String[] KEYWORDS = { "collation" };
947 
948     /**
949      * The resource name for this service.  Note that this is not the same as
950      * the keyword for this service.
951      * @since ICU 3.0
952      */
953     private static final String RESOURCE = "collations";
954 
955     /**
956      * The resource bundle base name for this service.
957      * *since ICU 3.0
958      */
959 
960     private static final String BASE = ICUData.ICU_COLLATION_BASE_NAME;
961 
962     /**
963      * {@icu} Returns an array of all possible keywords that are relevant to
964      * collation. At this point, the only recognized keyword for this
965      * service is "collation".
966      * @return an array of valid collation keywords.
967      * @see #getKeywordValues
968      * @stable ICU 3.0
969      */
getKeywords()970     public static final String[] getKeywords() {
971         return KEYWORDS;
972     }
973 
974     /**
975      * {@icu} Given a keyword, returns an array of all values for
976      * that keyword that are currently in use.
977      * @param keyword one of the keywords returned by getKeywords.
978      * @see #getKeywords
979      * @stable ICU 3.0
980      */
getKeywordValues(String keyword)981     public static final String[] getKeywordValues(String keyword) {
982         if (!keyword.equals(KEYWORDS[0])) {
983             throw new IllegalArgumentException("Invalid keyword: " + keyword);
984         }
985         return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
986     }
987 
988     /**
989      * {@icu} Given a key and a locale, returns an array of string values in a preferred
990      * order that would make a difference. These are all and only those values where
991      * the open (creation) of the service with the locale formed from the input locale
992      * plus input keyword and that value has different behavior than creation with the
993      * input locale alone.
994      * @param key           one of the keys supported by this service.  For now, only
995      *                      "collation" is supported.
996      * @param locale        the locale
997      * @param commonlyUsed  if set to true it will return only commonly used values
998      *                      with the given locale in preferred order.  Otherwise,
999      *                      it will return all the available values for the locale.
1000      * @return an array of string values for the given key and the locale.
1001      * @stable ICU 4.2
1002      */
getKeywordValuesForLocale(String key, ULocale locale, boolean commonlyUsed)1003     public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
1004                                                            boolean commonlyUsed) {
1005         // Note: The parameter commonlyUsed is not used.
1006         // The switch is in the method signature for consistency
1007         // with other locale services.
1008 
1009         // Read available collation values from collation bundles.
1010         ICUResourceBundle bundle = (ICUResourceBundle)
1011                 UResourceBundle.getBundleInstance(
1012                         ICUData.ICU_COLLATION_BASE_NAME, locale);
1013         KeywordsSink sink = new KeywordsSink();
1014         bundle.getAllItemsWithFallback("collations", sink);
1015         return sink.values.toArray(new String[sink.values.size()]);
1016     }
1017 
1018     private static final class KeywordsSink extends UResource.Sink {
1019         LinkedList<String> values = new LinkedList<>();
1020         boolean hasDefault = false;
1021 
1022         @Override
put(UResource.Key key, UResource.Value value, boolean noFallback)1023         public void put(UResource.Key key, UResource.Value value, boolean noFallback) {
1024             UResource.Table collations = value.getTable();
1025             for (int i = 0; collations.getKeyAndValue(i, key, value); ++i) {
1026                 int type = value.getType();
1027                 if (type == UResourceBundle.STRING) {
1028                     if (!hasDefault && key.contentEquals("default")) {
1029                         String defcoll = value.getString();
1030                         if (!defcoll.isEmpty()) {
1031                             values.remove(defcoll);
1032                             values.addFirst(defcoll);
1033                             hasDefault = true;
1034                         }
1035                     }
1036                 } else if (type == UResourceBundle.TABLE && !key.startsWith("private-")) {
1037                     String collkey = key.toString();
1038                     if (!values.contains(collkey)) {
1039                         values.add(collkey);
1040                     }
1041                 }
1042             }
1043         }
1044     }
1045 
1046     /**
1047      * {@icu} Returns the functionally equivalent locale for the given
1048      * requested locale, with respect to given keyword, for the
1049      * collation service.  If two locales return the same result, then
1050      * collators instantiated for these locales will behave
1051      * equivalently.  The converse is not always true; two collators
1052      * may in fact be equivalent, but return different results, due to
1053      * internal details.  The return result has no other meaning than
1054      * that stated above, and implies nothing as to the relationship
1055      * between the two locales.  This is intended for use by
1056      * applications who wish to cache collators, or otherwise reuse
1057      * collators when possible.  The functional equivalent may change
1058      * over time.  For more information, please see the <a
1059      * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services">
1060      * Locales and Services</a> section of the ICU User Guide.
1061      * @param keyword a particular keyword as enumerated by
1062      * getKeywords.
1063      * @param locID The requested locale
1064      * @param isAvailable If non-null, isAvailable[0] will receive and
1065      * output boolean that indicates whether the requested locale was
1066      * 'available' to the collation service. If non-null, isAvailable
1067      * must have length &gt;= 1.
1068      * @return the locale
1069      * @stable ICU 3.0
1070      */
getFunctionalEquivalent(String keyword, ULocale locID, boolean isAvailable[])1071     public static final ULocale getFunctionalEquivalent(String keyword,
1072                                                         ULocale locID,
1073                                                         boolean isAvailable[]) {
1074         return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
1075                                                          keyword, locID, isAvailable, true);
1076     }
1077 
1078     /**
1079      * {@icu} Returns the functionally equivalent locale for the given
1080      * requested locale, with respect to given keyword, for the
1081      * collation service.
1082      * @param keyword a particular keyword as enumerated by
1083      * getKeywords.
1084      * @param locID The requested locale
1085      * @return the locale
1086      * @see #getFunctionalEquivalent(String,ULocale,boolean[])
1087      * @stable ICU 3.0
1088      */
getFunctionalEquivalent(String keyword, ULocale locID)1089     public static final ULocale getFunctionalEquivalent(String keyword,
1090                                                         ULocale locID) {
1091         return getFunctionalEquivalent(keyword, locID, null);
1092     }
1093 
1094     /**
1095      * {@icu} Returns the name of the collator for the objectLocale, localized for the
1096      * displayLocale.
1097      * @param objectLocale the locale of the collator
1098      * @param displayLocale the locale for the collator's display name
1099      * @return the display name
1100      * @stable ICU 2.6
1101      */
getDisplayName(Locale objectLocale, Locale displayLocale)1102     static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
1103         return getShim().getDisplayName(ULocale.forLocale(objectLocale),
1104                                         ULocale.forLocale(displayLocale));
1105     }
1106 
1107     /**
1108      * {@icu} Returns the name of the collator for the objectLocale, localized for the
1109      * displayLocale.
1110      * @param objectLocale the locale of the collator
1111      * @param displayLocale the locale for the collator's display name
1112      * @return the display name
1113      * @stable ICU 3.2
1114      */
getDisplayName(ULocale objectLocale, ULocale displayLocale)1115     static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
1116         return getShim().getDisplayName(objectLocale, displayLocale);
1117     }
1118 
1119     /**
1120      * {@icu} Returns the name of the collator for the objectLocale, localized for the
1121      * default <code>DISPLAY</code> locale.
1122      * @param objectLocale the locale of the collator
1123      * @return the display name
1124      * @see com.ibm.icu.util.ULocale.Category#DISPLAY
1125      * @stable ICU 2.6
1126      */
getDisplayName(Locale objectLocale)1127     static public String getDisplayName(Locale objectLocale) {
1128         return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
1129     }
1130 
1131     /**
1132      * {@icu} Returns the name of the collator for the objectLocale, localized for the
1133      * default <code>DISPLAY</code> locale.
1134      * @param objectLocale the locale of the collator
1135      * @return the display name
1136      * @see com.ibm.icu.util.ULocale.Category#DISPLAY
1137      * @stable ICU 3.2
1138      */
getDisplayName(ULocale objectLocale)1139     static public String getDisplayName(ULocale objectLocale) {
1140         return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
1141     }
1142 
1143     /**
1144      * Returns this Collator's strength attribute. The strength attribute
1145      * determines the minimum level of difference considered significant.
1146      * {@icunote} This can return QUATERNARY strength, which is not supported by the
1147      * JDK version.
1148      * <p>
1149      * See the Collator class description for more details.
1150      * <p>The base class method always returns {@link #TERTIARY}.
1151      * Subclasses should override it if appropriate.
1152      *
1153      * @return this Collator's current strength attribute.
1154      * @see #setStrength
1155      * @see #PRIMARY
1156      * @see #SECONDARY
1157      * @see #TERTIARY
1158      * @see #QUATERNARY
1159      * @see #IDENTICAL
1160      * @stable ICU 2.8
1161      */
getStrength()1162     public int getStrength()
1163     {
1164         return TERTIARY;
1165     }
1166 
1167     /**
1168      * Returns the decomposition mode of this Collator. The decomposition mode
1169      * determines how Unicode composed characters are handled.
1170      * <p>
1171      * See the Collator class description for more details.
1172      * <p>The base class method always returns {@link #NO_DECOMPOSITION}.
1173      * Subclasses should override it if appropriate.
1174      *
1175      * @return the decomposition mode
1176      * @see #setDecomposition
1177      * @see #NO_DECOMPOSITION
1178      * @see #CANONICAL_DECOMPOSITION
1179      * @stable ICU 2.8
1180      */
getDecomposition()1181     public int getDecomposition()
1182     {
1183         return NO_DECOMPOSITION;
1184     }
1185 
1186     // public other methods -------------------------------------------------
1187 
1188     /**
1189      * Compares the equality of two text Strings using
1190      * this Collator's rules, strength and decomposition mode.  Convenience method.
1191      * @param source the source string to be compared.
1192      * @param target the target string to be compared.
1193      * @return true if the strings are equal according to the collation
1194      *         rules, otherwise false.
1195      * @see #compare
1196      * @throws NullPointerException thrown if either arguments is null.
1197      * @stable ICU 2.8
1198      */
equals(String source, String target)1199     public boolean equals(String source, String target)
1200     {
1201         return (compare(source, target) == 0);
1202     }
1203 
1204     /**
1205      * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
1206      * in this collator.
1207      * @return a pointer to a UnicodeSet object containing all the
1208      *         code points and sequences that may sort differently than
1209      *         in the root collator.
1210      * @stable ICU 2.4
1211      */
getTailoredSet()1212     public UnicodeSet getTailoredSet()
1213     {
1214         return new UnicodeSet(0, 0x10FFFF);
1215     }
1216 
1217     /**
1218      * Compares the source text String to the target text String according to
1219      * this Collator's rules, strength and decomposition mode.
1220      * Returns an integer less than,
1221      * equal to or greater than zero depending on whether the source String is
1222      * less than, equal to or greater than the target String. See the Collator
1223      * class description for an example of use.
1224      *
1225      * @param source the source String.
1226      * @param target the target String.
1227      * @return Returns an integer value. Value is less than zero if source is
1228      *         less than target, value is zero if source and target are equal,
1229      *         value is greater than zero if source is greater than target.
1230      * @see CollationKey
1231      * @see #getCollationKey
1232      * @throws NullPointerException thrown if either argument is null.
1233      * @stable ICU 2.8
1234      */
compare(String source, String target)1235     public abstract int compare(String source, String target);
1236 
1237     /**
1238      * Compares the source Object to the target Object.
1239      *
1240      * @param source the source Object.
1241      * @param target the target Object.
1242      * @return Returns an integer value. Value is less than zero if source is
1243      *         less than target, value is zero if source and target are equal,
1244      *         value is greater than zero if source is greater than target.
1245      * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence.
1246      * @stable ICU 4.2
1247      */
1248     @Override
compare(Object source, Object target)1249     public int compare(Object source, Object target) {
1250         return doCompare((CharSequence)source, (CharSequence)target);
1251     }
1252 
1253     /**
1254      * Compares two CharSequences.
1255      * The base class just calls compare(left.toString(), right.toString()).
1256      * Subclasses should instead implement this method and have the String API call this method.
1257      * @internal
1258      * @deprecated This API is ICU internal only.
1259      */
1260     @Deprecated
doCompare(CharSequence left, CharSequence right)1261     protected int doCompare(CharSequence left, CharSequence right) {
1262         return compare(left.toString(), right.toString());
1263     }
1264 
1265     /**
1266      * <p>
1267      * Transforms the String into a CollationKey suitable for efficient
1268      * repeated comparison.  The resulting key depends on the collator's
1269      * rules, strength and decomposition mode.
1270      *
1271      * <p>Note that collation keys are often less efficient than simply doing comparison.
1272      * For more details, see the ICU User Guide.
1273      *
1274      * <p>See the CollationKey class documentation for more information.
1275      * @param source the string to be transformed into a CollationKey.
1276      * @return the CollationKey for the given String based on this Collator's
1277      *         collation rules. If the source String is null, a null
1278      *         CollationKey is returned.
1279      * @see CollationKey
1280      * @see #compare(String, String)
1281      * @see #getRawCollationKey
1282      * @stable ICU 2.8
1283      */
getCollationKey(String source)1284     public abstract CollationKey getCollationKey(String source);
1285 
1286     /**
1287      * {@icu} Returns the simpler form of a CollationKey for the String source following
1288      * the rules of this Collator and stores the result into the user provided argument
1289      * key.  If key has a internal byte array of length that's too small for the result,
1290      * the internal byte array will be grown to the exact required size.
1291      *
1292      * <p>Note that collation keys are often less efficient than simply doing comparison.
1293      * For more details, see the ICU User Guide.
1294      *
1295      * @param source the text String to be transformed into a RawCollationKey
1296      * @return If key is null, a new instance of RawCollationKey will be
1297      *         created and returned, otherwise the user provided key will be
1298      *         returned.
1299      * @see #compare(String, String)
1300      * @see #getCollationKey
1301      * @see RawCollationKey
1302      * @stable ICU 2.8
1303      */
getRawCollationKey(String source, RawCollationKey key)1304     public abstract RawCollationKey getRawCollationKey(String source,
1305                                                        RawCollationKey key);
1306 
1307     /**
1308      * {@icu} Sets the variable top to the top of the specified reordering group.
1309      * The variable top determines the highest-sorting character
1310      * which is affected by the alternate handling behavior.
1311      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
1312      *
1313      * <p>The base class implementation throws an UnsupportedOperationException.
1314      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
1315      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
1316      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
1317      * @return this
1318      * @see #getMaxVariable
1319      * @stable ICU 53
1320      */
setMaxVariable(int group)1321     public Collator setMaxVariable(int group) {
1322         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1323     }
1324 
1325     /**
1326      * {@icu} Returns the maximum reordering group whose characters are affected by
1327      * the alternate handling behavior.
1328      *
1329      * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION.
1330      * @return the maximum variable reordering group.
1331      * @see #setMaxVariable
1332      * @stable ICU 53
1333      */
getMaxVariable()1334     public int getMaxVariable() {
1335         return Collator.ReorderCodes.PUNCTUATION;
1336     }
1337 
1338     /**
1339      * {@icu} Sets the variable top to the primary weight of the specified string.
1340      *
1341      * <p>Beginning with ICU 53, the variable top is pinned to
1342      * the top of one of the supported reordering groups,
1343      * and it must not be beyond the last of those groups.
1344      * See {@link #setMaxVariable(int)}.
1345      *
1346      * @param varTop one or more (if contraction) characters to which the
1347      *               variable top should be set
1348      * @return variable top primary weight
1349      * @exception IllegalArgumentException
1350      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
1351      *                invalid when
1352      *                <ul>
1353      *                <li>it is a contraction that does not exist in the Collation order
1354      *                <li>the variable top is beyond
1355      *                    the last reordering group supported by setMaxVariable()
1356      *                <li>when the varTop argument is null or zero in length.
1357      *                </ul>
1358      * @see #getVariableTop
1359      * @see RuleBasedCollator#setAlternateHandlingShifted
1360      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
1361      */
1362     @Deprecated
setVariableTop(String varTop)1363     public abstract int setVariableTop(String varTop);
1364 
1365     /**
1366      * {@icu} Gets the variable top value of a Collator.
1367      *
1368      * @return the variable top primary weight
1369      * @see #getMaxVariable
1370      * @stable ICU 2.6
1371      */
getVariableTop()1372     public abstract int getVariableTop();
1373 
1374     /**
1375      * {@icu} Sets the variable top to the specified primary weight.
1376      *
1377      * <p>Beginning with ICU 53, the variable top is pinned to
1378      * the top of one of the supported reordering groups,
1379      * and it must not be beyond the last of those groups.
1380      * See {@link #setMaxVariable(int)}.
1381      *
1382      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
1383      * @see #getVariableTop
1384      * @see #setVariableTop(String)
1385      * @deprecated ICU 53 Call setMaxVariable() instead.
1386      */
1387     @Deprecated
setVariableTop(int varTop)1388     public abstract void setVariableTop(int varTop);
1389 
1390     /**
1391      * {@icu} Returns the version of this collator object.
1392      * @return the version object associated with this collator
1393      * @stable ICU 2.8
1394      */
getVersion()1395     public abstract VersionInfo getVersion();
1396 
1397     /**
1398      * {@icu} Returns the UCA version of this collator object.
1399      * @return the version object associated with this collator
1400      * @stable ICU 2.8
1401      */
getUCAVersion()1402     public abstract VersionInfo getUCAVersion();
1403 
1404     /**
1405      * Retrieves the reordering codes for this collator.
1406      * These reordering codes are a combination of UScript codes and ReorderCodes.
1407      * @return a copy of the reordering codes for this collator;
1408      * if none are set then returns an empty array
1409      * @see #setReorderCodes
1410      * @see #getEquivalentReorderCodes
1411      * @see Collator.ReorderCodes
1412      * @see UScript
1413      * @stable ICU 4.8
1414      */
getReorderCodes()1415     public int[] getReorderCodes()
1416     {
1417         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1418     }
1419 
1420     /**
1421      * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
1422      * codes are grouped and must reorder together.
1423      * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
1424      * for example Hiragana and Katakana.
1425      *
1426      * @param reorderCode The reorder code to determine equivalence for.
1427      * @return the set of all reorder codes in the same group as the given reorder code.
1428      * @see #setReorderCodes
1429      * @see #getReorderCodes
1430      * @see Collator.ReorderCodes
1431      * @see UScript
1432      * @stable ICU 4.8
1433      */
getEquivalentReorderCodes(int reorderCode)1434     public static int[] getEquivalentReorderCodes(int reorderCode) {
1435         CollationData baseData = CollationRoot.getData();
1436         return baseData.getEquivalentScripts(reorderCode);
1437     }
1438 
1439 
1440     // Freezable interface implementation -------------------------------------------------
1441 
1442     /**
1443      * Determines whether the object has been frozen or not.
1444      *
1445      * <p>An unfrozen Collator is mutable and not thread-safe.
1446      * A frozen Collator is immutable and thread-safe.
1447      *
1448      * @stable ICU 4.8
1449      */
1450     @Override
isFrozen()1451     public boolean isFrozen() {
1452         return false;
1453     }
1454 
1455     /**
1456      * Freezes the collator.
1457      * @return the collator itself.
1458      * @stable ICU 4.8
1459      */
1460     @Override
freeze()1461     public Collator freeze() {
1462         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1463     }
1464 
1465     /**
1466      * Provides for the clone operation. Any clone is initially unfrozen.
1467      * @stable ICU 4.8
1468      */
1469     @Override
cloneAsThawed()1470     public Collator cloneAsThawed() {
1471         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1472     }
1473 
1474     /**
1475      * Empty default constructor to make javadocs happy
1476      * @stable ICU 2.4
1477      */
Collator()1478     protected Collator()
1479     {
1480     }
1481 
1482     private static final boolean DEBUG = ICUDebug.enabled("collator");
1483 
1484     // -------- BEGIN ULocale boilerplate --------
1485 
1486     /**
1487      * {@icu} Returns the locale that was used to create this object, or null.
1488      * This may may differ from the locale requested at the time of
1489      * this object's creation.  For example, if an object is created
1490      * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
1491      * drawn from <tt>en</tt> (the <i>actual</i> locale), and
1492      * <tt>en_US</tt> may be the most specific locale that exists (the
1493      * <i>valid</i> locale).
1494      *
1495      * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
1496      * contains a partial preview implementation.  The * <i>actual</i>
1497      * locale is returned correctly, but the <i>valid</i> locale is
1498      * not, in most cases.
1499      *
1500      * <p>The base class method always returns {@link ULocale#ROOT}.
1501      * Subclasses should override it if appropriate.
1502      *
1503      * @param type type of information requested, either {@link
1504      * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
1505      * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
1506      * @return the information specified by <i>type</i>, or null if
1507      * this object was not constructed from locale data.
1508      * @see com.ibm.icu.util.ULocale
1509      * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1510      * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1511      * @draft ICU 2.8 (retain)
1512      */
getLocale(ULocale.Type type)1513     public ULocale getLocale(ULocale.Type type) {
1514         return ULocale.ROOT;
1515     }
1516 
1517     /**
1518      * Set information about the locales that were used to create this
1519      * object.  If the object was not constructed from locale data,
1520      * both arguments should be set to null.  Otherwise, neither
1521      * should be null.  The actual locale must be at the same level or
1522      * less specific than the valid locale.  This method is intended
1523      * for use by factories or other entities that create objects of
1524      * this class.
1525      *
1526      * <p>The base class method does nothing. Subclasses should override it if appropriate.
1527      *
1528      * @param valid the most specific locale containing any resource
1529      * data, or null
1530      * @param actual the locale containing data used to construct this
1531      * object, or null
1532      * @see com.ibm.icu.util.ULocale
1533      * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1534      * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1535      */
setLocale(ULocale valid, ULocale actual)1536     void setLocale(ULocale valid, ULocale actual) {}
1537 
1538     // -------- END ULocale boilerplate --------
1539 }
1540