• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 1996-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10 package android.icu.text;
11 
12 import java.util.Comparator;
13 import java.util.LinkedList;
14 import java.util.Locale;
15 import java.util.MissingResourceException;
16 import java.util.Set;
17 
18 import android.icu.impl.ICUData;
19 import android.icu.impl.ICUDebug;
20 import android.icu.impl.ICUResourceBundle;
21 import android.icu.impl.UResource;
22 import android.icu.impl.coll.CollationData;
23 import android.icu.impl.coll.CollationRoot;
24 import android.icu.lang.UCharacter;
25 import android.icu.lang.UProperty;
26 import android.icu.lang.UScript;
27 import android.icu.util.Freezable;
28 import android.icu.util.ICUException;
29 import android.icu.util.ULocale;
30 import android.icu.util.ULocale.Category;
31 import android.icu.util.UResourceBundle;
32 import android.icu.util.VersionInfo;
33 
34 /**
35 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.text.Collator}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
36 *
37 * <p>Collator performs locale-sensitive string comparison. A concrete
38 * subclass, RuleBasedCollator, allows customization of the collation
39 * ordering by the use of rule sets.
40 *
41 * <p>A Collator is thread-safe only when frozen. See {@link #isFrozen()} and {@link Freezable}.
42 *
43 * <p>Following the <a href=http://www.unicode.org>Unicode
44 * Consortium</a>'s specifications for the
45 * <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
46 * Algorithm (UCA)</a>, there are 5 different levels of strength used
47 * in comparisons:
48 *
49 * <ul>
50 * <li>PRIMARY strength: Typically, this is used to denote differences between
51 *     base characters (for example, "a" &lt; "b").
52 *     It is the strongest difference. For example, dictionaries are divided
53 *     into different sections by base character.
54 * <li>SECONDARY strength: Accents in the characters are considered secondary
55 *     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other
56 *     differences
57 *     between letters can also be considered secondary differences, depending
58 *     on the language. A secondary difference is ignored when there is a
59 *     primary difference anywhere in the strings.
60 * <li>TERTIARY strength: Upper and lower case differences in characters are
61 *     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
62 *     "a&ograve;"). In addition, a variant of a letter differs from the base
63 *     form on the tertiary strength (such as "A" and "Ⓐ"). Another
64 *     example is the
65 *     difference between large and small Kana. A tertiary difference is ignored
66 *     when there is a primary or secondary difference anywhere in the strings.
67 * <li>QUATERNARY strength: When punctuation is ignored
68 *     (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
69 *     Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY
70 *     strength, an additional strength level can
71 *     be used to distinguish words with and without punctuation (for example,
72 *     "ab" &lt; "a-b" &lt; "aB").
73 *     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
74 *     difference. The QUATERNARY strength should only be used if ignoring
75 *     punctuation is required.
76 * <li>IDENTICAL strength:
77 *     When all other strengths are equal, the IDENTICAL strength is used as a
78 *     tiebreaker. The Unicode code point values of the NFD form of each string
79 *     are compared, just in case there is no difference.
80 *     For example, Hebrew cantellation marks are only distinguished at this
81 *     strength. This strength should be used sparingly, as only code point
82 *     value differences between two strings is an extremely rare occurrence.
83 *     Using this strength substantially decreases the performance for both
84 *     comparison and collation key generation APIs. This strength also
85 *     increases the size of the collation key.
86 * </ul>
87 *
88 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
89 * the canonical decomposition mode and one that does not use any decomposition.
90 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
91 * is not supported here. If the canonical
92 * decomposition mode is set, the Collator handles un-normalized text properly,
93 * producing the same results as if the text were normalized in NFD. If
94 * canonical decomposition is turned off, it is the user's responsibility to
95 * ensure that all text is already in the appropriate form before performing
96 * a comparison or before getting a CollationKey.
97 *
98 * <p>For more information about the collation service see the
99 * <a href="http://userguide.icu-project.org/collation">User Guide</a>.
100 *
101 * <p>Examples of use
102 * <pre>
103 * // Get the Collator for US English and set its strength to PRIMARY
104 * Collator usCollator = Collator.getInstance(Locale.US);
105 * usCollator.setStrength(Collator.PRIMARY);
106 * if (usCollator.compare("abc", "ABC") == 0) {
107 *     System.out.println("Strings are equivalent");
108 * }
109 *
110 * The following example shows how to compare two strings using the
111 * Collator for the default locale.
112 *
113 * // Compare two strings in the default locale
114 * Collator myCollator = Collator.getInstance();
115 * myCollator.setDecomposition(NO_DECOMPOSITION);
116 * if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
117 *     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
118 *     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
119 *     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
120 *         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
121 *     }
122 *     else {
123 *         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
124 *     }
125 * }
126 * else {
127 *     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
128 * }
129 * </pre>
130 *
131 * @see RuleBasedCollator
132 * @see CollationKey
133 * @author Syn Wee Quek
134 */
135 public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable
136 {
137     // public data members ---------------------------------------------------
138 
139     /**
140      * Strongest collator strength value. Typically used to denote differences
141      * between base characters. See class documentation for more explanation.
142      * @see #setStrength
143      * @see #getStrength
144      */
145     public final static int PRIMARY = 0;
146 
147     /**
148      * Second level collator strength value.
149      * Accents in the characters are considered secondary differences.
150      * Other differences between letters can also be considered secondary
151      * differences, depending on the language.
152      * See class documentation for more explanation.
153      * @see #setStrength
154      * @see #getStrength
155      */
156     public final static int SECONDARY = 1;
157 
158     /**
159      * Third level collator strength value.
160      * Upper and lower case differences in characters are distinguished at this
161      * strength level. In addition, a variant of a letter differs from the base
162      * form on the tertiary level.
163      * See class documentation for more explanation.
164      * @see #setStrength
165      * @see #getStrength
166      */
167     public final static int TERTIARY = 2;
168 
169     /**
170      * <strong>[icu]</strong> Fourth level collator strength value.
171      * When punctuation is ignored
172      * (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
173      * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY
174      * strength, an additional strength level can
175      * be used to distinguish words with and without punctuation.
176      * See class documentation for more explanation.
177      * @see #setStrength
178      * @see #getStrength
179      */
180     public final static int QUATERNARY = 3;
181 
182     /**
183      * Smallest Collator strength value. When all other strengths are equal,
184      * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
185      * values of the NFD form of each string are compared, just in case there
186      * is no difference.
187      * See class documentation for more explanation.
188      * <p>
189      * Note this value is different from JDK's
190      */
191     public final static int IDENTICAL = 15;
192 
193     /**
194      * <strong>[icu] Note:</strong> This is for backwards compatibility with Java APIs only.  It
195      * should not be used, IDENTICAL should be used instead.  ICU's
196      * collation does not support Java's FULL_DECOMPOSITION mode.
197      */
198     public final static int FULL_DECOMPOSITION = IDENTICAL;
199 
200     /**
201      * Decomposition mode value. With NO_DECOMPOSITION set, Strings
202      * will not be decomposed for collation. This is the default
203      * decomposition setting unless otherwise specified by the locale
204      * used to create the Collator.
205      *
206      * <p><strong>Note</strong> this value is different from the JDK's.
207      * @see #CANONICAL_DECOMPOSITION
208      * @see #getDecomposition
209      * @see #setDecomposition
210      */
211     public final static int NO_DECOMPOSITION = 16;
212 
213     /**
214      * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
215      * characters that are canonical variants according to the Unicode standard
216      * will be decomposed for collation.
217      *
218      * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
219      * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
220      * Unicode Technical Report #15</a>.
221      *
222      * @see #NO_DECOMPOSITION
223      * @see #getDecomposition
224      * @see #setDecomposition
225      */
226     public final static int CANONICAL_DECOMPOSITION = 17;
227 
228     /**
229      * Reordering codes for non-script groups that can be reordered under collation.
230      *
231      * @see #getReorderCodes
232      * @see #setReorderCodes
233      * @see #getEquivalentReorderCodes
234      */
235     public static interface ReorderCodes {
236         /**
237          * A special reordering code that is used to specify the default reordering codes for a locale.
238          */
239         public final static int DEFAULT          = -1;  // == UScript.INVALID_CODE
240         /**
241          * A special reordering code that is used to specify no reordering codes.
242          */
243         public final static int NONE          = UScript.UNKNOWN;
244         /**
245          * A special reordering code that is used to specify all other codes used for reordering except
246          * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
247          */
248         public final static int OTHERS          = UScript.UNKNOWN;
249         /**
250          * Characters with the space property.
251          * This is equivalent to the rule value "space".
252          */
253         public final static int SPACE          = 0x1000;
254         /**
255          * The first entry in the enumeration of reordering groups. This is intended for use in
256          * range checking and enumeration of the reorder codes.
257          */
258         public final static int FIRST          = SPACE;
259         /**
260          * Characters with the punctuation property.
261          * This is equivalent to the rule value "punct".
262          */
263         public final static int PUNCTUATION    = 0x1001;
264         /**
265          * Characters with the symbol property.
266          * This is equivalent to the rule value "symbol".
267          */
268         public final static int SYMBOL         = 0x1002;
269         /**
270          * Characters with the currency property.
271          * This is equivalent to the rule value "currency".
272          */
273         public final static int CURRENCY       = 0x1003;
274         /**
275          * Characters with the digit property.
276          * This is equivalent to the rule value "digit".
277          */
278         public final static int DIGIT          = 0x1004;
279         /**
280          * One more than the highest normal ReorderCodes value.
281          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
282          * @hide unsupported on Android
283          */
284         @Deprecated
285         public final static int LIMIT          = 0x1005;
286     }
287 
288     // public methods --------------------------------------------------------
289 
290     /**
291      * Compares the equality of two Collator objects. Collator objects are equal if they have the same
292      * collation (sorting &amp; searching) behavior.
293      *
294      * <p>The base class checks for null and for equal types.
295      * Subclasses should override.
296      *
297      * @param obj the Collator to compare to.
298      * @return true if this Collator has exactly the same collation behavior as obj, false otherwise.
299      */
300     @Override
equals(Object obj)301     public boolean equals(Object obj) {
302         // Subclasses: Call this method and then add more specific checks.
303         return this == obj || (obj != null && getClass() == obj.getClass());
304     }
305 
306     /**
307      * Generates a hash code for this Collator object.
308      *
309      * <p>The implementation exists just for consistency with {@link #equals(Object)}
310      * implementation in this class and does not generate a useful hash code.
311      * Subclasses should override this implementation.
312      *
313      * @return a hash code value.
314      */
315     @Override
hashCode()316     public int hashCode() {
317         // Dummy return to prevent compile warnings.
318         return 0;
319     }
320 
321     // public setters --------------------------------------------------------
322 
checkNotFrozen()323     private void checkNotFrozen() {
324         if (isFrozen()) {
325             throw new UnsupportedOperationException("Attempt to modify frozen Collator");
326         }
327     }
328 
329     /**
330      * Sets this Collator's strength attribute. The strength attribute
331      * determines the minimum level of difference considered significant
332      * during comparison.
333      *
334      * <p>The base class method does nothing. Subclasses should override it if appropriate.
335      *
336      * <p>See the Collator class description for an example of use.
337      * @param newStrength the new strength value.
338      * @see #getStrength
339      * @see #PRIMARY
340      * @see #SECONDARY
341      * @see #TERTIARY
342      * @see #QUATERNARY
343      * @see #IDENTICAL
344      * @throws IllegalArgumentException if the new strength value is not valid.
345      */
setStrength(int newStrength)346     public void setStrength(int newStrength)
347     {
348         checkNotFrozen();
349     }
350 
351     /**
352      * @return this, for chaining
353      * @deprecated This API is ICU internal only.
354      * @hide original deprecated declaration
355      * @hide draft / provisional / internal are hidden on Android
356      */
357     @Deprecated
setStrength2(int newStrength)358     public Collator setStrength2(int newStrength)
359     {
360         setStrength(newStrength);
361         return this;
362     }
363 
364     /**
365      * Sets the decomposition mode of this Collator.  Setting this
366      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
367      * Collator to handle un-normalized text properly, producing the
368      * same results as if the text were normalized. If
369      * NO_DECOMPOSITION is set, it is the user's responsibility to
370      * insure that all text is already in the appropriate form before
371      * a comparison or before getting a CollationKey. Adjusting
372      * decomposition mode allows the user to select between faster and
373      * more complete collation behavior.
374      *
375      * <p>Since a great many of the world's languages do not require
376      * text normalization, most locales set NO_DECOMPOSITION as the
377      * default decomposition mode.
378      *
379      * <p>The base class method does nothing. Subclasses should override it if appropriate.
380      *
381      * <p>See getDecomposition for a description of decomposition
382      * mode.
383      *
384      * @param decomposition the new decomposition mode
385      * @see #getDecomposition
386      * @see #NO_DECOMPOSITION
387      * @see #CANONICAL_DECOMPOSITION
388      * @throws IllegalArgumentException If the given value is not a valid
389      *            decomposition mode.
390      */
setDecomposition(int decomposition)391     public void setDecomposition(int decomposition)
392     {
393         checkNotFrozen();
394     }
395 
396     /**
397      * Sets the reordering codes for this collator.
398      * Collation reordering allows scripts and some other groups of characters
399      * to be moved relative to each other. This reordering is done on top of
400      * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
401      * at the start and/or the end of the collation order. These groups are specified using
402      * UScript codes and {@link Collator.ReorderCodes} entries.
403      *
404      * <p>By default, reordering codes specified for the start of the order are placed in the
405      * order given after several special non-script blocks. These special groups of characters
406      * are space, punctuation, symbol, currency, and digit. These special groups are represented with
407      * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with
408      * these special non-script groups if those special groups are explicitly specified in the reordering.
409      *
410      * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS}
411      * stands for any script that is not explicitly
412      * mentioned in the list of reordering codes given. Anything that is after OTHERS
413      * will go at the very end of the reordering in the order given.
414      *
415      * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT}
416      * will reset the reordering for this collator
417      * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
418      * was specified when this collator was created from resource data or from rules. The
419      * DEFAULT code <b>must</b> be the sole code supplied when it is used.
420      * If not, then an {@link IllegalArgumentException} will be thrown.
421      *
422      * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE}
423      * will remove any reordering for this collator.
424      * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
425      * NONE code <b>must</b> be the sole code supplied when it is used.
426      *
427      * @param order the reordering codes to apply to this collator; if this is null or an empty array
428      * then this clears any existing reordering
429      * @see #getReorderCodes
430      * @see #getEquivalentReorderCodes
431      * @see Collator.ReorderCodes
432      * @see UScript
433      */
setReorderCodes(int... order)434     public void setReorderCodes(int... order)
435     {
436         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
437     }
438 
439     // public getters --------------------------------------------------------
440 
441     /**
442      * Returns the Collator for the current default locale.
443      * The default locale is determined by java.util.Locale.getDefault().
444      * @return the Collator for the default locale (for example, en_US) if it
445      *         is created successfully. Otherwise if there is no Collator
446      *         associated with the current locale, the root collator
447      *         will be returned.
448      * @see java.util.Locale#getDefault()
449      * @see #getInstance(Locale)
450      */
getInstance()451     public static final Collator getInstance()
452     {
453         return getInstance(ULocale.getDefault());
454     }
455 
456     /**
457      * Clones the collator.
458      * @return a clone of this collator.
459      */
460     @Override
clone()461     public Object clone() throws CloneNotSupportedException {
462         return super.clone();
463     }
464 
465     // begin registry stuff
466 
467     /**
468      * A factory used with registerFactory to register multiple collators and provide
469      * display names for them.  If standard locale display names are sufficient,
470      * Collator instances may be registered instead.
471      * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
472      * ULocale instead of Locale.  Instead of overriding createCollator(Locale),
473      * new implementations should override createCollator(ULocale).  Note that
474      * one of these two methods <b>MUST</b> be overridden or else an infinite
475      * loop will occur.
476      * @hide unsupported on Android
477      */
478     public static abstract class CollatorFactory {
479         /**
480          * Return true if this factory will be visible.  Default is true.
481          * If not visible, the locales supported by this factory will not
482          * be listed by getAvailableLocales.
483          *
484          * @return true if this factory is visible
485          */
visible()486         public boolean visible() {
487             return true;
488         }
489 
490         /**
491          * Return an instance of the appropriate collator.  If the locale
492          * is not supported, return null.
493          * <b>Note:</b> as of ICU4J 3.2, implementations should override
494          * this method instead of createCollator(Locale).
495          * @param loc the locale for which this collator is to be created.
496          * @return the newly created collator.
497          */
createCollator(ULocale loc)498         public Collator createCollator(ULocale loc) {
499             return createCollator(loc.toLocale());
500         }
501 
502         /**
503          * Return an instance of the appropriate collator.  If the locale
504          * is not supported, return null.
505          * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
506          * createCollator(ULocale) instead of this method, and inherit this
507          * method's implementation.  This method is no longer abstract
508          * and instead delegates to createCollator(ULocale).
509          * @param loc the locale for which this collator is to be created.
510          * @return the newly created collator.
511          */
createCollator(Locale loc)512          public Collator createCollator(Locale loc) {
513             return createCollator(ULocale.forLocale(loc));
514         }
515 
516         /**
517          * Return the name of the collator for the objectLocale, localized for the displayLocale.
518          * If objectLocale is not visible or not defined by the factory, return null.
519          * @param objectLocale the locale identifying the collator
520          * @param displayLocale the locale for which the display name of the collator should be localized
521          * @return the display name
522          */
getDisplayName(Locale objectLocale, Locale displayLocale)523         public String getDisplayName(Locale objectLocale, Locale displayLocale) {
524             return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
525         }
526 
527         /**
528          * Return the name of the collator for the objectLocale, localized for the displayLocale.
529          * If objectLocale is not visible or not defined by the factory, return null.
530          * @param objectLocale the locale identifying the collator
531          * @param displayLocale the locale for which the display name of the collator should be localized
532          * @return the display name
533          */
getDisplayName(ULocale objectLocale, ULocale displayLocale)534         public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
535             if (visible()) {
536                 Set<String> supported = getSupportedLocaleIDs();
537                 String name = objectLocale.getBaseName();
538                 if (supported.contains(name)) {
539                     return objectLocale.getDisplayName(displayLocale);
540                 }
541             }
542             return null;
543         }
544 
545         /**
546          * Return an unmodifiable collection of the locale names directly
547          * supported by this factory.
548          *
549          * @return the set of supported locale IDs.
550          */
getSupportedLocaleIDs()551         public abstract Set<String> getSupportedLocaleIDs();
552 
553         /**
554          * Empty default constructor.
555          */
CollatorFactory()556         protected CollatorFactory() {
557         }
558     }
559 
560     static abstract class ServiceShim {
getInstance(ULocale l)561         abstract Collator getInstance(ULocale l);
registerInstance(Collator c, ULocale l)562         abstract Object registerInstance(Collator c, ULocale l);
registerFactory(CollatorFactory f)563         abstract Object registerFactory(CollatorFactory f);
unregister(Object k)564         abstract boolean unregister(Object k);
getAvailableLocales()565         abstract Locale[] getAvailableLocales(); // TODO remove
getAvailableULocales()566         abstract ULocale[] getAvailableULocales();
getDisplayName(ULocale ol, ULocale dl)567         abstract String getDisplayName(ULocale ol, ULocale dl);
568     }
569 
570     private static ServiceShim shim;
getShim()571     private static ServiceShim getShim() {
572         // Note: this instantiation is safe on loose-memory-model configurations
573         // despite lack of synchronization, since the shim instance has no state--
574         // it's all in the class init.  The worst problem is we might instantiate
575         // two shim instances, but they'll share the same state so that's ok.
576         if (shim == null) {
577             try {
578                 Class<?> cls = Class.forName("android.icu.text.CollatorServiceShim");
579                 shim = (ServiceShim)cls.newInstance();
580             }
581             catch (MissingResourceException e)
582             {
583                 ///CLOVER:OFF
584                 throw e;
585                 ///CLOVER:ON
586             }
587             catch (Exception e) {
588                 ///CLOVER:OFF
589                 if(DEBUG){
590                     e.printStackTrace();
591                 }
592                 throw new ICUException(e);
593                 ///CLOVER:ON
594             }
595         }
596         return shim;
597     }
598 
599     /**
600      * Simpler/faster methods for ASCII than ones based on Unicode data.
601      * TODO: There should be code like this somewhere already??
602      */
603     private static final class ASCII {
equalIgnoreCase(CharSequence left, CharSequence right)604         static boolean equalIgnoreCase(CharSequence left, CharSequence right) {
605             int length = left.length();
606             if (length != right.length()) { return false; }
607             for (int i = 0; i < length; ++i) {
608                 char lc = left.charAt(i);
609                 char rc = right.charAt(i);
610                 if (lc == rc) { continue; }
611                 if ('A' <= lc && lc <= 'Z') {
612                     if ((lc + 0x20) == rc) { continue; }
613                 } else if ('A' <= rc && rc <= 'Z') {
614                     if ((rc + 0x20) == lc) { continue; }
615                 }
616                 return false;
617             }
618             return true;
619         }
620     }
621 
getYesOrNo(String keyword, String s)622     private static final boolean getYesOrNo(String keyword, String s) {
623         if (ASCII.equalIgnoreCase(s, "yes")) {
624             return true;
625         }
626         if (ASCII.equalIgnoreCase(s, "no")) {
627             return false;
628         }
629         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
630     }
631 
getIntValue(String keyword, String s, String... values)632     private static final int getIntValue(String keyword, String s, String... values) {
633         for (int i = 0; i < values.length; ++i) {
634             if (ASCII.equalIgnoreCase(s, values[i])) {
635                 return i;
636             }
637         }
638         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
639     }
640 
getReorderCode(String keyword, String s)641     private static final int getReorderCode(String keyword, String s) {
642         return Collator.ReorderCodes.FIRST +
643                 getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit");
644         // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
645         // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
646         // Avoid introducing synonyms/aliases.
647     }
648 
649     /**
650      * Sets collation attributes according to locale keywords. See
651      * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
652      *
653      * Using "alias" keywords and values where defined:
654      * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
655      * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
656      */
setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc)657     private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) {
658         // Check for collation keywords that were already deprecated
659         // before any were supported in createInstance() (except for "collation").
660         String value = loc.getKeywordValue("colHiraganaQuaternary");
661         if (value != null) {
662             throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary");
663         }
664         value = loc.getKeywordValue("variableTop");
665         if (value != null) {
666             throw new UnsupportedOperationException("locale keyword vt/variableTop");
667         }
668         // Parse known collation keywords, ignore others.
669         value = loc.getKeywordValue("colStrength");
670         if (value != null) {
671             // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
672             int strength = getIntValue("colStrength", value,
673                     "primary", "secondary", "tertiary", "quaternary", "identical");
674             coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL);
675         }
676         value = loc.getKeywordValue("colBackwards");
677         if (value != null) {
678             if (rbc != null) {
679                 rbc.setFrenchCollation(getYesOrNo("colBackwards", value));
680             } else {
681                 throw new UnsupportedOperationException(
682                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
683             }
684         }
685         value = loc.getKeywordValue("colCaseLevel");
686         if (value != null) {
687             if (rbc != null) {
688                 rbc.setCaseLevel(getYesOrNo("colCaseLevel", value));
689             } else {
690                 throw new UnsupportedOperationException(
691                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
692             }
693         }
694         value = loc.getKeywordValue("colCaseFirst");
695         if (value != null) {
696             if (rbc != null) {
697                 int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper");
698                 if (cf == 0) {
699                     rbc.setLowerCaseFirst(false);
700                     rbc.setUpperCaseFirst(false);
701                 } else if (cf == 1) {
702                     rbc.setLowerCaseFirst(true);
703                 } else /* cf == 2 */ {
704                     rbc.setUpperCaseFirst(true);
705                 }
706             } else {
707                 throw new UnsupportedOperationException(
708                         "locale keyword kf/colCaseFirst only settable for RuleBasedCollator");
709             }
710         }
711         value = loc.getKeywordValue("colAlternate");
712         if (value != null) {
713             if (rbc != null) {
714                 rbc.setAlternateHandlingShifted(
715                         getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0);
716             } else {
717                 throw new UnsupportedOperationException(
718                         "locale keyword ka/colAlternate only settable for RuleBasedCollator");
719             }
720         }
721         value = loc.getKeywordValue("colNormalization");
722         if (value != null) {
723             coll.setDecomposition(getYesOrNo("colNormalization", value) ?
724                     Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION);
725         }
726         value = loc.getKeywordValue("colNumeric");
727         if (value != null) {
728             if (rbc != null) {
729                 rbc.setNumericCollation(getYesOrNo("colNumeric", value));
730             } else {
731                 throw new UnsupportedOperationException(
732                         "locale keyword kn/colNumeric only settable for RuleBasedCollator");
733             }
734         }
735         value = loc.getKeywordValue("colReorder");
736         if (value != null) {
737             int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST];
738             int codesLength = 0;
739             int scriptNameStart = 0;
740             for (;;) {
741                 if (codesLength == codes.length) {
742                     throw new IllegalArgumentException(
743                             "too many script codes for colReorder locale keyword: " + value);
744                 }
745                 int limit = scriptNameStart;
746                 while (limit < value.length() && value.charAt(limit) != '-') { ++limit; }
747                 String scriptName = value.substring(scriptNameStart, limit);
748                 int code;
749                 if (scriptName.length() == 4) {
750                     // Strict parsing, accept only 4-letter script codes, not long names.
751                     code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName);
752                 } else {
753                     code = getReorderCode("colReorder", scriptName);
754                 }
755                 codes[codesLength++] = code;
756                 if (limit == value.length()) { break; }
757                 scriptNameStart = limit + 1;
758             }
759             if (codesLength == 0) {
760                 throw new IllegalArgumentException("no script codes for colReorder locale keyword");
761             }
762             int[] args = new int[codesLength];
763             System.arraycopy(codes, 0, args, 0, codesLength);
764             coll.setReorderCodes(args);
765         }
766         value = loc.getKeywordValue("kv");
767         if (value != null) {
768             coll.setMaxVariable(getReorderCode("kv", value));
769         }
770     }
771 
772     /**
773      * <strong>[icu]</strong> Returns the Collator for the desired locale.
774      *
775      * <p>For some languages, multiple collation types are available;
776      * for example, "de@collation=phonebook".
777      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
778      * in the old locale extension syntax ("el@colCaseFirst=upper")
779      * or in language tag syntax ("el-u-kf-upper").
780      * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
781      *
782      * @param locale the desired locale.
783      * @return Collator for the desired locale if it is created successfully.
784      *         Otherwise if there is no Collator
785      *         associated with the current locale, the root collator will
786      *         be returned.
787      * @see java.util.Locale
788      * @see java.util.ResourceBundle
789      * @see #getInstance(Locale)
790      * @see #getInstance()
791      */
getInstance(ULocale locale)792     public static final Collator getInstance(ULocale locale) {
793         // fetching from service cache is faster than instantiation
794         if (locale == null) {
795             locale = ULocale.getDefault();
796         }
797         Collator coll = getShim().getInstance(locale);
798         if (!locale.getName().equals(locale.getBaseName())) {  // any keywords?
799             setAttributesFromKeywords(locale, coll,
800                     (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null);
801         }
802         return coll;
803     }
804 
805     /**
806      * Returns the Collator for the desired locale.
807      *
808      * <p>For some languages, multiple collation types are available;
809      * for example, "de-u-co-phonebk".
810      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
811      * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale})
812      * or in language tag syntax ("el-u-kf-upper").
813      * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
814      *
815      * @param locale the desired locale.
816      * @return Collator for the desired locale if it is created successfully.
817      *         Otherwise if there is no Collator
818      *         associated with the current locale, the root collator will
819      *         be returned.
820      * @see java.util.Locale
821      * @see java.util.ResourceBundle
822      * @see #getInstance(ULocale)
823      * @see #getInstance()
824      */
getInstance(Locale locale)825     public static final Collator getInstance(Locale locale) {
826         return getInstance(ULocale.forLocale(locale));
827     }
828 
829     /**
830      * <strong>[icu]</strong> Registers a collator as the default collator for the provided locale.  The
831      * collator should not be modified after it is registered.
832      *
833      * <p>Because ICU may choose to cache Collator objects internally, this must
834      * be called at application startup, prior to any calls to
835      * Collator.getInstance to avoid undefined behavior.
836      *
837      * @param collator the collator to register
838      * @param locale the locale for which this is the default collator
839      * @return an object that can be used to unregister the registered collator.
840      *
841      * @hide unsupported on Android
842      */
registerInstance(Collator collator, ULocale locale)843     public static final Object registerInstance(Collator collator, ULocale locale) {
844         return getShim().registerInstance(collator, locale);
845     }
846 
847     /**
848      * <strong>[icu]</strong> Registers a collator factory.
849      *
850      * <p>Because ICU may choose to cache Collator objects internally, this must
851      * be called at application startup, prior to any calls to
852      * Collator.getInstance to avoid undefined behavior.
853      *
854      * @param factory the factory to register
855      * @return an object that can be used to unregister the registered factory.
856      *
857      * @hide unsupported on Android
858      */
registerFactory(CollatorFactory factory)859     public static final Object registerFactory(CollatorFactory factory) {
860         return getShim().registerFactory(factory);
861     }
862 
863     /**
864      * <strong>[icu]</strong> Unregisters a collator previously registered using registerInstance.
865      * @param registryKey the object previously returned by registerInstance.
866      * @return true if the collator was successfully unregistered.
867      * @hide unsupported on Android
868      */
unregister(Object registryKey)869     public static final boolean unregister(Object registryKey) {
870         if (shim == null) {
871             return false;
872         }
873         return shim.unregister(registryKey);
874     }
875 
876     /**
877      * Returns the set of locales, as Locale objects, for which collators
878      * are installed.  Note that Locale objects do not support RFC 3066.
879      * @return the list of locales in which collators are installed.
880      * This list includes any that have been registered, in addition to
881      * those that are installed with ICU4J.
882      */
getAvailableLocales()883     public static Locale[] getAvailableLocales() {
884         // TODO make this wrap getAvailableULocales later
885         if (shim == null) {
886             return ICUResourceBundle.getAvailableLocales(
887                 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
888         }
889         return shim.getAvailableLocales();
890     }
891 
892     /**
893      * <strong>[icu]</strong> Returns the set of locales, as ULocale objects, for which collators
894      * are installed.  ULocale objects support RFC 3066.
895      * @return the list of locales in which collators are installed.
896      * This list includes any that have been registered, in addition to
897      * those that are installed with ICU4J.
898      */
getAvailableULocales()899     public static final ULocale[] getAvailableULocales() {
900         if (shim == null) {
901             return ICUResourceBundle.getAvailableULocales(
902                 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
903         }
904         return shim.getAvailableULocales();
905     }
906 
907     /**
908      * The list of keywords for this service.  This must be kept in sync with
909      * the resource data.
910      */
911     private static final String[] KEYWORDS = { "collation" };
912 
913     /**
914      * The resource name for this service.  Note that this is not the same as
915      * the keyword for this service.
916      */
917     private static final String RESOURCE = "collations";
918 
919     /**
920      * The resource bundle base name for this service.
921      * *since ICU 3.0
922      */
923 
924     private static final String BASE = ICUData.ICU_COLLATION_BASE_NAME;
925 
926     /**
927      * <strong>[icu]</strong> Returns an array of all possible keywords that are relevant to
928      * collation. At this point, the only recognized keyword for this
929      * service is "collation".
930      * @return an array of valid collation keywords.
931      * @see #getKeywordValues
932      */
getKeywords()933     public static final String[] getKeywords() {
934         return KEYWORDS;
935     }
936 
937     /**
938      * <strong>[icu]</strong> Given a keyword, returns an array of all values for
939      * that keyword that are currently in use.
940      * @param keyword one of the keywords returned by getKeywords.
941      * @see #getKeywords
942      */
getKeywordValues(String keyword)943     public static final String[] getKeywordValues(String keyword) {
944         if (!keyword.equals(KEYWORDS[0])) {
945             throw new IllegalArgumentException("Invalid keyword: " + keyword);
946         }
947         return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
948     }
949 
950     /**
951      * <strong>[icu]</strong> Given a key and a locale, returns an array of string values in a preferred
952      * order that would make a difference. These are all and only those values where
953      * the open (creation) of the service with the locale formed from the input locale
954      * plus input keyword and that value has different behavior than creation with the
955      * input locale alone.
956      * @param key           one of the keys supported by this service.  For now, only
957      *                      "collation" is supported.
958      * @param locale        the locale
959      * @param commonlyUsed  if set to true it will return only commonly used values
960      *                      with the given locale in preferred order.  Otherwise,
961      *                      it will return all the available values for the locale.
962      * @return an array of string values for the given key and the locale.
963      */
getKeywordValuesForLocale(String key, ULocale locale, boolean commonlyUsed)964     public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
965                                                            boolean commonlyUsed) {
966         // Note: The parameter commonlyUsed is not used.
967         // The switch is in the method signature for consistency
968         // with other locale services.
969 
970         // Read available collation values from collation bundles.
971         ICUResourceBundle bundle = (ICUResourceBundle)
972                 UResourceBundle.getBundleInstance(
973                         ICUData.ICU_COLLATION_BASE_NAME, locale);
974         KeywordsSink sink = new KeywordsSink();
975         bundle.getAllItemsWithFallback("collations", sink);
976         return sink.values.toArray(new String[sink.values.size()]);
977     }
978 
979     private static final class KeywordsSink extends UResource.Sink {
980         LinkedList<String> values = new LinkedList<String>();
981         boolean hasDefault = false;
982 
983         @Override
put(UResource.Key key, UResource.Value value, boolean noFallback)984         public void put(UResource.Key key, UResource.Value value, boolean noFallback) {
985             UResource.Table collations = value.getTable();
986             for (int i = 0; collations.getKeyAndValue(i, key, value); ++i) {
987                 int type = value.getType();
988                 if (type == UResourceBundle.STRING) {
989                     if (!hasDefault && key.contentEquals("default")) {
990                         String defcoll = value.getString();
991                         if (!defcoll.isEmpty()) {
992                             values.remove(defcoll);
993                             values.addFirst(defcoll);
994                             hasDefault = true;
995                         }
996                     }
997                 } else if (type == UResourceBundle.TABLE && !key.startsWith("private-")) {
998                     String collkey = key.toString();
999                     if (!values.contains(collkey)) {
1000                         values.add(collkey);
1001                     }
1002                 }
1003             }
1004         }
1005     }
1006 
1007     /**
1008      * <strong>[icu]</strong> Returns the functionally equivalent locale for the given
1009      * requested locale, with respect to given keyword, for the
1010      * collation service.  If two locales return the same result, then
1011      * collators instantiated for these locales will behave
1012      * equivalently.  The converse is not always true; two collators
1013      * may in fact be equivalent, but return different results, due to
1014      * internal details.  The return result has no other meaning than
1015      * that stated above, and implies nothing as to the relationship
1016      * between the two locales.  This is intended for use by
1017      * applications who wish to cache collators, or otherwise reuse
1018      * collators when possible.  The functional equivalent may change
1019      * over time.  For more information, please see the <a
1020      * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
1021      * Locales and Services</a> section of the ICU User Guide.
1022      * @param keyword a particular keyword as enumerated by
1023      * getKeywords.
1024      * @param locID The requested locale
1025      * @param isAvailable If non-null, isAvailable[0] will receive and
1026      * output boolean that indicates whether the requested locale was
1027      * 'available' to the collation service. If non-null, isAvailable
1028      * must have length &gt;= 1.
1029      * @return the locale
1030      */
getFunctionalEquivalent(String keyword, ULocale locID, boolean isAvailable[])1031     public static final ULocale getFunctionalEquivalent(String keyword,
1032                                                         ULocale locID,
1033                                                         boolean isAvailable[]) {
1034         return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
1035                                                          keyword, locID, isAvailable, true);
1036     }
1037 
1038     /**
1039      * <strong>[icu]</strong> Returns the functionally equivalent locale for the given
1040      * requested locale, with respect to given keyword, for the
1041      * collation service.
1042      * @param keyword a particular keyword as enumerated by
1043      * getKeywords.
1044      * @param locID The requested locale
1045      * @return the locale
1046      * @see #getFunctionalEquivalent(String,ULocale,boolean[])
1047      */
getFunctionalEquivalent(String keyword, ULocale locID)1048     public static final ULocale getFunctionalEquivalent(String keyword,
1049                                                         ULocale locID) {
1050         return getFunctionalEquivalent(keyword, locID, null);
1051     }
1052 
1053     /**
1054      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
1055      * displayLocale.
1056      * @param objectLocale the locale of the collator
1057      * @param displayLocale the locale for the collator's display name
1058      * @return the display name
1059      */
getDisplayName(Locale objectLocale, Locale displayLocale)1060     static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
1061         return getShim().getDisplayName(ULocale.forLocale(objectLocale),
1062                                         ULocale.forLocale(displayLocale));
1063     }
1064 
1065     /**
1066      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
1067      * displayLocale.
1068      * @param objectLocale the locale of the collator
1069      * @param displayLocale the locale for the collator's display name
1070      * @return the display name
1071      */
getDisplayName(ULocale objectLocale, ULocale displayLocale)1072     static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
1073         return getShim().getDisplayName(objectLocale, displayLocale);
1074     }
1075 
1076     /**
1077      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
1078      * default <code>DISPLAY</code> locale.
1079      * @param objectLocale the locale of the collator
1080      * @return the display name
1081      * @see android.icu.util.ULocale.Category#DISPLAY
1082      */
getDisplayName(Locale objectLocale)1083     static public String getDisplayName(Locale objectLocale) {
1084         return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
1085     }
1086 
1087     /**
1088      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
1089      * default <code>DISPLAY</code> locale.
1090      * @param objectLocale the locale of the collator
1091      * @return the display name
1092      * @see android.icu.util.ULocale.Category#DISPLAY
1093      */
getDisplayName(ULocale objectLocale)1094     static public String getDisplayName(ULocale objectLocale) {
1095         return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
1096     }
1097 
1098     /**
1099      * Returns this Collator's strength attribute. The strength attribute
1100      * determines the minimum level of difference considered significant.
1101      * <strong>[icu] Note:</strong> This can return QUATERNARY strength, which is not supported by the
1102      * JDK version.
1103      * <p>
1104      * See the Collator class description for more details.
1105      * <p>The base class method always returns {@link #TERTIARY}.
1106      * Subclasses should override it if appropriate.
1107      *
1108      * @return this Collator's current strength attribute.
1109      * @see #setStrength
1110      * @see #PRIMARY
1111      * @see #SECONDARY
1112      * @see #TERTIARY
1113      * @see #QUATERNARY
1114      * @see #IDENTICAL
1115      */
getStrength()1116     public int getStrength()
1117     {
1118         return TERTIARY;
1119     }
1120 
1121     /**
1122      * Returns the decomposition mode of this Collator. The decomposition mode
1123      * determines how Unicode composed characters are handled.
1124      * <p>
1125      * See the Collator class description for more details.
1126      * <p>The base class method always returns {@link #NO_DECOMPOSITION}.
1127      * Subclasses should override it if appropriate.
1128      *
1129      * @return the decomposition mode
1130      * @see #setDecomposition
1131      * @see #NO_DECOMPOSITION
1132      * @see #CANONICAL_DECOMPOSITION
1133      */
getDecomposition()1134     public int getDecomposition()
1135     {
1136         return NO_DECOMPOSITION;
1137     }
1138 
1139     // public other methods -------------------------------------------------
1140 
1141     /**
1142      * Compares the equality of two text Strings using
1143      * this Collator's rules, strength and decomposition mode.  Convenience method.
1144      * @param source the source string to be compared.
1145      * @param target the target string to be compared.
1146      * @return true if the strings are equal according to the collation
1147      *         rules, otherwise false.
1148      * @see #compare
1149      * @throws NullPointerException thrown if either arguments is null.
1150      */
equals(String source, String target)1151     public boolean equals(String source, String target)
1152     {
1153         return (compare(source, target) == 0);
1154     }
1155 
1156     /**
1157      * <strong>[icu]</strong> Returns a UnicodeSet that contains all the characters and sequences tailored
1158      * in this collator.
1159      * @return a pointer to a UnicodeSet object containing all the
1160      *         code points and sequences that may sort differently than
1161      *         in the root collator.
1162      */
getTailoredSet()1163     public UnicodeSet getTailoredSet()
1164     {
1165         return new UnicodeSet(0, 0x10FFFF);
1166     }
1167 
1168     /**
1169      * Compares the source text String to the target text String according to
1170      * this Collator's rules, strength and decomposition mode.
1171      * Returns an integer less than,
1172      * equal to or greater than zero depending on whether the source String is
1173      * less than, equal to or greater than the target String. See the Collator
1174      * class description for an example of use.
1175      *
1176      * @param source the source String.
1177      * @param target the target String.
1178      * @return Returns an integer value. Value is less than zero if source is
1179      *         less than target, value is zero if source and target are equal,
1180      *         value is greater than zero if source is greater than target.
1181      * @see CollationKey
1182      * @see #getCollationKey
1183      * @throws NullPointerException thrown if either argument is null.
1184      */
compare(String source, String target)1185     public abstract int compare(String source, String target);
1186 
1187     /**
1188      * Compares the source Object to the target Object.
1189      *
1190      * @param source the source Object.
1191      * @param target the target Object.
1192      * @return Returns an integer value. Value is less than zero if source is
1193      *         less than target, value is zero if source and target are equal,
1194      *         value is greater than zero if source is greater than target.
1195      * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence.
1196      */
1197     @Override
compare(Object source, Object target)1198     public int compare(Object source, Object target) {
1199         return doCompare((CharSequence)source, (CharSequence)target);
1200     }
1201 
1202     /**
1203      * Compares two CharSequences.
1204      * The base class just calls compare(left.toString(), right.toString()).
1205      * Subclasses should instead implement this method and have the String API call this method.
1206      * @deprecated This API is ICU internal only.
1207      * @hide original deprecated declaration
1208      * @hide draft / provisional / internal are hidden on Android
1209      */
1210     @Deprecated
doCompare(CharSequence left, CharSequence right)1211     protected int doCompare(CharSequence left, CharSequence right) {
1212         return compare(left.toString(), right.toString());
1213     }
1214 
1215     /**
1216      * <p>
1217      * Transforms the String into a CollationKey suitable for efficient
1218      * repeated comparison.  The resulting key depends on the collator's
1219      * rules, strength and decomposition mode.
1220      *
1221      * <p>Note that collation keys are often less efficient than simply doing comparison.
1222      * For more details, see the ICU User Guide.
1223      *
1224      * <p>See the CollationKey class documentation for more information.
1225      * @param source the string to be transformed into a CollationKey.
1226      * @return the CollationKey for the given String based on this Collator's
1227      *         collation rules. If the source String is null, a null
1228      *         CollationKey is returned.
1229      * @see CollationKey
1230      * @see #compare(String, String)
1231      */
getCollationKey(String source)1232     public abstract CollationKey getCollationKey(String source);
1233 
1234     /**
1235      * <strong>[icu]</strong> Returns the simpler form of a CollationKey for the String source following
1236      * the rules of this Collator and stores the result into the user provided argument
1237      * key.  If key has a internal byte array of length that's too small for the result,
1238      * the internal byte array will be grown to the exact required size.
1239      *
1240      * <p>Note that collation keys are often less efficient than simply doing comparison.
1241      * For more details, see the ICU User Guide.
1242      *
1243      * @param source the text String to be transformed into a RawCollationKey
1244      * @return If key is null, a new instance of RawCollationKey will be
1245      *         created and returned, otherwise the user provided key will be
1246      *         returned.
1247      * @see #compare(String, String)
1248      * @see #getCollationKey
1249      * @see RawCollationKey
1250      * @hide unsupported on Android
1251      */
getRawCollationKey(String source, RawCollationKey key)1252     public abstract RawCollationKey getRawCollationKey(String source,
1253                                                        RawCollationKey key);
1254 
1255     /**
1256      * <strong>[icu]</strong> Sets the variable top to the top of the specified reordering group.
1257      * The variable top determines the highest-sorting character
1258      * which is affected by the alternate handling behavior.
1259      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
1260      *
1261      * <p>The base class implementation throws an UnsupportedOperationException.
1262      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
1263      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
1264      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
1265      * @return this
1266      * @see #getMaxVariable
1267      */
setMaxVariable(int group)1268     public Collator setMaxVariable(int group) {
1269         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1270     }
1271 
1272     /**
1273      * <strong>[icu]</strong> Returns the maximum reordering group whose characters are affected by
1274      * the alternate handling behavior.
1275      *
1276      * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION.
1277      * @return the maximum variable reordering group.
1278      * @see #setMaxVariable
1279      */
getMaxVariable()1280     public int getMaxVariable() {
1281         return Collator.ReorderCodes.PUNCTUATION;
1282     }
1283 
1284     /**
1285      * <strong>[icu]</strong> Sets the variable top to the primary weight of the specified string.
1286      *
1287      * <p>Beginning with ICU 53, the variable top is pinned to
1288      * the top of one of the supported reordering groups,
1289      * and it must not be beyond the last of those groups.
1290      * See {@link #setMaxVariable(int)}.
1291      *
1292      * @param varTop one or more (if contraction) characters to which the
1293      *               variable top should be set
1294      * @return variable top primary weight
1295      * @exception IllegalArgumentException
1296      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
1297      *                invalid when
1298      *                <ul>
1299      *                <li>it is a contraction that does not exist in the Collation order
1300      *                <li>the variable top is beyond
1301      *                    the last reordering group supported by setMaxVariable()
1302      *                <li>when the varTop argument is null or zero in length.
1303      *                </ul>
1304      * @see #getVariableTop
1305      * @see RuleBasedCollator#setAlternateHandlingShifted
1306      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
1307      * @hide original deprecated declaration
1308      */
1309     @Deprecated
setVariableTop(String varTop)1310     public abstract int setVariableTop(String varTop);
1311 
1312     /**
1313      * <strong>[icu]</strong> Gets the variable top value of a Collator.
1314      *
1315      * @return the variable top primary weight
1316      * @see #getMaxVariable
1317      */
getVariableTop()1318     public abstract int getVariableTop();
1319 
1320     /**
1321      * <strong>[icu]</strong> Sets the variable top to the specified primary weight.
1322      *
1323      * <p>Beginning with ICU 53, the variable top is pinned to
1324      * the top of one of the supported reordering groups,
1325      * and it must not be beyond the last of those groups.
1326      * See {@link #setMaxVariable(int)}.
1327      *
1328      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
1329      * @see #getVariableTop
1330      * @see #setVariableTop(String)
1331      * @deprecated ICU 53 Call setMaxVariable() instead.
1332      * @hide original deprecated declaration
1333      */
1334     @Deprecated
setVariableTop(int varTop)1335     public abstract void setVariableTop(int varTop);
1336 
1337     /**
1338      * <strong>[icu]</strong> Returns the version of this collator object.
1339      * @return the version object associated with this collator
1340      */
getVersion()1341     public abstract VersionInfo getVersion();
1342 
1343     /**
1344      * <strong>[icu]</strong> Returns the UCA version of this collator object.
1345      * @return the version object associated with this collator
1346      */
getUCAVersion()1347     public abstract VersionInfo getUCAVersion();
1348 
1349     /**
1350      * Retrieves the reordering codes for this collator.
1351      * These reordering codes are a combination of UScript codes and ReorderCodes.
1352      * @return a copy of the reordering codes for this collator;
1353      * if none are set then returns an empty array
1354      * @see #setReorderCodes
1355      * @see #getEquivalentReorderCodes
1356      * @see Collator.ReorderCodes
1357      * @see UScript
1358      */
getReorderCodes()1359     public int[] getReorderCodes()
1360     {
1361         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1362     }
1363 
1364     /**
1365      * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
1366      * codes are grouped and must reorder together.
1367      * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
1368      * for example Hiragana and Katakana.
1369      *
1370      * @param reorderCode The reorder code to determine equivalence for.
1371      * @return the set of all reorder codes in the same group as the given reorder code.
1372      * @see #setReorderCodes
1373      * @see #getReorderCodes
1374      * @see Collator.ReorderCodes
1375      * @see UScript
1376      */
getEquivalentReorderCodes(int reorderCode)1377     public static int[] getEquivalentReorderCodes(int reorderCode) {
1378         CollationData baseData = CollationRoot.getData();
1379         return baseData.getEquivalentScripts(reorderCode);
1380     }
1381 
1382 
1383     // Freezable interface implementation -------------------------------------------------
1384 
1385     /**
1386      * Determines whether the object has been frozen or not.
1387      *
1388      * <p>An unfrozen Collator is mutable and not thread-safe.
1389      * A frozen Collator is immutable and thread-safe.
1390      */
1391     @Override
isFrozen()1392     public boolean isFrozen() {
1393         return false;
1394     }
1395 
1396     /**
1397      * Freezes the collator.
1398      * @return the collator itself.
1399      */
1400     @Override
freeze()1401     public Collator freeze() {
1402         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1403     }
1404 
1405     /**
1406      * Provides for the clone operation. Any clone is initially unfrozen.
1407      */
1408     @Override
cloneAsThawed()1409     public Collator cloneAsThawed() {
1410         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1411     }
1412 
1413     /**
1414      * Empty default constructor to make javadocs happy
1415      */
Collator()1416     protected Collator()
1417     {
1418     }
1419 
1420     private static final boolean DEBUG = ICUDebug.enabled("collator");
1421 
1422     // -------- BEGIN ULocale boilerplate --------
1423 
1424     /**
1425      * <strong>[icu]</strong> Returns the locale that was used to create this object, or null.
1426      * This may may differ from the locale requested at the time of
1427      * this object's creation.  For example, if an object is created
1428      * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
1429      * drawn from <tt>en</tt> (the <i>actual</i> locale), and
1430      * <tt>en_US</tt> may be the most specific locale that exists (the
1431      * <i>valid</i> locale).
1432      *
1433      * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
1434      * contains a partial preview implementation.  The * <i>actual</i>
1435      * locale is returned correctly, but the <i>valid</i> locale is
1436      * not, in most cases.
1437      *
1438      * <p>The base class method always returns {@link ULocale#ROOT}.
1439      * Subclasses should override it if appropriate.
1440      *
1441      * @param type type of information requested, either {@link
1442      * android.icu.util.ULocale#VALID_LOCALE} or {@link
1443      * android.icu.util.ULocale#ACTUAL_LOCALE}.
1444      * @return the information specified by <i>type</i>, or null if
1445      * this object was not constructed from locale data.
1446      * @see android.icu.util.ULocale
1447      * @see android.icu.util.ULocale#VALID_LOCALE
1448      * @see android.icu.util.ULocale#ACTUAL_LOCALE
1449      * @hide draft / provisional / internal are hidden on Android
1450      */
getLocale(ULocale.Type type)1451     public ULocale getLocale(ULocale.Type type) {
1452         return ULocale.ROOT;
1453     }
1454 
1455     /**
1456      * Set information about the locales that were used to create this
1457      * object.  If the object was not constructed from locale data,
1458      * both arguments should be set to null.  Otherwise, neither
1459      * should be null.  The actual locale must be at the same level or
1460      * less specific than the valid locale.  This method is intended
1461      * for use by factories or other entities that create objects of
1462      * this class.
1463      *
1464      * <p>The base class method does nothing. Subclasses should override it if appropriate.
1465      *
1466      * @param valid the most specific locale containing any resource
1467      * data, or null
1468      * @param actual the locale containing data used to construct this
1469      * object, or null
1470      * @see android.icu.util.ULocale
1471      * @see android.icu.util.ULocale#VALID_LOCALE
1472      * @see android.icu.util.ULocale#ACTUAL_LOCALE
1473      */
setLocale(ULocale valid, ULocale actual)1474     void setLocale(ULocale valid, ULocale actual) {}
1475 
1476     // -------- END ULocale boilerplate --------
1477 }
1478