• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 package android.icu.text;
11 
12 import java.lang.reflect.InvocationTargetException;
13 import java.lang.reflect.Method;
14 import java.text.CharacterIterator;
15 import java.text.ParseException;
16 import java.util.Arrays;
17 import java.util.concurrent.locks.Lock;
18 import java.util.concurrent.locks.ReentrantLock;
19 
20 import android.icu.impl.ClassLoaderUtil;
21 import android.icu.impl.Normalizer2Impl;
22 import android.icu.impl.Normalizer2Impl.ReorderingBuffer;
23 import android.icu.impl.Utility;
24 import android.icu.impl.coll.BOCSU;
25 import android.icu.impl.coll.Collation;
26 import android.icu.impl.coll.CollationCompare;
27 import android.icu.impl.coll.CollationData;
28 import android.icu.impl.coll.CollationFastLatin;
29 import android.icu.impl.coll.CollationIterator;
30 import android.icu.impl.coll.CollationKeys;
31 import android.icu.impl.coll.CollationKeys.SortKeyByteSink;
32 import android.icu.impl.coll.CollationLoader;
33 import android.icu.impl.coll.CollationRoot;
34 import android.icu.impl.coll.CollationSettings;
35 import android.icu.impl.coll.CollationTailoring;
36 import android.icu.impl.coll.ContractionsAndExpansions;
37 import android.icu.impl.coll.FCDUTF16CollationIterator;
38 import android.icu.impl.coll.SharedObject;
39 import android.icu.impl.coll.TailoredSet;
40 import android.icu.impl.coll.UTF16CollationIterator;
41 import android.icu.lang.UScript;
42 import android.icu.util.ULocale;
43 import android.icu.util.VersionInfo;
44 
45 /**
46  * <p>
47  * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule
48  * sets. RuleBasedCollator is designed to be fully compliant to the <a
49  * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651.
50  *
51  * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link android.icu.util.Freezable}.
52  *
53  * <p>
54  * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User
55  * Guide</a> for more information about the collation service before using this class.
56  *
57  * <p>
58  * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class
59  * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the
60  * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String)
61  * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while
62  * re-adjusting the attributes and orders of the characters in the specified rule accordingly.
63  *
64  * <p>
65  * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale
66  * is not available, the orders eventually falls back to the
67  * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
68  *
69  * <p>
70  * For information about the collation rule syntax and details about customization, please refer to the <a
71  * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the
72  * User Guide.
73  *
74  * <p>
75  * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J:
76  *
77  * <ul>
78  * <li>According to the JDK documentation: <br>
79  * <i>Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range
80  * &#92;U0E40-&#92;U0E44 precedes a Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the range
81  * &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range &#92;U0E81-&#92;U0EAE then the vowel is placed after the
82  * consonant for collation purposes.
83  * <br>
84  * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on.
85  * </i>
86  * <br>
87  * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly
88  * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.</li>
89  * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.</li>
90  * </ul>
91  * <p>
92  * <strong>Examples</strong>
93  * <p>
94  * Creating Customized RuleBasedCollators: <blockquote>
95  *
96  * <pre>
97  * String simple = "&amp; a &lt; b &lt; c &lt; d";
98  * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
99  *
100  * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
101  *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
102  *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
103  *                    + "o , O &lt; p , P &lt; q , Q &lt;r , R &lt;s , S &lt; "
104  *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
105  *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
106  *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
107  *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
108  * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
109  * </pre>
110  *
111  * </blockquote>
112  *
113  * Concatenating rules to combine <code>Collator</code>s: <blockquote>
114  *
115  * <pre>
116  * // Create an en_US Collator object
117  * RuleBasedCollator en_USCollator = (RuleBasedCollator)
118  *     Collator.getInstance(new Locale("en", "US", ""));
119  * // Create a da_DK Collator object
120  * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
121  *     Collator.getInstance(new Locale("da", "DK", ""));
122  * // Combine the two
123  * // First, get the collation rules from en_USCollator
124  * String en_USRules = en_USCollator.getRules();
125  * // Second, get the collation rules from da_DKCollator
126  * String da_DKRules = da_DKCollator.getRules();
127  * RuleBasedCollator newCollator =
128  *                             new RuleBasedCollator(en_USRules + da_DKRules);
129  * // newCollator has the combined rules
130  * </pre>
131  *
132  * </blockquote>
133  *
134  * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to
135  * the existing rule: <blockquote>
136  *
137  * <pre>
138  * // Create a new Collator object with additional rules
139  * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
140  * RuleBasedCollator myCollator =
141  *     new RuleBasedCollator(en_USCollator.getRules() + addRules);
142  * // myCollator contains the new rules
143  * </pre>
144  *
145  * </blockquote>
146  *
147  * How to change the order of non-spacing accents: <blockquote>
148  *
149  * <pre>
150  * // old rule with main accents
151  * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "
152  *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
153  *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "
154  *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
155  *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
156  *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
157  *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
158  * // change the order of accent characters
159  * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
160  * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
161  * </pre>
162  *
163  * </blockquote>
164  *
165  * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese
166  * characters in the Japanese <code>Collator</code>: <blockquote>
167  *
168  * <pre>
169  * // get en_US Collator rules
170  * RuleBasedCollator en_USCollator
171  *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
172  * // add a few Japanese characters to sort before English characters
173  * // suppose the last character before the first base letter 'a' in
174  * // the English collation rule is &#92;u2212
175  * String jaString = "&amp; &#92;u2212 &lt;&#92;u3041, &#92;u3042 &lt;&#92;u3043, "
176  *                   + "&#92;u3044";
177  * RuleBasedCollator myJapaneseCollator
178  *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
179  * </pre>
180  *
181  * </blockquote>
182  * <p>
183  * This class is not subclassable
184  *
185  * @author Syn Wee Quek
186  */
187 public final class RuleBasedCollator extends Collator {
188     // public constructors ---------------------------------------------------
189 
190     /**
191      * <p>
192      * Constructor that takes the argument rules for customization.
193      * The collator will be based on the CLDR root collation, with the
194      * attributes and re-ordering of the characters specified in the argument rules.
195      * <p>
196      * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization">
197      * Collation Customization</a> for details on the rule syntax.
198      *
199      * @param rules
200      *            the collation rules to build the collation table from.
201      * @exception ParseException
202      *                and IOException thrown. ParseException thrown when argument rules have an invalid syntax.
203      *                IOException thrown when an error occurred while reading internal data.
204      */
RuleBasedCollator(String rules)205     public RuleBasedCollator(String rules) throws Exception {
206         if (rules == null) {
207             throw new IllegalArgumentException("Collation rules can not be null");
208         }
209         validLocale = ULocale.ROOT;
210         internalBuildTailoring(rules);
211     }
212 
213     /**
214      * Implements from-rule constructors.
215      * @param rules rule string
216      * @throws Exception
217      */
internalBuildTailoring(String rules)218     private final void internalBuildTailoring(String rules) throws Exception {
219         CollationTailoring base = CollationRoot.getRoot();
220         // Most code using Collator does not need to build a Collator from rules.
221         // By using reflection, most code will not have a static dependency on the builder code.
222         // CollationBuilder builder = new CollationBuilder(base);
223         ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass());
224         CollationTailoring t;
225         try {
226             Class<?> builderClass = classLoader.loadClass("android.icu.impl.coll.CollationBuilder");
227             Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base);
228             // builder.parseAndBuild(rules);
229             Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class);
230             t = (CollationTailoring)parseAndBuild.invoke(builder, rules);
231         } catch(InvocationTargetException e) {
232             throw (Exception)e.getTargetException();
233         }
234         t.actualLocale = null;
235         adoptTailoring(t);
236     }
237 
238     // public methods --------------------------------------------------------
239 
240     /**
241      * Clones the RuleBasedCollator
242      *
243      * @return a new instance of this RuleBasedCollator object
244      */
245     @Override
clone()246     public Object clone() throws CloneNotSupportedException {
247         if (isFrozen()) {
248             return this;
249         }
250         return cloneAsThawed();
251     }
252 
initMaxExpansions()253     private final void initMaxExpansions() {
254         synchronized(tailoring) {
255             if (tailoring.maxExpansions == null) {
256                 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data);
257             }
258         }
259     }
260 
261     /**
262      * Return a CollationElementIterator for the given String.
263      *
264      * @see CollationElementIterator
265      */
getCollationElementIterator(String source)266     public CollationElementIterator getCollationElementIterator(String source) {
267         initMaxExpansions();
268         return new CollationElementIterator(source, this);
269     }
270 
271     /**
272      * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be
273      * preserved since a new copy will be created for use.
274      *
275      * @see CollationElementIterator
276      */
getCollationElementIterator(CharacterIterator source)277     public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
278         initMaxExpansions();
279         CharacterIterator newsource = (CharacterIterator) source.clone();
280         return new CollationElementIterator(newsource, this);
281     }
282 
283     /**
284      * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be
285      * preserved since a new copy will be created for use.
286      *
287      * @see CollationElementIterator
288      */
getCollationElementIterator(UCharacterIterator source)289     public CollationElementIterator getCollationElementIterator(UCharacterIterator source) {
290         initMaxExpansions();
291         return new CollationElementIterator(source, this);
292     }
293 
294     // Freezable interface implementation -------------------------------------------------
295 
296     /**
297      * Determines whether the object has been frozen or not.
298      *
299      * <p>An unfrozen Collator is mutable and not thread-safe.
300      * A frozen Collator is immutable and thread-safe.
301      */
302     @Override
isFrozen()303     public boolean isFrozen() {
304         return frozenLock != null;
305     }
306 
307     /**
308      * Freezes the collator.
309      * @return the collator itself.
310      */
311     @Override
freeze()312     public Collator freeze() {
313         if (!isFrozen()) {
314             frozenLock = new ReentrantLock();
315             if (collationBuffer == null) {
316                 collationBuffer = new CollationBuffer(data);
317             }
318         }
319         return this;
320     }
321 
322     /**
323      * Provides for the clone operation. Any clone is initially unfrozen.
324      */
325     @Override
cloneAsThawed()326     public RuleBasedCollator cloneAsThawed() {
327         try {
328             RuleBasedCollator result = (RuleBasedCollator) super.clone();
329             // since all collation data in the RuleBasedCollator do not change
330             // we can safely assign the result.fields to this collator
331             // except in cases where we can't
332             result.settings = settings.clone();
333             result.collationBuffer = null;
334             result.frozenLock = null;
335             return result;
336         } catch (CloneNotSupportedException e) {
337             // Clone is implemented
338             return null;
339         }
340     }
341 
342     // public setters --------------------------------------------------------
343 
checkNotFrozen()344     private void checkNotFrozen() {
345         if (isFrozen()) {
346             throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator");
347         }
348     }
349 
getOwnedSettings()350     private final CollationSettings getOwnedSettings() {
351         return settings.copyOnWrite();
352     }
353 
getDefaultSettings()354     private final CollationSettings getDefaultSettings() {
355         return tailoring.settings.readOnly();
356     }
357 
358     /**
359      * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
360      * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
361      * correct JIS collation order, distinguishing between Katakana and Hiragana characters.
362      *
363      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
364      * Since ICU 50, this attribute is not settable any more via API functions.
365      * Since CLDR 25/ICU 53, explicit quaternary relations are used
366      * to achieve the same Japanese sort order.
367      *
368      * @param flag
369      *            true if Hiragana Quaternary mode is to be on, false otherwise
370      * @see #setHiraganaQuaternaryDefault
371      * @see #isHiraganaQuaternary
372      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
373      * @hide original deprecated declaration
374      */
375     @Deprecated
setHiraganaQuaternary(boolean flag)376     public void setHiraganaQuaternary(boolean flag) {
377         checkNotFrozen();
378     }
379 
380     /**
381      * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See
382      * setHiraganaQuaternary(boolean) for more details.
383      *
384      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
385      * Since ICU 50, this attribute is not settable any more via API functions.
386      * Since CLDR 25/ICU 53, explicit quaternary relations are used
387      * to achieve the same Japanese sort order.
388      *
389      * @see #setHiraganaQuaternary(boolean)
390      * @see #isHiraganaQuaternary
391      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
392      * @hide original deprecated declaration
393      */
394     @Deprecated
setHiraganaQuaternaryDefault()395     public void setHiraganaQuaternaryDefault() {
396         checkNotFrozen();
397     }
398 
399     /**
400      * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The
401      * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case
402      * characters first.
403      *
404      * @param upperfirst
405      *            true to sort uppercase characters before lowercase characters, false to sort lowercase characters
406      *            before uppercase characters
407      * @see #isLowerCaseFirst
408      * @see #isUpperCaseFirst
409      * @see #setLowerCaseFirst
410      * @see #setCaseFirstDefault
411      */
setUpperCaseFirst(boolean upperfirst)412     public void setUpperCaseFirst(boolean upperfirst) {
413         checkNotFrozen();
414         if (upperfirst == isUpperCaseFirst()) { return; }
415         CollationSettings ownedSettings = getOwnedSettings();
416         ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0);
417         setFastLatinOptions(ownedSettings);
418     }
419 
420     /**
421      * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The
422      * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper
423      * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences.
424      *
425      * @param lowerfirst
426      *            true for sorting lower cased characters before upper cased characters, false to ignore case
427      *            preferences.
428      * @see #isLowerCaseFirst
429      * @see #isUpperCaseFirst
430      * @see #setUpperCaseFirst
431      * @see #setCaseFirstDefault
432      */
setLowerCaseFirst(boolean lowerfirst)433     public void setLowerCaseFirst(boolean lowerfirst) {
434         checkNotFrozen();
435         if (lowerfirst == isLowerCaseFirst()) { return; }
436         CollationSettings ownedSettings = getOwnedSettings();
437         ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0);
438         setFastLatinOptions(ownedSettings);
439     }
440 
441     /**
442      * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See
443      * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details.
444      *
445      * @see #isLowerCaseFirst
446      * @see #isUpperCaseFirst
447      * @see #setLowerCaseFirst(boolean)
448      * @see #setUpperCaseFirst(boolean)
449      */
setCaseFirstDefault()450     public final void setCaseFirstDefault() {
451         checkNotFrozen();
452         CollationSettings defaultSettings = getDefaultSettings();
453         if(settings.readOnly() == defaultSettings) { return; }
454         CollationSettings ownedSettings = getOwnedSettings();
455         ownedSettings.setCaseFirstDefault(defaultSettings.options);
456         setFastLatinOptions(ownedSettings);
457     }
458 
459     /**
460      * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See
461      * setAlternateHandling(boolean) for more details.
462      *
463      * @see #setAlternateHandlingShifted(boolean)
464      * @see #isAlternateHandlingShifted()
465      */
setAlternateHandlingDefault()466     public void setAlternateHandlingDefault() {
467         checkNotFrozen();
468         CollationSettings defaultSettings = getDefaultSettings();
469         if(settings.readOnly() == defaultSettings) { return; }
470         CollationSettings ownedSettings = getOwnedSettings();
471         ownedSettings.setAlternateHandlingDefault(defaultSettings.options);
472         setFastLatinOptions(ownedSettings);
473     }
474 
475     /**
476      * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See
477      * setCaseLevel(boolean) for more details.
478      *
479      * @see #setCaseLevel(boolean)
480      * @see #isCaseLevel
481      */
setCaseLevelDefault()482     public void setCaseLevelDefault() {
483         checkNotFrozen();
484         CollationSettings defaultSettings = getDefaultSettings();
485         if(settings.readOnly() == defaultSettings) { return; }
486         CollationSettings ownedSettings = getOwnedSettings();
487         ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options);
488         setFastLatinOptions(ownedSettings);
489     }
490 
491     /**
492      * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See
493      * setDecomposition(int) for more details.
494      *
495      * @see #getDecomposition
496      * @see #setDecomposition(int)
497      */
setDecompositionDefault()498     public void setDecompositionDefault() {
499         checkNotFrozen();
500         CollationSettings defaultSettings = getDefaultSettings();
501         if(settings.readOnly() == defaultSettings) { return; }
502         CollationSettings ownedSettings = getOwnedSettings();
503         ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options);
504         setFastLatinOptions(ownedSettings);
505     }
506 
507     /**
508      * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See
509      * setFrenchCollation(boolean) for more details.
510      *
511      * @see #isFrenchCollation
512      * @see #setFrenchCollation(boolean)
513      */
setFrenchCollationDefault()514     public void setFrenchCollationDefault() {
515         checkNotFrozen();
516         CollationSettings defaultSettings = getDefaultSettings();
517         if(settings.readOnly() == defaultSettings) { return; }
518         CollationSettings ownedSettings = getOwnedSettings();
519         ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options);
520         setFastLatinOptions(ownedSettings);
521     }
522 
523     /**
524      * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See
525      * setStrength(int) for more details.
526      *
527      * @see #setStrength(int)
528      * @see #getStrength
529      */
setStrengthDefault()530     public void setStrengthDefault() {
531         checkNotFrozen();
532         CollationSettings defaultSettings = getDefaultSettings();
533         if(settings.readOnly() == defaultSettings) { return; }
534         CollationSettings ownedSettings = getOwnedSettings();
535         ownedSettings.setStrengthDefault(defaultSettings.options);
536         setFastLatinOptions(ownedSettings);
537     }
538 
539     /**
540      * Method to set numeric collation to its default value.
541      *
542      * @see #getNumericCollation
543      * @see #setNumericCollation
544      */
setNumericCollationDefault()545     public void setNumericCollationDefault() {
546         checkNotFrozen();
547         CollationSettings defaultSettings = getDefaultSettings();
548         if(settings.readOnly() == defaultSettings) { return; }
549         CollationSettings ownedSettings = getOwnedSettings();
550         ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options);
551         setFastLatinOptions(ownedSettings);
552     }
553 
554     /**
555      * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false,
556      * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted
557      * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture">
558      * French collation</a> for more information.
559      *
560      * @param flag
561      *            true to set the French collation on, false to set it off
562      * @see #isFrenchCollation
563      * @see #setFrenchCollationDefault
564      */
setFrenchCollation(boolean flag)565     public void setFrenchCollation(boolean flag) {
566         checkNotFrozen();
567         if(flag == isFrenchCollation()) { return; }
568         CollationSettings ownedSettings = getOwnedSettings();
569         ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag);
570         setFastLatinOptions(ownedSettings);
571     }
572 
573     /**
574      * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition
575      * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This
576      * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false,
577      * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all
578      * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior
579      * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the
580      * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order.
581      *
582      * @param shifted
583      *            true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior.
584      * @see #isAlternateHandlingShifted
585      * @see #setAlternateHandlingDefault
586      */
setAlternateHandlingShifted(boolean shifted)587     public void setAlternateHandlingShifted(boolean shifted) {
588         checkNotFrozen();
589         if(shifted == isAlternateHandlingShifted()) { return; }
590         CollationSettings ownedSettings = getOwnedSettings();
591         ownedSettings.setAlternateHandlingShifted(shifted);
592         setFastLatinOptions(ownedSettings);
593     }
594 
595     /**
596      * <p>
597      * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known
598      * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level
599      * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value
600      * is false, which means the case level is not generated. The contents of the case level are affected by the case
601      * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable
602      * case level.
603      * <p>
604      * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case
605      * level</a> for more information.
606      *
607      * @param flag
608      *            true if case level sorting is required, false otherwise
609      * @see #setCaseLevelDefault
610      * @see #isCaseLevel
611      */
setCaseLevel(boolean flag)612     public void setCaseLevel(boolean flag) {
613         checkNotFrozen();
614         if(flag == isCaseLevel()) { return; }
615         CollationSettings ownedSettings = getOwnedSettings();
616         ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag);
617         setFastLatinOptions(ownedSettings);
618     }
619 
620     /**
621      * Sets the decomposition mode of this Collator.  Setting this
622      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
623      * Collator to handle un-normalized text properly, producing the
624      * same results as if the text were normalized. If
625      * NO_DECOMPOSITION is set, it is the user's responsibility to
626      * insure that all text is already in the appropriate form before
627      * a comparison or before getting a CollationKey. Adjusting
628      * decomposition mode allows the user to select between faster and
629      * more complete collation behavior.
630      *
631      * <p>Since a great many of the world's languages do not require
632      * text normalization, most locales set NO_DECOMPOSITION as the
633      * default decomposition mode.
634      *
635      * The default decompositon mode for the Collator is
636      * NO_DECOMPOSITON, unless specified otherwise by the locale used
637      * to create the Collator.
638      *
639      * <p>See getDecomposition for a description of decomposition
640      * mode.
641      *
642      * @param decomposition the new decomposition mode
643      * @see #getDecomposition
644      * @see #NO_DECOMPOSITION
645      * @see #CANONICAL_DECOMPOSITION
646      * @throws IllegalArgumentException If the given value is not a valid
647      *            decomposition mode.
648      */
649     @Override
setDecomposition(int decomposition)650     public void setDecomposition(int decomposition)
651     {
652         checkNotFrozen();
653         boolean flag;
654         switch(decomposition) {
655         case NO_DECOMPOSITION:
656             flag = false;
657             break;
658         case CANONICAL_DECOMPOSITION:
659             flag = true;
660             break;
661         default:
662             throw new IllegalArgumentException("Wrong decomposition mode.");
663         }
664         if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; }
665         CollationSettings ownedSettings = getOwnedSettings();
666         ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag);
667         setFastLatinOptions(ownedSettings);
668     }
669 
670     /**
671      * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference
672      * considered significant during comparison.
673      *
674      * <p>See the Collator class description for an example of use.
675      *
676      * @param newStrength
677      *            the new strength value.
678      * @see #getStrength
679      * @see #setStrengthDefault
680      * @see #PRIMARY
681      * @see #SECONDARY
682      * @see #TERTIARY
683      * @see #QUATERNARY
684      * @see #IDENTICAL
685      * @exception IllegalArgumentException
686      *                If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
687      */
688     @Override
setStrength(int newStrength)689     public void setStrength(int newStrength) {
690         checkNotFrozen();
691         if(newStrength == getStrength()) { return; }
692         CollationSettings ownedSettings = getOwnedSettings();
693         ownedSettings.setStrength(newStrength);
694         setFastLatinOptions(ownedSettings);
695     }
696 
697     /**
698      * <strong>[icu]</strong> Sets the variable top to the top of the specified reordering group.
699      * The variable top determines the highest-sorting character
700      * which is affected by the alternate handling behavior.
701      * If that attribute is set to NON_IGNORABLE, then the variable top has no effect.
702      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
703      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
704      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
705      * @return this
706      * @see #getMaxVariable
707      */
708     @Override
setMaxVariable(int group)709     public RuleBasedCollator setMaxVariable(int group) {
710         // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
711         int value;
712         if(group == Collator.ReorderCodes.DEFAULT) {
713             value = -1;  // UCOL_DEFAULT
714         } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) {
715             value = group - Collator.ReorderCodes.FIRST;
716         } else {
717             throw new IllegalArgumentException("illegal max variable group " + group);
718         }
719         int oldValue = settings.readOnly().getMaxVariable();
720         if(value == oldValue) {
721             return this;
722         }
723         CollationSettings defaultSettings = getDefaultSettings();
724         if(settings.readOnly() == defaultSettings) {
725             if(value < 0) {  // UCOL_DEFAULT
726                 return this;
727             }
728         }
729         CollationSettings ownedSettings = getOwnedSettings();
730 
731         if(group == Collator.ReorderCodes.DEFAULT) {
732             group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable();
733         }
734         long varTop = data.getLastPrimaryForGroup(group);
735         assert(varTop != 0);
736         ownedSettings.setMaxVariable(value, defaultSettings.options);
737         ownedSettings.variableTop = varTop;
738         setFastLatinOptions(ownedSettings);
739         return this;
740     }
741 
742     /**
743      * <strong>[icu]</strong> Returns the maximum reordering group whose characters are affected by
744      * the alternate handling behavior.
745      * @return the maximum variable reordering group.
746      * @see #setMaxVariable
747      */
748     @Override
getMaxVariable()749     public int getMaxVariable() {
750         return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable();
751     }
752 
753     /**
754      * <strong>[icu]</strong> Sets the variable top to the primary weight of the specified string.
755      *
756      * <p>Beginning with ICU 53, the variable top is pinned to
757      * the top of one of the supported reordering groups,
758      * and it must not be beyond the last of those groups.
759      * See {@link #setMaxVariable(int)}.
760      *
761      * @param varTop
762      *            one or more (if contraction) characters to which the variable top should be set
763      * @return variable top primary weight
764      * @exception IllegalArgumentException
765      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
766      *                invalid when
767      *                <ul>
768      *                <li>it is a contraction that does not exist in the Collation order
769      *                <li>the variable top is beyond
770      *                    the last reordering group supported by setMaxVariable()
771      *                <li>when the varTop argument is null or zero in length.
772      *                </ul>
773      * @see #getVariableTop
774      * @see RuleBasedCollator#setAlternateHandlingShifted
775      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
776      * @hide original deprecated declaration
777      */
778     @Override
779     @Deprecated
setVariableTop(String varTop)780     public int setVariableTop(String varTop) {
781         checkNotFrozen();
782         if (varTop == null || varTop.length() == 0) {
783             throw new IllegalArgumentException("Variable top argument string can not be null or zero in length.");
784         }
785         boolean numeric = settings.readOnly().isNumeric();
786         long ce1, ce2;
787         if(settings.readOnly().dontCheckFCD()) {
788             UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0);
789             ce1 = ci.nextCE();
790             ce2 = ci.nextCE();
791         } else {
792             FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0);
793             ce1 = ci.nextCE();
794             ce2 = ci.nextCE();
795         }
796         if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) {
797             throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element");
798         }
799         internalSetVariableTop(ce1 >>> 32);
800         return (int)settings.readOnly().variableTop;
801     }
802 
803     /**
804      * <strong>[icu]</strong> Sets the variable top to the specified primary weight.
805      *
806      * <p>Beginning with ICU 53, the variable top is pinned to
807      * the top of one of the supported reordering groups,
808      * and it must not be beyond the last of those groups.
809      * See {@link #setMaxVariable(int)}.
810      *
811      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
812      * @see #getVariableTop
813      * @see #setVariableTop(String)
814      * @deprecated ICU 53 Call setMaxVariable() instead.
815      * @hide original deprecated declaration
816      */
817     @Override
818     @Deprecated
setVariableTop(int varTop)819     public void setVariableTop(int varTop) {
820         checkNotFrozen();
821         internalSetVariableTop(varTop & 0xffffffffL);
822     }
823 
internalSetVariableTop(long varTop)824     private void internalSetVariableTop(long varTop) {
825         if(varTop != settings.readOnly().variableTop) {
826             // Pin the variable top to the end of the reordering group which contains it.
827             // Only a few special groups are supported.
828             int group = data.getGroupForPrimary(varTop);
829             if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) {
830                 throw new IllegalArgumentException("The variable top must be a primary weight in " +
831                         "the space/punctuation/symbols/currency symbols range");
832             }
833             long v = data.getLastPrimaryForGroup(group);
834             assert(v != 0 && v >= varTop);
835             varTop = v;
836             if(varTop != settings.readOnly().variableTop) {
837                 CollationSettings ownedSettings = getOwnedSettings();
838                 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST,
839                         getDefaultSettings().options);
840                 ownedSettings.variableTop = varTop;
841                 setFastLatinOptions(ownedSettings);
842             }
843         }
844     }
845 
846     /**
847      * <strong>[icu]</strong> When numeric collation is turned on, this Collator makes
848      * substrings of digits sort according to their numeric values.
849      *
850      * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest
851      * digit substring that can be treated as a single unit is
852      * 254 digits (not counting leading zeros). If a digit substring is
853      * longer than that, the digits beyond the limit will be treated as a
854      * separate digit substring.
855      *
856      * <p>A "digit" in this sense is a code point with General_Category=Nd,
857      * which does not include circled numbers, roman numerals, etc.
858      * Only a contiguous digit substring is considered, that is,
859      * non-negative integers without separators.
860      * There is no support for plus/minus signs, decimals, exponents, etc.
861      *
862      * @param flag
863      *            true to turn numeric collation on and false to turn it off
864      * @see #getNumericCollation
865      * @see #setNumericCollationDefault
866      */
setNumericCollation(boolean flag)867     public void setNumericCollation(boolean flag) {
868         checkNotFrozen();
869         // sort substrings of digits as numbers
870         if(flag == getNumericCollation()) { return; }
871         CollationSettings ownedSettings = getOwnedSettings();
872         ownedSettings.setFlag(CollationSettings.NUMERIC, flag);
873         setFastLatinOptions(ownedSettings);
874     }
875 
876     /**
877      * {@inheritDoc}
878      *
879      * @param order the reordering codes to apply to this collator; if this is null or an empty array
880      * then this clears any existing reordering
881      * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
882      * @see #getReorderCodes
883      * @see Collator#getEquivalentReorderCodes
884      * @see Collator.ReorderCodes
885      * @see UScript
886      */
887     @Override
setReorderCodes(int... order)888     public void setReorderCodes(int... order) {
889         checkNotFrozen();
890         int length = (order != null) ? order.length : 0;
891         if(length == 1 && order[0] == ReorderCodes.NONE) {
892             length = 0;
893         }
894         if(length == 0 ?
895                 settings.readOnly().reorderCodes.length == 0 :
896                 Arrays.equals(order, settings.readOnly().reorderCodes)) {
897             return;
898         }
899         CollationSettings defaultSettings = getDefaultSettings();
900         if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) {
901             if(settings.readOnly() != defaultSettings) {
902                 CollationSettings ownedSettings = getOwnedSettings();
903                 ownedSettings.copyReorderingFrom(defaultSettings);
904                 setFastLatinOptions(ownedSettings);
905             }
906             return;
907         }
908         CollationSettings ownedSettings = getOwnedSettings();
909         if(length == 0) {
910             ownedSettings.resetReordering();
911         } else {
912             ownedSettings.setReordering(data, order.clone());
913         }
914         setFastLatinOptions(ownedSettings);
915     }
916 
setFastLatinOptions(CollationSettings ownedSettings)917     private void setFastLatinOptions(CollationSettings ownedSettings) {
918         ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
919                 data, ownedSettings, ownedSettings.fastLatinPrimaries);
920     }
921 
922     // public getters --------------------------------------------------------
923 
924     /**
925      * Gets the collation tailoring rules for this RuleBasedCollator.
926      * Equivalent to String getRules(false).
927      *
928      * <p>On Android, the returned string will be empty unless this instance was
929      * constructed using {@link #RuleBasedCollator(String)}.
930      *
931      * @return the collation tailoring rules
932      * @see #getRules(boolean)
933      */
getRules()934     public String getRules() {
935         return tailoring.getRules();
936     }
937 
938     /**
939      * Returns current rules.
940      * The argument defines whether full rules (root collation + tailored) rules are returned
941      * or just the tailoring.
942      *
943      * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order.
944      * They are almost never used or useful at runtime and can be removed from the data.
945      * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide:
946      * Collation Customization, Building on Existing Locales</a>
947      *
948      * <p>{@link #getRules()} should normally be used instead.
949      * @param fullrules
950      *            true if the rules that defines the full set of collation order is required, otherwise false for
951      *            returning only the tailored rules
952      * @return the current rules that defines this Collator.
953      * @see #getRules()
954      */
getRules(boolean fullrules)955     public String getRules(boolean fullrules) {
956         if (!fullrules) {
957             return tailoring.getRules();
958         }
959         return CollationLoader.getRootRules() + tailoring.getRules();
960     }
961 
962     /**
963      * Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
964      *
965      * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently
966      *         than in the root collator.
967      */
968     @Override
getTailoredSet()969     public UnicodeSet getTailoredSet() {
970         UnicodeSet tailored = new UnicodeSet();
971         if(data.base != null) {
972             new TailoredSet(tailored).forData(data);
973         }
974         return tailored;
975     }
976 
977     /**
978      * Gets unicode sets containing contractions and/or expansions of a collator
979      *
980      * @param contractions
981      *            if not null, set to contain contractions
982      * @param expansions
983      *            if not null, set to contain expansions
984      * @param addPrefixes
985      *            add the prefix contextual elements to contractions
986      * @throws Exception
987      *             Throws an exception if any errors occurs.
988      */
getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)989     public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)
990             throws Exception {
991         if (contractions != null) {
992             contractions.clear();
993         }
994         if (expansions != null) {
995             expansions.clear();
996         }
997         new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data);
998     }
999 
1000     /**
1001      * Adds the contractions that start with character c to the set.
1002      * Ignores prefixes. Used by AlphabeticIndex.
1003      * @deprecated This API is ICU internal only.
1004      * @hide draft / provisional / internal are hidden on Android
1005      */
internalAddContractions(int c, UnicodeSet set)1006     void internalAddContractions(int c, UnicodeSet set) {
1007         new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c);
1008     }
1009 
1010     /**
1011      * <p>
1012      * Get a Collation key for the argument String source from this RuleBasedCollator.
1013      * <p>
1014      * General recommendation: <br>
1015      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
1016      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each
1017      * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better
1018      * performance.
1019      * <p>
1020      * See the class documentation for an explanation about CollationKeys.
1021      *
1022      * @param source
1023      *            the text String to be transformed into a collation key.
1024      * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source
1025      *         String is null, a null CollationKey is returned.
1026      * @see CollationKey
1027      * @see #compare(String, String)
1028      * @see #getRawCollationKey
1029      */
1030     @Override
getCollationKey(String source)1031     public CollationKey getCollationKey(String source) {
1032         if (source == null) {
1033             return null;
1034         }
1035         CollationBuffer buffer = null;
1036         try {
1037             buffer = getCollationBuffer();
1038             return getCollationKey(source, buffer);
1039         } finally {
1040             releaseCollationBuffer(buffer);
1041         }
1042     }
1043 
getCollationKey(String source, CollationBuffer buffer)1044     private CollationKey getCollationKey(String source, CollationBuffer buffer) {
1045         buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer);
1046         return new CollationKey(source, buffer.rawCollationKey);
1047     }
1048 
1049     /**
1050      * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the
1051      * result into the user provided argument key. If key has a internal byte array of length that's too small for the
1052      * result, the internal byte array will be grown to the exact required size.
1053      *
1054      * @param source the text String to be transformed into a RawCollationKey
1055      * @param key output RawCollationKey to store results
1056      * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user
1057      *         provided key will be returned.
1058      * @see #getCollationKey
1059      * @see #compare(String, String)
1060      * @see RawCollationKey
1061      * @hide unsupported on Android
1062      */
1063     @Override
getRawCollationKey(String source, RawCollationKey key)1064     public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
1065         if (source == null) {
1066             return null;
1067         }
1068         CollationBuffer buffer = null;
1069         try {
1070             buffer = getCollationBuffer();
1071             return getRawCollationKey(source, key, buffer);
1072         } finally {
1073             releaseCollationBuffer(buffer);
1074         }
1075     }
1076 
1077     private static final class CollationKeyByteSink extends SortKeyByteSink {
CollationKeyByteSink(RawCollationKey key)1078         CollationKeyByteSink(RawCollationKey key) {
1079             super(key.bytes);
1080             key_ = key;
1081         }
1082 
1083         @Override
AppendBeyondCapacity(byte[] bytes, int start, int n, int length)1084         protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) {
1085             // n > 0 && appended_ > capacity_
1086             if (Resize(n, length)) {
1087                 System.arraycopy(bytes, start, buffer_, length, n);
1088             }
1089         }
1090 
1091         @Override
Resize(int appendCapacity, int length)1092         protected boolean Resize(int appendCapacity, int length) {
1093             int newCapacity = 2 * buffer_.length;
1094             int altCapacity = length + 2 * appendCapacity;
1095             if (newCapacity < altCapacity) {
1096                 newCapacity = altCapacity;
1097             }
1098             if (newCapacity < 200) {
1099                 newCapacity = 200;
1100             }
1101             // Do not call key_.ensureCapacity(newCapacity) because we do not
1102             // keep key_.size in sync with appended_.
1103             // We only set it when we are done.
1104             byte[] newBytes = new byte[newCapacity];
1105             System.arraycopy(buffer_, 0, newBytes, 0, length);
1106             buffer_ = key_.bytes = newBytes;
1107             return true;
1108         }
1109 
1110         private RawCollationKey key_;
1111     }
1112 
getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer)1113     private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) {
1114         if (key == null) {
1115             key = new RawCollationKey(simpleKeyLengthEstimate(source));
1116         } else if (key.bytes == null) {
1117             key.bytes = new byte[simpleKeyLengthEstimate(source)];
1118         }
1119         CollationKeyByteSink sink = new CollationKeyByteSink(key);
1120         writeSortKey(source, sink, buffer);
1121         key.size = sink.NumberOfBytesAppended();
1122         return key;
1123     }
1124 
simpleKeyLengthEstimate(CharSequence source)1125     private int simpleKeyLengthEstimate(CharSequence source) {
1126         return 2 * source.length() + 10;
1127     }
1128 
writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer)1129     private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) {
1130         boolean numeric = settings.readOnly().isNumeric();
1131         if(settings.readOnly().dontCheckFCD()) {
1132             buffer.leftUTF16CollIter.setText(numeric, s, 0);
1133             CollationKeys.writeSortKeyUpToQuaternary(
1134                     buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(),
1135                     sink, Collation.PRIMARY_LEVEL,
1136                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
1137         } else {
1138             buffer.leftFCDUTF16Iter.setText(numeric, s, 0);
1139             CollationKeys.writeSortKeyUpToQuaternary(
1140                     buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(),
1141                     sink, Collation.PRIMARY_LEVEL,
1142                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
1143         }
1144         if(settings.readOnly().getStrength() == IDENTICAL) {
1145             writeIdenticalLevel(s, sink);
1146         }
1147         sink.Append(Collation.TERMINATOR_BYTE);
1148     }
1149 
writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink)1150     private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) {
1151         // NFD quick check
1152         int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null);
1153         sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
1154         // Sync the ByteArrayWrapper size with the key length.
1155         sink.key_.size = sink.NumberOfBytesAppended();
1156         int prev = 0;
1157         if(nfdQCYesLimit != 0) {
1158             prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_);
1159         }
1160         // Is there non-NFD text?
1161         if(nfdQCYesLimit < s.length()) {
1162             int destLengthEstimate = s.length() - nfdQCYesLimit;
1163             StringBuilder nfd = new StringBuilder();
1164             data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate);
1165             BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_);
1166         }
1167         // Sync the key with the buffer again which got bytes appended and may have been reallocated.
1168         sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size);
1169     }
1170 
1171     /**
1172      * Returns the CEs for the string.
1173      * @param str the string
1174      * @deprecated This API is ICU internal only.
1175      * @hide original deprecated declaration
1176      * @hide draft / provisional / internal are hidden on Android
1177      */
1178     @Deprecated
internalGetCEs(CharSequence str)1179     public long[] internalGetCEs(CharSequence str) {
1180         CollationBuffer buffer = null;
1181         try {
1182             buffer = getCollationBuffer();
1183             boolean numeric = settings.readOnly().isNumeric();
1184             CollationIterator iter;
1185             if(settings.readOnly().dontCheckFCD()) {
1186                 buffer.leftUTF16CollIter.setText(numeric, str, 0);
1187                 iter = buffer.leftUTF16CollIter;
1188             } else {
1189                 buffer.leftFCDUTF16Iter.setText(numeric, str, 0);
1190                 iter = buffer.leftFCDUTF16Iter;
1191             }
1192             int length = iter.fetchCEs() - 1;
1193             assert length >= 0 && iter.getCE(length) == Collation.NO_CE;
1194             long[] ces = new long[length];
1195             System.arraycopy(iter.getCEs(), 0, ces, 0, length);
1196             return ces;
1197         } finally {
1198             releaseCollationBuffer(buffer);
1199         }
1200     }
1201 
1202     /**
1203      * Returns this Collator's strength attribute. The strength attribute
1204      * determines the minimum level of difference considered significant.
1205      *
1206      * <p><strong>[icu] Note:</strong> This can return QUATERNARY strength, which is not supported by the
1207      * JDK version.
1208      *
1209      * <p>See the Collator class description for more details.
1210      *
1211      * @return this Collator's current strength attribute.
1212      * @see #setStrength
1213      * @see #PRIMARY
1214      * @see #SECONDARY
1215      * @see #TERTIARY
1216      * @see #QUATERNARY
1217      * @see #IDENTICAL
1218      */
1219     @Override
getStrength()1220     public int getStrength() {
1221         return settings.readOnly().getStrength();
1222     }
1223 
1224     /**
1225      * Returns the decomposition mode of this Collator. The decomposition mode
1226      * determines how Unicode composed characters are handled.
1227      *
1228      * <p>See the Collator class description for more details.
1229      *
1230      * @return the decomposition mode
1231      * @see #setDecomposition
1232      * @see #NO_DECOMPOSITION
1233      * @see #CANONICAL_DECOMPOSITION
1234      */
1235     @Override
getDecomposition()1236     public int getDecomposition() {
1237         return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ?
1238                 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION;
1239     }
1240 
1241     /**
1242      * Return true if an uppercase character is sorted before the corresponding lowercase character. See
1243      * setCaseFirst(boolean) for details.
1244      *
1245      * @see #setUpperCaseFirst
1246      * @see #setLowerCaseFirst
1247      * @see #isLowerCaseFirst
1248      * @see #setCaseFirstDefault
1249      * @return true if upper cased characters are sorted before lower cased characters, false otherwise
1250      */
isUpperCaseFirst()1251     public boolean isUpperCaseFirst() {
1252         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK);
1253     }
1254 
1255     /**
1256      * Return true if a lowercase character is sorted before the corresponding uppercase character. See
1257      * setCaseFirst(boolean) for details.
1258      *
1259      * @see #setUpperCaseFirst
1260      * @see #setLowerCaseFirst
1261      * @see #isUpperCaseFirst
1262      * @see #setCaseFirstDefault
1263      * @return true lower cased characters are sorted before upper cased characters, false otherwise
1264      */
isLowerCaseFirst()1265     public boolean isLowerCaseFirst() {
1266         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST);
1267     }
1268 
1269     /**
1270      * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
1271      * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the
1272      * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more
1273      * details.
1274      *
1275      * @return true or false
1276      * @see #setAlternateHandlingShifted(boolean)
1277      * @see #setAlternateHandlingDefault
1278      */
isAlternateHandlingShifted()1279     public boolean isAlternateHandlingShifted() {
1280         return settings.readOnly().getAlternateHandling();
1281     }
1282 
1283     /**
1284      * Checks if case level is set to true. See setCaseLevel(boolean) for details.
1285      *
1286      * @return the case level mode
1287      * @see #setCaseLevelDefault
1288      * @see #isCaseLevel
1289      * @see #setCaseLevel(boolean)
1290      */
isCaseLevel()1291     public boolean isCaseLevel() {
1292         return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0;
1293     }
1294 
1295     /**
1296      * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details.
1297      *
1298      * @return true if French Collation is set to true, false otherwise
1299      * @see #setFrenchCollation(boolean)
1300      * @see #setFrenchCollationDefault
1301      */
isFrenchCollation()1302     public boolean isFrenchCollation() {
1303         return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0;
1304     }
1305 
1306     /**
1307      * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details.
1308      *
1309      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
1310      * Since ICU 50, this attribute is not settable any more via API functions.
1311      * Since CLDR 25/ICU 53, explicit quaternary relations are used
1312      * to achieve the same Japanese sort order.
1313      *
1314      * @return false
1315      * @see #setHiraganaQuaternaryDefault
1316      * @see #setHiraganaQuaternary(boolean)
1317      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
1318      * @hide original deprecated declaration
1319      */
1320     @Deprecated
isHiraganaQuaternary()1321     public boolean isHiraganaQuaternary() {
1322         return false;
1323     }
1324 
1325     /**
1326      * <strong>[icu]</strong> Gets the variable top value of a Collator.
1327      *
1328      * @return the variable top primary weight
1329      * @see #getMaxVariable
1330      */
1331     @Override
getVariableTop()1332     public int getVariableTop() {
1333         return (int)settings.readOnly().variableTop;
1334     }
1335 
1336     /**
1337      * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a
1338      * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2'
1339      *
1340      * @see #setNumericCollation
1341      * @see #setNumericCollationDefault
1342      * @return true if numeric collation is turned on, false otherwise
1343      */
getNumericCollation()1344     public boolean getNumericCollation() {
1345         return (settings.readOnly().options & CollationSettings.NUMERIC) != 0;
1346     }
1347 
1348     /**
1349      * Retrieves the reordering codes for this collator.
1350      * These reordering codes are a combination of UScript codes and ReorderCodes.
1351      * @return a copy of the reordering codes for this collator;
1352      * if none are set then returns an empty array
1353      * @see #setReorderCodes
1354      * @see Collator#getEquivalentReorderCodes
1355      */
1356     @Override
getReorderCodes()1357     public int[] getReorderCodes() {
1358         return settings.readOnly().reorderCodes.clone();
1359     }
1360 
1361     // public other methods -------------------------------------------------
1362 
1363     /**
1364      * {@inheritDoc}
1365      */
1366     @Override
equals(Object obj)1367     public boolean equals(Object obj) {
1368         if (this == obj) {
1369             return true;
1370         }
1371         if (!super.equals(obj)) {
1372             return false;
1373         }
1374         RuleBasedCollator o = (RuleBasedCollator) obj;
1375         if(!settings.readOnly().equals(o.settings.readOnly())) { return false; }
1376         if(data == o.data) { return true; }
1377         boolean thisIsRoot = data.base == null;
1378         boolean otherIsRoot = o.data.base == null;
1379         assert(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
1380         if(thisIsRoot != otherIsRoot) { return false; }
1381         String theseRules = tailoring.getRules();
1382         String otherRules = o.tailoring.getRules();
1383         if((thisIsRoot || theseRules.length() != 0) &&
1384                 (otherIsRoot || otherRules.length() != 0)) {
1385             // Shortcut: If both collators have valid rule strings, then compare those.
1386             if(theseRules.equals(otherRules)) { return true; }
1387         }
1388         // Different rule strings can result in the same or equivalent tailoring.
1389         // The rule strings are optional in ICU resource bundles, although included by default.
1390         // cloneBinary() drops the rule string.
1391         UnicodeSet thisTailored = getTailoredSet();
1392         UnicodeSet otherTailored = o.getTailoredSet();
1393         if(!thisTailored.equals(otherTailored)) { return false; }
1394         // For completeness, we should compare all of the mappings;
1395         // or we should create a list of strings, sort it with one collator,
1396         // and check if both collators compare adjacent strings the same
1397         // (order & strength, down to quaternary); or similar.
1398         // Testing equality of collators seems unusual.
1399         return true;
1400     }
1401 
1402     /**
1403      * Generates a unique hash code for this RuleBasedCollator.
1404      *
1405      * @return the unique hash code for this Collator
1406      */
1407     @Override
hashCode()1408     public int hashCode() {
1409         int h = settings.readOnly().hashCode();
1410         if(data.base == null) { return h; }  // root collator
1411         // Do not rely on the rule string, see comments in operator==().
1412         UnicodeSet set = getTailoredSet();
1413         UnicodeSetIterator iter = new UnicodeSetIterator(set);
1414         while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
1415             h ^= data.getCE32(iter.codepoint);
1416         }
1417         return h;
1418     }
1419 
1420     /**
1421      * Compares the source text String to the target text String according to the collation rules, strength and
1422      * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
1423      * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
1424      * class description for an example of use.
1425      * <p>
1426      * General recommendation: <br>
1427      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
1428      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
1429      * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
1430      * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
1431      * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
1432      * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
1433      * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
1434      * String) will have a better performance.
1435      *
1436      * @param source
1437      *            the source text String.
1438      * @param target
1439      *            the target text String.
1440      * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
1441      *         and target are equal, value is greater than zero if source is greater than target.
1442      * @see CollationKey
1443      * @see #getCollationKey
1444      */
1445     @Override
compare(String source, String target)1446     public int compare(String source, String target) {
1447         return doCompare(source, target);
1448     }
1449 
1450     /**
1451     * Abstract iterator for identical-level string comparisons.
1452     * Returns FCD code points and handles temporary switching to NFD.
1453     *
1454     * <p>As with CollationIterator,
1455     * Java NFDIterator instances are partially constructed and cached,
1456     * and completed when reset for use.
1457     * C++ NFDIterator instances are stack-allocated.
1458     */
1459     private static abstract class NFDIterator {
1460         /**
1461          * Partial constructor, must call reset().
1462          */
NFDIterator()1463         NFDIterator() {}
reset()1464         final void reset() {
1465             index = -1;
1466         }
1467 
1468         /**
1469          * Returns the next code point from the internal normalization buffer,
1470          * or else the next text code point.
1471          * Returns -1 at the end of the text.
1472          */
nextCodePoint()1473         final int nextCodePoint() {
1474             if(index >= 0) {
1475                 if(index == decomp.length()) {
1476                     index = -1;
1477                 } else {
1478                     int c = Character.codePointAt(decomp, index);
1479                     index += Character.charCount(c);
1480                     return c;
1481                 }
1482             }
1483             return nextRawCodePoint();
1484         }
1485         /**
1486          * @param nfcImpl
1487          * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
1488          * @return the first code point in c's decomposition,
1489          *         or c itself if it was decomposed already or if it does not decompose
1490          */
nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c)1491         final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) {
1492             if(index >= 0) { return c; }
1493             decomp = nfcImpl.getDecomposition(c);
1494             if(decomp == null) { return c; }
1495             c = Character.codePointAt(decomp, 0);
1496             index = Character.charCount(c);
1497             return c;
1498         }
1499 
1500         /**
1501          * Returns the next text code point in FCD order.
1502          * Returns -1 at the end of the text.
1503          */
nextRawCodePoint()1504         protected abstract int nextRawCodePoint();
1505 
1506         private String decomp;
1507         private int index;
1508     }
1509 
1510     private static class UTF16NFDIterator extends NFDIterator {
UTF16NFDIterator()1511         UTF16NFDIterator() {}
setText(CharSequence seq, int start)1512         void setText(CharSequence seq, int start) {
1513             reset();
1514             s = seq;
1515             pos = start;
1516         }
1517 
1518         @Override
nextRawCodePoint()1519         protected int nextRawCodePoint() {
1520             if(pos == s.length()) { return Collation.SENTINEL_CP; }
1521             int c = Character.codePointAt(s, pos);
1522             pos += Character.charCount(c);
1523             return c;
1524         }
1525 
1526         protected CharSequence s;
1527         protected int pos;
1528     }
1529 
1530     private static final class FCDUTF16NFDIterator extends UTF16NFDIterator {
FCDUTF16NFDIterator()1531         FCDUTF16NFDIterator() {}
setText(Normalizer2Impl nfcImpl, CharSequence seq, int start)1532         void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) {
1533             reset();
1534             int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null);
1535             if(spanLimit == seq.length()) {
1536                 s = seq;
1537                 pos = start;
1538             } else {
1539                 if(str == null) {
1540                     str = new StringBuilder();
1541                 } else {
1542                     str.setLength(0);
1543                 }
1544                 str.append(seq, start, spanLimit);
1545                 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start);
1546                 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer);
1547                 s = str;
1548                 pos = 0;
1549             }
1550         }
1551 
1552         private StringBuilder str;
1553     }
1554 
compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right)1555     private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) {
1556         for(;;) {
1557             // Fetch the next FCD code point from each string.
1558             int leftCp = left.nextCodePoint();
1559             int rightCp = right.nextCodePoint();
1560             if(leftCp == rightCp) {
1561                 if(leftCp < 0) { break; }
1562                 continue;
1563             }
1564             // If they are different, then decompose each and compare again.
1565             if(leftCp < 0) {
1566                 leftCp = -2;  // end of string
1567             } else if(leftCp == 0xfffe) {
1568                 leftCp = -1;  // U+FFFE: merge separator
1569             } else {
1570                 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
1571             }
1572             if(rightCp < 0) {
1573                 rightCp = -2;  // end of string
1574             } else if(rightCp == 0xfffe) {
1575                 rightCp = -1;  // U+FFFE: merge separator
1576             } else {
1577                 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
1578             }
1579             if(leftCp < rightCp) { return Collation.LESS; }
1580             if(leftCp > rightCp) { return Collation.GREATER; }
1581         }
1582         return Collation.EQUAL;
1583     }
1584 
1585     /**
1586      * Compares two CharSequences.
1587      * @deprecated This API is ICU internal only.
1588      * @hide original deprecated declaration
1589      * @hide draft / provisional / internal are hidden on Android
1590      */
1591     @Override
1592     @Deprecated
doCompare(CharSequence left, CharSequence right)1593     protected int doCompare(CharSequence left, CharSequence right) {
1594         if(left == right) {
1595             return Collation.EQUAL;
1596         }
1597 
1598         // Identical-prefix test.
1599         int equalPrefixLength = 0;
1600         for(;;) {
1601             if(equalPrefixLength == left.length()) {
1602                 if(equalPrefixLength == right.length()) { return Collation.EQUAL; }
1603                 break;
1604             } else if(equalPrefixLength == right.length() ||
1605                       left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) {
1606                 break;
1607             }
1608             ++equalPrefixLength;
1609         }
1610 
1611         CollationSettings roSettings = settings.readOnly();
1612         boolean numeric = roSettings.isNumeric();
1613         if(equalPrefixLength > 0) {
1614             if((equalPrefixLength != left.length() &&
1615                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) ||
1616                     (equalPrefixLength != right.length() &&
1617                         data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) {
1618                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1619                 while(--equalPrefixLength > 0 &&
1620                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {}
1621             }
1622             // Notes:
1623             // - A longer string can compare equal to a prefix of it if only ignorables follow.
1624             // - With a backward level, a longer string can compare less-than a prefix of it.
1625 
1626             // Pass the actual start of each string into the CollationIterators,
1627             // plus the equalPrefixLength position,
1628             // so that prefix matches back into the equal prefix work.
1629         }
1630 
1631         int result;
1632         int fastLatinOptions = roSettings.fastLatinOptions;
1633         if(fastLatinOptions >= 0 &&
1634                 (equalPrefixLength == left.length() ||
1635                     left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) &&
1636                 (equalPrefixLength == right.length() ||
1637                     right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) {
1638             result = CollationFastLatin.compareUTF16(data.fastLatinTable,
1639                                                       roSettings.fastLatinPrimaries,
1640                                                       fastLatinOptions,
1641                                                       left, right, equalPrefixLength);
1642         } else {
1643             result = CollationFastLatin.BAIL_OUT_RESULT;
1644         }
1645 
1646         if(result == CollationFastLatin.BAIL_OUT_RESULT) {
1647             CollationBuffer buffer = null;
1648             try {
1649                 buffer = getCollationBuffer();
1650                 if(roSettings.dontCheckFCD()) {
1651                     buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength);
1652                     buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength);
1653                     result = CollationCompare.compareUpToQuaternary(
1654                             buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings);
1655                 } else {
1656                     buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength);
1657                     buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength);
1658                     result = CollationCompare.compareUpToQuaternary(
1659                             buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings);
1660                 }
1661             } finally {
1662                 releaseCollationBuffer(buffer);
1663             }
1664         }
1665         if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) {
1666             return result;
1667         }
1668 
1669         CollationBuffer buffer = null;
1670         try {
1671             buffer = getCollationBuffer();
1672             // Compare identical level.
1673             Normalizer2Impl nfcImpl = data.nfcImpl;
1674             if(roSettings.dontCheckFCD()) {
1675                 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength);
1676                 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength);
1677                 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter);
1678             } else {
1679                 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength);
1680                 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength);
1681                 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter);
1682             }
1683         } finally {
1684             releaseCollationBuffer(buffer);
1685         }
1686     }
1687 
1688     // package private constructors ------------------------------------------
1689 
RuleBasedCollator(CollationTailoring t, ULocale vl)1690     RuleBasedCollator(CollationTailoring t, ULocale vl) {
1691         data = t.data;
1692         settings = t.settings.clone();
1693         tailoring = t;
1694         validLocale = vl;
1695         actualLocaleIsSameAsValid = false;
1696     }
1697 
adoptTailoring(CollationTailoring t)1698     private void adoptTailoring(CollationTailoring t) {
1699         assert(settings == null && data == null && tailoring == null);
1700         data = t.data;
1701         settings = t.settings.clone();
1702         tailoring = t;
1703         validLocale = t.actualLocale;
1704         actualLocaleIsSameAsValid = false;
1705     }
1706 
1707     // package private methods -----------------------------------------------
1708 
1709     /**
1710      * Tests whether a character is "unsafe" for use as a collation starting point.
1711      *
1712      * @param c code point or code unit
1713      * @return true if c is unsafe
1714      * @see CollationElementIterator#setOffset(int)
1715      */
isUnsafe(int c)1716     final boolean isUnsafe(int c) {
1717         return data.isUnsafeBackward(c, settings.readOnly().isNumeric());
1718     }
1719 
1720     /**
1721      * Frozen state of the collator.
1722      */
1723     private Lock frozenLock;
1724 
1725     private static final class CollationBuffer {
CollationBuffer(CollationData data)1726         private CollationBuffer(CollationData data) {
1727             leftUTF16CollIter = new UTF16CollationIterator(data);
1728             rightUTF16CollIter = new UTF16CollationIterator(data);
1729             leftFCDUTF16Iter = new FCDUTF16CollationIterator(data);
1730             rightFCDUTF16Iter = new FCDUTF16CollationIterator(data);
1731             leftUTF16NFDIter = new UTF16NFDIterator();
1732             rightUTF16NFDIter = new UTF16NFDIterator();
1733             leftFCDUTF16NFDIter = new FCDUTF16NFDIterator();
1734             rightFCDUTF16NFDIter = new FCDUTF16NFDIterator();
1735         }
1736 
1737         UTF16CollationIterator leftUTF16CollIter;
1738         UTF16CollationIterator rightUTF16CollIter;
1739         FCDUTF16CollationIterator leftFCDUTF16Iter;
1740         FCDUTF16CollationIterator rightFCDUTF16Iter;
1741 
1742         UTF16NFDIterator leftUTF16NFDIter;
1743         UTF16NFDIterator rightUTF16NFDIter;
1744         FCDUTF16NFDIterator leftFCDUTF16NFDIter;
1745         FCDUTF16NFDIterator rightFCDUTF16NFDIter;
1746 
1747         RawCollationKey rawCollationKey;
1748     }
1749 
1750     /**
1751      * Get the version of this collator object.
1752      *
1753      * @return the version object associated with this collator
1754      */
1755     @Override
getVersion()1756     public VersionInfo getVersion() {
1757         int version = tailoring.version;
1758         int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
1759         return VersionInfo.getInstance(
1760                 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4),
1761                 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff));
1762     }
1763 
1764     /**
1765      * Get the UCA version of this collator object.
1766      *
1767      * @return the version object associated with this collator
1768      */
1769     @Override
getUCAVersion()1770     public VersionInfo getUCAVersion() {
1771         VersionInfo v = getVersion();
1772         // Note: This is tied to how the current implementation encodes the UCA version
1773         // in the overall getVersion().
1774         // Alternatively, we could load the root collator and get at lower-level data from there.
1775         // Either way, it will reflect the input collator's UCA version only
1776         // if it is a known implementation.
1777         // (C++ comment) It would be cleaner to make this a virtual Collator method.
1778         // (In Java, it is virtual.)
1779         return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0);
1780     }
1781 
1782     private CollationBuffer collationBuffer;
1783 
getCollationBuffer()1784     private final CollationBuffer getCollationBuffer() {
1785         if (isFrozen()) {
1786             frozenLock.lock();
1787         } else if (collationBuffer == null) {
1788             collationBuffer = new CollationBuffer(data);
1789         }
1790         return collationBuffer;
1791     }
1792 
releaseCollationBuffer(CollationBuffer buffer)1793     private final void releaseCollationBuffer(CollationBuffer buffer) {
1794         if (isFrozen()) {
1795             frozenLock.unlock();
1796         }
1797     }
1798 
1799     /**
1800      * {@inheritDoc}
1801      * @hide draft / provisional / internal are hidden on Android
1802      */
1803     @Override
getLocale(ULocale.Type type)1804     public ULocale getLocale(ULocale.Type type) {
1805         if (type == ULocale.ACTUAL_LOCALE) {
1806             return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale;
1807         } else if(type == ULocale.VALID_LOCALE) {
1808             return validLocale;
1809         } else {
1810             throw new IllegalArgumentException("unknown ULocale.Type " + type);
1811         }
1812     }
1813 
1814     /**
1815      * {@inheritDoc}
1816      */
1817     @Override
setLocale(ULocale valid, ULocale actual)1818     void setLocale(ULocale valid, ULocale actual) {
1819         // This method is called
1820         // by other protected functions that checks and makes sure that
1821         // valid and actual are not null before passing
1822         assert (valid == null) == (actual == null);
1823         // Another check we could do is that the actual locale is at
1824         // the same level or less specific than the valid locale.
1825         // TODO: Starting with Java 7, use Objects.equals(a, b).
1826         if(Utility.objectEquals(actual, tailoring.actualLocale)) {
1827             actualLocaleIsSameAsValid = false;
1828         } else {
1829             assert(Utility.objectEquals(actual, valid));
1830             actualLocaleIsSameAsValid = true;
1831         }
1832         // Do not modify tailoring.actualLocale:
1833         // We cannot be sure that that would be thread-safe.
1834         validLocale = valid;
1835     }
1836 
1837     CollationData data;
1838     SharedObject.Reference<CollationSettings> settings;  // reference-counted
1839     CollationTailoring tailoring;  // C++: reference-counted
1840     private ULocale validLocale;
1841     // Note: No need in Java to track which attributes have been set explicitly.
1842     // int or EnumSet  explicitlySetAttributes;
1843 
1844     private boolean actualLocaleIsSameAsValid;
1845 }
1846