• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 package ohos.global.icu.text;
11 
12 import java.lang.reflect.InvocationTargetException;
13 import java.lang.reflect.Method;
14 import java.text.CharacterIterator;
15 import java.text.ParseException;
16 import java.util.Arrays;
17 import java.util.Objects;
18 import java.util.concurrent.locks.Lock;
19 import java.util.concurrent.locks.ReentrantLock;
20 
21 import ohos.global.icu.impl.ClassLoaderUtil;
22 import ohos.global.icu.impl.Normalizer2Impl;
23 import ohos.global.icu.impl.Normalizer2Impl.ReorderingBuffer;
24 import ohos.global.icu.impl.coll.BOCSU;
25 import ohos.global.icu.impl.coll.Collation;
26 import ohos.global.icu.impl.coll.CollationCompare;
27 import ohos.global.icu.impl.coll.CollationData;
28 import ohos.global.icu.impl.coll.CollationFastLatin;
29 import ohos.global.icu.impl.coll.CollationIterator;
30 import ohos.global.icu.impl.coll.CollationKeys;
31 import ohos.global.icu.impl.coll.CollationKeys.SortKeyByteSink;
32 import ohos.global.icu.impl.coll.CollationLoader;
33 import ohos.global.icu.impl.coll.CollationRoot;
34 import ohos.global.icu.impl.coll.CollationSettings;
35 import ohos.global.icu.impl.coll.CollationTailoring;
36 import ohos.global.icu.impl.coll.ContractionsAndExpansions;
37 import ohos.global.icu.impl.coll.FCDUTF16CollationIterator;
38 import ohos.global.icu.impl.coll.SharedObject;
39 import ohos.global.icu.impl.coll.TailoredSet;
40 import ohos.global.icu.impl.coll.UTF16CollationIterator;
41 import ohos.global.icu.lang.UScript;
42 import ohos.global.icu.util.ULocale;
43 import ohos.global.icu.util.VersionInfo;
44 
45 /**
46  * <p>
47  * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule
48  * sets. RuleBasedCollator is designed to be fully compliant to the <a
49  * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651.
50  *
51  * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link ohos.global.icu.util.Freezable}.
52  *
53  * <p>
54  * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User
55  * Guide</a> for more information about the collation service before using this class.
56  *
57  * <p>
58  * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class
59  * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the
60  * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String)
61  * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while
62  * re-adjusting the attributes and orders of the characters in the specified rule accordingly.
63  *
64  * <p>
65  * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale
66  * is not available, the orders eventually falls back to the
67  * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
68  *
69  * <p>
70  * For information about the collation rule syntax and details about customization, please refer to the <a
71  * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the
72  * User Guide.
73  *
74  * <p>
75  * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J:
76  *
77  * <ul>
78  * <li>According to the JDK documentation: <br>
79  * <i>Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range
80  * &#92;U0E40-&#92;U0E44 precedes a Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the range
81  * &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range &#92;U0E81-&#92;U0EAE then the vowel is placed after the
82  * consonant for collation purposes.
83  * <br>
84  * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on.
85  * </i>
86  * <br>
87  * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly
88  * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.</li>
89  * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.</li>
90  * </ul>
91  * <p>
92  * <strong>Examples</strong>
93  * <p>
94  * Creating Customized RuleBasedCollators: <blockquote>
95  *
96  * <pre>
97  * String simple = "&amp; a &lt; b &lt; c &lt; d";
98  * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
99  *
100  * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
101  *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
102  *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
103  *                    + "o , O &lt; p , P &lt; q , Q &lt;r , R &lt;s , S &lt; "
104  *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
105  *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
106  *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
107  *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
108  * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
109  * </pre>
110  *
111  * </blockquote>
112  *
113  * Concatenating rules to combine <code>Collator</code>s: <blockquote>
114  *
115  * <pre>
116  * // Create an en_US Collator object
117  * RuleBasedCollator en_USCollator = (RuleBasedCollator)
118  *     Collator.getInstance(new Locale("en", "US", ""));
119  * // Create a da_DK Collator object
120  * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
121  *     Collator.getInstance(new Locale("da", "DK", ""));
122  * // Combine the two
123  * // First, get the collation rules from en_USCollator
124  * String en_USRules = en_USCollator.getRules();
125  * // Second, get the collation rules from da_DKCollator
126  * String da_DKRules = da_DKCollator.getRules();
127  * RuleBasedCollator newCollator =
128  *                             new RuleBasedCollator(en_USRules + da_DKRules);
129  * // newCollator has the combined rules
130  * </pre>
131  *
132  * </blockquote>
133  *
134  * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to
135  * the existing rule: <blockquote>
136  *
137  * <pre>
138  * // Create a new Collator object with additional rules
139  * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
140  * RuleBasedCollator myCollator =
141  *     new RuleBasedCollator(en_USCollator.getRules() + addRules);
142  * // myCollator contains the new rules
143  * </pre>
144  *
145  * </blockquote>
146  *
147  * How to change the order of non-spacing accents: <blockquote>
148  *
149  * <pre>
150  * // old rule with main accents
151  * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "
152  *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
153  *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "
154  *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
155  *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
156  *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
157  *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
158  * // change the order of accent characters
159  * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
160  * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
161  * </pre>
162  *
163  * </blockquote>
164  *
165  * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese
166  * characters in the Japanese <code>Collator</code>: <blockquote>
167  *
168  * <pre>
169  * // get en_US Collator rules
170  * RuleBasedCollator en_USCollator
171  *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
172  * // add a few Japanese characters to sort before English characters
173  * // suppose the last character before the first base letter 'a' in
174  * // the English collation rule is &#92;u2212
175  * String jaString = "&amp; &#92;u2212 &lt;&#92;u3041, &#92;u3042 &lt;&#92;u3043, "
176  *                   + "&#92;u3044";
177  * RuleBasedCollator myJapaneseCollator
178  *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
179  * </pre>
180  *
181  * </blockquote>
182  * <p>
183  * This class is not subclassable
184  *
185  * @author Syn Wee Quek
186  */
187 public final class RuleBasedCollator extends Collator {
188     // public constructors ---------------------------------------------------
189 
190     /**
191      * <p>
192      * Constructor that takes the argument rules for customization.
193      * The collator will be based on the CLDR root collation, with the
194      * attributes and re-ordering of the characters specified in the argument rules.
195      * <p>
196      * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization">
197      * Collation Customization</a> for details on the rule syntax.
198      *
199      * @param rules
200      *            the collation rules to build the collation table from.
201      * @exception ParseException
202      *                and IOException thrown. ParseException thrown when argument rules have an invalid syntax.
203      *                IOException thrown when an error occurred while reading internal data.
204      */
RuleBasedCollator(String rules)205     public RuleBasedCollator(String rules) throws Exception {
206         if (rules == null) {
207             throw new IllegalArgumentException("Collation rules can not be null");
208         }
209         validLocale = ULocale.ROOT;
210         internalBuildTailoring(rules);
211     }
212 
213     /**
214      * Implements from-rule constructors.
215      * @param rules rule string
216      * @throws Exception
217      */
internalBuildTailoring(String rules)218     private final void internalBuildTailoring(String rules) throws Exception {
219         CollationTailoring base = CollationRoot.getRoot();
220         // Most code using Collator does not need to build a Collator from rules.
221         // By using reflection, most code will not have a static dependency on the builder code.
222         // CollationBuilder builder = new CollationBuilder(base);
223         ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass());
224         CollationTailoring t;
225         try {
226             Class<?> builderClass = classLoader.loadClass("ohos.global.icu.impl.coll.CollationBuilder");
227             Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base);
228             // builder.parseAndBuild(rules);
229             Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class);
230             t = (CollationTailoring)parseAndBuild.invoke(builder, rules);
231         } catch(InvocationTargetException e) {
232             throw (Exception)e.getTargetException();
233         }
234         t.actualLocale = null;
235         adoptTailoring(t);
236     }
237 
238     // public methods --------------------------------------------------------
239 
240     /**
241      * Clones the RuleBasedCollator
242      *
243      * @return a new instance of this RuleBasedCollator object
244      */
245     @Override
clone()246     public Object clone() throws CloneNotSupportedException {
247         if (isFrozen()) {
248             return this;
249         }
250         return cloneAsThawed();
251     }
252 
initMaxExpansions()253     private final void initMaxExpansions() {
254         synchronized(tailoring) {
255             if (tailoring.maxExpansions == null) {
256                 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data);
257             }
258         }
259     }
260 
261     /**
262      * Return a CollationElementIterator for the given String.
263      *
264      * @see CollationElementIterator
265      */
getCollationElementIterator(String source)266     public CollationElementIterator getCollationElementIterator(String source) {
267         initMaxExpansions();
268         return new CollationElementIterator(source, this);
269     }
270 
271     /**
272      * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be
273      * preserved since a new copy will be created for use.
274      *
275      * @see CollationElementIterator
276      */
getCollationElementIterator(CharacterIterator source)277     public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
278         initMaxExpansions();
279         CharacterIterator newsource = (CharacterIterator) source.clone();
280         return new CollationElementIterator(newsource, this);
281     }
282 
283     /**
284      * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be
285      * preserved since a new copy will be created for use.
286      *
287      * @see CollationElementIterator
288      */
getCollationElementIterator(UCharacterIterator source)289     public CollationElementIterator getCollationElementIterator(UCharacterIterator source) {
290         initMaxExpansions();
291         return new CollationElementIterator(source, this);
292     }
293 
294     // Freezable interface implementation -------------------------------------------------
295 
296     /**
297      * Determines whether the object has been frozen or not.
298      *
299      * <p>An unfrozen Collator is mutable and not thread-safe.
300      * A frozen Collator is immutable and thread-safe.
301      */
302     @Override
isFrozen()303     public boolean isFrozen() {
304         return frozenLock != null;
305     }
306 
307     /**
308      * Freezes the collator.
309      * @return the collator itself.
310      */
311     @Override
freeze()312     public Collator freeze() {
313         if (!isFrozen()) {
314             frozenLock = new ReentrantLock();
315             if (collationBuffer == null) {
316                 collationBuffer = new CollationBuffer(data);
317             }
318         }
319         return this;
320     }
321 
322     /**
323      * Provides for the clone operation. Any clone is initially unfrozen.
324      */
325     @Override
cloneAsThawed()326     public RuleBasedCollator cloneAsThawed() {
327         try {
328             RuleBasedCollator result = (RuleBasedCollator) super.clone();
329             // since all collation data in the RuleBasedCollator do not change
330             // we can safely assign the result.fields to this collator
331             // except in cases where we can't
332             result.settings = settings.clone();
333             result.collationBuffer = null;
334             result.frozenLock = null;
335             return result;
336         } catch (CloneNotSupportedException e) {
337             // Clone is implemented
338             return null;
339         }
340     }
341 
342     // public setters --------------------------------------------------------
343 
checkNotFrozen()344     private void checkNotFrozen() {
345         if (isFrozen()) {
346             throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator");
347         }
348     }
349 
getOwnedSettings()350     private final CollationSettings getOwnedSettings() {
351         return settings.copyOnWrite();
352     }
353 
getDefaultSettings()354     private final CollationSettings getDefaultSettings() {
355         return tailoring.settings.readOnly();
356     }
357 
358     /**
359      * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
360      * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
361      * correct JIS collation order, distinguishing between Katakana and Hiragana characters.
362      *
363      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
364      * Since ICU 50, this attribute is not settable any more via API functions.
365      * Since CLDR 25/ICU 53, explicit quaternary relations are used
366      * to achieve the same Japanese sort order.
367      *
368      * @param flag
369      *            true if Hiragana Quaternary mode is to be on, false otherwise
370      * @see #setHiraganaQuaternaryDefault
371      * @see #isHiraganaQuaternary
372      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
373      * @hide deprecated on icu4j-org
374      */
375     @Deprecated
setHiraganaQuaternary(boolean flag)376     public void setHiraganaQuaternary(boolean flag) {
377         checkNotFrozen();
378     }
379 
380     /**
381      * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See
382      * setHiraganaQuaternary(boolean) for more details.
383      *
384      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
385      * Since ICU 50, this attribute is not settable any more via API functions.
386      * Since CLDR 25/ICU 53, explicit quaternary relations are used
387      * to achieve the same Japanese sort order.
388      *
389      * @see #setHiraganaQuaternary(boolean)
390      * @see #isHiraganaQuaternary
391      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
392      * @hide deprecated on icu4j-org
393      */
394     @Deprecated
setHiraganaQuaternaryDefault()395     public void setHiraganaQuaternaryDefault() {
396         checkNotFrozen();
397     }
398 
399     /**
400      * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The
401      * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case
402      * characters first.
403      *
404      * @param upperfirst
405      *            true to sort uppercase characters before lowercase characters, false to sort lowercase characters
406      *            before uppercase characters
407      * @see #isLowerCaseFirst
408      * @see #isUpperCaseFirst
409      * @see #setLowerCaseFirst
410      * @see #setCaseFirstDefault
411      */
setUpperCaseFirst(boolean upperfirst)412     public void setUpperCaseFirst(boolean upperfirst) {
413         checkNotFrozen();
414         if (upperfirst == isUpperCaseFirst()) { return; }
415         CollationSettings ownedSettings = getOwnedSettings();
416         ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0);
417         setFastLatinOptions(ownedSettings);
418     }
419 
420     /**
421      * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The
422      * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper
423      * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences.
424      *
425      * @param lowerfirst
426      *            true for sorting lower cased characters before upper cased characters, false to ignore case
427      *            preferences.
428      * @see #isLowerCaseFirst
429      * @see #isUpperCaseFirst
430      * @see #setUpperCaseFirst
431      * @see #setCaseFirstDefault
432      */
setLowerCaseFirst(boolean lowerfirst)433     public void setLowerCaseFirst(boolean lowerfirst) {
434         checkNotFrozen();
435         if (lowerfirst == isLowerCaseFirst()) { return; }
436         CollationSettings ownedSettings = getOwnedSettings();
437         ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0);
438         setFastLatinOptions(ownedSettings);
439     }
440 
441     /**
442      * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See
443      * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details.
444      *
445      * @see #isLowerCaseFirst
446      * @see #isUpperCaseFirst
447      * @see #setLowerCaseFirst(boolean)
448      * @see #setUpperCaseFirst(boolean)
449      */
setCaseFirstDefault()450     public final void setCaseFirstDefault() {
451         checkNotFrozen();
452         CollationSettings defaultSettings = getDefaultSettings();
453         if(settings.readOnly() == defaultSettings) { return; }
454         CollationSettings ownedSettings = getOwnedSettings();
455         ownedSettings.setCaseFirstDefault(defaultSettings.options);
456         setFastLatinOptions(ownedSettings);
457     }
458 
459     /**
460      * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See
461      * setAlternateHandling(boolean) for more details.
462      *
463      * @see #setAlternateHandlingShifted(boolean)
464      * @see #isAlternateHandlingShifted()
465      */
setAlternateHandlingDefault()466     public void setAlternateHandlingDefault() {
467         checkNotFrozen();
468         CollationSettings defaultSettings = getDefaultSettings();
469         if(settings.readOnly() == defaultSettings) { return; }
470         CollationSettings ownedSettings = getOwnedSettings();
471         ownedSettings.setAlternateHandlingDefault(defaultSettings.options);
472         setFastLatinOptions(ownedSettings);
473     }
474 
475     /**
476      * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See
477      * setCaseLevel(boolean) for more details.
478      *
479      * @see #setCaseLevel(boolean)
480      * @see #isCaseLevel
481      */
setCaseLevelDefault()482     public void setCaseLevelDefault() {
483         checkNotFrozen();
484         CollationSettings defaultSettings = getDefaultSettings();
485         if(settings.readOnly() == defaultSettings) { return; }
486         CollationSettings ownedSettings = getOwnedSettings();
487         ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options);
488         setFastLatinOptions(ownedSettings);
489     }
490 
491     /**
492      * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See
493      * setDecomposition(int) for more details.
494      *
495      * @see #getDecomposition
496      * @see #setDecomposition(int)
497      */
setDecompositionDefault()498     public void setDecompositionDefault() {
499         checkNotFrozen();
500         CollationSettings defaultSettings = getDefaultSettings();
501         if(settings.readOnly() == defaultSettings) { return; }
502         CollationSettings ownedSettings = getOwnedSettings();
503         ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options);
504         setFastLatinOptions(ownedSettings);
505     }
506 
507     /**
508      * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See
509      * setFrenchCollation(boolean) for more details.
510      *
511      * @see #isFrenchCollation
512      * @see #setFrenchCollation(boolean)
513      */
setFrenchCollationDefault()514     public void setFrenchCollationDefault() {
515         checkNotFrozen();
516         CollationSettings defaultSettings = getDefaultSettings();
517         if(settings.readOnly() == defaultSettings) { return; }
518         CollationSettings ownedSettings = getOwnedSettings();
519         ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options);
520         setFastLatinOptions(ownedSettings);
521     }
522 
523     /**
524      * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See
525      * setStrength(int) for more details.
526      *
527      * @see #setStrength(int)
528      * @see #getStrength
529      */
setStrengthDefault()530     public void setStrengthDefault() {
531         checkNotFrozen();
532         CollationSettings defaultSettings = getDefaultSettings();
533         if(settings.readOnly() == defaultSettings) { return; }
534         CollationSettings ownedSettings = getOwnedSettings();
535         ownedSettings.setStrengthDefault(defaultSettings.options);
536         setFastLatinOptions(ownedSettings);
537     }
538 
539     /**
540      * Method to set numeric collation to its default value.
541      *
542      * @see #getNumericCollation
543      * @see #setNumericCollation
544      */
setNumericCollationDefault()545     public void setNumericCollationDefault() {
546         checkNotFrozen();
547         CollationSettings defaultSettings = getDefaultSettings();
548         if(settings.readOnly() == defaultSettings) { return; }
549         CollationSettings ownedSettings = getOwnedSettings();
550         ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options);
551         setFastLatinOptions(ownedSettings);
552     }
553 
554     /**
555      * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false,
556      * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted
557      * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture">
558      * French collation</a> for more information.
559      *
560      * @param flag
561      *            true to set the French collation on, false to set it off
562      * @see #isFrenchCollation
563      * @see #setFrenchCollationDefault
564      */
setFrenchCollation(boolean flag)565     public void setFrenchCollation(boolean flag) {
566         checkNotFrozen();
567         if(flag == isFrenchCollation()) { return; }
568         CollationSettings ownedSettings = getOwnedSettings();
569         ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag);
570         setFastLatinOptions(ownedSettings);
571     }
572 
573     /**
574      * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition
575      * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This
576      * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false,
577      * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all
578      * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior
579      * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the
580      * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order.
581      *
582      * @param shifted
583      *            true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior.
584      * @see #isAlternateHandlingShifted
585      * @see #setAlternateHandlingDefault
586      */
setAlternateHandlingShifted(boolean shifted)587     public void setAlternateHandlingShifted(boolean shifted) {
588         checkNotFrozen();
589         if(shifted == isAlternateHandlingShifted()) { return; }
590         CollationSettings ownedSettings = getOwnedSettings();
591         ownedSettings.setAlternateHandlingShifted(shifted);
592         setFastLatinOptions(ownedSettings);
593     }
594 
595     /**
596      * <p>
597      * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known
598      * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level
599      * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value
600      * is false, which means the case level is not generated. The contents of the case level are affected by the case
601      * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable
602      * case level.
603      * <p>
604      * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case
605      * level</a> for more information.
606      *
607      * @param flag
608      *            true if case level sorting is required, false otherwise
609      * @see #setCaseLevelDefault
610      * @see #isCaseLevel
611      */
setCaseLevel(boolean flag)612     public void setCaseLevel(boolean flag) {
613         checkNotFrozen();
614         if(flag == isCaseLevel()) { return; }
615         CollationSettings ownedSettings = getOwnedSettings();
616         ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag);
617         setFastLatinOptions(ownedSettings);
618     }
619 
620     /**
621      * Sets the decomposition mode of this Collator.  Setting this
622      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
623      * Collator to handle un-normalized text properly, producing the
624      * same results as if the text were normalized. If
625      * NO_DECOMPOSITION is set, it is the user's responsibility to
626      * insure that all text is already in the appropriate form before
627      * a comparison or before getting a CollationKey. Adjusting
628      * decomposition mode allows the user to select between faster and
629      * more complete collation behavior.
630      *
631      * <p>Since a great many of the world's languages do not require
632      * text normalization, most locales set NO_DECOMPOSITION as the
633      * default decomposition mode.
634      *
635      * The default decompositon mode for the Collator is
636      * NO_DECOMPOSITON, unless specified otherwise by the locale used
637      * to create the Collator.
638      *
639      * <p>See getDecomposition for a description of decomposition
640      * mode.
641      *
642      * @param decomposition the new decomposition mode
643      * @see #getDecomposition
644      * @see #NO_DECOMPOSITION
645      * @see #CANONICAL_DECOMPOSITION
646      * @throws IllegalArgumentException If the given value is not a valid
647      *            decomposition mode.
648      */
649     @Override
setDecomposition(int decomposition)650     public void setDecomposition(int decomposition)
651     {
652         checkNotFrozen();
653         boolean flag;
654         switch(decomposition) {
655         case NO_DECOMPOSITION:
656             flag = false;
657             break;
658         case CANONICAL_DECOMPOSITION:
659             flag = true;
660             break;
661         default:
662             throw new IllegalArgumentException("Wrong decomposition mode.");
663         }
664         if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; }
665         CollationSettings ownedSettings = getOwnedSettings();
666         ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag);
667         setFastLatinOptions(ownedSettings);
668     }
669 
670     /**
671      * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference
672      * considered significant during comparison.
673      *
674      * <p>See the Collator class description for an example of use.
675      *
676      * @param newStrength
677      *            the new strength value.
678      * @see #getStrength
679      * @see #setStrengthDefault
680      * @see #PRIMARY
681      * @see #SECONDARY
682      * @see #TERTIARY
683      * @see #QUATERNARY
684      * @see #IDENTICAL
685      * @exception IllegalArgumentException
686      *                If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
687      */
688     @Override
setStrength(int newStrength)689     public void setStrength(int newStrength) {
690         checkNotFrozen();
691         if(newStrength == getStrength()) { return; }
692         CollationSettings ownedSettings = getOwnedSettings();
693         ownedSettings.setStrength(newStrength);
694         setFastLatinOptions(ownedSettings);
695     }
696 
697     /**
698      * <strong>[icu]</strong> Sets the variable top to the top of the specified reordering group.
699      * The variable top determines the highest-sorting character
700      * which is affected by the alternate handling behavior.
701      * If that attribute is set to NON_IGNORABLE, then the variable top has no effect.
702      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
703      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
704      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
705      * @return this
706      * @see #getMaxVariable
707      */
708     @Override
setMaxVariable(int group)709     public RuleBasedCollator setMaxVariable(int group) {
710         // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
711         int value;
712         if(group == Collator.ReorderCodes.DEFAULT) {
713             value = -1;  // UCOL_DEFAULT
714         } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) {
715             value = group - Collator.ReorderCodes.FIRST;
716         } else {
717             throw new IllegalArgumentException("illegal max variable group " + group);
718         }
719         int oldValue = settings.readOnly().getMaxVariable();
720         if(value == oldValue) {
721             return this;
722         }
723         CollationSettings defaultSettings = getDefaultSettings();
724         if(settings.readOnly() == defaultSettings) {
725             if(value < 0) {  // UCOL_DEFAULT
726                 return this;
727             }
728         }
729         CollationSettings ownedSettings = getOwnedSettings();
730 
731         if(group == Collator.ReorderCodes.DEFAULT) {
732             group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable();
733         }
734         long varTop = data.getLastPrimaryForGroup(group);
735         assert(varTop != 0);
736         ownedSettings.setMaxVariable(value, defaultSettings.options);
737         ownedSettings.variableTop = varTop;
738         setFastLatinOptions(ownedSettings);
739         return this;
740     }
741 
742     /**
743      * <strong>[icu]</strong> Returns the maximum reordering group whose characters are affected by
744      * the alternate handling behavior.
745      * @return the maximum variable reordering group.
746      * @see #setMaxVariable
747      */
748     @Override
getMaxVariable()749     public int getMaxVariable() {
750         return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable();
751     }
752 
753     /**
754      * <strong>[icu]</strong> Sets the variable top to the primary weight of the specified string.
755      *
756      * <p>Beginning with ICU 53, the variable top is pinned to
757      * the top of one of the supported reordering groups,
758      * and it must not be beyond the last of those groups.
759      * See {@link #setMaxVariable(int)}.
760      *
761      * @param varTop
762      *            one or more (if contraction) characters to which the variable top should be set
763      * @return variable top primary weight
764      * @exception IllegalArgumentException
765      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
766      *                invalid when
767      *                <ul>
768      *                <li>it is a contraction that does not exist in the Collation order
769      *                <li>the variable top is beyond
770      *                    the last reordering group supported by setMaxVariable()
771      *                <li>when the varTop argument is null or zero in length.
772      *                </ul>
773      * @see #getVariableTop
774      * @see RuleBasedCollator#setAlternateHandlingShifted
775      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
776      * @hide deprecated on icu4j-org
777      */
778     @Override
779     @Deprecated
setVariableTop(String varTop)780     public int setVariableTop(String varTop) {
781         checkNotFrozen();
782         if (varTop == null || varTop.length() == 0) {
783             throw new IllegalArgumentException("Variable top argument string can not be null or zero in length.");
784         }
785         boolean numeric = settings.readOnly().isNumeric();
786         long ce1, ce2;
787         if(settings.readOnly().dontCheckFCD()) {
788             UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0);
789             ce1 = ci.nextCE();
790             ce2 = ci.nextCE();
791         } else {
792             FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0);
793             ce1 = ci.nextCE();
794             ce2 = ci.nextCE();
795         }
796         if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) {
797             throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element");
798         }
799         internalSetVariableTop(ce1 >>> 32);
800         return (int)settings.readOnly().variableTop;
801     }
802 
803     /**
804      * <strong>[icu]</strong> Sets the variable top to the specified primary weight.
805      *
806      * <p>Beginning with ICU 53, the variable top is pinned to
807      * the top of one of the supported reordering groups,
808      * and it must not be beyond the last of those groups.
809      * See {@link #setMaxVariable(int)}.
810      *
811      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
812      * @see #getVariableTop
813      * @see #setVariableTop(String)
814      * @deprecated ICU 53 Call setMaxVariable() instead.
815      * @hide deprecated on icu4j-org
816      */
817     @Override
818     @Deprecated
setVariableTop(int varTop)819     public void setVariableTop(int varTop) {
820         checkNotFrozen();
821         internalSetVariableTop(varTop & 0xffffffffL);
822     }
823 
internalSetVariableTop(long varTop)824     private void internalSetVariableTop(long varTop) {
825         if(varTop != settings.readOnly().variableTop) {
826             // Pin the variable top to the end of the reordering group which contains it.
827             // Only a few special groups are supported.
828             int group = data.getGroupForPrimary(varTop);
829             if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) {
830                 throw new IllegalArgumentException("The variable top must be a primary weight in " +
831                         "the space/punctuation/symbols/currency symbols range");
832             }
833             long v = data.getLastPrimaryForGroup(group);
834             assert(v != 0 && v >= varTop);
835             varTop = v;
836             if(varTop != settings.readOnly().variableTop) {
837                 CollationSettings ownedSettings = getOwnedSettings();
838                 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST,
839                         getDefaultSettings().options);
840                 ownedSettings.variableTop = varTop;
841                 setFastLatinOptions(ownedSettings);
842             }
843         }
844     }
845 
846     /**
847      * <strong>[icu]</strong> When numeric collation is turned on, this Collator makes
848      * substrings of digits sort according to their numeric values.
849      *
850      * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest
851      * digit substring that can be treated as a single unit is
852      * 254 digits (not counting leading zeros). If a digit substring is
853      * longer than that, the digits beyond the limit will be treated as a
854      * separate digit substring.
855      *
856      * <p>A "digit" in this sense is a code point with General_Category=Nd,
857      * which does not include circled numbers, roman numerals, etc.
858      * Only a contiguous digit substring is considered, that is,
859      * non-negative integers without separators.
860      * There is no support for plus/minus signs, decimals, exponents, etc.
861      *
862      * @param flag
863      *            true to turn numeric collation on and false to turn it off
864      * @see #getNumericCollation
865      * @see #setNumericCollationDefault
866      */
setNumericCollation(boolean flag)867     public void setNumericCollation(boolean flag) {
868         checkNotFrozen();
869         // sort substrings of digits as numbers
870         if(flag == getNumericCollation()) { return; }
871         CollationSettings ownedSettings = getOwnedSettings();
872         ownedSettings.setFlag(CollationSettings.NUMERIC, flag);
873         setFastLatinOptions(ownedSettings);
874     }
875 
876     /**
877      * {@inheritDoc}
878      *
879      * @param order the reordering codes to apply to this collator; if this is null or an empty array
880      * then this clears any existing reordering
881      * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
882      * @see #getReorderCodes
883      * @see Collator#getEquivalentReorderCodes
884      * @see Collator.ReorderCodes
885      * @see UScript
886      */
887     @Override
setReorderCodes(int... order)888     public void setReorderCodes(int... order) {
889         checkNotFrozen();
890         int length = (order != null) ? order.length : 0;
891         if(length == 1 && order[0] == ReorderCodes.NONE) {
892             length = 0;
893         }
894         if(length == 0 ?
895                 settings.readOnly().reorderCodes.length == 0 :
896                 Arrays.equals(order, settings.readOnly().reorderCodes)) {
897             return;
898         }
899         CollationSettings defaultSettings = getDefaultSettings();
900         if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) {
901             if(settings.readOnly() != defaultSettings) {
902                 CollationSettings ownedSettings = getOwnedSettings();
903                 ownedSettings.copyReorderingFrom(defaultSettings);
904                 setFastLatinOptions(ownedSettings);
905             }
906             return;
907         }
908         CollationSettings ownedSettings = getOwnedSettings();
909         if(length == 0) {
910             ownedSettings.resetReordering();
911         } else {
912             ownedSettings.setReordering(data, order.clone());
913         }
914         setFastLatinOptions(ownedSettings);
915     }
916 
setFastLatinOptions(CollationSettings ownedSettings)917     private void setFastLatinOptions(CollationSettings ownedSettings) {
918         ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
919                 data, ownedSettings, ownedSettings.fastLatinPrimaries);
920     }
921 
922     // public getters --------------------------------------------------------
923 
924     /**
925      * Gets the collation tailoring rules for this RuleBasedCollator.
926      * Equivalent to String getRules(false).
927      *
928      * @return the collation tailoring rules
929      * @see #getRules(boolean)
930      */
getRules()931     public String getRules() {
932         return tailoring.getRules();
933     }
934 
935     /**
936      * Returns current rules.
937      * The argument defines whether full rules (root collation + tailored) rules are returned
938      * or just the tailoring.
939      *
940      * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order.
941      * They are almost never used or useful at runtime and can be removed from the data.
942      * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide:
943      * Collation Customization, Building on Existing Locales</a>
944      *
945      * <p>{@link #getRules()} should normally be used instead.
946      * @param fullrules
947      *            true if the rules that defines the full set of collation order is required, otherwise false for
948      *            returning only the tailored rules
949      * @return the current rules that defines this Collator.
950      * @see #getRules()
951      */
getRules(boolean fullrules)952     public String getRules(boolean fullrules) {
953         if (!fullrules) {
954             return tailoring.getRules();
955         }
956         return CollationLoader.getRootRules() + tailoring.getRules();
957     }
958 
959     /**
960      * Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
961      *
962      * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently
963      *         than in the root collator.
964      */
965     @Override
getTailoredSet()966     public UnicodeSet getTailoredSet() {
967         UnicodeSet tailored = new UnicodeSet();
968         if(data.base != null) {
969             new TailoredSet(tailored).forData(data);
970         }
971         return tailored;
972     }
973 
974     /**
975      * Gets unicode sets containing contractions and/or expansions of a collator
976      *
977      * @param contractions
978      *            if not null, set to contain contractions
979      * @param expansions
980      *            if not null, set to contain expansions
981      * @param addPrefixes
982      *            add the prefix contextual elements to contractions
983      * @throws Exception
984      *             Throws an exception if any errors occurs.
985      */
getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)986     public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)
987             throws Exception {
988         if (contractions != null) {
989             contractions.clear();
990         }
991         if (expansions != null) {
992             expansions.clear();
993         }
994         new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data);
995     }
996 
997     /**
998      * Adds the contractions that start with character c to the set.
999      * Ignores prefixes. Used by AlphabeticIndex.
1000      * @deprecated This API is ICU internal only.
1001      * @hide draft / provisional / internal are hidden on OHOS
1002      */
1003     @Deprecated
internalAddContractions(int c, UnicodeSet set)1004     void internalAddContractions(int c, UnicodeSet set) {
1005         new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c);
1006     }
1007 
1008     /**
1009      * <p>
1010      * Get a Collation key for the argument String source from this RuleBasedCollator.
1011      * <p>
1012      * General recommendation: <br>
1013      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
1014      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each
1015      * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better
1016      * performance.
1017      * <p>
1018      * See the class documentation for an explanation about CollationKeys.
1019      *
1020      * @param source
1021      *            the text String to be transformed into a collation key.
1022      * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source
1023      *         String is null, a null CollationKey is returned.
1024      * @see CollationKey
1025      * @see #compare(String, String)
1026      * @see #getRawCollationKey
1027      */
1028     @Override
getCollationKey(String source)1029     public CollationKey getCollationKey(String source) {
1030         if (source == null) {
1031             return null;
1032         }
1033         CollationBuffer buffer = null;
1034         try {
1035             buffer = getCollationBuffer();
1036             return getCollationKey(source, buffer);
1037         } finally {
1038             releaseCollationBuffer(buffer);
1039         }
1040     }
1041 
getCollationKey(String source, CollationBuffer buffer)1042     private CollationKey getCollationKey(String source, CollationBuffer buffer) {
1043         buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer);
1044         return new CollationKey(source, buffer.rawCollationKey);
1045     }
1046 
1047     /**
1048      * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the
1049      * result into the user provided argument key. If key has a internal byte array of length that's too small for the
1050      * result, the internal byte array will be grown to the exact required size.
1051      *
1052      * @param source the text String to be transformed into a RawCollationKey
1053      * @param key output RawCollationKey to store results
1054      * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user
1055      *         provided key will be returned.
1056      * @see #getCollationKey
1057      * @see #compare(String, String)
1058      * @see RawCollationKey
1059      * @hide unsupported on OHOS
1060      */
1061     @Override
getRawCollationKey(String source, RawCollationKey key)1062     public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
1063         if (source == null) {
1064             return null;
1065         }
1066         CollationBuffer buffer = null;
1067         try {
1068             buffer = getCollationBuffer();
1069             return getRawCollationKey(source, key, buffer);
1070         } finally {
1071             releaseCollationBuffer(buffer);
1072         }
1073     }
1074 
1075     private static final class CollationKeyByteSink extends SortKeyByteSink {
CollationKeyByteSink(RawCollationKey key)1076         CollationKeyByteSink(RawCollationKey key) {
1077             super(key.bytes);
1078             key_ = key;
1079         }
1080 
1081         @Override
AppendBeyondCapacity(byte[] bytes, int start, int n, int length)1082         protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) {
1083             // n > 0 && appended_ > capacity_
1084             if (Resize(n, length)) {
1085                 System.arraycopy(bytes, start, buffer_, length, n);
1086             }
1087         }
1088 
1089         @Override
Resize(int appendCapacity, int length)1090         protected boolean Resize(int appendCapacity, int length) {
1091             int newCapacity = 2 * buffer_.length;
1092             int altCapacity = length + 2 * appendCapacity;
1093             if (newCapacity < altCapacity) {
1094                 newCapacity = altCapacity;
1095             }
1096             if (newCapacity < 200) {
1097                 newCapacity = 200;
1098             }
1099             // Do not call key_.ensureCapacity(newCapacity) because we do not
1100             // keep key_.size in sync with appended_.
1101             // We only set it when we are done.
1102             byte[] newBytes = new byte[newCapacity];
1103             System.arraycopy(buffer_, 0, newBytes, 0, length);
1104             buffer_ = key_.bytes = newBytes;
1105             return true;
1106         }
1107 
1108         private RawCollationKey key_;
1109     }
1110 
getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer)1111     private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) {
1112         if (key == null) {
1113             key = new RawCollationKey(simpleKeyLengthEstimate(source));
1114         } else if (key.bytes == null) {
1115             key.bytes = new byte[simpleKeyLengthEstimate(source)];
1116         }
1117         CollationKeyByteSink sink = new CollationKeyByteSink(key);
1118         writeSortKey(source, sink, buffer);
1119         key.size = sink.NumberOfBytesAppended();
1120         return key;
1121     }
1122 
simpleKeyLengthEstimate(CharSequence source)1123     private int simpleKeyLengthEstimate(CharSequence source) {
1124         return 2 * source.length() + 10;
1125     }
1126 
writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer)1127     private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) {
1128         boolean numeric = settings.readOnly().isNumeric();
1129         if(settings.readOnly().dontCheckFCD()) {
1130             buffer.leftUTF16CollIter.setText(numeric, s, 0);
1131             CollationKeys.writeSortKeyUpToQuaternary(
1132                     buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(),
1133                     sink, Collation.PRIMARY_LEVEL,
1134                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
1135         } else {
1136             buffer.leftFCDUTF16Iter.setText(numeric, s, 0);
1137             CollationKeys.writeSortKeyUpToQuaternary(
1138                     buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(),
1139                     sink, Collation.PRIMARY_LEVEL,
1140                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
1141         }
1142         if(settings.readOnly().getStrength() == IDENTICAL) {
1143             writeIdenticalLevel(s, sink);
1144         }
1145         sink.Append(Collation.TERMINATOR_BYTE);
1146     }
1147 
writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink)1148     private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) {
1149         // NFD quick check
1150         int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null);
1151         sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
1152         // Sync the ByteArrayWrapper size with the key length.
1153         sink.key_.size = sink.NumberOfBytesAppended();
1154         int prev = 0;
1155         if(nfdQCYesLimit != 0) {
1156             prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_);
1157         }
1158         // Is there non-NFD text?
1159         if(nfdQCYesLimit < s.length()) {
1160             int destLengthEstimate = s.length() - nfdQCYesLimit;
1161             StringBuilder nfd = new StringBuilder();
1162             data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate);
1163             BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_);
1164         }
1165         // Sync the key with the buffer again which got bytes appended and may have been reallocated.
1166         sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size);
1167     }
1168 
1169     /**
1170      * Returns the CEs for the string.
1171      * @param str the string
1172      * @deprecated This API is ICU internal only.
1173      * @hide deprecated on icu4j-org
1174      * @hide draft / provisional / internal are hidden on OHOS
1175      */
1176     @Deprecated
internalGetCEs(CharSequence str)1177     public long[] internalGetCEs(CharSequence str) {
1178         CollationBuffer buffer = null;
1179         try {
1180             buffer = getCollationBuffer();
1181             boolean numeric = settings.readOnly().isNumeric();
1182             CollationIterator iter;
1183             if(settings.readOnly().dontCheckFCD()) {
1184                 buffer.leftUTF16CollIter.setText(numeric, str, 0);
1185                 iter = buffer.leftUTF16CollIter;
1186             } else {
1187                 buffer.leftFCDUTF16Iter.setText(numeric, str, 0);
1188                 iter = buffer.leftFCDUTF16Iter;
1189             }
1190             int length = iter.fetchCEs() - 1;
1191             assert length >= 0 && iter.getCE(length) == Collation.NO_CE;
1192             long[] ces = new long[length];
1193             System.arraycopy(iter.getCEs(), 0, ces, 0, length);
1194             return ces;
1195         } finally {
1196             releaseCollationBuffer(buffer);
1197         }
1198     }
1199 
1200     /**
1201      * Returns this Collator's strength attribute. The strength attribute
1202      * determines the minimum level of difference considered significant.
1203      *
1204      * <p><strong>[icu] Note:</strong> This can return QUATERNARY strength, which is not supported by the
1205      * JDK version.
1206      *
1207      * <p>See the Collator class description for more details.
1208      *
1209      * @return this Collator's current strength attribute.
1210      * @see #setStrength
1211      * @see #PRIMARY
1212      * @see #SECONDARY
1213      * @see #TERTIARY
1214      * @see #QUATERNARY
1215      * @see #IDENTICAL
1216      */
1217     @Override
getStrength()1218     public int getStrength() {
1219         return settings.readOnly().getStrength();
1220     }
1221 
1222     /**
1223      * Returns the decomposition mode of this Collator. The decomposition mode
1224      * determines how Unicode composed characters are handled.
1225      *
1226      * <p>See the Collator class description for more details.
1227      *
1228      * @return the decomposition mode
1229      * @see #setDecomposition
1230      * @see #NO_DECOMPOSITION
1231      * @see #CANONICAL_DECOMPOSITION
1232      */
1233     @Override
getDecomposition()1234     public int getDecomposition() {
1235         return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ?
1236                 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION;
1237     }
1238 
1239     /**
1240      * Return true if an uppercase character is sorted before the corresponding lowercase character. See
1241      * setCaseFirst(boolean) for details.
1242      *
1243      * @see #setUpperCaseFirst
1244      * @see #setLowerCaseFirst
1245      * @see #isLowerCaseFirst
1246      * @see #setCaseFirstDefault
1247      * @return true if upper cased characters are sorted before lower cased characters, false otherwise
1248      */
isUpperCaseFirst()1249     public boolean isUpperCaseFirst() {
1250         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK);
1251     }
1252 
1253     /**
1254      * Return true if a lowercase character is sorted before the corresponding uppercase character. See
1255      * setCaseFirst(boolean) for details.
1256      *
1257      * @see #setUpperCaseFirst
1258      * @see #setLowerCaseFirst
1259      * @see #isUpperCaseFirst
1260      * @see #setCaseFirstDefault
1261      * @return true lower cased characters are sorted before upper cased characters, false otherwise
1262      */
isLowerCaseFirst()1263     public boolean isLowerCaseFirst() {
1264         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST);
1265     }
1266 
1267     /**
1268      * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
1269      * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the
1270      * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more
1271      * details.
1272      *
1273      * @return true or false
1274      * @see #setAlternateHandlingShifted(boolean)
1275      * @see #setAlternateHandlingDefault
1276      */
isAlternateHandlingShifted()1277     public boolean isAlternateHandlingShifted() {
1278         return settings.readOnly().getAlternateHandling();
1279     }
1280 
1281     /**
1282      * Checks if case level is set to true. See setCaseLevel(boolean) for details.
1283      *
1284      * @return the case level mode
1285      * @see #setCaseLevelDefault
1286      * @see #isCaseLevel
1287      * @see #setCaseLevel(boolean)
1288      */
isCaseLevel()1289     public boolean isCaseLevel() {
1290         return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0;
1291     }
1292 
1293     /**
1294      * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details.
1295      *
1296      * @return true if French Collation is set to true, false otherwise
1297      * @see #setFrenchCollation(boolean)
1298      * @see #setFrenchCollationDefault
1299      */
isFrenchCollation()1300     public boolean isFrenchCollation() {
1301         return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0;
1302     }
1303 
1304     /**
1305      * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details.
1306      *
1307      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
1308      * Since ICU 50, this attribute is not settable any more via API functions.
1309      * Since CLDR 25/ICU 53, explicit quaternary relations are used
1310      * to achieve the same Japanese sort order.
1311      *
1312      * @return false
1313      * @see #setHiraganaQuaternaryDefault
1314      * @see #setHiraganaQuaternary(boolean)
1315      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
1316      * @hide deprecated on icu4j-org
1317      */
1318     @Deprecated
isHiraganaQuaternary()1319     public boolean isHiraganaQuaternary() {
1320         return false;
1321     }
1322 
1323     /**
1324      * <strong>[icu]</strong> Gets the variable top value of a Collator.
1325      *
1326      * @return the variable top primary weight
1327      * @see #getMaxVariable
1328      */
1329     @Override
getVariableTop()1330     public int getVariableTop() {
1331         return (int)settings.readOnly().variableTop;
1332     }
1333 
1334     /**
1335      * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a
1336      * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2'
1337      *
1338      * @see #setNumericCollation
1339      * @see #setNumericCollationDefault
1340      * @return true if numeric collation is turned on, false otherwise
1341      */
getNumericCollation()1342     public boolean getNumericCollation() {
1343         return (settings.readOnly().options & CollationSettings.NUMERIC) != 0;
1344     }
1345 
1346     /**
1347      * Retrieves the reordering codes for this collator.
1348      * These reordering codes are a combination of UScript codes and ReorderCodes.
1349      * @return a copy of the reordering codes for this collator;
1350      * if none are set then returns an empty array
1351      * @see #setReorderCodes
1352      * @see Collator#getEquivalentReorderCodes
1353      */
1354     @Override
getReorderCodes()1355     public int[] getReorderCodes() {
1356         return settings.readOnly().reorderCodes.clone();
1357     }
1358 
1359     // public other methods -------------------------------------------------
1360 
1361     /**
1362      * {@inheritDoc}
1363      */
1364     @Override
equals(Object obj)1365     public boolean equals(Object obj) {
1366         if (this == obj) {
1367             return true;
1368         }
1369         if (!super.equals(obj)) {
1370             return false;
1371         }
1372         RuleBasedCollator o = (RuleBasedCollator) obj;
1373         if(!settings.readOnly().equals(o.settings.readOnly())) { return false; }
1374         if(data == o.data) { return true; }
1375         boolean thisIsRoot = data.base == null;
1376         boolean otherIsRoot = o.data.base == null;
1377         assert(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
1378         if(thisIsRoot != otherIsRoot) { return false; }
1379         String theseRules = tailoring.getRules();
1380         String otherRules = o.tailoring.getRules();
1381         if((thisIsRoot || theseRules.length() != 0) &&
1382                 (otherIsRoot || otherRules.length() != 0)) {
1383             // Shortcut: If both collators have valid rule strings, then compare those.
1384             if(theseRules.equals(otherRules)) { return true; }
1385         }
1386         // Different rule strings can result in the same or equivalent tailoring.
1387         // The rule strings are optional in ICU resource bundles, although included by default.
1388         // cloneBinary() drops the rule string.
1389         UnicodeSet thisTailored = getTailoredSet();
1390         UnicodeSet otherTailored = o.getTailoredSet();
1391         if(!thisTailored.equals(otherTailored)) { return false; }
1392         // For completeness, we should compare all of the mappings;
1393         // or we should create a list of strings, sort it with one collator,
1394         // and check if both collators compare adjacent strings the same
1395         // (order & strength, down to quaternary); or similar.
1396         // Testing equality of collators seems unusual.
1397         return true;
1398     }
1399 
1400     /**
1401      * Generates a unique hash code for this RuleBasedCollator.
1402      *
1403      * @return the unique hash code for this Collator
1404      */
1405     @Override
hashCode()1406     public int hashCode() {
1407         int h = settings.readOnly().hashCode();
1408         if(data.base == null) { return h; }  // root collator
1409         // Do not rely on the rule string, see comments in operator==().
1410         UnicodeSet set = getTailoredSet();
1411         UnicodeSetIterator iter = new UnicodeSetIterator(set);
1412         while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
1413             h ^= data.getCE32(iter.codepoint);
1414         }
1415         return h;
1416     }
1417 
1418     /**
1419      * Compares the source text String to the target text String according to the collation rules, strength and
1420      * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
1421      * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
1422      * class description for an example of use.
1423      * <p>
1424      * General recommendation: <br>
1425      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
1426      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
1427      * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
1428      * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
1429      * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
1430      * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
1431      * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
1432      * String) will have a better performance.
1433      *
1434      * @param source
1435      *            the source text String.
1436      * @param target
1437      *            the target text String.
1438      * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
1439      *         and target are equal, value is greater than zero if source is greater than target.
1440      * @see CollationKey
1441      * @see #getCollationKey
1442      */
1443     @Override
compare(String source, String target)1444     public int compare(String source, String target) {
1445         return doCompare(source, target);
1446     }
1447 
1448     /**
1449     * Abstract iterator for identical-level string comparisons.
1450     * Returns FCD code points and handles temporary switching to NFD.
1451     *
1452     * <p>As with CollationIterator,
1453     * Java NFDIterator instances are partially constructed and cached,
1454     * and completed when reset for use.
1455     * C++ NFDIterator instances are stack-allocated.
1456     */
1457     private static abstract class NFDIterator {
1458         /**
1459          * Partial constructor, must call reset().
1460          */
NFDIterator()1461         NFDIterator() {}
reset()1462         final void reset() {
1463             index = -1;
1464         }
1465 
1466         /**
1467          * Returns the next code point from the internal normalization buffer,
1468          * or else the next text code point.
1469          * Returns -1 at the end of the text.
1470          */
nextCodePoint()1471         final int nextCodePoint() {
1472             if(index >= 0) {
1473                 if(index == decomp.length()) {
1474                     index = -1;
1475                 } else {
1476                     int c = Character.codePointAt(decomp, index);
1477                     index += Character.charCount(c);
1478                     return c;
1479                 }
1480             }
1481             return nextRawCodePoint();
1482         }
1483         /**
1484          * @param nfcImpl
1485          * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
1486          * @return the first code point in c's decomposition,
1487          *         or c itself if it was decomposed already or if it does not decompose
1488          */
nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c)1489         final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) {
1490             if(index >= 0) { return c; }
1491             decomp = nfcImpl.getDecomposition(c);
1492             if(decomp == null) { return c; }
1493             c = Character.codePointAt(decomp, 0);
1494             index = Character.charCount(c);
1495             return c;
1496         }
1497 
1498         /**
1499          * Returns the next text code point in FCD order.
1500          * Returns -1 at the end of the text.
1501          */
nextRawCodePoint()1502         protected abstract int nextRawCodePoint();
1503 
1504         private String decomp;
1505         private int index;
1506     }
1507 
1508     private static class UTF16NFDIterator extends NFDIterator {
UTF16NFDIterator()1509         UTF16NFDIterator() {}
setText(CharSequence seq, int start)1510         void setText(CharSequence seq, int start) {
1511             reset();
1512             s = seq;
1513             pos = start;
1514         }
1515 
1516         @Override
nextRawCodePoint()1517         protected int nextRawCodePoint() {
1518             if(pos == s.length()) { return Collation.SENTINEL_CP; }
1519             int c = Character.codePointAt(s, pos);
1520             pos += Character.charCount(c);
1521             return c;
1522         }
1523 
1524         protected CharSequence s;
1525         protected int pos;
1526     }
1527 
1528     private static final class FCDUTF16NFDIterator extends UTF16NFDIterator {
FCDUTF16NFDIterator()1529         FCDUTF16NFDIterator() {}
setText(Normalizer2Impl nfcImpl, CharSequence seq, int start)1530         void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) {
1531             reset();
1532             int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null);
1533             if(spanLimit == seq.length()) {
1534                 s = seq;
1535                 pos = start;
1536             } else {
1537                 if(str == null) {
1538                     str = new StringBuilder();
1539                 } else {
1540                     str.setLength(0);
1541                 }
1542                 str.append(seq, start, spanLimit);
1543                 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start);
1544                 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer);
1545                 s = str;
1546                 pos = 0;
1547             }
1548         }
1549 
1550         private StringBuilder str;
1551     }
1552 
compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right)1553     private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) {
1554         for(;;) {
1555             // Fetch the next FCD code point from each string.
1556             int leftCp = left.nextCodePoint();
1557             int rightCp = right.nextCodePoint();
1558             if(leftCp == rightCp) {
1559                 if(leftCp < 0) { break; }
1560                 continue;
1561             }
1562             // If they are different, then decompose each and compare again.
1563             if(leftCp < 0) {
1564                 leftCp = -2;  // end of string
1565             } else if(leftCp == 0xfffe) {
1566                 leftCp = -1;  // U+FFFE: merge separator
1567             } else {
1568                 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
1569             }
1570             if(rightCp < 0) {
1571                 rightCp = -2;  // end of string
1572             } else if(rightCp == 0xfffe) {
1573                 rightCp = -1;  // U+FFFE: merge separator
1574             } else {
1575                 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
1576             }
1577             if(leftCp < rightCp) { return Collation.LESS; }
1578             if(leftCp > rightCp) { return Collation.GREATER; }
1579         }
1580         return Collation.EQUAL;
1581     }
1582 
1583     /**
1584      * Compares two CharSequences.
1585      * @deprecated This API is ICU internal only.
1586      * @hide deprecated on icu4j-org
1587      * @hide draft / provisional / internal are hidden on OHOS
1588      */
1589     @Override
1590     @Deprecated
doCompare(CharSequence left, CharSequence right)1591     protected int doCompare(CharSequence left, CharSequence right) {
1592         if(left == right) {
1593             return Collation.EQUAL;
1594         }
1595 
1596         // Identical-prefix test.
1597         int equalPrefixLength = 0;
1598         for(;;) {
1599             if(equalPrefixLength == left.length()) {
1600                 if(equalPrefixLength == right.length()) { return Collation.EQUAL; }
1601                 break;
1602             } else if(equalPrefixLength == right.length() ||
1603                       left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) {
1604                 break;
1605             }
1606             ++equalPrefixLength;
1607         }
1608 
1609         CollationSettings roSettings = settings.readOnly();
1610         boolean numeric = roSettings.isNumeric();
1611         if(equalPrefixLength > 0) {
1612             if((equalPrefixLength != left.length() &&
1613                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) ||
1614                     (equalPrefixLength != right.length() &&
1615                         data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) {
1616                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1617                 while(--equalPrefixLength > 0 &&
1618                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {}
1619             }
1620             // Notes:
1621             // - A longer string can compare equal to a prefix of it if only ignorables follow.
1622             // - With a backward level, a longer string can compare less-than a prefix of it.
1623 
1624             // Pass the actual start of each string into the CollationIterators,
1625             // plus the equalPrefixLength position,
1626             // so that prefix matches back into the equal prefix work.
1627         }
1628 
1629         int result;
1630         int fastLatinOptions = roSettings.fastLatinOptions;
1631         if(fastLatinOptions >= 0 &&
1632                 (equalPrefixLength == left.length() ||
1633                     left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) &&
1634                 (equalPrefixLength == right.length() ||
1635                     right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) {
1636             result = CollationFastLatin.compareUTF16(data.fastLatinTable,
1637                                                       roSettings.fastLatinPrimaries,
1638                                                       fastLatinOptions,
1639                                                       left, right, equalPrefixLength);
1640         } else {
1641             result = CollationFastLatin.BAIL_OUT_RESULT;
1642         }
1643 
1644         if(result == CollationFastLatin.BAIL_OUT_RESULT) {
1645             CollationBuffer buffer = null;
1646             try {
1647                 buffer = getCollationBuffer();
1648                 if(roSettings.dontCheckFCD()) {
1649                     buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength);
1650                     buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength);
1651                     result = CollationCompare.compareUpToQuaternary(
1652                             buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings);
1653                 } else {
1654                     buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength);
1655                     buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength);
1656                     result = CollationCompare.compareUpToQuaternary(
1657                             buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings);
1658                 }
1659             } finally {
1660                 releaseCollationBuffer(buffer);
1661             }
1662         }
1663         if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) {
1664             return result;
1665         }
1666 
1667         CollationBuffer buffer = null;
1668         try {
1669             buffer = getCollationBuffer();
1670             // Compare identical level.
1671             Normalizer2Impl nfcImpl = data.nfcImpl;
1672             if(roSettings.dontCheckFCD()) {
1673                 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength);
1674                 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength);
1675                 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter);
1676             } else {
1677                 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength);
1678                 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength);
1679                 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter);
1680             }
1681         } finally {
1682             releaseCollationBuffer(buffer);
1683         }
1684     }
1685 
1686     // package private constructors ------------------------------------------
1687 
RuleBasedCollator(CollationTailoring t, ULocale vl)1688     RuleBasedCollator(CollationTailoring t, ULocale vl) {
1689         data = t.data;
1690         settings = t.settings.clone();
1691         tailoring = t;
1692         validLocale = vl;
1693         actualLocaleIsSameAsValid = false;
1694     }
1695 
adoptTailoring(CollationTailoring t)1696     private void adoptTailoring(CollationTailoring t) {
1697         assert(settings == null && data == null && tailoring == null);
1698         data = t.data;
1699         settings = t.settings.clone();
1700         tailoring = t;
1701         validLocale = t.actualLocale;
1702         actualLocaleIsSameAsValid = false;
1703     }
1704 
1705     // package private methods -----------------------------------------------
1706 
1707     /**
1708      * Tests whether a character is "unsafe" for use as a collation starting point.
1709      *
1710      * @param c code point or code unit
1711      * @return true if c is unsafe
1712      * @see CollationElementIterator#setOffset(int)
1713      */
isUnsafe(int c)1714     final boolean isUnsafe(int c) {
1715         return data.isUnsafeBackward(c, settings.readOnly().isNumeric());
1716     }
1717 
1718     /**
1719      * Frozen state of the collator.
1720      */
1721     private Lock frozenLock;
1722 
1723     private static final class CollationBuffer {
CollationBuffer(CollationData data)1724         private CollationBuffer(CollationData data) {
1725             leftUTF16CollIter = new UTF16CollationIterator(data);
1726             rightUTF16CollIter = new UTF16CollationIterator(data);
1727             leftFCDUTF16Iter = new FCDUTF16CollationIterator(data);
1728             rightFCDUTF16Iter = new FCDUTF16CollationIterator(data);
1729             leftUTF16NFDIter = new UTF16NFDIterator();
1730             rightUTF16NFDIter = new UTF16NFDIterator();
1731             leftFCDUTF16NFDIter = new FCDUTF16NFDIterator();
1732             rightFCDUTF16NFDIter = new FCDUTF16NFDIterator();
1733         }
1734 
1735         UTF16CollationIterator leftUTF16CollIter;
1736         UTF16CollationIterator rightUTF16CollIter;
1737         FCDUTF16CollationIterator leftFCDUTF16Iter;
1738         FCDUTF16CollationIterator rightFCDUTF16Iter;
1739 
1740         UTF16NFDIterator leftUTF16NFDIter;
1741         UTF16NFDIterator rightUTF16NFDIter;
1742         FCDUTF16NFDIterator leftFCDUTF16NFDIter;
1743         FCDUTF16NFDIterator rightFCDUTF16NFDIter;
1744 
1745         RawCollationKey rawCollationKey;
1746     }
1747 
1748     /**
1749      * Get the version of this collator object.
1750      *
1751      * @return the version object associated with this collator
1752      */
1753     @Override
getVersion()1754     public VersionInfo getVersion() {
1755         int version = tailoring.version;
1756         int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
1757         return VersionInfo.getInstance(
1758                 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4),
1759                 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff));
1760     }
1761 
1762     /**
1763      * Get the UCA version of this collator object.
1764      *
1765      * @return the version object associated with this collator
1766      */
1767     @Override
getUCAVersion()1768     public VersionInfo getUCAVersion() {
1769         VersionInfo v = getVersion();
1770         // Note: This is tied to how the current implementation encodes the UCA version
1771         // in the overall getVersion().
1772         // Alternatively, we could load the root collator and get at lower-level data from there.
1773         // Either way, it will reflect the input collator's UCA version only
1774         // if it is a known implementation.
1775         // (C++ comment) It would be cleaner to make this a virtual Collator method.
1776         // (In Java, it is virtual.)
1777         return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0);
1778     }
1779 
1780     private CollationBuffer collationBuffer;
1781 
getCollationBuffer()1782     private final CollationBuffer getCollationBuffer() {
1783         if (isFrozen()) {
1784             frozenLock.lock();
1785         } else if (collationBuffer == null) {
1786             collationBuffer = new CollationBuffer(data);
1787         }
1788         return collationBuffer;
1789     }
1790 
releaseCollationBuffer(CollationBuffer buffer)1791     private final void releaseCollationBuffer(CollationBuffer buffer) {
1792         if (isFrozen()) {
1793             frozenLock.unlock();
1794         }
1795     }
1796 
1797     /**
1798      * {@inheritDoc}
1799      * @hide draft / provisional / internal are hidden on OHOS
1800      */
1801     @Override
getLocale(ULocale.Type type)1802     public ULocale getLocale(ULocale.Type type) {
1803         if (type == ULocale.ACTUAL_LOCALE) {
1804             return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale;
1805         } else if(type == ULocale.VALID_LOCALE) {
1806             return validLocale;
1807         } else {
1808             throw new IllegalArgumentException("unknown ULocale.Type " + type);
1809         }
1810     }
1811 
1812     /**
1813      * {@inheritDoc}
1814      */
1815     @Override
setLocale(ULocale valid, ULocale actual)1816     void setLocale(ULocale valid, ULocale actual) {
1817         // This method is called
1818         // by other protected functions that checks and makes sure that
1819         // valid and actual are not null before passing
1820         assert (valid == null) == (actual == null);
1821         // Another check we could do is that the actual locale is at
1822         // the same level or less specific than the valid locale.
1823         if(Objects.equals(actual, tailoring.actualLocale)) {
1824             actualLocaleIsSameAsValid = false;
1825         } else {
1826             assert(Objects.equals(actual, valid));
1827             actualLocaleIsSameAsValid = true;
1828         }
1829         // Do not modify tailoring.actualLocale:
1830         // We cannot be sure that that would be thread-safe.
1831         validLocale = valid;
1832     }
1833 
1834     CollationData data;
1835     SharedObject.Reference<CollationSettings> settings;  // reference-counted
1836     CollationTailoring tailoring;  // C++: reference-counted
1837     private ULocale validLocale;
1838     // Note: No need in Java to track which attributes have been set explicitly.
1839     // int or EnumSet  explicitlySetAttributes;
1840 
1841     private boolean actualLocaleIsSameAsValid;
1842 }
1843