1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.text; 11 12 import java.lang.reflect.InvocationTargetException; 13 import java.lang.reflect.Method; 14 import java.text.CharacterIterator; 15 import java.text.ParseException; 16 import java.util.Arrays; 17 import java.util.Objects; 18 import java.util.concurrent.locks.Lock; 19 import java.util.concurrent.locks.ReentrantLock; 20 21 import ohos.global.icu.impl.ClassLoaderUtil; 22 import ohos.global.icu.impl.Normalizer2Impl; 23 import ohos.global.icu.impl.Normalizer2Impl.ReorderingBuffer; 24 import ohos.global.icu.impl.coll.BOCSU; 25 import ohos.global.icu.impl.coll.Collation; 26 import ohos.global.icu.impl.coll.CollationCompare; 27 import ohos.global.icu.impl.coll.CollationData; 28 import ohos.global.icu.impl.coll.CollationFastLatin; 29 import ohos.global.icu.impl.coll.CollationIterator; 30 import ohos.global.icu.impl.coll.CollationKeys; 31 import ohos.global.icu.impl.coll.CollationKeys.SortKeyByteSink; 32 import ohos.global.icu.impl.coll.CollationLoader; 33 import ohos.global.icu.impl.coll.CollationRoot; 34 import ohos.global.icu.impl.coll.CollationSettings; 35 import ohos.global.icu.impl.coll.CollationTailoring; 36 import ohos.global.icu.impl.coll.ContractionsAndExpansions; 37 import ohos.global.icu.impl.coll.FCDUTF16CollationIterator; 38 import ohos.global.icu.impl.coll.SharedObject; 39 import ohos.global.icu.impl.coll.TailoredSet; 40 import ohos.global.icu.impl.coll.UTF16CollationIterator; 41 import ohos.global.icu.lang.UScript; 42 import ohos.global.icu.util.ULocale; 43 import ohos.global.icu.util.VersionInfo; 44 45 /** 46 * <p> 47 * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule 48 * sets. RuleBasedCollator is designed to be fully compliant to the <a 49 * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651. 50 * 51 * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link ohos.global.icu.util.Freezable}. 52 * 53 * <p> 54 * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User 55 * Guide</a> for more information about the collation service before using this class. 56 * 57 * <p> 58 * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class 59 * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the 60 * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String) 61 * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while 62 * re-adjusting the attributes and orders of the characters in the specified rule accordingly. 63 * 64 * <p> 65 * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale 66 * is not available, the orders eventually falls back to the 67 * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 68 * 69 * <p> 70 * For information about the collation rule syntax and details about customization, please refer to the <a 71 * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the 72 * User Guide. 73 * 74 * <p> 75 * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J: 76 * 77 * <ul> 78 * <li>According to the JDK documentation: <br> 79 * <i>Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range 80 * \U0E40-\U0E44 precedes a Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the range 81 * \U0EC0-\U0EC4 precedes a Lao consonant of the range \U0E81-\U0EAE then the vowel is placed after the 82 * consonant for collation purposes. 83 * <br> 84 * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on. 85 * </i> 86 * <br> 87 * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly 88 * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.</li> 89 * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.</li> 90 * </ul> 91 * <p> 92 * <strong>Examples</strong> 93 * <p> 94 * Creating Customized RuleBasedCollators: <blockquote> 95 * 96 * <pre> 97 * String simple = "& a < b < c < d"; 98 * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple); 99 * 100 * String norwegian = "& a , A < b , B < c , C < d , D < e , E " 101 * + "< f , F < g , G < h , H < i , I < j , " 102 * + "J < k , K < l , L < m , M < n , N < " 103 * + "o , O < p , P < q , Q <r , R <s , S < " 104 * + "t , T < u , U < v , V < w , W < x , X " 105 * + "< y , Y < z , Z < \u00E5 = a\u030A " 106 * + ", \u00C5 = A\u030A ; aa , AA < \u00E6 " 107 * + ", \u00C6 < \u00F8 , \u00D8"; 108 * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian); 109 * </pre> 110 * 111 * </blockquote> 112 * 113 * Concatenating rules to combine <code>Collator</code>s: <blockquote> 114 * 115 * <pre> 116 * // Create an en_US Collator object 117 * RuleBasedCollator en_USCollator = (RuleBasedCollator) 118 * Collator.getInstance(new Locale("en", "US", "")); 119 * // Create a da_DK Collator object 120 * RuleBasedCollator da_DKCollator = (RuleBasedCollator) 121 * Collator.getInstance(new Locale("da", "DK", "")); 122 * // Combine the two 123 * // First, get the collation rules from en_USCollator 124 * String en_USRules = en_USCollator.getRules(); 125 * // Second, get the collation rules from da_DKCollator 126 * String da_DKRules = da_DKCollator.getRules(); 127 * RuleBasedCollator newCollator = 128 * new RuleBasedCollator(en_USRules + da_DKRules); 129 * // newCollator has the combined rules 130 * </pre> 131 * 132 * </blockquote> 133 * 134 * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to 135 * the existing rule: <blockquote> 136 * 137 * <pre> 138 * // Create a new Collator object with additional rules 139 * String addRules = "& C < ch, cH, Ch, CH"; 140 * RuleBasedCollator myCollator = 141 * new RuleBasedCollator(en_USCollator.getRules() + addRules); 142 * // myCollator contains the new rules 143 * </pre> 144 * 145 * </blockquote> 146 * 147 * How to change the order of non-spacing accents: <blockquote> 148 * 149 * <pre> 150 * // old rule with main accents 151 * String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 " 152 * + "; \u0327 ; \u0303 ; \u0304 ; \u0305 " 153 * + "; \u0306 ; \u0307 ; \u0309 ; \u030A " 154 * + "; \u030B ; \u030C ; \u030D ; \u030E " 155 * + "; \u030F ; \u0310 ; \u0311 ; \u0312 " 156 * + "< a , A ; ae, AE ; \u00e6 , \u00c6 " 157 * + "< b , B < c, C < e, E & C < d , D"; 158 * // change the order of accent characters 159 * String addOn = "& \u0300 ; \u0308 ; \u0302"; 160 * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn); 161 * </pre> 162 * 163 * </blockquote> 164 * 165 * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese 166 * characters in the Japanese <code>Collator</code>: <blockquote> 167 * 168 * <pre> 169 * // get en_US Collator rules 170 * RuleBasedCollator en_USCollator 171 * = (RuleBasedCollator)Collator.getInstance(Locale.US); 172 * // add a few Japanese characters to sort before English characters 173 * // suppose the last character before the first base letter 'a' in 174 * // the English collation rule is \u2212 175 * String jaString = "& \u2212 <\u3041, \u3042 <\u3043, " 176 * + "\u3044"; 177 * RuleBasedCollator myJapaneseCollator 178 * = new RuleBasedCollator(en_USCollator.getRules() + jaString); 179 * </pre> 180 * 181 * </blockquote> 182 * <p> 183 * This class is not subclassable 184 * 185 * @author Syn Wee Quek 186 */ 187 public final class RuleBasedCollator extends Collator { 188 // public constructors --------------------------------------------------- 189 190 /** 191 * <p> 192 * Constructor that takes the argument rules for customization. 193 * The collator will be based on the CLDR root collation, with the 194 * attributes and re-ordering of the characters specified in the argument rules. 195 * <p> 196 * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization"> 197 * Collation Customization</a> for details on the rule syntax. 198 * 199 * @param rules 200 * the collation rules to build the collation table from. 201 * @exception ParseException 202 * and IOException thrown. ParseException thrown when argument rules have an invalid syntax. 203 * IOException thrown when an error occurred while reading internal data. 204 */ RuleBasedCollator(String rules)205 public RuleBasedCollator(String rules) throws Exception { 206 if (rules == null) { 207 throw new IllegalArgumentException("Collation rules can not be null"); 208 } 209 validLocale = ULocale.ROOT; 210 internalBuildTailoring(rules); 211 } 212 213 /** 214 * Implements from-rule constructors. 215 * @param rules rule string 216 * @throws Exception 217 */ internalBuildTailoring(String rules)218 private final void internalBuildTailoring(String rules) throws Exception { 219 CollationTailoring base = CollationRoot.getRoot(); 220 // Most code using Collator does not need to build a Collator from rules. 221 // By using reflection, most code will not have a static dependency on the builder code. 222 // CollationBuilder builder = new CollationBuilder(base); 223 ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass()); 224 CollationTailoring t; 225 try { 226 Class<?> builderClass = classLoader.loadClass("ohos.global.icu.impl.coll.CollationBuilder"); 227 Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base); 228 // builder.parseAndBuild(rules); 229 Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class); 230 t = (CollationTailoring)parseAndBuild.invoke(builder, rules); 231 } catch(InvocationTargetException e) { 232 throw (Exception)e.getTargetException(); 233 } 234 t.actualLocale = null; 235 adoptTailoring(t); 236 } 237 238 // public methods -------------------------------------------------------- 239 240 /** 241 * Clones the RuleBasedCollator 242 * 243 * @return a new instance of this RuleBasedCollator object 244 */ 245 @Override clone()246 public Object clone() throws CloneNotSupportedException { 247 if (isFrozen()) { 248 return this; 249 } 250 return cloneAsThawed(); 251 } 252 initMaxExpansions()253 private final void initMaxExpansions() { 254 synchronized(tailoring) { 255 if (tailoring.maxExpansions == null) { 256 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data); 257 } 258 } 259 } 260 261 /** 262 * Return a CollationElementIterator for the given String. 263 * 264 * @see CollationElementIterator 265 */ getCollationElementIterator(String source)266 public CollationElementIterator getCollationElementIterator(String source) { 267 initMaxExpansions(); 268 return new CollationElementIterator(source, this); 269 } 270 271 /** 272 * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be 273 * preserved since a new copy will be created for use. 274 * 275 * @see CollationElementIterator 276 */ getCollationElementIterator(CharacterIterator source)277 public CollationElementIterator getCollationElementIterator(CharacterIterator source) { 278 initMaxExpansions(); 279 CharacterIterator newsource = (CharacterIterator) source.clone(); 280 return new CollationElementIterator(newsource, this); 281 } 282 283 /** 284 * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be 285 * preserved since a new copy will be created for use. 286 * 287 * @see CollationElementIterator 288 */ getCollationElementIterator(UCharacterIterator source)289 public CollationElementIterator getCollationElementIterator(UCharacterIterator source) { 290 initMaxExpansions(); 291 return new CollationElementIterator(source, this); 292 } 293 294 // Freezable interface implementation ------------------------------------------------- 295 296 /** 297 * Determines whether the object has been frozen or not. 298 * 299 * <p>An unfrozen Collator is mutable and not thread-safe. 300 * A frozen Collator is immutable and thread-safe. 301 */ 302 @Override isFrozen()303 public boolean isFrozen() { 304 return frozenLock != null; 305 } 306 307 /** 308 * Freezes the collator. 309 * @return the collator itself. 310 */ 311 @Override freeze()312 public Collator freeze() { 313 if (!isFrozen()) { 314 frozenLock = new ReentrantLock(); 315 if (collationBuffer == null) { 316 collationBuffer = new CollationBuffer(data); 317 } 318 } 319 return this; 320 } 321 322 /** 323 * Provides for the clone operation. Any clone is initially unfrozen. 324 */ 325 @Override cloneAsThawed()326 public RuleBasedCollator cloneAsThawed() { 327 try { 328 RuleBasedCollator result = (RuleBasedCollator) super.clone(); 329 // since all collation data in the RuleBasedCollator do not change 330 // we can safely assign the result.fields to this collator 331 // except in cases where we can't 332 result.settings = settings.clone(); 333 result.collationBuffer = null; 334 result.frozenLock = null; 335 return result; 336 } catch (CloneNotSupportedException e) { 337 // Clone is implemented 338 return null; 339 } 340 } 341 342 // public setters -------------------------------------------------------- 343 checkNotFrozen()344 private void checkNotFrozen() { 345 if (isFrozen()) { 346 throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator"); 347 } 348 } 349 getOwnedSettings()350 private final CollationSettings getOwnedSettings() { 351 return settings.copyOnWrite(); 352 } 353 getDefaultSettings()354 private final CollationSettings getDefaultSettings() { 355 return tailoring.settings.readOnly(); 356 } 357 358 /** 359 * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator 360 * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a 361 * correct JIS collation order, distinguishing between Katakana and Hiragana characters. 362 * 363 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 364 * Since ICU 50, this attribute is not settable any more via API functions. 365 * Since CLDR 25/ICU 53, explicit quaternary relations are used 366 * to achieve the same Japanese sort order. 367 * 368 * @param flag 369 * true if Hiragana Quaternary mode is to be on, false otherwise 370 * @see #setHiraganaQuaternaryDefault 371 * @see #isHiraganaQuaternary 372 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 373 * @hide deprecated on icu4j-org 374 */ 375 @Deprecated setHiraganaQuaternary(boolean flag)376 public void setHiraganaQuaternary(boolean flag) { 377 checkNotFrozen(); 378 } 379 380 /** 381 * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See 382 * setHiraganaQuaternary(boolean) for more details. 383 * 384 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 385 * Since ICU 50, this attribute is not settable any more via API functions. 386 * Since CLDR 25/ICU 53, explicit quaternary relations are used 387 * to achieve the same Japanese sort order. 388 * 389 * @see #setHiraganaQuaternary(boolean) 390 * @see #isHiraganaQuaternary 391 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 392 * @hide deprecated on icu4j-org 393 */ 394 @Deprecated setHiraganaQuaternaryDefault()395 public void setHiraganaQuaternaryDefault() { 396 checkNotFrozen(); 397 } 398 399 /** 400 * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The 401 * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case 402 * characters first. 403 * 404 * @param upperfirst 405 * true to sort uppercase characters before lowercase characters, false to sort lowercase characters 406 * before uppercase characters 407 * @see #isLowerCaseFirst 408 * @see #isUpperCaseFirst 409 * @see #setLowerCaseFirst 410 * @see #setCaseFirstDefault 411 */ setUpperCaseFirst(boolean upperfirst)412 public void setUpperCaseFirst(boolean upperfirst) { 413 checkNotFrozen(); 414 if (upperfirst == isUpperCaseFirst()) { return; } 415 CollationSettings ownedSettings = getOwnedSettings(); 416 ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0); 417 setFastLatinOptions(ownedSettings); 418 } 419 420 /** 421 * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The 422 * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper 423 * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences. 424 * 425 * @param lowerfirst 426 * true for sorting lower cased characters before upper cased characters, false to ignore case 427 * preferences. 428 * @see #isLowerCaseFirst 429 * @see #isUpperCaseFirst 430 * @see #setUpperCaseFirst 431 * @see #setCaseFirstDefault 432 */ setLowerCaseFirst(boolean lowerfirst)433 public void setLowerCaseFirst(boolean lowerfirst) { 434 checkNotFrozen(); 435 if (lowerfirst == isLowerCaseFirst()) { return; } 436 CollationSettings ownedSettings = getOwnedSettings(); 437 ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0); 438 setFastLatinOptions(ownedSettings); 439 } 440 441 /** 442 * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See 443 * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details. 444 * 445 * @see #isLowerCaseFirst 446 * @see #isUpperCaseFirst 447 * @see #setLowerCaseFirst(boolean) 448 * @see #setUpperCaseFirst(boolean) 449 */ setCaseFirstDefault()450 public final void setCaseFirstDefault() { 451 checkNotFrozen(); 452 CollationSettings defaultSettings = getDefaultSettings(); 453 if(settings.readOnly() == defaultSettings) { return; } 454 CollationSettings ownedSettings = getOwnedSettings(); 455 ownedSettings.setCaseFirstDefault(defaultSettings.options); 456 setFastLatinOptions(ownedSettings); 457 } 458 459 /** 460 * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See 461 * setAlternateHandling(boolean) for more details. 462 * 463 * @see #setAlternateHandlingShifted(boolean) 464 * @see #isAlternateHandlingShifted() 465 */ setAlternateHandlingDefault()466 public void setAlternateHandlingDefault() { 467 checkNotFrozen(); 468 CollationSettings defaultSettings = getDefaultSettings(); 469 if(settings.readOnly() == defaultSettings) { return; } 470 CollationSettings ownedSettings = getOwnedSettings(); 471 ownedSettings.setAlternateHandlingDefault(defaultSettings.options); 472 setFastLatinOptions(ownedSettings); 473 } 474 475 /** 476 * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See 477 * setCaseLevel(boolean) for more details. 478 * 479 * @see #setCaseLevel(boolean) 480 * @see #isCaseLevel 481 */ setCaseLevelDefault()482 public void setCaseLevelDefault() { 483 checkNotFrozen(); 484 CollationSettings defaultSettings = getDefaultSettings(); 485 if(settings.readOnly() == defaultSettings) { return; } 486 CollationSettings ownedSettings = getOwnedSettings(); 487 ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options); 488 setFastLatinOptions(ownedSettings); 489 } 490 491 /** 492 * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See 493 * setDecomposition(int) for more details. 494 * 495 * @see #getDecomposition 496 * @see #setDecomposition(int) 497 */ setDecompositionDefault()498 public void setDecompositionDefault() { 499 checkNotFrozen(); 500 CollationSettings defaultSettings = getDefaultSettings(); 501 if(settings.readOnly() == defaultSettings) { return; } 502 CollationSettings ownedSettings = getOwnedSettings(); 503 ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options); 504 setFastLatinOptions(ownedSettings); 505 } 506 507 /** 508 * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See 509 * setFrenchCollation(boolean) for more details. 510 * 511 * @see #isFrenchCollation 512 * @see #setFrenchCollation(boolean) 513 */ setFrenchCollationDefault()514 public void setFrenchCollationDefault() { 515 checkNotFrozen(); 516 CollationSettings defaultSettings = getDefaultSettings(); 517 if(settings.readOnly() == defaultSettings) { return; } 518 CollationSettings ownedSettings = getOwnedSettings(); 519 ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options); 520 setFastLatinOptions(ownedSettings); 521 } 522 523 /** 524 * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See 525 * setStrength(int) for more details. 526 * 527 * @see #setStrength(int) 528 * @see #getStrength 529 */ setStrengthDefault()530 public void setStrengthDefault() { 531 checkNotFrozen(); 532 CollationSettings defaultSettings = getDefaultSettings(); 533 if(settings.readOnly() == defaultSettings) { return; } 534 CollationSettings ownedSettings = getOwnedSettings(); 535 ownedSettings.setStrengthDefault(defaultSettings.options); 536 setFastLatinOptions(ownedSettings); 537 } 538 539 /** 540 * Method to set numeric collation to its default value. 541 * 542 * @see #getNumericCollation 543 * @see #setNumericCollation 544 */ setNumericCollationDefault()545 public void setNumericCollationDefault() { 546 checkNotFrozen(); 547 CollationSettings defaultSettings = getDefaultSettings(); 548 if(settings.readOnly() == defaultSettings) { return; } 549 CollationSettings ownedSettings = getOwnedSettings(); 550 ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options); 551 setFastLatinOptions(ownedSettings); 552 } 553 554 /** 555 * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false, 556 * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted 557 * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture"> 558 * French collation</a> for more information. 559 * 560 * @param flag 561 * true to set the French collation on, false to set it off 562 * @see #isFrenchCollation 563 * @see #setFrenchCollationDefault 564 */ setFrenchCollation(boolean flag)565 public void setFrenchCollation(boolean flag) { 566 checkNotFrozen(); 567 if(flag == isFrenchCollation()) { return; } 568 CollationSettings ownedSettings = getOwnedSettings(); 569 ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag); 570 setFastLatinOptions(ownedSettings); 571 } 572 573 /** 574 * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition 575 * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This 576 * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false, 577 * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all 578 * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior 579 * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the 580 * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order. 581 * 582 * @param shifted 583 * true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior. 584 * @see #isAlternateHandlingShifted 585 * @see #setAlternateHandlingDefault 586 */ setAlternateHandlingShifted(boolean shifted)587 public void setAlternateHandlingShifted(boolean shifted) { 588 checkNotFrozen(); 589 if(shifted == isAlternateHandlingShifted()) { return; } 590 CollationSettings ownedSettings = getOwnedSettings(); 591 ownedSettings.setAlternateHandlingShifted(shifted); 592 setFastLatinOptions(ownedSettings); 593 } 594 595 /** 596 * <p> 597 * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known 598 * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level 599 * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value 600 * is false, which means the case level is not generated. The contents of the case level are affected by the case 601 * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable 602 * case level. 603 * <p> 604 * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case 605 * level</a> for more information. 606 * 607 * @param flag 608 * true if case level sorting is required, false otherwise 609 * @see #setCaseLevelDefault 610 * @see #isCaseLevel 611 */ setCaseLevel(boolean flag)612 public void setCaseLevel(boolean flag) { 613 checkNotFrozen(); 614 if(flag == isCaseLevel()) { return; } 615 CollationSettings ownedSettings = getOwnedSettings(); 616 ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag); 617 setFastLatinOptions(ownedSettings); 618 } 619 620 /** 621 * Sets the decomposition mode of this Collator. Setting this 622 * decomposition attribute with CANONICAL_DECOMPOSITION allows the 623 * Collator to handle un-normalized text properly, producing the 624 * same results as if the text were normalized. If 625 * NO_DECOMPOSITION is set, it is the user's responsibility to 626 * insure that all text is already in the appropriate form before 627 * a comparison or before getting a CollationKey. Adjusting 628 * decomposition mode allows the user to select between faster and 629 * more complete collation behavior. 630 * 631 * <p>Since a great many of the world's languages do not require 632 * text normalization, most locales set NO_DECOMPOSITION as the 633 * default decomposition mode. 634 * 635 * The default decompositon mode for the Collator is 636 * NO_DECOMPOSITON, unless specified otherwise by the locale used 637 * to create the Collator. 638 * 639 * <p>See getDecomposition for a description of decomposition 640 * mode. 641 * 642 * @param decomposition the new decomposition mode 643 * @see #getDecomposition 644 * @see #NO_DECOMPOSITION 645 * @see #CANONICAL_DECOMPOSITION 646 * @throws IllegalArgumentException If the given value is not a valid 647 * decomposition mode. 648 */ 649 @Override setDecomposition(int decomposition)650 public void setDecomposition(int decomposition) 651 { 652 checkNotFrozen(); 653 boolean flag; 654 switch(decomposition) { 655 case NO_DECOMPOSITION: 656 flag = false; 657 break; 658 case CANONICAL_DECOMPOSITION: 659 flag = true; 660 break; 661 default: 662 throw new IllegalArgumentException("Wrong decomposition mode."); 663 } 664 if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; } 665 CollationSettings ownedSettings = getOwnedSettings(); 666 ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag); 667 setFastLatinOptions(ownedSettings); 668 } 669 670 /** 671 * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference 672 * considered significant during comparison. 673 * 674 * <p>See the Collator class description for an example of use. 675 * 676 * @param newStrength 677 * the new strength value. 678 * @see #getStrength 679 * @see #setStrengthDefault 680 * @see #PRIMARY 681 * @see #SECONDARY 682 * @see #TERTIARY 683 * @see #QUATERNARY 684 * @see #IDENTICAL 685 * @exception IllegalArgumentException 686 * If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. 687 */ 688 @Override setStrength(int newStrength)689 public void setStrength(int newStrength) { 690 checkNotFrozen(); 691 if(newStrength == getStrength()) { return; } 692 CollationSettings ownedSettings = getOwnedSettings(); 693 ownedSettings.setStrength(newStrength); 694 setFastLatinOptions(ownedSettings); 695 } 696 697 /** 698 * <strong>[icu]</strong> Sets the variable top to the top of the specified reordering group. 699 * The variable top determines the highest-sorting character 700 * which is affected by the alternate handling behavior. 701 * If that attribute is set to NON_IGNORABLE, then the variable top has no effect. 702 * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION, 703 * Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY; 704 * or Collator.ReorderCodes.DEFAULT to restore the default max variable group 705 * @return this 706 * @see #getMaxVariable 707 */ 708 @Override setMaxVariable(int group)709 public RuleBasedCollator setMaxVariable(int group) { 710 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1. 711 int value; 712 if(group == Collator.ReorderCodes.DEFAULT) { 713 value = -1; // UCOL_DEFAULT 714 } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) { 715 value = group - Collator.ReorderCodes.FIRST; 716 } else { 717 throw new IllegalArgumentException("illegal max variable group " + group); 718 } 719 int oldValue = settings.readOnly().getMaxVariable(); 720 if(value == oldValue) { 721 return this; 722 } 723 CollationSettings defaultSettings = getDefaultSettings(); 724 if(settings.readOnly() == defaultSettings) { 725 if(value < 0) { // UCOL_DEFAULT 726 return this; 727 } 728 } 729 CollationSettings ownedSettings = getOwnedSettings(); 730 731 if(group == Collator.ReorderCodes.DEFAULT) { 732 group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable(); 733 } 734 long varTop = data.getLastPrimaryForGroup(group); 735 assert(varTop != 0); 736 ownedSettings.setMaxVariable(value, defaultSettings.options); 737 ownedSettings.variableTop = varTop; 738 setFastLatinOptions(ownedSettings); 739 return this; 740 } 741 742 /** 743 * <strong>[icu]</strong> Returns the maximum reordering group whose characters are affected by 744 * the alternate handling behavior. 745 * @return the maximum variable reordering group. 746 * @see #setMaxVariable 747 */ 748 @Override getMaxVariable()749 public int getMaxVariable() { 750 return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable(); 751 } 752 753 /** 754 * <strong>[icu]</strong> Sets the variable top to the primary weight of the specified string. 755 * 756 * <p>Beginning with ICU 53, the variable top is pinned to 757 * the top of one of the supported reordering groups, 758 * and it must not be beyond the last of those groups. 759 * See {@link #setMaxVariable(int)}. 760 * 761 * @param varTop 762 * one or more (if contraction) characters to which the variable top should be set 763 * @return variable top primary weight 764 * @exception IllegalArgumentException 765 * is thrown if varTop argument is not a valid variable top element. A variable top element is 766 * invalid when 767 * <ul> 768 * <li>it is a contraction that does not exist in the Collation order 769 * <li>the variable top is beyond 770 * the last reordering group supported by setMaxVariable() 771 * <li>when the varTop argument is null or zero in length. 772 * </ul> 773 * @see #getVariableTop 774 * @see RuleBasedCollator#setAlternateHandlingShifted 775 * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead. 776 * @hide deprecated on icu4j-org 777 */ 778 @Override 779 @Deprecated setVariableTop(String varTop)780 public int setVariableTop(String varTop) { 781 checkNotFrozen(); 782 if (varTop == null || varTop.length() == 0) { 783 throw new IllegalArgumentException("Variable top argument string can not be null or zero in length."); 784 } 785 boolean numeric = settings.readOnly().isNumeric(); 786 long ce1, ce2; 787 if(settings.readOnly().dontCheckFCD()) { 788 UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0); 789 ce1 = ci.nextCE(); 790 ce2 = ci.nextCE(); 791 } else { 792 FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0); 793 ce1 = ci.nextCE(); 794 ce2 = ci.nextCE(); 795 } 796 if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) { 797 throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element"); 798 } 799 internalSetVariableTop(ce1 >>> 32); 800 return (int)settings.readOnly().variableTop; 801 } 802 803 /** 804 * <strong>[icu]</strong> Sets the variable top to the specified primary weight. 805 * 806 * <p>Beginning with ICU 53, the variable top is pinned to 807 * the top of one of the supported reordering groups, 808 * and it must not be beyond the last of those groups. 809 * See {@link #setMaxVariable(int)}. 810 * 811 * @param varTop primary weight, as returned by setVariableTop or getVariableTop 812 * @see #getVariableTop 813 * @see #setVariableTop(String) 814 * @deprecated ICU 53 Call setMaxVariable() instead. 815 * @hide deprecated on icu4j-org 816 */ 817 @Override 818 @Deprecated setVariableTop(int varTop)819 public void setVariableTop(int varTop) { 820 checkNotFrozen(); 821 internalSetVariableTop(varTop & 0xffffffffL); 822 } 823 internalSetVariableTop(long varTop)824 private void internalSetVariableTop(long varTop) { 825 if(varTop != settings.readOnly().variableTop) { 826 // Pin the variable top to the end of the reordering group which contains it. 827 // Only a few special groups are supported. 828 int group = data.getGroupForPrimary(varTop); 829 if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) { 830 throw new IllegalArgumentException("The variable top must be a primary weight in " + 831 "the space/punctuation/symbols/currency symbols range"); 832 } 833 long v = data.getLastPrimaryForGroup(group); 834 assert(v != 0 && v >= varTop); 835 varTop = v; 836 if(varTop != settings.readOnly().variableTop) { 837 CollationSettings ownedSettings = getOwnedSettings(); 838 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST, 839 getDefaultSettings().options); 840 ownedSettings.variableTop = varTop; 841 setFastLatinOptions(ownedSettings); 842 } 843 } 844 } 845 846 /** 847 * <strong>[icu]</strong> When numeric collation is turned on, this Collator makes 848 * substrings of digits sort according to their numeric values. 849 * 850 * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest 851 * digit substring that can be treated as a single unit is 852 * 254 digits (not counting leading zeros). If a digit substring is 853 * longer than that, the digits beyond the limit will be treated as a 854 * separate digit substring. 855 * 856 * <p>A "digit" in this sense is a code point with General_Category=Nd, 857 * which does not include circled numbers, roman numerals, etc. 858 * Only a contiguous digit substring is considered, that is, 859 * non-negative integers without separators. 860 * There is no support for plus/minus signs, decimals, exponents, etc. 861 * 862 * @param flag 863 * true to turn numeric collation on and false to turn it off 864 * @see #getNumericCollation 865 * @see #setNumericCollationDefault 866 */ setNumericCollation(boolean flag)867 public void setNumericCollation(boolean flag) { 868 checkNotFrozen(); 869 // sort substrings of digits as numbers 870 if(flag == getNumericCollation()) { return; } 871 CollationSettings ownedSettings = getOwnedSettings(); 872 ownedSettings.setFlag(CollationSettings.NUMERIC, flag); 873 setFastLatinOptions(ownedSettings); 874 } 875 876 /** 877 * {@inheritDoc} 878 * 879 * @param order the reordering codes to apply to this collator; if this is null or an empty array 880 * then this clears any existing reordering 881 * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts) 882 * @see #getReorderCodes 883 * @see Collator#getEquivalentReorderCodes 884 * @see Collator.ReorderCodes 885 * @see UScript 886 */ 887 @Override setReorderCodes(int... order)888 public void setReorderCodes(int... order) { 889 checkNotFrozen(); 890 int length = (order != null) ? order.length : 0; 891 if(length == 1 && order[0] == ReorderCodes.NONE) { 892 length = 0; 893 } 894 if(length == 0 ? 895 settings.readOnly().reorderCodes.length == 0 : 896 Arrays.equals(order, settings.readOnly().reorderCodes)) { 897 return; 898 } 899 CollationSettings defaultSettings = getDefaultSettings(); 900 if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) { 901 if(settings.readOnly() != defaultSettings) { 902 CollationSettings ownedSettings = getOwnedSettings(); 903 ownedSettings.copyReorderingFrom(defaultSettings); 904 setFastLatinOptions(ownedSettings); 905 } 906 return; 907 } 908 CollationSettings ownedSettings = getOwnedSettings(); 909 if(length == 0) { 910 ownedSettings.resetReordering(); 911 } else { 912 ownedSettings.setReordering(data, order.clone()); 913 } 914 setFastLatinOptions(ownedSettings); 915 } 916 setFastLatinOptions(CollationSettings ownedSettings)917 private void setFastLatinOptions(CollationSettings ownedSettings) { 918 ownedSettings.fastLatinOptions = CollationFastLatin.getOptions( 919 data, ownedSettings, ownedSettings.fastLatinPrimaries); 920 } 921 922 // public getters -------------------------------------------------------- 923 924 /** 925 * Gets the collation tailoring rules for this RuleBasedCollator. 926 * Equivalent to String getRules(false). 927 * 928 * @return the collation tailoring rules 929 * @see #getRules(boolean) 930 */ getRules()931 public String getRules() { 932 return tailoring.getRules(); 933 } 934 935 /** 936 * Returns current rules. 937 * The argument defines whether full rules (root collation + tailored) rules are returned 938 * or just the tailoring. 939 * 940 * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order. 941 * They are almost never used or useful at runtime and can be removed from the data. 942 * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide: 943 * Collation Customization, Building on Existing Locales</a> 944 * 945 * <p>{@link #getRules()} should normally be used instead. 946 * @param fullrules 947 * true if the rules that defines the full set of collation order is required, otherwise false for 948 * returning only the tailored rules 949 * @return the current rules that defines this Collator. 950 * @see #getRules() 951 */ getRules(boolean fullrules)952 public String getRules(boolean fullrules) { 953 if (!fullrules) { 954 return tailoring.getRules(); 955 } 956 return CollationLoader.getRootRules() + tailoring.getRules(); 957 } 958 959 /** 960 * Get a UnicodeSet that contains all the characters and sequences tailored in this collator. 961 * 962 * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently 963 * than in the root collator. 964 */ 965 @Override getTailoredSet()966 public UnicodeSet getTailoredSet() { 967 UnicodeSet tailored = new UnicodeSet(); 968 if(data.base != null) { 969 new TailoredSet(tailored).forData(data); 970 } 971 return tailored; 972 } 973 974 /** 975 * Gets unicode sets containing contractions and/or expansions of a collator 976 * 977 * @param contractions 978 * if not null, set to contain contractions 979 * @param expansions 980 * if not null, set to contain expansions 981 * @param addPrefixes 982 * add the prefix contextual elements to contractions 983 * @throws Exception 984 * Throws an exception if any errors occurs. 985 */ getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)986 public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes) 987 throws Exception { 988 if (contractions != null) { 989 contractions.clear(); 990 } 991 if (expansions != null) { 992 expansions.clear(); 993 } 994 new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data); 995 } 996 997 /** 998 * Adds the contractions that start with character c to the set. 999 * Ignores prefixes. Used by AlphabeticIndex. 1000 * @deprecated This API is ICU internal only. 1001 * @hide draft / provisional / internal are hidden on OHOS 1002 */ 1003 @Deprecated internalAddContractions(int c, UnicodeSet set)1004 void internalAddContractions(int c, UnicodeSet set) { 1005 new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c); 1006 } 1007 1008 /** 1009 * <p> 1010 * Get a Collation key for the argument String source from this RuleBasedCollator. 1011 * <p> 1012 * General recommendation: <br> 1013 * If comparison are to be done to the same String multiple times, it would be more efficient to generate 1014 * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each 1015 * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better 1016 * performance. 1017 * <p> 1018 * See the class documentation for an explanation about CollationKeys. 1019 * 1020 * @param source 1021 * the text String to be transformed into a collation key. 1022 * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source 1023 * String is null, a null CollationKey is returned. 1024 * @see CollationKey 1025 * @see #compare(String, String) 1026 * @see #getRawCollationKey 1027 */ 1028 @Override getCollationKey(String source)1029 public CollationKey getCollationKey(String source) { 1030 if (source == null) { 1031 return null; 1032 } 1033 CollationBuffer buffer = null; 1034 try { 1035 buffer = getCollationBuffer(); 1036 return getCollationKey(source, buffer); 1037 } finally { 1038 releaseCollationBuffer(buffer); 1039 } 1040 } 1041 getCollationKey(String source, CollationBuffer buffer)1042 private CollationKey getCollationKey(String source, CollationBuffer buffer) { 1043 buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer); 1044 return new CollationKey(source, buffer.rawCollationKey); 1045 } 1046 1047 /** 1048 * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the 1049 * result into the user provided argument key. If key has a internal byte array of length that's too small for the 1050 * result, the internal byte array will be grown to the exact required size. 1051 * 1052 * @param source the text String to be transformed into a RawCollationKey 1053 * @param key output RawCollationKey to store results 1054 * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user 1055 * provided key will be returned. 1056 * @see #getCollationKey 1057 * @see #compare(String, String) 1058 * @see RawCollationKey 1059 * @hide unsupported on OHOS 1060 */ 1061 @Override getRawCollationKey(String source, RawCollationKey key)1062 public RawCollationKey getRawCollationKey(String source, RawCollationKey key) { 1063 if (source == null) { 1064 return null; 1065 } 1066 CollationBuffer buffer = null; 1067 try { 1068 buffer = getCollationBuffer(); 1069 return getRawCollationKey(source, key, buffer); 1070 } finally { 1071 releaseCollationBuffer(buffer); 1072 } 1073 } 1074 1075 private static final class CollationKeyByteSink extends SortKeyByteSink { CollationKeyByteSink(RawCollationKey key)1076 CollationKeyByteSink(RawCollationKey key) { 1077 super(key.bytes); 1078 key_ = key; 1079 } 1080 1081 @Override AppendBeyondCapacity(byte[] bytes, int start, int n, int length)1082 protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) { 1083 // n > 0 && appended_ > capacity_ 1084 if (Resize(n, length)) { 1085 System.arraycopy(bytes, start, buffer_, length, n); 1086 } 1087 } 1088 1089 @Override Resize(int appendCapacity, int length)1090 protected boolean Resize(int appendCapacity, int length) { 1091 int newCapacity = 2 * buffer_.length; 1092 int altCapacity = length + 2 * appendCapacity; 1093 if (newCapacity < altCapacity) { 1094 newCapacity = altCapacity; 1095 } 1096 if (newCapacity < 200) { 1097 newCapacity = 200; 1098 } 1099 // Do not call key_.ensureCapacity(newCapacity) because we do not 1100 // keep key_.size in sync with appended_. 1101 // We only set it when we are done. 1102 byte[] newBytes = new byte[newCapacity]; 1103 System.arraycopy(buffer_, 0, newBytes, 0, length); 1104 buffer_ = key_.bytes = newBytes; 1105 return true; 1106 } 1107 1108 private RawCollationKey key_; 1109 } 1110 getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer)1111 private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) { 1112 if (key == null) { 1113 key = new RawCollationKey(simpleKeyLengthEstimate(source)); 1114 } else if (key.bytes == null) { 1115 key.bytes = new byte[simpleKeyLengthEstimate(source)]; 1116 } 1117 CollationKeyByteSink sink = new CollationKeyByteSink(key); 1118 writeSortKey(source, sink, buffer); 1119 key.size = sink.NumberOfBytesAppended(); 1120 return key; 1121 } 1122 simpleKeyLengthEstimate(CharSequence source)1123 private int simpleKeyLengthEstimate(CharSequence source) { 1124 return 2 * source.length() + 10; 1125 } 1126 writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer)1127 private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) { 1128 boolean numeric = settings.readOnly().isNumeric(); 1129 if(settings.readOnly().dontCheckFCD()) { 1130 buffer.leftUTF16CollIter.setText(numeric, s, 0); 1131 CollationKeys.writeSortKeyUpToQuaternary( 1132 buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(), 1133 sink, Collation.PRIMARY_LEVEL, 1134 CollationKeys.SIMPLE_LEVEL_FALLBACK, true); 1135 } else { 1136 buffer.leftFCDUTF16Iter.setText(numeric, s, 0); 1137 CollationKeys.writeSortKeyUpToQuaternary( 1138 buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(), 1139 sink, Collation.PRIMARY_LEVEL, 1140 CollationKeys.SIMPLE_LEVEL_FALLBACK, true); 1141 } 1142 if(settings.readOnly().getStrength() == IDENTICAL) { 1143 writeIdenticalLevel(s, sink); 1144 } 1145 sink.Append(Collation.TERMINATOR_BYTE); 1146 } 1147 writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink)1148 private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) { 1149 // NFD quick check 1150 int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null); 1151 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 1152 // Sync the ByteArrayWrapper size with the key length. 1153 sink.key_.size = sink.NumberOfBytesAppended(); 1154 int prev = 0; 1155 if(nfdQCYesLimit != 0) { 1156 prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_); 1157 } 1158 // Is there non-NFD text? 1159 if(nfdQCYesLimit < s.length()) { 1160 int destLengthEstimate = s.length() - nfdQCYesLimit; 1161 StringBuilder nfd = new StringBuilder(); 1162 data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate); 1163 BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_); 1164 } 1165 // Sync the key with the buffer again which got bytes appended and may have been reallocated. 1166 sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size); 1167 } 1168 1169 /** 1170 * Returns the CEs for the string. 1171 * @param str the string 1172 * @deprecated This API is ICU internal only. 1173 * @hide deprecated on icu4j-org 1174 * @hide draft / provisional / internal are hidden on OHOS 1175 */ 1176 @Deprecated internalGetCEs(CharSequence str)1177 public long[] internalGetCEs(CharSequence str) { 1178 CollationBuffer buffer = null; 1179 try { 1180 buffer = getCollationBuffer(); 1181 boolean numeric = settings.readOnly().isNumeric(); 1182 CollationIterator iter; 1183 if(settings.readOnly().dontCheckFCD()) { 1184 buffer.leftUTF16CollIter.setText(numeric, str, 0); 1185 iter = buffer.leftUTF16CollIter; 1186 } else { 1187 buffer.leftFCDUTF16Iter.setText(numeric, str, 0); 1188 iter = buffer.leftFCDUTF16Iter; 1189 } 1190 int length = iter.fetchCEs() - 1; 1191 assert length >= 0 && iter.getCE(length) == Collation.NO_CE; 1192 long[] ces = new long[length]; 1193 System.arraycopy(iter.getCEs(), 0, ces, 0, length); 1194 return ces; 1195 } finally { 1196 releaseCollationBuffer(buffer); 1197 } 1198 } 1199 1200 /** 1201 * Returns this Collator's strength attribute. The strength attribute 1202 * determines the minimum level of difference considered significant. 1203 * 1204 * <p><strong>[icu] Note:</strong> This can return QUATERNARY strength, which is not supported by the 1205 * JDK version. 1206 * 1207 * <p>See the Collator class description for more details. 1208 * 1209 * @return this Collator's current strength attribute. 1210 * @see #setStrength 1211 * @see #PRIMARY 1212 * @see #SECONDARY 1213 * @see #TERTIARY 1214 * @see #QUATERNARY 1215 * @see #IDENTICAL 1216 */ 1217 @Override getStrength()1218 public int getStrength() { 1219 return settings.readOnly().getStrength(); 1220 } 1221 1222 /** 1223 * Returns the decomposition mode of this Collator. The decomposition mode 1224 * determines how Unicode composed characters are handled. 1225 * 1226 * <p>See the Collator class description for more details. 1227 * 1228 * @return the decomposition mode 1229 * @see #setDecomposition 1230 * @see #NO_DECOMPOSITION 1231 * @see #CANONICAL_DECOMPOSITION 1232 */ 1233 @Override getDecomposition()1234 public int getDecomposition() { 1235 return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ? 1236 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION; 1237 } 1238 1239 /** 1240 * Return true if an uppercase character is sorted before the corresponding lowercase character. See 1241 * setCaseFirst(boolean) for details. 1242 * 1243 * @see #setUpperCaseFirst 1244 * @see #setLowerCaseFirst 1245 * @see #isLowerCaseFirst 1246 * @see #setCaseFirstDefault 1247 * @return true if upper cased characters are sorted before lower cased characters, false otherwise 1248 */ isUpperCaseFirst()1249 public boolean isUpperCaseFirst() { 1250 return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK); 1251 } 1252 1253 /** 1254 * Return true if a lowercase character is sorted before the corresponding uppercase character. See 1255 * setCaseFirst(boolean) for details. 1256 * 1257 * @see #setUpperCaseFirst 1258 * @see #setLowerCaseFirst 1259 * @see #isUpperCaseFirst 1260 * @see #setCaseFirstDefault 1261 * @return true lower cased characters are sorted before upper cased characters, false otherwise 1262 */ isLowerCaseFirst()1263 public boolean isLowerCaseFirst() { 1264 return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST); 1265 } 1266 1267 /** 1268 * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true, 1269 * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the 1270 * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more 1271 * details. 1272 * 1273 * @return true or false 1274 * @see #setAlternateHandlingShifted(boolean) 1275 * @see #setAlternateHandlingDefault 1276 */ isAlternateHandlingShifted()1277 public boolean isAlternateHandlingShifted() { 1278 return settings.readOnly().getAlternateHandling(); 1279 } 1280 1281 /** 1282 * Checks if case level is set to true. See setCaseLevel(boolean) for details. 1283 * 1284 * @return the case level mode 1285 * @see #setCaseLevelDefault 1286 * @see #isCaseLevel 1287 * @see #setCaseLevel(boolean) 1288 */ isCaseLevel()1289 public boolean isCaseLevel() { 1290 return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0; 1291 } 1292 1293 /** 1294 * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details. 1295 * 1296 * @return true if French Collation is set to true, false otherwise 1297 * @see #setFrenchCollation(boolean) 1298 * @see #setFrenchCollationDefault 1299 */ isFrenchCollation()1300 public boolean isFrenchCollation() { 1301 return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0; 1302 } 1303 1304 /** 1305 * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details. 1306 * 1307 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 1308 * Since ICU 50, this attribute is not settable any more via API functions. 1309 * Since CLDR 25/ICU 53, explicit quaternary relations are used 1310 * to achieve the same Japanese sort order. 1311 * 1312 * @return false 1313 * @see #setHiraganaQuaternaryDefault 1314 * @see #setHiraganaQuaternary(boolean) 1315 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 1316 * @hide deprecated on icu4j-org 1317 */ 1318 @Deprecated isHiraganaQuaternary()1319 public boolean isHiraganaQuaternary() { 1320 return false; 1321 } 1322 1323 /** 1324 * <strong>[icu]</strong> Gets the variable top value of a Collator. 1325 * 1326 * @return the variable top primary weight 1327 * @see #getMaxVariable 1328 */ 1329 @Override getVariableTop()1330 public int getVariableTop() { 1331 return (int)settings.readOnly().variableTop; 1332 } 1333 1334 /** 1335 * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a 1336 * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2' 1337 * 1338 * @see #setNumericCollation 1339 * @see #setNumericCollationDefault 1340 * @return true if numeric collation is turned on, false otherwise 1341 */ getNumericCollation()1342 public boolean getNumericCollation() { 1343 return (settings.readOnly().options & CollationSettings.NUMERIC) != 0; 1344 } 1345 1346 /** 1347 * Retrieves the reordering codes for this collator. 1348 * These reordering codes are a combination of UScript codes and ReorderCodes. 1349 * @return a copy of the reordering codes for this collator; 1350 * if none are set then returns an empty array 1351 * @see #setReorderCodes 1352 * @see Collator#getEquivalentReorderCodes 1353 */ 1354 @Override getReorderCodes()1355 public int[] getReorderCodes() { 1356 return settings.readOnly().reorderCodes.clone(); 1357 } 1358 1359 // public other methods ------------------------------------------------- 1360 1361 /** 1362 * {@inheritDoc} 1363 */ 1364 @Override equals(Object obj)1365 public boolean equals(Object obj) { 1366 if (this == obj) { 1367 return true; 1368 } 1369 if (!super.equals(obj)) { 1370 return false; 1371 } 1372 RuleBasedCollator o = (RuleBasedCollator) obj; 1373 if(!settings.readOnly().equals(o.settings.readOnly())) { return false; } 1374 if(data == o.data) { return true; } 1375 boolean thisIsRoot = data.base == null; 1376 boolean otherIsRoot = o.data.base == null; 1377 assert(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be == 1378 if(thisIsRoot != otherIsRoot) { return false; } 1379 String theseRules = tailoring.getRules(); 1380 String otherRules = o.tailoring.getRules(); 1381 if((thisIsRoot || theseRules.length() != 0) && 1382 (otherIsRoot || otherRules.length() != 0)) { 1383 // Shortcut: If both collators have valid rule strings, then compare those. 1384 if(theseRules.equals(otherRules)) { return true; } 1385 } 1386 // Different rule strings can result in the same or equivalent tailoring. 1387 // The rule strings are optional in ICU resource bundles, although included by default. 1388 // cloneBinary() drops the rule string. 1389 UnicodeSet thisTailored = getTailoredSet(); 1390 UnicodeSet otherTailored = o.getTailoredSet(); 1391 if(!thisTailored.equals(otherTailored)) { return false; } 1392 // For completeness, we should compare all of the mappings; 1393 // or we should create a list of strings, sort it with one collator, 1394 // and check if both collators compare adjacent strings the same 1395 // (order & strength, down to quaternary); or similar. 1396 // Testing equality of collators seems unusual. 1397 return true; 1398 } 1399 1400 /** 1401 * Generates a unique hash code for this RuleBasedCollator. 1402 * 1403 * @return the unique hash code for this Collator 1404 */ 1405 @Override hashCode()1406 public int hashCode() { 1407 int h = settings.readOnly().hashCode(); 1408 if(data.base == null) { return h; } // root collator 1409 // Do not rely on the rule string, see comments in operator==(). 1410 UnicodeSet set = getTailoredSet(); 1411 UnicodeSetIterator iter = new UnicodeSetIterator(set); 1412 while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) { 1413 h ^= data.getCE32(iter.codepoint); 1414 } 1415 return h; 1416 } 1417 1418 /** 1419 * Compares the source text String to the target text String according to the collation rules, strength and 1420 * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero 1421 * depending on whether the source String is less than, equal to or greater than the target String. See the Collator 1422 * class description for an example of use. 1423 * <p> 1424 * General recommendation: <br> 1425 * If comparison are to be done to the same String multiple times, it would be more efficient to generate 1426 * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed 1427 * performance is critical and object instantiation is to be reduced, further optimization may be achieved by 1428 * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method 1429 * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey 1430 * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key 1431 * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String, 1432 * String) will have a better performance. 1433 * 1434 * @param source 1435 * the source text String. 1436 * @param target 1437 * the target text String. 1438 * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source 1439 * and target are equal, value is greater than zero if source is greater than target. 1440 * @see CollationKey 1441 * @see #getCollationKey 1442 */ 1443 @Override compare(String source, String target)1444 public int compare(String source, String target) { 1445 return doCompare(source, target); 1446 } 1447 1448 /** 1449 * Abstract iterator for identical-level string comparisons. 1450 * Returns FCD code points and handles temporary switching to NFD. 1451 * 1452 * <p>As with CollationIterator, 1453 * Java NFDIterator instances are partially constructed and cached, 1454 * and completed when reset for use. 1455 * C++ NFDIterator instances are stack-allocated. 1456 */ 1457 private static abstract class NFDIterator { 1458 /** 1459 * Partial constructor, must call reset(). 1460 */ NFDIterator()1461 NFDIterator() {} reset()1462 final void reset() { 1463 index = -1; 1464 } 1465 1466 /** 1467 * Returns the next code point from the internal normalization buffer, 1468 * or else the next text code point. 1469 * Returns -1 at the end of the text. 1470 */ nextCodePoint()1471 final int nextCodePoint() { 1472 if(index >= 0) { 1473 if(index == decomp.length()) { 1474 index = -1; 1475 } else { 1476 int c = Character.codePointAt(decomp, index); 1477 index += Character.charCount(c); 1478 return c; 1479 } 1480 } 1481 return nextRawCodePoint(); 1482 } 1483 /** 1484 * @param nfcImpl 1485 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint() 1486 * @return the first code point in c's decomposition, 1487 * or c itself if it was decomposed already or if it does not decompose 1488 */ nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c)1489 final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) { 1490 if(index >= 0) { return c; } 1491 decomp = nfcImpl.getDecomposition(c); 1492 if(decomp == null) { return c; } 1493 c = Character.codePointAt(decomp, 0); 1494 index = Character.charCount(c); 1495 return c; 1496 } 1497 1498 /** 1499 * Returns the next text code point in FCD order. 1500 * Returns -1 at the end of the text. 1501 */ nextRawCodePoint()1502 protected abstract int nextRawCodePoint(); 1503 1504 private String decomp; 1505 private int index; 1506 } 1507 1508 private static class UTF16NFDIterator extends NFDIterator { UTF16NFDIterator()1509 UTF16NFDIterator() {} setText(CharSequence seq, int start)1510 void setText(CharSequence seq, int start) { 1511 reset(); 1512 s = seq; 1513 pos = start; 1514 } 1515 1516 @Override nextRawCodePoint()1517 protected int nextRawCodePoint() { 1518 if(pos == s.length()) { return Collation.SENTINEL_CP; } 1519 int c = Character.codePointAt(s, pos); 1520 pos += Character.charCount(c); 1521 return c; 1522 } 1523 1524 protected CharSequence s; 1525 protected int pos; 1526 } 1527 1528 private static final class FCDUTF16NFDIterator extends UTF16NFDIterator { FCDUTF16NFDIterator()1529 FCDUTF16NFDIterator() {} setText(Normalizer2Impl nfcImpl, CharSequence seq, int start)1530 void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) { 1531 reset(); 1532 int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null); 1533 if(spanLimit == seq.length()) { 1534 s = seq; 1535 pos = start; 1536 } else { 1537 if(str == null) { 1538 str = new StringBuilder(); 1539 } else { 1540 str.setLength(0); 1541 } 1542 str.append(seq, start, spanLimit); 1543 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start); 1544 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer); 1545 s = str; 1546 pos = 0; 1547 } 1548 } 1549 1550 private StringBuilder str; 1551 } 1552 compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right)1553 private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) { 1554 for(;;) { 1555 // Fetch the next FCD code point from each string. 1556 int leftCp = left.nextCodePoint(); 1557 int rightCp = right.nextCodePoint(); 1558 if(leftCp == rightCp) { 1559 if(leftCp < 0) { break; } 1560 continue; 1561 } 1562 // If they are different, then decompose each and compare again. 1563 if(leftCp < 0) { 1564 leftCp = -2; // end of string 1565 } else if(leftCp == 0xfffe) { 1566 leftCp = -1; // U+FFFE: merge separator 1567 } else { 1568 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp); 1569 } 1570 if(rightCp < 0) { 1571 rightCp = -2; // end of string 1572 } else if(rightCp == 0xfffe) { 1573 rightCp = -1; // U+FFFE: merge separator 1574 } else { 1575 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp); 1576 } 1577 if(leftCp < rightCp) { return Collation.LESS; } 1578 if(leftCp > rightCp) { return Collation.GREATER; } 1579 } 1580 return Collation.EQUAL; 1581 } 1582 1583 /** 1584 * Compares two CharSequences. 1585 * @deprecated This API is ICU internal only. 1586 * @hide deprecated on icu4j-org 1587 * @hide draft / provisional / internal are hidden on OHOS 1588 */ 1589 @Override 1590 @Deprecated doCompare(CharSequence left, CharSequence right)1591 protected int doCompare(CharSequence left, CharSequence right) { 1592 if(left == right) { 1593 return Collation.EQUAL; 1594 } 1595 1596 // Identical-prefix test. 1597 int equalPrefixLength = 0; 1598 for(;;) { 1599 if(equalPrefixLength == left.length()) { 1600 if(equalPrefixLength == right.length()) { return Collation.EQUAL; } 1601 break; 1602 } else if(equalPrefixLength == right.length() || 1603 left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) { 1604 break; 1605 } 1606 ++equalPrefixLength; 1607 } 1608 1609 CollationSettings roSettings = settings.readOnly(); 1610 boolean numeric = roSettings.isNumeric(); 1611 if(equalPrefixLength > 0) { 1612 if((equalPrefixLength != left.length() && 1613 data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) || 1614 (equalPrefixLength != right.length() && 1615 data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) { 1616 // Identical prefix: Back up to the start of a contraction or reordering sequence. 1617 while(--equalPrefixLength > 0 && 1618 data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {} 1619 } 1620 // Notes: 1621 // - A longer string can compare equal to a prefix of it if only ignorables follow. 1622 // - With a backward level, a longer string can compare less-than a prefix of it. 1623 1624 // Pass the actual start of each string into the CollationIterators, 1625 // plus the equalPrefixLength position, 1626 // so that prefix matches back into the equal prefix work. 1627 } 1628 1629 int result; 1630 int fastLatinOptions = roSettings.fastLatinOptions; 1631 if(fastLatinOptions >= 0 && 1632 (equalPrefixLength == left.length() || 1633 left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) && 1634 (equalPrefixLength == right.length() || 1635 right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) { 1636 result = CollationFastLatin.compareUTF16(data.fastLatinTable, 1637 roSettings.fastLatinPrimaries, 1638 fastLatinOptions, 1639 left, right, equalPrefixLength); 1640 } else { 1641 result = CollationFastLatin.BAIL_OUT_RESULT; 1642 } 1643 1644 if(result == CollationFastLatin.BAIL_OUT_RESULT) { 1645 CollationBuffer buffer = null; 1646 try { 1647 buffer = getCollationBuffer(); 1648 if(roSettings.dontCheckFCD()) { 1649 buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength); 1650 buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength); 1651 result = CollationCompare.compareUpToQuaternary( 1652 buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings); 1653 } else { 1654 buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength); 1655 buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength); 1656 result = CollationCompare.compareUpToQuaternary( 1657 buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings); 1658 } 1659 } finally { 1660 releaseCollationBuffer(buffer); 1661 } 1662 } 1663 if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) { 1664 return result; 1665 } 1666 1667 CollationBuffer buffer = null; 1668 try { 1669 buffer = getCollationBuffer(); 1670 // Compare identical level. 1671 Normalizer2Impl nfcImpl = data.nfcImpl; 1672 if(roSettings.dontCheckFCD()) { 1673 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength); 1674 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength); 1675 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter); 1676 } else { 1677 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength); 1678 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength); 1679 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter); 1680 } 1681 } finally { 1682 releaseCollationBuffer(buffer); 1683 } 1684 } 1685 1686 // package private constructors ------------------------------------------ 1687 RuleBasedCollator(CollationTailoring t, ULocale vl)1688 RuleBasedCollator(CollationTailoring t, ULocale vl) { 1689 data = t.data; 1690 settings = t.settings.clone(); 1691 tailoring = t; 1692 validLocale = vl; 1693 actualLocaleIsSameAsValid = false; 1694 } 1695 adoptTailoring(CollationTailoring t)1696 private void adoptTailoring(CollationTailoring t) { 1697 assert(settings == null && data == null && tailoring == null); 1698 data = t.data; 1699 settings = t.settings.clone(); 1700 tailoring = t; 1701 validLocale = t.actualLocale; 1702 actualLocaleIsSameAsValid = false; 1703 } 1704 1705 // package private methods ----------------------------------------------- 1706 1707 /** 1708 * Tests whether a character is "unsafe" for use as a collation starting point. 1709 * 1710 * @param c code point or code unit 1711 * @return true if c is unsafe 1712 * @see CollationElementIterator#setOffset(int) 1713 */ isUnsafe(int c)1714 final boolean isUnsafe(int c) { 1715 return data.isUnsafeBackward(c, settings.readOnly().isNumeric()); 1716 } 1717 1718 /** 1719 * Frozen state of the collator. 1720 */ 1721 private Lock frozenLock; 1722 1723 private static final class CollationBuffer { CollationBuffer(CollationData data)1724 private CollationBuffer(CollationData data) { 1725 leftUTF16CollIter = new UTF16CollationIterator(data); 1726 rightUTF16CollIter = new UTF16CollationIterator(data); 1727 leftFCDUTF16Iter = new FCDUTF16CollationIterator(data); 1728 rightFCDUTF16Iter = new FCDUTF16CollationIterator(data); 1729 leftUTF16NFDIter = new UTF16NFDIterator(); 1730 rightUTF16NFDIter = new UTF16NFDIterator(); 1731 leftFCDUTF16NFDIter = new FCDUTF16NFDIterator(); 1732 rightFCDUTF16NFDIter = new FCDUTF16NFDIterator(); 1733 } 1734 1735 UTF16CollationIterator leftUTF16CollIter; 1736 UTF16CollationIterator rightUTF16CollIter; 1737 FCDUTF16CollationIterator leftFCDUTF16Iter; 1738 FCDUTF16CollationIterator rightFCDUTF16Iter; 1739 1740 UTF16NFDIterator leftUTF16NFDIter; 1741 UTF16NFDIterator rightUTF16NFDIter; 1742 FCDUTF16NFDIterator leftFCDUTF16NFDIter; 1743 FCDUTF16NFDIterator rightFCDUTF16NFDIter; 1744 1745 RawCollationKey rawCollationKey; 1746 } 1747 1748 /** 1749 * Get the version of this collator object. 1750 * 1751 * @return the version object associated with this collator 1752 */ 1753 @Override getVersion()1754 public VersionInfo getVersion() { 1755 int version = tailoring.version; 1756 int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); 1757 return VersionInfo.getInstance( 1758 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4), 1759 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff)); 1760 } 1761 1762 /** 1763 * Get the UCA version of this collator object. 1764 * 1765 * @return the version object associated with this collator 1766 */ 1767 @Override getUCAVersion()1768 public VersionInfo getUCAVersion() { 1769 VersionInfo v = getVersion(); 1770 // Note: This is tied to how the current implementation encodes the UCA version 1771 // in the overall getVersion(). 1772 // Alternatively, we could load the root collator and get at lower-level data from there. 1773 // Either way, it will reflect the input collator's UCA version only 1774 // if it is a known implementation. 1775 // (C++ comment) It would be cleaner to make this a virtual Collator method. 1776 // (In Java, it is virtual.) 1777 return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0); 1778 } 1779 1780 private CollationBuffer collationBuffer; 1781 getCollationBuffer()1782 private final CollationBuffer getCollationBuffer() { 1783 if (isFrozen()) { 1784 frozenLock.lock(); 1785 } else if (collationBuffer == null) { 1786 collationBuffer = new CollationBuffer(data); 1787 } 1788 return collationBuffer; 1789 } 1790 releaseCollationBuffer(CollationBuffer buffer)1791 private final void releaseCollationBuffer(CollationBuffer buffer) { 1792 if (isFrozen()) { 1793 frozenLock.unlock(); 1794 } 1795 } 1796 1797 /** 1798 * {@inheritDoc} 1799 * @hide draft / provisional / internal are hidden on OHOS 1800 */ 1801 @Override getLocale(ULocale.Type type)1802 public ULocale getLocale(ULocale.Type type) { 1803 if (type == ULocale.ACTUAL_LOCALE) { 1804 return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale; 1805 } else if(type == ULocale.VALID_LOCALE) { 1806 return validLocale; 1807 } else { 1808 throw new IllegalArgumentException("unknown ULocale.Type " + type); 1809 } 1810 } 1811 1812 /** 1813 * {@inheritDoc} 1814 */ 1815 @Override setLocale(ULocale valid, ULocale actual)1816 void setLocale(ULocale valid, ULocale actual) { 1817 // This method is called 1818 // by other protected functions that checks and makes sure that 1819 // valid and actual are not null before passing 1820 assert (valid == null) == (actual == null); 1821 // Another check we could do is that the actual locale is at 1822 // the same level or less specific than the valid locale. 1823 if(Objects.equals(actual, tailoring.actualLocale)) { 1824 actualLocaleIsSameAsValid = false; 1825 } else { 1826 assert(Objects.equals(actual, valid)); 1827 actualLocaleIsSameAsValid = true; 1828 } 1829 // Do not modify tailoring.actualLocale: 1830 // We cannot be sure that that would be thread-safe. 1831 validLocale = valid; 1832 } 1833 1834 CollationData data; 1835 SharedObject.Reference<CollationSettings> settings; // reference-counted 1836 CollationTailoring tailoring; // C++: reference-counted 1837 private ULocale validLocale; 1838 // Note: No need in Java to track which attributes have been set explicitly. 1839 // int or EnumSet explicitlySetAttributes; 1840 1841 private boolean actualLocaleIsSameAsValid; 1842 } 1843