1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 11 import java.util.Comparator; 12 import java.util.LinkedList; 13 import java.util.Locale; 14 import java.util.MissingResourceException; 15 import java.util.Set; 16 17 import com.ibm.icu.impl.ICUData; 18 import com.ibm.icu.impl.ICUDebug; 19 import com.ibm.icu.impl.ICUResourceBundle; 20 import com.ibm.icu.impl.UResource; 21 import com.ibm.icu.impl.coll.CollationData; 22 import com.ibm.icu.impl.coll.CollationRoot; 23 import com.ibm.icu.lang.UCharacter; 24 import com.ibm.icu.lang.UProperty; 25 import com.ibm.icu.lang.UScript; 26 import com.ibm.icu.util.Freezable; 27 import com.ibm.icu.util.ICUException; 28 import com.ibm.icu.util.ULocale; 29 import com.ibm.icu.util.ULocale.Category; 30 import com.ibm.icu.util.UResourceBundle; 31 import com.ibm.icu.util.VersionInfo; 32 33 /** 34 * {@icuenhanced java.text.Collator}.{@icu _usage_} 35 * 36 * <p>Collator performs locale-sensitive string comparison. A concrete 37 * subclass, RuleBasedCollator, allows customization of the collation 38 * ordering by the use of rule sets. 39 * 40 * <p>A Collator is thread-safe only when frozen. See {@link #isFrozen()} and {@link Freezable}. 41 * 42 * <p>Following the <a href=http://www.unicode.org>Unicode 43 * Consortium</a>'s specifications for the 44 * <a href="https://www.unicode.org/reports/tr10/">Unicode Collation 45 * Algorithm (UCA)</a>, there are 5 different levels of strength used 46 * in comparisons: 47 * 48 * <ul> 49 * <li>PRIMARY strength: Typically, this is used to denote differences between 50 * base characters (for example, "a" < "b"). 51 * It is the strongest difference. For example, dictionaries are divided 52 * into different sections by base character. 53 * <li>SECONDARY strength: Accents in the characters are considered secondary 54 * differences (for example, "as" < "às" < "at"). Other 55 * differences 56 * between letters can also be considered secondary differences, depending 57 * on the language. A secondary difference is ignored when there is a 58 * primary difference anywhere in the strings. 59 * <li>TERTIARY strength: Upper and lower case differences in characters are 60 * distinguished at tertiary strength (for example, "ao" < "Ao" < 61 * "aò"). In addition, a variant of a letter differs from the base 62 * form on the tertiary strength (such as "A" and "Ⓐ"). Another 63 * example is the 64 * difference between large and small Kana. A tertiary difference is ignored 65 * when there is a primary or secondary difference anywhere in the strings. 66 * <li>QUATERNARY strength: When punctuation is ignored 67 * (see <a href="https://unicode-org.github.io/icu/userguide/collation/concepts#ignoring-punctuation"> 68 * Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY 69 * strength, an additional strength level can 70 * be used to distinguish words with and without punctuation (for example, 71 * "ab" < "a-b" < "aB"). 72 * This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY 73 * difference. The QUATERNARY strength should only be used if ignoring 74 * punctuation is required. 75 * <li>IDENTICAL strength: 76 * When all other strengths are equal, the IDENTICAL strength is used as a 77 * tiebreaker. The Unicode code point values of the NFD form of each string 78 * are compared, just in case there is no difference. 79 * For example, Hebrew cantillation marks are only distinguished at this 80 * strength. This strength should be used sparingly, as only code point 81 * value differences between two strings is an extremely rare occurrence. 82 * Using this strength substantially decreases the performance for both 83 * comparison and collation key generation APIs. This strength also 84 * increases the size of the collation key. 85 * </ul> 86 * 87 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes, 88 * the canonical decomposition mode and one that does not use any decomposition. 89 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION 90 * is not supported here. If the canonical 91 * decomposition mode is set, the Collator handles un-normalized text properly, 92 * producing the same results as if the text were normalized in NFD. If 93 * canonical decomposition is turned off, it is the user's responsibility to 94 * ensure that all text is already in the appropriate form before performing 95 * a comparison or before getting a CollationKey. 96 * 97 * <p>For more information about the collation service see the 98 * <a href="https://unicode-org.github.io/icu/userguide/collation">User Guide</a>. 99 * 100 * <p>Examples of use 101 * <pre> 102 * // Get the Collator for US English and set its strength to PRIMARY 103 * Collator usCollator = Collator.getInstance(Locale.US); 104 * usCollator.setStrength(Collator.PRIMARY); 105 * if (usCollator.compare("abc", "ABC") == 0) { 106 * System.out.println("Strings are equivalent"); 107 * } 108 * 109 * The following example shows how to compare two strings using the 110 * Collator for the default locale. 111 * 112 * // Compare two strings in the default locale 113 * Collator myCollator = Collator.getInstance(); 114 * myCollator.setDecomposition(NO_DECOMPOSITION); 115 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) { 116 * System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition"); 117 * myCollator.setDecomposition(CANONICAL_DECOMPOSITION); 118 * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) { 119 * System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition"); 120 * } 121 * else { 122 * System.out.println("à\u0325 is equals to a\u0325̀ with decomposition"); 123 * } 124 * } 125 * else { 126 * System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition"); 127 * } 128 * </pre> 129 * 130 * @see RuleBasedCollator 131 * @see CollationKey 132 * @author Syn Wee Quek 133 * @stable ICU 2.8 134 */ 135 public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable 136 { 137 // public data members --------------------------------------------------- 138 139 /** 140 * Strongest collator strength value. Typically used to denote differences 141 * between base characters. See class documentation for more explanation. 142 * @see #setStrength 143 * @see #getStrength 144 * @stable ICU 2.8 145 */ 146 public final static int PRIMARY = 0; 147 148 /** 149 * Second level collator strength value. 150 * Accents in the characters are considered secondary differences. 151 * Other differences between letters can also be considered secondary 152 * differences, depending on the language. 153 * See class documentation for more explanation. 154 * @see #setStrength 155 * @see #getStrength 156 * @stable ICU 2.8 157 */ 158 public final static int SECONDARY = 1; 159 160 /** 161 * Third level collator strength value. 162 * Upper and lower case differences in characters are distinguished at this 163 * strength level. In addition, a variant of a letter differs from the base 164 * form on the tertiary level. 165 * See class documentation for more explanation. 166 * @see #setStrength 167 * @see #getStrength 168 * @stable ICU 2.8 169 */ 170 public final static int TERTIARY = 2; 171 172 /** 173 * {@icu} Fourth level collator strength value. 174 * When punctuation is ignored 175 * (see <a href="https://unicode-org.github.io/icu/userguide/collation/concepts#ignoring-punctuation"> 176 * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY 177 * strength, an additional strength level can 178 * be used to distinguish words with and without punctuation. 179 * See class documentation for more explanation. 180 * @see #setStrength 181 * @see #getStrength 182 * @stable ICU 2.8 183 */ 184 public final static int QUATERNARY = 3; 185 186 /** 187 * Smallest Collator strength value. When all other strengths are equal, 188 * the IDENTICAL strength is used as a tiebreaker. The Unicode code point 189 * values of the NFD form of each string are compared, just in case there 190 * is no difference. 191 * See class documentation for more explanation. 192 * <p> 193 * Note this value is different from JDK's 194 * @stable ICU 2.8 195 */ 196 public final static int IDENTICAL = 15; 197 198 /** 199 * {@icunote} This is for backwards compatibility with Java APIs only. It 200 * should not be used, IDENTICAL should be used instead. ICU's 201 * collation does not support Java's FULL_DECOMPOSITION mode. 202 * @stable ICU 3.4 203 */ 204 public final static int FULL_DECOMPOSITION = IDENTICAL; 205 206 /** 207 * Decomposition mode value. With NO_DECOMPOSITION set, Strings 208 * will not be decomposed for collation. This is the default 209 * decomposition setting unless otherwise specified by the locale 210 * used to create the Collator. 211 * 212 * <p><strong>Note</strong> this value is different from the JDK's. 213 * @see #CANONICAL_DECOMPOSITION 214 * @see #getDecomposition 215 * @see #setDecomposition 216 * @stable ICU 2.8 217 */ 218 public final static int NO_DECOMPOSITION = 16; 219 220 /** 221 * Decomposition mode value. With CANONICAL_DECOMPOSITION set, 222 * characters that are canonical variants according to the Unicode standard 223 * will be decomposed for collation. 224 * 225 * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as 226 * described in <a href="https://www.unicode.org/reports/tr15/"> 227 * Unicode Technical Report #15</a>. 228 * 229 * @see #NO_DECOMPOSITION 230 * @see #getDecomposition 231 * @see #setDecomposition 232 * @stable ICU 2.8 233 */ 234 public final static int CANONICAL_DECOMPOSITION = 17; 235 236 /** 237 * Reordering codes for non-script groups that can be reordered under collation. 238 * 239 * @see #getReorderCodes 240 * @see #setReorderCodes 241 * @see #getEquivalentReorderCodes 242 * @stable ICU 4.8 243 */ 244 public static interface ReorderCodes { 245 /** 246 * A special reordering code that is used to specify the default reordering codes for a locale. 247 * @stable ICU 4.8 248 */ 249 public final static int DEFAULT = -1; // == UScript.INVALID_CODE 250 /** 251 * A special reordering code that is used to specify no reordering codes. 252 * @stable ICU 4.8 253 */ 254 public final static int NONE = UScript.UNKNOWN; 255 /** 256 * A special reordering code that is used to specify all other codes used for reordering except 257 * for the codes listed as ReorderingCodes and those listed explicitly in a reordering. 258 * @stable ICU 4.8 259 */ 260 public final static int OTHERS = UScript.UNKNOWN; 261 /** 262 * Characters with the space property. 263 * This is equivalent to the rule value "space". 264 * @stable ICU 4.8 265 */ 266 public final static int SPACE = 0x1000; 267 /** 268 * The first entry in the enumeration of reordering groups. This is intended for use in 269 * range checking and enumeration of the reorder codes. 270 * @stable ICU 4.8 271 */ 272 public final static int FIRST = SPACE; 273 /** 274 * Characters with the punctuation property. 275 * This is equivalent to the rule value "punct". 276 * @stable ICU 4.8 277 */ 278 public final static int PUNCTUATION = 0x1001; 279 /** 280 * Characters with the symbol property. 281 * This is equivalent to the rule value "symbol". 282 * @stable ICU 4.8 283 */ 284 public final static int SYMBOL = 0x1002; 285 /** 286 * Characters with the currency property. 287 * This is equivalent to the rule value "currency". 288 * @stable ICU 4.8 289 */ 290 public final static int CURRENCY = 0x1003; 291 /** 292 * Characters with the digit property. 293 * This is equivalent to the rule value "digit". 294 * @stable ICU 4.8 295 */ 296 public final static int DIGIT = 0x1004; 297 /** 298 * One more than the highest normal ReorderCodes value. 299 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 300 */ 301 @Deprecated 302 public final static int LIMIT = 0x1005; 303 } 304 305 // public methods -------------------------------------------------------- 306 307 /** 308 * Compares the equality of two Collator objects. Collator objects are equal if they have the same 309 * collation (sorting & searching) behavior. 310 * 311 * <p>The base class checks for null and for equal types. 312 * Subclasses should override. 313 * 314 * @param obj the Collator to compare to. 315 * @return true if this Collator has exactly the same collation behavior as obj, false otherwise. 316 * @stable ICU 2.8 317 */ 318 @Override equals(Object obj)319 public boolean equals(Object obj) { 320 // Subclasses: Call this method and then add more specific checks. 321 return this == obj || (obj != null && getClass() == obj.getClass()); 322 } 323 324 /** 325 * Generates a hash code for this Collator object. 326 * 327 * <p>The implementation exists just for consistency with {@link #equals(Object)} 328 * implementation in this class and does not generate a useful hash code. 329 * Subclasses should override this implementation. 330 * 331 * @return a hash code value. 332 * @stable ICU 2.8 333 */ 334 @Override hashCode()335 public int hashCode() { 336 // Dummy return to prevent compile warnings. 337 return 0; 338 } 339 340 // public setters -------------------------------------------------------- 341 checkNotFrozen()342 private void checkNotFrozen() { 343 if (isFrozen()) { 344 throw new UnsupportedOperationException("Attempt to modify frozen Collator"); 345 } 346 } 347 348 /** 349 * Sets this Collator's strength attribute. The strength attribute 350 * determines the minimum level of difference considered significant 351 * during comparison. 352 * 353 * <p>The base class method does nothing. Subclasses should override it if appropriate. 354 * 355 * <p>See the Collator class description for an example of use. 356 * @param newStrength the new strength value. 357 * @see #getStrength 358 * @see #PRIMARY 359 * @see #SECONDARY 360 * @see #TERTIARY 361 * @see #QUATERNARY 362 * @see #IDENTICAL 363 * @throws IllegalArgumentException if the new strength value is not valid. 364 * @stable ICU 2.8 365 */ setStrength(int newStrength)366 public void setStrength(int newStrength) 367 { 368 checkNotFrozen(); 369 } 370 371 /** 372 * @return this, for chaining 373 * @internal Used in UnicodeTools 374 * @deprecated This API is ICU internal only. 375 */ 376 @Deprecated setStrength2(int newStrength)377 public Collator setStrength2(int newStrength) 378 { 379 setStrength(newStrength); 380 return this; 381 } 382 383 /** 384 * Sets the decomposition mode of this Collator. Setting this 385 * decomposition attribute with CANONICAL_DECOMPOSITION allows the 386 * Collator to handle un-normalized text properly, producing the 387 * same results as if the text were normalized. If 388 * NO_DECOMPOSITION is set, it is the user's responsibility to 389 * insure that all text is already in the appropriate form before 390 * a comparison or before getting a CollationKey. Adjusting 391 * decomposition mode allows the user to select between faster and 392 * more complete collation behavior. 393 * 394 * <p>Since a great many of the world's languages do not require 395 * text normalization, most locales set NO_DECOMPOSITION as the 396 * default decomposition mode. 397 * 398 * <p>The base class method does nothing. Subclasses should override it if appropriate. 399 * 400 * <p>See getDecomposition for a description of decomposition 401 * mode. 402 * 403 * @param decomposition the new decomposition mode 404 * @see #getDecomposition 405 * @see #NO_DECOMPOSITION 406 * @see #CANONICAL_DECOMPOSITION 407 * @throws IllegalArgumentException If the given value is not a valid 408 * decomposition mode. 409 * @stable ICU 2.8 410 */ setDecomposition(int decomposition)411 public void setDecomposition(int decomposition) 412 { 413 checkNotFrozen(); 414 } 415 416 /** 417 * Sets the reordering codes for this collator. 418 * Collation reordering allows scripts and some other groups of characters 419 * to be moved relative to each other. This reordering is done on top of 420 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 421 * at the start and/or the end of the collation order. These groups are specified using 422 * UScript codes and {@link Collator.ReorderCodes} entries. 423 * 424 * <p>By default, reordering codes specified for the start of the order are placed in the 425 * order given after several special non-script blocks. These special groups of characters 426 * are space, punctuation, symbol, currency, and digit. These special groups are represented with 427 * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with 428 * these special non-script groups if those special groups are explicitly specified in the reordering. 429 * 430 * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS} 431 * stands for any script that is not explicitly 432 * mentioned in the list of reordering codes given. Anything that is after OTHERS 433 * will go at the very end of the reordering in the order given. 434 * 435 * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT} 436 * will reset the reordering for this collator 437 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 438 * was specified when this collator was created from resource data or from rules. The 439 * DEFAULT code <b>must</b> be the sole code supplied when it is used. 440 * If not, then an {@link IllegalArgumentException} will be thrown. 441 * 442 * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE} 443 * will remove any reordering for this collator. 444 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 445 * NONE code <b>must</b> be the sole code supplied when it is used. 446 * 447 * @param order the reordering codes to apply to this collator; if this is null or an empty array 448 * then this clears any existing reordering 449 * @see #getReorderCodes 450 * @see #getEquivalentReorderCodes 451 * @see Collator.ReorderCodes 452 * @see UScript 453 * @stable ICU 4.8 454 */ setReorderCodes(int... order)455 public void setReorderCodes(int... order) 456 { 457 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 458 } 459 460 // public getters -------------------------------------------------------- 461 462 /** 463 * Returns the Collator for the current default locale. 464 * The default locale is determined by java.util.Locale.getDefault(). 465 * @return the Collator for the default locale (for example, en_US) if it 466 * is created successfully. Otherwise if there is no Collator 467 * associated with the current locale, the root collator 468 * will be returned. 469 * @see java.util.Locale#getDefault() 470 * @see #getInstance(Locale) 471 * @stable ICU 2.8 472 */ getInstance()473 public static final Collator getInstance() 474 { 475 return getInstance(ULocale.getDefault()); 476 } 477 478 /** 479 * Clones the collator. 480 * @stable ICU 2.8 481 * @return a clone of this collator. 482 */ 483 @Override clone()484 public Object clone() throws CloneNotSupportedException { 485 return super.clone(); 486 } 487 488 // begin registry stuff 489 490 /** 491 * A factory used with registerFactory to register multiple collators and provide 492 * display names for them. If standard locale display names are sufficient, 493 * Collator instances may be registered instead. 494 * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses 495 * ULocale instead of Locale. Instead of overriding createCollator(Locale), 496 * new implementations should override createCollator(ULocale). Note that 497 * one of these two methods <b>MUST</b> be overridden or else an infinite 498 * loop will occur. 499 * @stable ICU 2.6 500 */ 501 public static abstract class CollatorFactory { 502 /** 503 * Return true if this factory will be visible. Default is true. 504 * If not visible, the locales supported by this factory will not 505 * be listed by getAvailableLocales. 506 * 507 * @return true if this factory is visible 508 * @stable ICU 2.6 509 */ visible()510 public boolean visible() { 511 return true; 512 } 513 514 /** 515 * Return an instance of the appropriate collator. If the locale 516 * is not supported, return null. 517 * <b>Note:</b> as of ICU4J 3.2, implementations should override 518 * this method instead of createCollator(Locale). 519 * @param loc the locale for which this collator is to be created. 520 * @return the newly created collator. 521 * @stable ICU 3.2 522 */ createCollator(ULocale loc)523 public Collator createCollator(ULocale loc) { 524 return createCollator(loc.toLocale()); 525 } 526 527 /** 528 * Return an instance of the appropriate collator. If the locale 529 * is not supported, return null. 530 * <p><b>Note:</b> as of ICU4J 3.2, implementations should override 531 * createCollator(ULocale) instead of this method, and inherit this 532 * method's implementation. This method is no longer abstract 533 * and instead delegates to createCollator(ULocale). 534 * @param loc the locale for which this collator is to be created. 535 * @return the newly created collator. 536 * @stable ICU 2.6 537 */ createCollator(Locale loc)538 public Collator createCollator(Locale loc) { 539 return createCollator(ULocale.forLocale(loc)); 540 } 541 542 /** 543 * Return the name of the collator for the objectLocale, localized for the displayLocale. 544 * If objectLocale is not visible or not defined by the factory, return null. 545 * @param objectLocale the locale identifying the collator 546 * @param displayLocale the locale for which the display name of the collator should be localized 547 * @return the display name 548 * @stable ICU 2.6 549 */ getDisplayName(Locale objectLocale, Locale displayLocale)550 public String getDisplayName(Locale objectLocale, Locale displayLocale) { 551 return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale)); 552 } 553 554 /** 555 * Return the name of the collator for the objectLocale, localized for the displayLocale. 556 * If objectLocale is not visible or not defined by the factory, return null. 557 * @param objectLocale the locale identifying the collator 558 * @param displayLocale the locale for which the display name of the collator should be localized 559 * @return the display name 560 * @stable ICU 3.2 561 */ getDisplayName(ULocale objectLocale, ULocale displayLocale)562 public String getDisplayName(ULocale objectLocale, ULocale displayLocale) { 563 if (visible()) { 564 Set<String> supported = getSupportedLocaleIDs(); 565 String name = objectLocale.getBaseName(); 566 if (supported.contains(name)) { 567 return objectLocale.getDisplayName(displayLocale); 568 } 569 } 570 return null; 571 } 572 573 /** 574 * Return an unmodifiable collection of the locale names directly 575 * supported by this factory. 576 * 577 * @return the set of supported locale IDs. 578 * @stable ICU 2.6 579 */ getSupportedLocaleIDs()580 public abstract Set<String> getSupportedLocaleIDs(); 581 582 /** 583 * Empty default constructor. 584 * @stable ICU 2.6 585 */ CollatorFactory()586 protected CollatorFactory() { 587 } 588 } 589 590 static abstract class ServiceShim { getInstance(ULocale l)591 abstract Collator getInstance(ULocale l); registerInstance(Collator c, ULocale l)592 abstract Object registerInstance(Collator c, ULocale l); registerFactory(CollatorFactory f)593 abstract Object registerFactory(CollatorFactory f); unregister(Object k)594 abstract boolean unregister(Object k); getAvailableLocales()595 abstract Locale[] getAvailableLocales(); // TODO remove getAvailableULocales()596 abstract ULocale[] getAvailableULocales(); getDisplayName(ULocale ol, ULocale dl)597 abstract String getDisplayName(ULocale ol, ULocale dl); 598 } 599 600 private static ServiceShim shim; getShim()601 private static ServiceShim getShim() { 602 // Note: this instantiation is safe on loose-memory-model configurations 603 // despite lack of synchronization, since the shim instance has no state-- 604 // it's all in the class init. The worst problem is we might instantiate 605 // two shim instances, but they'll share the same state so that's ok. 606 if (shim == null) { 607 try { 608 Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim"); 609 shim = (ServiceShim)cls.newInstance(); 610 } 611 catch (MissingResourceException e) 612 { 613 ///CLOVER:OFF 614 throw e; 615 ///CLOVER:ON 616 } 617 catch (Exception e) { 618 ///CLOVER:OFF 619 if(DEBUG){ 620 e.printStackTrace(); 621 } 622 throw new ICUException(e); 623 ///CLOVER:ON 624 } 625 } 626 return shim; 627 } 628 629 /** 630 * Simpler/faster methods for ASCII than ones based on Unicode data. 631 * TODO: There should be code like this somewhere already?? 632 */ 633 private static final class ASCII { equalIgnoreCase(CharSequence left, CharSequence right)634 static boolean equalIgnoreCase(CharSequence left, CharSequence right) { 635 int length = left.length(); 636 if (length != right.length()) { return false; } 637 for (int i = 0; i < length; ++i) { 638 char lc = left.charAt(i); 639 char rc = right.charAt(i); 640 if (lc == rc) { continue; } 641 if ('A' <= lc && lc <= 'Z') { 642 if ((lc + 0x20) == rc) { continue; } 643 } else if ('A' <= rc && rc <= 'Z') { 644 if ((rc + 0x20) == lc) { continue; } 645 } 646 return false; 647 } 648 return true; 649 } 650 } 651 getYesOrNo(String keyword, String s)652 private static final boolean getYesOrNo(String keyword, String s) { 653 if (ASCII.equalIgnoreCase(s, "yes")) { 654 return true; 655 } 656 if (ASCII.equalIgnoreCase(s, "no")) { 657 return false; 658 } 659 throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s); 660 } 661 getIntValue(String keyword, String s, String... values)662 private static final int getIntValue(String keyword, String s, String... values) { 663 for (int i = 0; i < values.length; ++i) { 664 if (ASCII.equalIgnoreCase(s, values[i])) { 665 return i; 666 } 667 } 668 throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s); 669 } 670 getReorderCode(String keyword, String s)671 private static final int getReorderCode(String keyword, String s) { 672 return Collator.ReorderCodes.FIRST + 673 getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit"); 674 // Not supporting "others" = UCOL_REORDER_CODE_OTHERS 675 // as a synonym for Zzzz = USCRIPT_UNKNOWN for now: 676 // Avoid introducing synonyms/aliases. 677 } 678 679 /** 680 * Sets collation attributes according to locale keywords. See 681 * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings 682 * 683 * Using "alias" keywords and values where defined: 684 * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax 685 * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml 686 */ setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc)687 private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) { 688 // Check for collation keywords that were already deprecated 689 // before any were supported in createInstance() (except for "collation"). 690 String value = loc.getKeywordValue("colHiraganaQuaternary"); 691 if (value != null) { 692 throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary"); 693 } 694 value = loc.getKeywordValue("variableTop"); 695 if (value != null) { 696 throw new UnsupportedOperationException("locale keyword vt/variableTop"); 697 } 698 // Parse known collation keywords, ignore others. 699 value = loc.getKeywordValue("colStrength"); 700 if (value != null) { 701 // Note: Not supporting typo "quarternary" because it was never supported in locale IDs. 702 int strength = getIntValue("colStrength", value, 703 "primary", "secondary", "tertiary", "quaternary", "identical"); 704 coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL); 705 } 706 value = loc.getKeywordValue("colBackwards"); 707 if (value != null) { 708 if (rbc != null) { 709 rbc.setFrenchCollation(getYesOrNo("colBackwards", value)); 710 } else { 711 throw new UnsupportedOperationException( 712 "locale keyword kb/colBackwards only settable for RuleBasedCollator"); 713 } 714 } 715 value = loc.getKeywordValue("colCaseLevel"); 716 if (value != null) { 717 if (rbc != null) { 718 rbc.setCaseLevel(getYesOrNo("colCaseLevel", value)); 719 } else { 720 throw new UnsupportedOperationException( 721 "locale keyword kb/colBackwards only settable for RuleBasedCollator"); 722 } 723 } 724 value = loc.getKeywordValue("colCaseFirst"); 725 if (value != null) { 726 if (rbc != null) { 727 int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper"); 728 if (cf == 0) { 729 rbc.setLowerCaseFirst(false); 730 rbc.setUpperCaseFirst(false); 731 } else if (cf == 1) { 732 rbc.setLowerCaseFirst(true); 733 } else /* cf == 2 */ { 734 rbc.setUpperCaseFirst(true); 735 } 736 } else { 737 throw new UnsupportedOperationException( 738 "locale keyword kf/colCaseFirst only settable for RuleBasedCollator"); 739 } 740 } 741 value = loc.getKeywordValue("colAlternate"); 742 if (value != null) { 743 if (rbc != null) { 744 rbc.setAlternateHandlingShifted( 745 getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0); 746 } else { 747 throw new UnsupportedOperationException( 748 "locale keyword ka/colAlternate only settable for RuleBasedCollator"); 749 } 750 } 751 value = loc.getKeywordValue("colNormalization"); 752 if (value != null) { 753 coll.setDecomposition(getYesOrNo("colNormalization", value) ? 754 Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION); 755 } 756 value = loc.getKeywordValue("colNumeric"); 757 if (value != null) { 758 if (rbc != null) { 759 rbc.setNumericCollation(getYesOrNo("colNumeric", value)); 760 } else { 761 throw new UnsupportedOperationException( 762 "locale keyword kn/colNumeric only settable for RuleBasedCollator"); 763 } 764 } 765 value = loc.getKeywordValue("colReorder"); 766 if (value != null) { 767 int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST]; 768 int codesLength = 0; 769 int scriptNameStart = 0; 770 for (;;) { 771 if (codesLength == codes.length) { 772 throw new IllegalArgumentException( 773 "too many script codes for colReorder locale keyword: " + value); 774 } 775 int limit = scriptNameStart; 776 while (limit < value.length() && value.charAt(limit) != '-') { ++limit; } 777 String scriptName = value.substring(scriptNameStart, limit); 778 int code; 779 if (scriptName.length() == 4) { 780 // Strict parsing, accept only 4-letter script codes, not long names. 781 code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName); 782 } else { 783 code = getReorderCode("colReorder", scriptName); 784 } 785 codes[codesLength++] = code; 786 if (limit == value.length()) { break; } 787 scriptNameStart = limit + 1; 788 } 789 if (codesLength == 0) { 790 throw new IllegalArgumentException("no script codes for colReorder locale keyword"); 791 } 792 int[] args = new int[codesLength]; 793 System.arraycopy(codes, 0, args, 0, codesLength); 794 coll.setReorderCodes(args); 795 } 796 value = loc.getKeywordValue("kv"); 797 if (value != null) { 798 coll.setMaxVariable(getReorderCode("kv", value)); 799 } 800 } 801 802 /** 803 * {@icu} Returns the Collator for the desired locale. 804 * 805 * <p>For some languages, multiple collation types are available; 806 * for example, "de@collation=phonebook". 807 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 808 * in the old locale extension syntax ("el@colCaseFirst=upper") 809 * or in language tag syntax ("el-u-kf-upper"). 810 * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>. 811 * 812 * @param locale the desired locale. 813 * @return Collator for the desired locale if it is created successfully. 814 * Otherwise if there is no Collator 815 * associated with the current locale, the root collator will 816 * be returned. 817 * @see java.util.Locale 818 * @see java.util.ResourceBundle 819 * @see #getInstance(Locale) 820 * @see #getInstance() 821 * @stable ICU 3.0 822 */ getInstance(ULocale locale)823 public static final Collator getInstance(ULocale locale) { 824 // fetching from service cache is faster than instantiation 825 if (locale == null) { 826 locale = ULocale.getDefault(); 827 } 828 Collator coll = getShim().getInstance(locale); 829 if (!locale.getName().equals(locale.getBaseName())) { // any keywords? 830 setAttributesFromKeywords(locale, coll, 831 (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null); 832 } 833 return coll; 834 } 835 836 /** 837 * Returns the Collator for the desired locale. 838 * 839 * <p>For some languages, multiple collation types are available; 840 * for example, "de-u-co-phonebk". 841 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 842 * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale}) 843 * or in language tag syntax ("el-u-kf-upper"). 844 * See <a href="https://unicode-org.github.io/icu/userguide/collation/api">User Guide: Collation API</a>. 845 * 846 * @param locale the desired locale. 847 * @return Collator for the desired locale if it is created successfully. 848 * Otherwise if there is no Collator 849 * associated with the current locale, the root collator will 850 * be returned. 851 * @see java.util.Locale 852 * @see java.util.ResourceBundle 853 * @see #getInstance(ULocale) 854 * @see #getInstance() 855 * @stable ICU 2.8 856 */ getInstance(Locale locale)857 public static final Collator getInstance(Locale locale) { 858 return getInstance(ULocale.forLocale(locale)); 859 } 860 861 /** 862 * {@icu} Registers a collator as the default collator for the provided locale. The 863 * collator should not be modified after it is registered. 864 * 865 * <p>Because ICU may choose to cache Collator objects internally, this must 866 * be called at application startup, prior to any calls to 867 * Collator.getInstance to avoid undefined behavior. 868 * 869 * @param collator the collator to register 870 * @param locale the locale for which this is the default collator 871 * @return an object that can be used to unregister the registered collator. 872 * 873 * @stable ICU 3.2 874 */ registerInstance(Collator collator, ULocale locale)875 public static final Object registerInstance(Collator collator, ULocale locale) { 876 return getShim().registerInstance(collator, locale); 877 } 878 879 /** 880 * {@icu} Registers a collator factory. 881 * 882 * <p>Because ICU may choose to cache Collator objects internally, this must 883 * be called at application startup, prior to any calls to 884 * Collator.getInstance to avoid undefined behavior. 885 * 886 * @param factory the factory to register 887 * @return an object that can be used to unregister the registered factory. 888 * 889 * @stable ICU 2.6 890 */ registerFactory(CollatorFactory factory)891 public static final Object registerFactory(CollatorFactory factory) { 892 return getShim().registerFactory(factory); 893 } 894 895 /** 896 * {@icu} Unregisters a collator previously registered using registerInstance. 897 * @param registryKey the object previously returned by registerInstance. 898 * @return true if the collator was successfully unregistered. 899 * @stable ICU 2.6 900 */ unregister(Object registryKey)901 public static final boolean unregister(Object registryKey) { 902 if (shim == null) { 903 return false; 904 } 905 return shim.unregister(registryKey); 906 } 907 908 /** 909 * Returns the set of locales, as Locale objects, for which collators 910 * are installed. Note that Locale objects do not support RFC 3066. 911 * @return the list of locales in which collators are installed. 912 * This list includes any that have been registered, in addition to 913 * those that are installed with ICU4J. 914 * @stable ICU 2.4 915 */ getAvailableLocales()916 public static Locale[] getAvailableLocales() { 917 // TODO make this wrap getAvailableULocales later 918 if (shim == null) { 919 return ICUResourceBundle.getAvailableLocales( 920 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); 921 } 922 return shim.getAvailableLocales(); 923 } 924 925 /** 926 * {@icu} Returns the set of locales, as ULocale objects, for which collators 927 * are installed. ULocale objects support RFC 3066. 928 * @return the list of locales in which collators are installed. 929 * This list includes any that have been registered, in addition to 930 * those that are installed with ICU4J. 931 * @stable ICU 3.0 932 */ getAvailableULocales()933 public static final ULocale[] getAvailableULocales() { 934 if (shim == null) { 935 return ICUResourceBundle.getAvailableULocales( 936 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); 937 } 938 return shim.getAvailableULocales(); 939 } 940 941 /** 942 * The list of keywords for this service. This must be kept in sync with 943 * the resource data. 944 * @since ICU 3.0 945 */ 946 private static final String[] KEYWORDS = { "collation" }; 947 948 /** 949 * The resource name for this service. Note that this is not the same as 950 * the keyword for this service. 951 * @since ICU 3.0 952 */ 953 private static final String RESOURCE = "collations"; 954 955 /** 956 * The resource bundle base name for this service. 957 * *since ICU 3.0 958 */ 959 960 private static final String BASE = ICUData.ICU_COLLATION_BASE_NAME; 961 962 /** 963 * {@icu} Returns an array of all possible keywords that are relevant to 964 * collation. At this point, the only recognized keyword for this 965 * service is "collation". 966 * @return an array of valid collation keywords. 967 * @see #getKeywordValues 968 * @stable ICU 3.0 969 */ getKeywords()970 public static final String[] getKeywords() { 971 return KEYWORDS; 972 } 973 974 /** 975 * {@icu} Given a keyword, returns an array of all values for 976 * that keyword that are currently in use. 977 * @param keyword one of the keywords returned by getKeywords. 978 * @see #getKeywords 979 * @stable ICU 3.0 980 */ getKeywordValues(String keyword)981 public static final String[] getKeywordValues(String keyword) { 982 if (!keyword.equals(KEYWORDS[0])) { 983 throw new IllegalArgumentException("Invalid keyword: " + keyword); 984 } 985 return ICUResourceBundle.getKeywordValues(BASE, RESOURCE); 986 } 987 988 /** 989 * {@icu} Given a key and a locale, returns an array of string values in a preferred 990 * order that would make a difference. These are all and only those values where 991 * the open (creation) of the service with the locale formed from the input locale 992 * plus input keyword and that value has different behavior than creation with the 993 * input locale alone. 994 * @param key one of the keys supported by this service. For now, only 995 * "collation" is supported. 996 * @param locale the locale 997 * @param commonlyUsed if set to true it will return only commonly used values 998 * with the given locale in preferred order. Otherwise, 999 * it will return all the available values for the locale. 1000 * @return an array of string values for the given key and the locale. 1001 * @stable ICU 4.2 1002 */ getKeywordValuesForLocale(String key, ULocale locale, boolean commonlyUsed)1003 public static final String[] getKeywordValuesForLocale(String key, ULocale locale, 1004 boolean commonlyUsed) { 1005 // Note: The parameter commonlyUsed is not used. 1006 // The switch is in the method signature for consistency 1007 // with other locale services. 1008 1009 // Read available collation values from collation bundles. 1010 ICUResourceBundle bundle = (ICUResourceBundle) 1011 UResourceBundle.getBundleInstance( 1012 ICUData.ICU_COLLATION_BASE_NAME, locale); 1013 KeywordsSink sink = new KeywordsSink(); 1014 bundle.getAllItemsWithFallback("collations", sink); 1015 return sink.values.toArray(new String[sink.values.size()]); 1016 } 1017 1018 private static final class KeywordsSink extends UResource.Sink { 1019 LinkedList<String> values = new LinkedList<>(); 1020 boolean hasDefault = false; 1021 1022 @Override put(UResource.Key key, UResource.Value value, boolean noFallback)1023 public void put(UResource.Key key, UResource.Value value, boolean noFallback) { 1024 UResource.Table collations = value.getTable(); 1025 for (int i = 0; collations.getKeyAndValue(i, key, value); ++i) { 1026 int type = value.getType(); 1027 if (type == UResourceBundle.STRING) { 1028 if (!hasDefault && key.contentEquals("default")) { 1029 String defcoll = value.getString(); 1030 if (!defcoll.isEmpty()) { 1031 values.remove(defcoll); 1032 values.addFirst(defcoll); 1033 hasDefault = true; 1034 } 1035 } 1036 } else if (type == UResourceBundle.TABLE && !key.startsWith("private-")) { 1037 String collkey = key.toString(); 1038 if (!values.contains(collkey)) { 1039 values.add(collkey); 1040 } 1041 } 1042 } 1043 } 1044 } 1045 1046 /** 1047 * {@icu} Returns the functionally equivalent locale for the given 1048 * requested locale, with respect to given keyword, for the 1049 * collation service. If two locales return the same result, then 1050 * collators instantiated for these locales will behave 1051 * equivalently. The converse is not always true; two collators 1052 * may in fact be equivalent, but return different results, due to 1053 * internal details. The return result has no other meaning than 1054 * that stated above, and implies nothing as to the relationship 1055 * between the two locales. This is intended for use by 1056 * applications who wish to cache collators, or otherwise reuse 1057 * collators when possible. The functional equivalent may change 1058 * over time. For more information, please see the <a 1059 * href="https://unicode-org.github.io/icu/userguide/locale#locales-and-services"> 1060 * Locales and Services</a> section of the ICU User Guide. 1061 * @param keyword a particular keyword as enumerated by 1062 * getKeywords. 1063 * @param locID The requested locale 1064 * @param isAvailable If non-null, isAvailable[0] will receive and 1065 * output boolean that indicates whether the requested locale was 1066 * 'available' to the collation service. If non-null, isAvailable 1067 * must have length >= 1. 1068 * @return the locale 1069 * @stable ICU 3.0 1070 */ getFunctionalEquivalent(String keyword, ULocale locID, boolean isAvailable[])1071 public static final ULocale getFunctionalEquivalent(String keyword, 1072 ULocale locID, 1073 boolean isAvailable[]) { 1074 return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE, 1075 keyword, locID, isAvailable, true); 1076 } 1077 1078 /** 1079 * {@icu} Returns the functionally equivalent locale for the given 1080 * requested locale, with respect to given keyword, for the 1081 * collation service. 1082 * @param keyword a particular keyword as enumerated by 1083 * getKeywords. 1084 * @param locID The requested locale 1085 * @return the locale 1086 * @see #getFunctionalEquivalent(String,ULocale,boolean[]) 1087 * @stable ICU 3.0 1088 */ getFunctionalEquivalent(String keyword, ULocale locID)1089 public static final ULocale getFunctionalEquivalent(String keyword, 1090 ULocale locID) { 1091 return getFunctionalEquivalent(keyword, locID, null); 1092 } 1093 1094 /** 1095 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1096 * displayLocale. 1097 * @param objectLocale the locale of the collator 1098 * @param displayLocale the locale for the collator's display name 1099 * @return the display name 1100 * @stable ICU 2.6 1101 */ getDisplayName(Locale objectLocale, Locale displayLocale)1102 static public String getDisplayName(Locale objectLocale, Locale displayLocale) { 1103 return getShim().getDisplayName(ULocale.forLocale(objectLocale), 1104 ULocale.forLocale(displayLocale)); 1105 } 1106 1107 /** 1108 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1109 * displayLocale. 1110 * @param objectLocale the locale of the collator 1111 * @param displayLocale the locale for the collator's display name 1112 * @return the display name 1113 * @stable ICU 3.2 1114 */ getDisplayName(ULocale objectLocale, ULocale displayLocale)1115 static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) { 1116 return getShim().getDisplayName(objectLocale, displayLocale); 1117 } 1118 1119 /** 1120 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1121 * default <code>DISPLAY</code> locale. 1122 * @param objectLocale the locale of the collator 1123 * @return the display name 1124 * @see com.ibm.icu.util.ULocale.Category#DISPLAY 1125 * @stable ICU 2.6 1126 */ getDisplayName(Locale objectLocale)1127 static public String getDisplayName(Locale objectLocale) { 1128 return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY)); 1129 } 1130 1131 /** 1132 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1133 * default <code>DISPLAY</code> locale. 1134 * @param objectLocale the locale of the collator 1135 * @return the display name 1136 * @see com.ibm.icu.util.ULocale.Category#DISPLAY 1137 * @stable ICU 3.2 1138 */ getDisplayName(ULocale objectLocale)1139 static public String getDisplayName(ULocale objectLocale) { 1140 return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY)); 1141 } 1142 1143 /** 1144 * Returns this Collator's strength attribute. The strength attribute 1145 * determines the minimum level of difference considered significant. 1146 * {@icunote} This can return QUATERNARY strength, which is not supported by the 1147 * JDK version. 1148 * <p> 1149 * See the Collator class description for more details. 1150 * <p>The base class method always returns {@link #TERTIARY}. 1151 * Subclasses should override it if appropriate. 1152 * 1153 * @return this Collator's current strength attribute. 1154 * @see #setStrength 1155 * @see #PRIMARY 1156 * @see #SECONDARY 1157 * @see #TERTIARY 1158 * @see #QUATERNARY 1159 * @see #IDENTICAL 1160 * @stable ICU 2.8 1161 */ getStrength()1162 public int getStrength() 1163 { 1164 return TERTIARY; 1165 } 1166 1167 /** 1168 * Returns the decomposition mode of this Collator. The decomposition mode 1169 * determines how Unicode composed characters are handled. 1170 * <p> 1171 * See the Collator class description for more details. 1172 * <p>The base class method always returns {@link #NO_DECOMPOSITION}. 1173 * Subclasses should override it if appropriate. 1174 * 1175 * @return the decomposition mode 1176 * @see #setDecomposition 1177 * @see #NO_DECOMPOSITION 1178 * @see #CANONICAL_DECOMPOSITION 1179 * @stable ICU 2.8 1180 */ getDecomposition()1181 public int getDecomposition() 1182 { 1183 return NO_DECOMPOSITION; 1184 } 1185 1186 // public other methods ------------------------------------------------- 1187 1188 /** 1189 * Compares the equality of two text Strings using 1190 * this Collator's rules, strength and decomposition mode. Convenience method. 1191 * @param source the source string to be compared. 1192 * @param target the target string to be compared. 1193 * @return true if the strings are equal according to the collation 1194 * rules, otherwise false. 1195 * @see #compare 1196 * @throws NullPointerException thrown if either arguments is null. 1197 * @stable ICU 2.8 1198 */ equals(String source, String target)1199 public boolean equals(String source, String target) 1200 { 1201 return (compare(source, target) == 0); 1202 } 1203 1204 /** 1205 * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored 1206 * in this collator. 1207 * @return a pointer to a UnicodeSet object containing all the 1208 * code points and sequences that may sort differently than 1209 * in the root collator. 1210 * @stable ICU 2.4 1211 */ getTailoredSet()1212 public UnicodeSet getTailoredSet() 1213 { 1214 return new UnicodeSet(0, 0x10FFFF); 1215 } 1216 1217 /** 1218 * Compares the source text String to the target text String according to 1219 * this Collator's rules, strength and decomposition mode. 1220 * Returns an integer less than, 1221 * equal to or greater than zero depending on whether the source String is 1222 * less than, equal to or greater than the target String. See the Collator 1223 * class description for an example of use. 1224 * 1225 * @param source the source String. 1226 * @param target the target String. 1227 * @return Returns an integer value. Value is less than zero if source is 1228 * less than target, value is zero if source and target are equal, 1229 * value is greater than zero if source is greater than target. 1230 * @see CollationKey 1231 * @see #getCollationKey 1232 * @throws NullPointerException thrown if either argument is null. 1233 * @stable ICU 2.8 1234 */ compare(String source, String target)1235 public abstract int compare(String source, String target); 1236 1237 /** 1238 * Compares the source Object to the target Object. 1239 * 1240 * @param source the source Object. 1241 * @param target the target Object. 1242 * @return Returns an integer value. Value is less than zero if source is 1243 * less than target, value is zero if source and target are equal, 1244 * value is greater than zero if source is greater than target. 1245 * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence. 1246 * @stable ICU 4.2 1247 */ 1248 @Override compare(Object source, Object target)1249 public int compare(Object source, Object target) { 1250 return doCompare((CharSequence)source, (CharSequence)target); 1251 } 1252 1253 /** 1254 * Compares two CharSequences. 1255 * The base class just calls compare(left.toString(), right.toString()). 1256 * Subclasses should instead implement this method and have the String API call this method. 1257 * @internal 1258 * @deprecated This API is ICU internal only. 1259 */ 1260 @Deprecated doCompare(CharSequence left, CharSequence right)1261 protected int doCompare(CharSequence left, CharSequence right) { 1262 return compare(left.toString(), right.toString()); 1263 } 1264 1265 /** 1266 * <p> 1267 * Transforms the String into a CollationKey suitable for efficient 1268 * repeated comparison. The resulting key depends on the collator's 1269 * rules, strength and decomposition mode. 1270 * 1271 * <p>Note that collation keys are often less efficient than simply doing comparison. 1272 * For more details, see the ICU User Guide. 1273 * 1274 * <p>See the CollationKey class documentation for more information. 1275 * @param source the string to be transformed into a CollationKey. 1276 * @return the CollationKey for the given String based on this Collator's 1277 * collation rules. If the source String is null, a null 1278 * CollationKey is returned. 1279 * @see CollationKey 1280 * @see #compare(String, String) 1281 * @see #getRawCollationKey 1282 * @stable ICU 2.8 1283 */ getCollationKey(String source)1284 public abstract CollationKey getCollationKey(String source); 1285 1286 /** 1287 * {@icu} Returns the simpler form of a CollationKey for the String source following 1288 * the rules of this Collator and stores the result into the user provided argument 1289 * key. If key has a internal byte array of length that's too small for the result, 1290 * the internal byte array will be grown to the exact required size. 1291 * 1292 * <p>Note that collation keys are often less efficient than simply doing comparison. 1293 * For more details, see the ICU User Guide. 1294 * 1295 * @param source the text String to be transformed into a RawCollationKey 1296 * @return If key is null, a new instance of RawCollationKey will be 1297 * created and returned, otherwise the user provided key will be 1298 * returned. 1299 * @see #compare(String, String) 1300 * @see #getCollationKey 1301 * @see RawCollationKey 1302 * @stable ICU 2.8 1303 */ getRawCollationKey(String source, RawCollationKey key)1304 public abstract RawCollationKey getRawCollationKey(String source, 1305 RawCollationKey key); 1306 1307 /** 1308 * {@icu} Sets the variable top to the top of the specified reordering group. 1309 * The variable top determines the highest-sorting character 1310 * which is affected by the alternate handling behavior. 1311 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. 1312 * 1313 * <p>The base class implementation throws an UnsupportedOperationException. 1314 * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION, 1315 * Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY; 1316 * or Collator.ReorderCodes.DEFAULT to restore the default max variable group 1317 * @return this 1318 * @see #getMaxVariable 1319 * @stable ICU 53 1320 */ setMaxVariable(int group)1321 public Collator setMaxVariable(int group) { 1322 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1323 } 1324 1325 /** 1326 * {@icu} Returns the maximum reordering group whose characters are affected by 1327 * the alternate handling behavior. 1328 * 1329 * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION. 1330 * @return the maximum variable reordering group. 1331 * @see #setMaxVariable 1332 * @stable ICU 53 1333 */ getMaxVariable()1334 public int getMaxVariable() { 1335 return Collator.ReorderCodes.PUNCTUATION; 1336 } 1337 1338 /** 1339 * {@icu} Sets the variable top to the primary weight of the specified string. 1340 * 1341 * <p>Beginning with ICU 53, the variable top is pinned to 1342 * the top of one of the supported reordering groups, 1343 * and it must not be beyond the last of those groups. 1344 * See {@link #setMaxVariable(int)}. 1345 * 1346 * @param varTop one or more (if contraction) characters to which the 1347 * variable top should be set 1348 * @return variable top primary weight 1349 * @exception IllegalArgumentException 1350 * is thrown if varTop argument is not a valid variable top element. A variable top element is 1351 * invalid when 1352 * <ul> 1353 * <li>it is a contraction that does not exist in the Collation order 1354 * <li>the variable top is beyond 1355 * the last reordering group supported by setMaxVariable() 1356 * <li>when the varTop argument is null or zero in length. 1357 * </ul> 1358 * @see #getVariableTop 1359 * @see RuleBasedCollator#setAlternateHandlingShifted 1360 * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead. 1361 */ 1362 @Deprecated setVariableTop(String varTop)1363 public abstract int setVariableTop(String varTop); 1364 1365 /** 1366 * {@icu} Gets the variable top value of a Collator. 1367 * 1368 * @return the variable top primary weight 1369 * @see #getMaxVariable 1370 * @stable ICU 2.6 1371 */ getVariableTop()1372 public abstract int getVariableTop(); 1373 1374 /** 1375 * {@icu} Sets the variable top to the specified primary weight. 1376 * 1377 * <p>Beginning with ICU 53, the variable top is pinned to 1378 * the top of one of the supported reordering groups, 1379 * and it must not be beyond the last of those groups. 1380 * See {@link #setMaxVariable(int)}. 1381 * 1382 * @param varTop primary weight, as returned by setVariableTop or getVariableTop 1383 * @see #getVariableTop 1384 * @see #setVariableTop(String) 1385 * @deprecated ICU 53 Call setMaxVariable() instead. 1386 */ 1387 @Deprecated setVariableTop(int varTop)1388 public abstract void setVariableTop(int varTop); 1389 1390 /** 1391 * {@icu} Returns the version of this collator object. 1392 * @return the version object associated with this collator 1393 * @stable ICU 2.8 1394 */ getVersion()1395 public abstract VersionInfo getVersion(); 1396 1397 /** 1398 * {@icu} Returns the UCA version of this collator object. 1399 * @return the version object associated with this collator 1400 * @stable ICU 2.8 1401 */ getUCAVersion()1402 public abstract VersionInfo getUCAVersion(); 1403 1404 /** 1405 * Retrieves the reordering codes for this collator. 1406 * These reordering codes are a combination of UScript codes and ReorderCodes. 1407 * @return a copy of the reordering codes for this collator; 1408 * if none are set then returns an empty array 1409 * @see #setReorderCodes 1410 * @see #getEquivalentReorderCodes 1411 * @see Collator.ReorderCodes 1412 * @see UScript 1413 * @stable ICU 4.8 1414 */ getReorderCodes()1415 public int[] getReorderCodes() 1416 { 1417 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1418 } 1419 1420 /** 1421 * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder 1422 * codes are grouped and must reorder together. 1423 * Beginning with ICU 55, scripts only reorder together if they are primary-equal, 1424 * for example Hiragana and Katakana. 1425 * 1426 * @param reorderCode The reorder code to determine equivalence for. 1427 * @return the set of all reorder codes in the same group as the given reorder code. 1428 * @see #setReorderCodes 1429 * @see #getReorderCodes 1430 * @see Collator.ReorderCodes 1431 * @see UScript 1432 * @stable ICU 4.8 1433 */ getEquivalentReorderCodes(int reorderCode)1434 public static int[] getEquivalentReorderCodes(int reorderCode) { 1435 CollationData baseData = CollationRoot.getData(); 1436 return baseData.getEquivalentScripts(reorderCode); 1437 } 1438 1439 1440 // Freezable interface implementation ------------------------------------------------- 1441 1442 /** 1443 * Determines whether the object has been frozen or not. 1444 * 1445 * <p>An unfrozen Collator is mutable and not thread-safe. 1446 * A frozen Collator is immutable and thread-safe. 1447 * 1448 * @stable ICU 4.8 1449 */ 1450 @Override isFrozen()1451 public boolean isFrozen() { 1452 return false; 1453 } 1454 1455 /** 1456 * Freezes the collator. 1457 * @return the collator itself. 1458 * @stable ICU 4.8 1459 */ 1460 @Override freeze()1461 public Collator freeze() { 1462 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1463 } 1464 1465 /** 1466 * Provides for the clone operation. Any clone is initially unfrozen. 1467 * @stable ICU 4.8 1468 */ 1469 @Override cloneAsThawed()1470 public Collator cloneAsThawed() { 1471 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1472 } 1473 1474 /** 1475 * Empty default constructor to make javadocs happy 1476 * @stable ICU 2.4 1477 */ Collator()1478 protected Collator() 1479 { 1480 } 1481 1482 private static final boolean DEBUG = ICUDebug.enabled("collator"); 1483 1484 // -------- BEGIN ULocale boilerplate -------- 1485 1486 /** 1487 * {@icu} Returns the locale that was used to create this object, or null. 1488 * This may may differ from the locale requested at the time of 1489 * this object's creation. For example, if an object is created 1490 * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be 1491 * drawn from <tt>en</tt> (the <i>actual</i> locale), and 1492 * <tt>en_US</tt> may be the most specific locale that exists (the 1493 * <i>valid</i> locale). 1494 * 1495 * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8 1496 * contains a partial preview implementation. The * <i>actual</i> 1497 * locale is returned correctly, but the <i>valid</i> locale is 1498 * not, in most cases. 1499 * 1500 * <p>The base class method always returns {@link ULocale#ROOT}. 1501 * Subclasses should override it if appropriate. 1502 * 1503 * @param type type of information requested, either {@link 1504 * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link 1505 * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}. 1506 * @return the information specified by <i>type</i>, or null if 1507 * this object was not constructed from locale data. 1508 * @see com.ibm.icu.util.ULocale 1509 * @see com.ibm.icu.util.ULocale#VALID_LOCALE 1510 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE 1511 * @draft ICU 2.8 (retain) 1512 */ getLocale(ULocale.Type type)1513 public ULocale getLocale(ULocale.Type type) { 1514 return ULocale.ROOT; 1515 } 1516 1517 /** 1518 * Set information about the locales that were used to create this 1519 * object. If the object was not constructed from locale data, 1520 * both arguments should be set to null. Otherwise, neither 1521 * should be null. The actual locale must be at the same level or 1522 * less specific than the valid locale. This method is intended 1523 * for use by factories or other entities that create objects of 1524 * this class. 1525 * 1526 * <p>The base class method does nothing. Subclasses should override it if appropriate. 1527 * 1528 * @param valid the most specific locale containing any resource 1529 * data, or null 1530 * @param actual the locale containing data used to construct this 1531 * object, or null 1532 * @see com.ibm.icu.util.ULocale 1533 * @see com.ibm.icu.util.ULocale#VALID_LOCALE 1534 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE 1535 */ setLocale(ULocale valid, ULocale actual)1536 void setLocale(ULocale valid, ULocale actual) {} 1537 1538 // -------- END ULocale boilerplate -------- 1539 } 1540