1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 /* 28 * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved 29 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 30 * 31 * The original version of this source code and documentation is copyrighted 32 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 33 * materials are provided under terms of a License Agreement between Taligent 34 * and Sun. This technology is protected by multiple US and International 35 * patents. This notice and attribution to Taligent may not be removed. 36 * Taligent is a registered trademark of Taligent, Inc. 37 * 38 */ 39 40 package java.text; 41 42 import java.util.Locale; 43 44 import libcore.icu.ICU; 45 46 /** 47 * The <code>Collator</code> class performs locale-sensitive 48 * <code>String</code> comparison. You use this class to build 49 * searching and sorting routines for natural language text. 50 * 51 * <p> 52 * <code>Collator</code> is an abstract base class. Subclasses 53 * implement specific collation strategies. One subclass, 54 * <code>RuleBasedCollator</code>, is currently provided with 55 * the Java Platform and is applicable to a wide set of languages. Other 56 * subclasses may be created to handle more specialized needs. 57 * 58 * <p> 59 * Like other locale-sensitive classes, you can use the static 60 * factory method, <code>getInstance</code>, to obtain the appropriate 61 * <code>Collator</code> object for a given locale. You will only need 62 * to look at the subclasses of <code>Collator</code> if you need 63 * to understand the details of a particular collation strategy or 64 * if you need to modify that strategy. 65 * 66 * <p> 67 * The following example shows how to compare two strings using 68 * the <code>Collator</code> for the default locale. 69 * <blockquote> 70 * <pre>{@code 71 * // Compare two strings in the default locale 72 * Collator myCollator = Collator.getInstance(); 73 * if( myCollator.compare("abc", "ABC") < 0 ) 74 * System.out.println("abc is less than ABC"); 75 * else 76 * System.out.println("abc is greater than or equal to ABC"); 77 * }</pre> 78 * </blockquote> 79 * 80 * <p> 81 * You can set a <code>Collator</code>'s <em>strength</em> property 82 * to determine the level of difference considered significant in 83 * comparisons. Four strengths are provided: <code>PRIMARY</code>, 84 * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>. 85 * The exact assignment of strengths to language features is 86 * locale dependant. For example, in Czech, "e" and "f" are considered 87 * primary differences, while "e" and "ě" are secondary differences, 88 * "e" and "E" are tertiary differences and "e" and "e" are identical. 89 * The following shows how both case and accents could be ignored for 90 * US English. 91 * <blockquote> 92 * <pre> 93 * //Get the Collator for US English and set its strength to PRIMARY 94 * Collator usCollator = Collator.getInstance(Locale.US); 95 * usCollator.setStrength(Collator.PRIMARY); 96 * if( usCollator.compare("abc", "ABC") == 0 ) { 97 * System.out.println("Strings are equivalent"); 98 * } 99 * </pre> 100 * </blockquote> 101 * <p> 102 * For comparing <code>String</code>s exactly once, the <code>compare</code> 103 * method provides the best performance. When sorting a list of 104 * <code>String</code>s however, it is generally necessary to compare each 105 * <code>String</code> multiple times. In this case, <code>CollationKey</code>s 106 * provide better performance. The <code>CollationKey</code> class converts 107 * a <code>String</code> to a series of bits that can be compared bitwise 108 * against other <code>CollationKey</code>s. A <code>CollationKey</code> is 109 * created by a <code>Collator</code> object for a given <code>String</code>. 110 * <br> 111 * <strong>Note:</strong> <code>CollationKey</code>s from different 112 * <code>Collator</code>s can not be compared. See the class description 113 * for {@link CollationKey} 114 * for an example using <code>CollationKey</code>s. 115 * 116 * @see RuleBasedCollator 117 * @see CollationKey 118 * @see CollationElementIterator 119 * @see Locale 120 * @author Helena Shih, Laura Werner, Richard Gillam 121 */ 122 123 public abstract class Collator 124 implements java.util.Comparator<Object>, Cloneable 125 { 126 /** 127 * Collator strength value. When set, only PRIMARY differences are 128 * considered significant during comparison. The assignment of strengths 129 * to language features is locale dependant. A common example is for 130 * different base letters ("a" vs "b") to be considered a PRIMARY difference. 131 * @see java.text.Collator#setStrength 132 * @see java.text.Collator#getStrength 133 */ 134 public final static int PRIMARY = 0; 135 /** 136 * Collator strength value. When set, only SECONDARY and above differences are 137 * considered significant during comparison. The assignment of strengths 138 * to language features is locale dependant. A common example is for 139 * different accented forms of the same base letter ("a" vs "\u00E4") to be 140 * considered a SECONDARY difference. 141 * @see java.text.Collator#setStrength 142 * @see java.text.Collator#getStrength 143 */ 144 public final static int SECONDARY = 1; 145 /** 146 * Collator strength value. When set, only TERTIARY and above differences are 147 * considered significant during comparison. The assignment of strengths 148 * to language features is locale dependant. A common example is for 149 * case differences ("a" vs "A") to be considered a TERTIARY difference. 150 * @see java.text.Collator#setStrength 151 * @see java.text.Collator#getStrength 152 */ 153 public final static int TERTIARY = 2; 154 155 /** 156 * Collator strength value. When set, all differences are 157 * considered significant during comparison. The assignment of strengths 158 * to language features is locale dependant. A common example is for control 159 * characters ("\u0001" vs "\u0002") to be considered equal at the 160 * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL 161 * level. Additionally, differences between pre-composed accents such as 162 * "\u00C0" (A-grave) and combining accents such as "A\u0300" 163 * (A, combining-grave) will be considered significant at the IDENTICAL 164 * level if decomposition is set to NO_DECOMPOSITION. 165 */ 166 public final static int IDENTICAL = 3; 167 168 /** 169 * Decomposition mode value. With NO_DECOMPOSITION 170 * set, accented characters will not be decomposed for collation. This 171 * is the default setting and provides the fastest collation but 172 * will only produce correct results for languages that do not use accents. 173 * @see java.text.Collator#getDecomposition 174 * @see java.text.Collator#setDecomposition 175 */ 176 public final static int NO_DECOMPOSITION = 0; 177 178 /** 179 * Decomposition mode value. With CANONICAL_DECOMPOSITION 180 * set, characters that are canonical variants according to Unicode 181 * standard will be decomposed for collation. This should be used to get 182 * correct collation of accented characters. 183 * <p> 184 * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as 185 * described in 186 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode 187 * Technical Report #15</a>. 188 * @see java.text.Collator#getDecomposition 189 * @see java.text.Collator#setDecomposition 190 */ 191 public final static int CANONICAL_DECOMPOSITION = 1; 192 193 /** 194 * Decomposition mode value. With FULL_DECOMPOSITION 195 * set, both Unicode canonical variants and Unicode compatibility variants 196 * will be decomposed for collation. This causes not only accented 197 * characters to be collated, but also characters that have special formats 198 * to be collated with their norminal form. For example, the half-width and 199 * full-width ASCII and Katakana characters are then collated together. 200 * FULL_DECOMPOSITION is the most complete and therefore the slowest 201 * decomposition mode. 202 * <p> 203 * FULL_DECOMPOSITION corresponds to Normalization Form KD as 204 * described in 205 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode 206 * Technical Report #15</a>. 207 * @see java.text.Collator#getDecomposition 208 * @see java.text.Collator#setDecomposition 209 */ 210 public final static int FULL_DECOMPOSITION = 2; 211 212 /** 213 * Gets the Collator for the current default locale. 214 * The default locale is determined by java.util.Locale.getDefault. 215 * @return the Collator for the default locale.(for example, en_US) 216 * @see java.util.Locale#getDefault 217 */ getInstance()218 public static synchronized Collator getInstance() { 219 return getInstance(Locale.getDefault()); 220 } 221 222 /** 223 * Gets the Collator for the desired locale. 224 * @param desiredLocale the desired locale. 225 * @return the Collator for the desired locale. 226 * @see java.util.Locale 227 * @see java.util.ResourceBundle 228 */ 229 // Android-changed: Switched to ICU. getInstance(Locale desiredLocale)230 public static synchronized Collator getInstance(Locale desiredLocale) 231 { 232 if (desiredLocale == null) { 233 throw new NullPointerException("locale == null"); 234 } 235 return new RuleBasedCollator((android.icu.text.RuleBasedCollator) 236 android.icu.text.Collator.getInstance(desiredLocale)); 237 } 238 239 /** 240 * Compares the source string to the target string according to the 241 * collation rules for this Collator. Returns an integer less than, 242 * equal to or greater than zero depending on whether the source String is 243 * less than, equal to or greater than the target string. See the Collator 244 * class description for an example of use. 245 * <p> 246 * For a one time comparison, this method has the best performance. If a 247 * given String will be involved in multiple comparisons, CollationKey.compareTo 248 * has the best performance. See the Collator class description for an example 249 * using CollationKeys. 250 * @param source the source string. 251 * @param target the target string. 252 * @return Returns an integer value. Value is less than zero if source is less than 253 * target, value is zero if source and target are equal, value is greater than zero 254 * if source is greater than target. 255 * @see java.text.CollationKey 256 * @see java.text.Collator#getCollationKey 257 */ compare(String source, String target)258 public abstract int compare(String source, String target); 259 260 /** 261 * Compares its two arguments for order. Returns a negative integer, 262 * zero, or a positive integer as the first argument is less than, equal 263 * to, or greater than the second. 264 * <p> 265 * This implementation merely returns 266 * <code> compare((String)o1, (String)o2) </code>. 267 * 268 * @return a negative integer, zero, or a positive integer as the 269 * first argument is less than, equal to, or greater than the 270 * second. 271 * @exception ClassCastException the arguments cannot be cast to Strings. 272 * @see java.util.Comparator 273 * @since 1.2 274 */ 275 @Override compare(Object o1, Object o2)276 public int compare(Object o1, Object o2) { 277 return compare((String)o1, (String)o2); 278 } 279 280 /** 281 * Transforms the String into a series of bits that can be compared bitwise 282 * to other CollationKeys. CollationKeys provide better performance than 283 * Collator.compare when Strings are involved in multiple comparisons. 284 * See the Collator class description for an example using CollationKeys. 285 * @param source the string to be transformed into a collation key. 286 * @return the CollationKey for the given String based on this Collator's collation 287 * rules. If the source String is null, a null CollationKey is returned. 288 * @see java.text.CollationKey 289 * @see java.text.Collator#compare 290 */ getCollationKey(String source)291 public abstract CollationKey getCollationKey(String source); 292 293 /** 294 * Convenience method for comparing the equality of two strings based on 295 * this Collator's collation rules. 296 * @param source the source string to be compared with. 297 * @param target the target string to be compared with. 298 * @return true if the strings are equal according to the collation 299 * rules. false, otherwise. 300 * @see java.text.Collator#compare 301 */ equals(String source, String target)302 public boolean equals(String source, String target) 303 { 304 // Android-changed: remove use of unnecessary EQUAL constant. 305 return (compare(source, target) == 0); 306 } 307 308 /** 309 * Returns this Collator's strength property. The strength property determines 310 * the minimum level of difference considered significant during comparison. 311 * See the Collator class description for an example of use. 312 * @return this Collator's current strength property. 313 * @see java.text.Collator#setStrength 314 * @see java.text.Collator#PRIMARY 315 * @see java.text.Collator#SECONDARY 316 * @see java.text.Collator#TERTIARY 317 * @see java.text.Collator#IDENTICAL 318 */ getStrength()319 public synchronized int getStrength() 320 { 321 // Android-changed: Switched to ICU. 322 // The value for IDENTICAL in ICU differs from that used in this class. 323 int value = icuColl.getStrength(); 324 return (value == android.icu.text.Collator.IDENTICAL) ? IDENTICAL : value; 325 } 326 327 /** 328 * Sets this Collator's strength property. The strength property determines 329 * the minimum level of difference considered significant during comparison. 330 * See the Collator class description for an example of use. 331 * @param newStrength the new strength value. 332 * @see java.text.Collator#getStrength 333 * @see java.text.Collator#PRIMARY 334 * @see java.text.Collator#SECONDARY 335 * @see java.text.Collator#TERTIARY 336 * @see java.text.Collator#IDENTICAL 337 * @exception IllegalArgumentException If the new strength value is not one of 338 * PRIMARY, SECONDARY, TERTIARY or IDENTICAL. 339 */ setStrength(int newStrength)340 public synchronized void setStrength(int newStrength) { 341 // Android-changed: Switched to ICU. 342 // The ICU value for IDENTICAL differs from that defined in this class. 343 if (newStrength == IDENTICAL) { 344 newStrength = android.icu.text.Collator.IDENTICAL; 345 } 346 icuColl.setStrength(newStrength); 347 } 348 349 /** 350 * Get the decomposition mode of this Collator. Decomposition mode 351 * determines how Unicode composed characters are handled. Adjusting 352 * decomposition mode allows the user to select between faster and more 353 * complete collation behavior. 354 * <p>The three values for decomposition mode are: 355 * <UL> 356 * <LI>NO_DECOMPOSITION, 357 * <LI>CANONICAL_DECOMPOSITION 358 * <LI>FULL_DECOMPOSITION. 359 * </UL> 360 * See the documentation for these three constants for a description 361 * of their meaning. 362 * @return the decomposition mode 363 * @see java.text.Collator#setDecomposition 364 * @see java.text.Collator#NO_DECOMPOSITION 365 * @see java.text.Collator#CANONICAL_DECOMPOSITION 366 * @see java.text.Collator#FULL_DECOMPOSITION 367 */ getDecomposition()368 public synchronized int getDecomposition() 369 { 370 // Android-changed: Switched to ICU. 371 return decompositionMode_ICU_Java(icuColl.getDecomposition()); 372 } 373 /** 374 * Set the decomposition mode of this Collator. See getDecomposition 375 * for a description of decomposition mode. 376 * @param decompositionMode the new decomposition mode. 377 * @see java.text.Collator#getDecomposition 378 * @see java.text.Collator#NO_DECOMPOSITION 379 * @see java.text.Collator#CANONICAL_DECOMPOSITION 380 * @see java.text.Collator#FULL_DECOMPOSITION 381 * @exception IllegalArgumentException If the given value is not a valid decomposition 382 * mode. 383 */ setDecomposition(int decompositionMode)384 public synchronized void setDecomposition(int decompositionMode) { 385 // Android-changed: Switched to ICU. 386 icuColl.setDecomposition(decompositionMode_Java_ICU(decompositionMode)); 387 } 388 389 // Android-changed: Removed references to CollatorProvider. 390 /** 391 * Returns an array of all locales for which the 392 * <code>getInstance</code> methods of this class can return 393 * localized instances. 394 * 395 * @return An array of locales for which localized 396 * <code>Collator</code> instances are available. 397 */ getAvailableLocales()398 public static synchronized Locale[] getAvailableLocales() { 399 // Android-changed: Removed reference to CollatorProvider. Switched to ICU. 400 return ICU.getAvailableCollatorLocales(); 401 } 402 403 // BEGIN Android-added: conversion method for decompositionMode constants. decompositionMode_Java_ICU(int mode)404 private int decompositionMode_Java_ICU(int mode) { 405 switch (mode) { 406 case Collator.CANONICAL_DECOMPOSITION: 407 return android.icu.text.Collator.CANONICAL_DECOMPOSITION; 408 case Collator.NO_DECOMPOSITION: 409 return android.icu.text.Collator.NO_DECOMPOSITION; 410 } 411 throw new IllegalArgumentException("Bad mode: " + mode); 412 } 413 decompositionMode_ICU_Java(int mode)414 private int decompositionMode_ICU_Java(int mode) { 415 int javaMode = mode; 416 switch (mode) { 417 case android.icu.text.Collator.NO_DECOMPOSITION: 418 javaMode = Collator.NO_DECOMPOSITION; 419 break; 420 case android.icu.text.Collator.CANONICAL_DECOMPOSITION: 421 javaMode = Collator.CANONICAL_DECOMPOSITION; 422 break; 423 } 424 return javaMode; 425 } 426 // END Android-added: conversion method for decompositionMode constants. 427 428 // Android-changed: improve documentation. 429 /** 430 * Returns a new collator with the same decomposition mode and 431 * strength value as this collator. 432 * 433 * @return a shallow copy of this collator. 434 * @see java.lang.Cloneable 435 */ 436 @Override clone()437 public Object clone() 438 { 439 try { 440 // Android-changed: Switched to ICU. 441 Collator clone = (Collator) super.clone(); 442 clone.icuColl = (android.icu.text.Collator) icuColl.clone(); 443 return clone; 444 } catch (CloneNotSupportedException e) { 445 throw new AssertionError(e); 446 } 447 } 448 449 /** 450 * Compares the equality of two Collators. 451 * @param that the Collator to be compared with this. 452 * @return true if this Collator is the same as that Collator; 453 * false otherwise. 454 */ 455 @Override equals(Object that)456 public boolean equals(Object that) 457 { 458 if (this == that) { 459 return true; 460 } 461 if (that == null) { 462 return false; 463 } 464 if (getClass() != that.getClass()) { 465 return false; 466 } 467 Collator other = (Collator) that; 468 // Android-changed: Switched to ICU. 469 return icuColl == null ? other.icuColl == null : icuColl.equals(other.icuColl); 470 } 471 472 /** 473 * Generates the hash code for this Collator. 474 */ 475 @Override hashCode()476 abstract public int hashCode(); 477 478 /** 479 * Default constructor. This constructor is 480 * protected so subclasses can get access to it. Users typically create 481 * a Collator sub-class by calling the factory method getInstance. 482 * @see java.text.Collator#getInstance 483 */ Collator()484 protected Collator() 485 { 486 // Android-changed: Switched to ICU. 487 icuColl = android.icu.text.RuleBasedCollator.getInstance(Locale.getDefault()); 488 } 489 490 // Android-added: ICU Collator this delegates to. 491 android.icu.text.Collator icuColl; 492 493 // Android-added: protected constructor taking a Collator. Collator(android.icu.text.Collator icuColl)494 Collator(android.icu.text.Collator icuColl) { 495 this.icuColl = icuColl; 496 } 497 498 // Android-removed: Fields and constants. 499 } 500