1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.text; 11 12 import ohos.global.icu.impl.coll.Collation; 13 14 /** 15 * A <code>CollationKey</code> represents a <code>String</code> 16 * under the rules of a specific <code>Collator</code> 17 * object. Comparing two <code>CollationKey</code>s returns the 18 * relative order of the <code>String</code>s they represent. 19 * 20 * <p>Since the rule set of <code>Collator</code>s can differ, the 21 * sort orders of the same string under two different 22 * <code>Collator</code>s might differ. Hence comparing 23 * <code>CollationKey</code>s generated from different 24 * <code>Collator</code>s can give incorrect results. 25 26 * <p>Both the method 27 * <code>CollationKey.compareTo(CollationKey)</code> and the method 28 * <code>Collator.compare(String, String)</code> compare two strings 29 * and returns their relative order. The performance characteristics 30 * of these two approaches can differ. 31 * Note that collation keys are often less efficient than simply doing comparison. 32 * For more details, see the ICU User Guide. 33 * 34 * <p>During the construction of a <code>CollationKey</code>, the 35 * entire source string is examined and processed into a series of 36 * bits terminated by a null, that are stored in the <code>CollationKey</code>. 37 * When <code>CollationKey.compareTo(CollationKey)</code> executes, it 38 * performs bitwise comparison on the bit sequences. This can incurs 39 * startup cost when creating the <code>CollationKey</code>, but once 40 * the key is created, binary comparisons are fast. This approach is 41 * recommended when the same strings are to be compared over and over 42 * again. 43 * 44 * <p>On the other hand, implementations of 45 * <code>Collator.compare(String, String)</code> can examine and 46 * process the strings only until the first characters differing in 47 * order. This approach is recommended if the strings are to be 48 * compared only once.</p> 49 * 50 * <p>More information about the composition of the bit sequence can 51 * be found in the 52 * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html"> 53 * user guide</a>.</p> 54 * 55 * <p>The following example shows how <code>CollationKey</code>s can be used 56 * to sort a list of <code>String</code>s.</p> 57 * <blockquote> 58 * <pre> 59 * // Create an array of CollationKeys for the Strings to be sorted. 60 * Collator myCollator = Collator.getInstance(); 61 * CollationKey[] keys = new CollationKey[3]; 62 * keys[0] = myCollator.getCollationKey("Tom"); 63 * keys[1] = myCollator.getCollationKey("Dick"); 64 * keys[2] = myCollator.getCollationKey("Harry"); 65 * sort( keys ); 66 * <br> 67 * //... 68 * <br> 69 * // Inside body of sort routine, compare keys this way 70 * if( keys[i].compareTo( keys[j] ) > 0 ) 71 * // swap keys[i] and keys[j] 72 * <br> 73 * //... 74 * <br> 75 * // Finally, when we've returned from sort. 76 * System.out.println( keys[0].getSourceString() ); 77 * System.out.println( keys[1].getSourceString() ); 78 * System.out.println( keys[2].getSourceString() ); 79 * </pre> 80 * </blockquote> 81 * <p> 82 * This class is not subclassable 83 * @see Collator 84 * @see RuleBasedCollator 85 * @author Syn Wee Quek 86 */ 87 public final class CollationKey implements Comparable<CollationKey> 88 { 89 // public inner classes ------------------------------------------------- 90 91 /** 92 * Options that used in the API CollationKey.getBound() for getting a 93 * CollationKey based on the bound mode requested. 94 */ 95 public static final class BoundMode 96 { 97 /* 98 * do not change the values assigned to the members of this enum. 99 * Underlying code depends on them having these numbers 100 */ 101 102 /** 103 * Lower bound 104 */ 105 public static final int LOWER = 0; 106 107 /** 108 * Upper bound that will match strings of exact size 109 */ 110 public static final int UPPER = 1; 111 112 /** 113 * Upper bound that will match all the strings that have the same 114 * initial substring as the given string 115 */ 116 public static final int UPPER_LONG = 2; 117 118 /** 119 * One more than the highest normal BoundMode value. 120 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 121 * @hide unsupported on OHOS 122 */ 123 @Deprecated 124 public static final int COUNT = 3; 125 126 /** 127 * Private Constructor 128 */ 129 ///CLOVER:OFF BoundMode()130 private BoundMode(){} 131 ///CLOVER:ON 132 } 133 134 // public constructor --------------------------------------------------- 135 136 /** 137 * CollationKey constructor. 138 * This constructor is given public access, unlike the JDK version, to 139 * allow access to users extending the Collator class. See 140 * {@link Collator#getCollationKey(String)}. 141 * @param source string this CollationKey is to represent 142 * @param key array of bytes that represent the collation order of argument 143 * source terminated by a null 144 * @see Collator 145 */ CollationKey(String source, byte key[])146 public CollationKey(String source, byte key[]) 147 { 148 this(source, key, -1); 149 } 150 151 /** 152 * Private constructor, takes a length argument so it need not be lazy-evaluated. 153 * There must be a 00 byte at key[length] and none before. 154 */ CollationKey(String source, byte key[], int length)155 private CollationKey(String source, byte key[], int length) 156 { 157 m_source_ = source; 158 m_key_ = key; 159 m_hashCode_ = 0; 160 m_length_ = length; 161 } 162 163 /** 164 * CollationKey constructor that forces key to release its internal byte 165 * array for adoption. key will have a null byte array after this 166 * construction. 167 * @param source string this CollationKey is to represent 168 * @param key RawCollationKey object that represents the collation order of 169 * argument source. 170 * @see Collator 171 * @see RawCollationKey 172 * @hide unsupported on OHOS 173 */ CollationKey(String source, RawCollationKey key)174 public CollationKey(String source, RawCollationKey key) 175 { 176 m_source_ = source; 177 m_length_ = key.size - 1; 178 m_key_ = key.releaseBytes(); 179 assert m_key_[m_length_] == 0; 180 m_hashCode_ = 0; 181 } 182 183 // public getters ------------------------------------------------------- 184 185 /** 186 * Return the source string that this CollationKey represents. 187 * @return source string that this CollationKey represents 188 */ getSourceString()189 public String getSourceString() 190 { 191 return m_source_; 192 } 193 194 /** 195 * Duplicates and returns the value of this CollationKey as a sequence 196 * of big-endian bytes terminated by a null. 197 * 198 * <p>If two CollationKeys can be legitimately compared, then one can 199 * compare the byte arrays of each to obtain the same result, e.g. 200 * <pre> 201 * byte key1[] = collationkey1.toByteArray(); 202 * byte key2[] = collationkey2.toByteArray(); 203 * int key, targetkey; 204 * int i = 0; 205 * do { 206 * key = key1[i] & 0xFF; 207 * targetkey = key2[i] & 0xFF; 208 * if (key < targetkey) { 209 * System.out.println("String 1 is less than string 2"); 210 * return; 211 * } 212 * if (targetkey < key) { 213 * System.out.println("String 1 is more than string 2"); 214 * } 215 * i ++; 216 * } while (key != 0 && targetKey != 0); 217 * 218 * System.out.println("Strings are equal."); 219 * </pre> 220 * 221 * @return CollationKey value in a sequence of big-endian byte bytes 222 * terminated by a null. 223 */ toByteArray()224 public byte[] toByteArray() 225 { 226 int length = getLength() + 1; 227 byte result[] = new byte[length]; 228 System.arraycopy(m_key_, 0, result, 0, length); 229 return result; 230 } 231 232 // public other methods ------------------------------------------------- 233 234 /** 235 * Compare this CollationKey to another CollationKey. The 236 * collation rules of the Collator that created this key are 237 * applied. 238 * 239 * <p><strong>Note:</strong> Comparison between CollationKeys 240 * created by different Collators might return incorrect 241 * results. See class documentation. 242 * 243 * @param target target CollationKey 244 * @return an integer value. If the value is less than zero this CollationKey 245 * is less than than target, if the value is zero they are equal, and 246 * if the value is greater than zero this CollationKey is greater 247 * than target. 248 * @exception NullPointerException is thrown if argument is null. 249 * @see Collator#compare(String, String) 250 */ 251 @Override compareTo(CollationKey target)252 public int compareTo(CollationKey target) 253 { 254 for (int i = 0;; ++i) { 255 int l = m_key_[i]&0xff; 256 int r = target.m_key_[i]&0xff; 257 if (l < r) { 258 return -1; 259 } else if (l > r) { 260 return 1; 261 } else if (l == 0) { 262 return 0; 263 } 264 } 265 } 266 267 /** 268 * Compare this CollationKey and the specified Object for 269 * equality. The collation rules of the Collator that created 270 * this key are applied. 271 * 272 * <p>See note in compareTo(CollationKey) for warnings about 273 * possible incorrect results. 274 * 275 * @param target the object to compare to. 276 * @return true if the two keys compare as equal, false otherwise. 277 * @see #compareTo(CollationKey) 278 * @exception ClassCastException is thrown when the argument is not 279 * a CollationKey. NullPointerException is thrown when the argument 280 * is null. 281 */ 282 @Override equals(Object target)283 public boolean equals(Object target) 284 { 285 if (!(target instanceof CollationKey)) { 286 return false; 287 } 288 289 return equals((CollationKey)target); 290 } 291 292 /** 293 * Compare this CollationKey and the argument target CollationKey for 294 * equality. 295 * The collation 296 * rules of the Collator object which created these objects are applied. 297 * <p> 298 * See note in compareTo(CollationKey) for warnings of incorrect results 299 * 300 * @param target the CollationKey to compare to. 301 * @return true if two objects are equal, false otherwise. 302 * @exception NullPointerException is thrown when the argument is null. 303 */ equals(CollationKey target)304 public boolean equals(CollationKey target) 305 { 306 if (this == target) { 307 return true; 308 } 309 if (target == null) { 310 return false; 311 } 312 CollationKey other = target; 313 int i = 0; 314 while (true) { 315 if (m_key_[i] != other.m_key_[i]) { 316 return false; 317 } 318 if (m_key_[i] == 0) { 319 break; 320 } 321 i ++; 322 } 323 return true; 324 } 325 326 /** 327 * Returns a hash code for this CollationKey. The hash value is calculated 328 * on the key itself, not the String from which the key was created. Thus 329 * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode() 330 * if x.equals(y) is true. This allows language-sensitive comparison in a 331 * hash table. 332 * 333 * @return the hash value. 334 */ 335 @Override hashCode()336 public int hashCode() 337 { 338 if (m_hashCode_ == 0) { 339 if (m_key_ == null) { 340 m_hashCode_ = 1; 341 } 342 else { 343 int size = m_key_.length >> 1; 344 StringBuilder key = new StringBuilder(size); 345 int i = 0; 346 while (m_key_[i] != 0 && m_key_[i + 1] != 0) { 347 key.append((char)((m_key_[i] << 8) | (0xff & m_key_[i + 1]))); 348 i += 2; 349 } 350 if (m_key_[i] != 0) { 351 key.append((char)(m_key_[i] << 8)); 352 } 353 m_hashCode_ = key.toString().hashCode(); 354 } 355 } 356 return m_hashCode_; 357 } 358 359 /** 360 * Produces a bound for the sort order of a given collation key and a 361 * strength level. This API does not attempt to find a bound for the 362 * CollationKey String representation, hence null will be returned in its 363 * place. 364 * <p> 365 * Resulting bounds can be used to produce a range of strings that are 366 * between upper and lower bounds. For example, if bounds are produced 367 * for a sortkey of string "smith", strings between upper and lower 368 * bounds with primary strength would include "Smith", "SMITH", "sMiTh". 369 * <p> 370 * There are two upper bounds that can be produced. If BoundMode.UPPER 371 * is produced, strings matched would be as above. However, if a bound 372 * is produced using BoundMode.UPPER_LONG is used, the above example will 373 * also match "Smithsonian" and similar. 374 * <p> 375 * For more on usage, see example in test procedure 376 * <a href="http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/collator/CollationAPITest.java"> 377 * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds. 378 * </a> 379 * <p> 380 * Collation keys produced may be compared using the <TT>compare</TT> API. 381 * @param boundType Mode of bound required. It can be BoundMode.LOWER, which 382 * produces a lower inclusive bound, BoundMode.UPPER, that 383 * produces upper bound that matches strings of the same 384 * length or BoundMode.UPPER_LONG that matches strings that 385 * have the same starting substring as the source string. 386 * @param noOfLevels Strength levels required in the resulting bound 387 * (for most uses, the recommended value is PRIMARY). This 388 * strength should be less than the maximum strength of 389 * this CollationKey. 390 * See users guide for explanation on the strength levels a 391 * collation key can have. 392 * @return the result bounded CollationKey with a valid sort order but 393 * a null String representation. 394 * @exception IllegalArgumentException thrown when the strength level 395 * requested is higher than or equal to the strength in this 396 * CollationKey. 397 * In the case of an Exception, information 398 * about the maximum strength to use will be returned in the 399 * Exception. The user can then call getBound() again with the 400 * appropriate strength. 401 * @see CollationKey 402 * @see CollationKey.BoundMode 403 * @see Collator#PRIMARY 404 * @see Collator#SECONDARY 405 * @see Collator#TERTIARY 406 * @see Collator#QUATERNARY 407 * @see Collator#IDENTICAL 408 */ getBound(int boundType, int noOfLevels)409 public CollationKey getBound(int boundType, int noOfLevels) 410 { 411 // Scan the string until we skip enough of the key OR reach the end of 412 // the key 413 int offset = 0; 414 int keystrength = Collator.PRIMARY; 415 416 if (noOfLevels > Collator.PRIMARY) { 417 while (offset < m_key_.length && m_key_[offset] != 0) { 418 if (m_key_[offset ++] 419 == Collation.LEVEL_SEPARATOR_BYTE) { 420 keystrength ++; 421 noOfLevels --; 422 if (noOfLevels == Collator.PRIMARY 423 || offset == m_key_.length || m_key_[offset] == 0) { 424 offset --; 425 break; 426 } 427 } 428 } 429 } 430 431 if (noOfLevels > 0) { 432 throw new IllegalArgumentException( 433 "Source collation key has only " 434 + keystrength 435 + " strength level. Call getBound() again " 436 + " with noOfLevels < " + keystrength); 437 } 438 439 // READ ME: this code assumes that the values for BoundMode variables 440 // will not change. They are set so that the enum value corresponds to 441 // the number of extra bytes each bound type needs. 442 byte resultkey[] = new byte[offset + boundType + 1]; 443 System.arraycopy(m_key_, 0, resultkey, 0, offset); 444 switch (boundType) { 445 case BoundMode.LOWER: // = 0 446 // Lower bound just gets terminated. No extra bytes 447 break; 448 case BoundMode.UPPER: // = 1 449 // Upper bound needs one extra byte 450 resultkey[offset ++] = 2; 451 break; 452 case BoundMode.UPPER_LONG: // = 2 453 // Upper long bound needs two extra bytes 454 resultkey[offset ++] = (byte)0xFF; 455 resultkey[offset ++] = (byte)0xFF; 456 break; 457 default: 458 throw new IllegalArgumentException( 459 "Illegal boundType argument"); 460 } 461 resultkey[offset] = 0; 462 return new CollationKey(null, resultkey, offset); 463 } 464 465 466 /** 467 * Merges this CollationKey with another. 468 * The levels are merged with their corresponding counterparts 469 * (primaries with primaries, secondaries with secondaries etc.). 470 * Between the values from the same level a separator is inserted. 471 * 472 * <p>This is useful, for example, for combining sort keys from first and last names 473 * to sort such pairs. 474 * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys 475 * 476 * <p>The recommended way to achieve "merged" sorting is by 477 * concatenating strings with U+FFFE between them. 478 * The concatenation has the same sort order as the merged sort keys, 479 * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2). 480 * Using strings with U+FFFE may yield shorter sort keys. 481 * 482 * <p>For details about Sort Key Features see 483 * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features 484 * 485 * <p>It is possible to merge multiple sort keys by consecutively merging 486 * another one with the intermediate result. 487 * 488 * <p>Only the sort key bytes of the CollationKeys are merged. 489 * This API does not attempt to merge the 490 * String representations of the CollationKeys, hence null will be returned 491 * as the result's String representation. 492 * 493 * <p>Example (uncompressed): 494 * <pre>191B1D 01 050505 01 910505 00 495 * 1F2123 01 050505 01 910505 00</pre> 496 * will be merged as 497 * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> 498 * 499 * @param source CollationKey to merge with 500 * @return a CollationKey that contains the valid merged sort keys 501 * with a null String representation, 502 * i.e. <tt>new CollationKey(null, merged_sort_keys)</tt> 503 * @exception IllegalArgumentException thrown if source CollationKey 504 * argument is null or of 0 length. 505 */ merge(CollationKey source)506 public CollationKey merge(CollationKey source) 507 { 508 // check arguments 509 if (source == null || source.getLength() == 0) { 510 throw new IllegalArgumentException( 511 "CollationKey argument can not be null or of 0 length"); 512 } 513 514 // 1 byte extra for the 02 separator at the end of the copy of this sort key, 515 // and 1 more for the terminating 00. 516 byte result[] = new byte[getLength() + source.getLength() + 2]; 517 518 // merge the sort keys with the same number of levels 519 int rindex = 0; 520 int index = 0; 521 int sourceindex = 0; 522 while (true) { 523 // copy level from src1 not including 00 or 01 524 // unsigned issues 525 while (m_key_[index] < 0 || m_key_[index] >= MERGE_SEPERATOR_) { 526 result[rindex++] = m_key_[index++]; 527 } 528 529 // add a 02 merge separator 530 result[rindex++] = MERGE_SEPERATOR_; 531 532 // copy level from src2 not including 00 or 01 533 while (source.m_key_[sourceindex] < 0 534 || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) { 535 result[rindex++] = source.m_key_[sourceindex++]; 536 } 537 538 // if both sort keys have another level, then add a 01 level 539 // separator and continue 540 if (m_key_[index] == Collation.LEVEL_SEPARATOR_BYTE 541 && source.m_key_[sourceindex] 542 == Collation.LEVEL_SEPARATOR_BYTE) { 543 ++index; 544 ++sourceindex; 545 result[rindex++] = Collation.LEVEL_SEPARATOR_BYTE; 546 } 547 else { 548 break; 549 } 550 } 551 552 // here, at least one sort key is finished now, but the other one 553 // might have some contents left from containing more levels; 554 // that contents is just appended to the result 555 int remainingLength; 556 if ((remainingLength = m_length_ - index) > 0) { 557 System.arraycopy(m_key_, index, result, rindex, remainingLength); 558 rindex += remainingLength; 559 } 560 else if ((remainingLength = source.m_length_ - sourceindex) > 0) { 561 System.arraycopy(source.m_key_, sourceindex, result, rindex, remainingLength); 562 rindex += remainingLength; 563 } 564 result[rindex] = 0; 565 566 assert rindex == result.length - 1; 567 return new CollationKey(null, result, rindex); 568 } 569 570 // private data members ------------------------------------------------- 571 572 /** 573 * Sequence of bytes that represents the sort key 574 */ 575 private byte m_key_[]; 576 577 /** 578 * Source string this CollationKey represents 579 */ 580 private String m_source_; 581 582 /** 583 * Hash code for the key 584 */ 585 private int m_hashCode_; 586 /** 587 * Gets the length of this CollationKey 588 */ 589 private int m_length_; 590 /** 591 * Collation key merge seperator 592 */ 593 private static final int MERGE_SEPERATOR_ = 2; 594 595 // private methods ------------------------------------------------------ 596 597 /** 598 * Gets the length of the CollationKey 599 * @return length of the CollationKey 600 */ getLength()601 private int getLength() 602 { 603 if (m_length_ >= 0) { 604 return m_length_; 605 } 606 int length = m_key_.length; 607 for (int index = 0; index < length; index ++) { 608 if (m_key_[index] == 0) { 609 length = index; 610 break; 611 } 612 } 613 m_length_ = length; 614 return m_length_; 615 } 616 } 617