• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 1996-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10 package ohos.global.icu.text;
11 
12 import ohos.global.icu.impl.coll.Collation;
13 
14 /**
15  * A <code>CollationKey</code> represents a <code>String</code>
16  * under the rules of a specific <code>Collator</code>
17  * object. Comparing two <code>CollationKey</code>s returns the
18  * relative order of the <code>String</code>s they represent.
19  *
20  * <p>Since the rule set of <code>Collator</code>s can differ, the
21  * sort orders of the same string under two different
22  * <code>Collator</code>s might differ.  Hence comparing
23  * <code>CollationKey</code>s generated from different
24  * <code>Collator</code>s can give incorrect results.
25 
26  * <p>Both the method
27  * <code>CollationKey.compareTo(CollationKey)</code> and the method
28  * <code>Collator.compare(String, String)</code> compare two strings
29  * and returns their relative order.  The performance characteristics
30  * of these two approaches can differ.
31  * Note that collation keys are often less efficient than simply doing comparison.
32  * For more details, see the ICU User Guide.
33  *
34  * <p>During the construction of a <code>CollationKey</code>, the
35  * entire source string is examined and processed into a series of
36  * bits terminated by a null, that are stored in the <code>CollationKey</code>.
37  * When <code>CollationKey.compareTo(CollationKey)</code> executes, it
38  * performs bitwise comparison on the bit sequences.  This can incurs
39  * startup cost when creating the <code>CollationKey</code>, but once
40  * the key is created, binary comparisons are fast.  This approach is
41  * recommended when the same strings are to be compared over and over
42  * again.
43  *
44  * <p>On the other hand, implementations of
45  * <code>Collator.compare(String, String)</code> can examine and
46  * process the strings only until the first characters differing in
47  * order.  This approach is recommended if the strings are to be
48  * compared only once.</p>
49  *
50  * <p>More information about the composition of the bit sequence can
51  * be found in the
52  * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">
53  * user guide</a>.</p>
54  *
55  * <p>The following example shows how <code>CollationKey</code>s can be used
56  * to sort a list of <code>String</code>s.</p>
57  * <blockquote>
58  * <pre>
59  * // Create an array of CollationKeys for the Strings to be sorted.
60  * Collator myCollator = Collator.getInstance();
61  * CollationKey[] keys = new CollationKey[3];
62  * keys[0] = myCollator.getCollationKey("Tom");
63  * keys[1] = myCollator.getCollationKey("Dick");
64  * keys[2] = myCollator.getCollationKey("Harry");
65  * sort( keys );
66  * <br>
67  * //...
68  * <br>
69  * // Inside body of sort routine, compare keys this way
70  * if( keys[i].compareTo( keys[j] ) &gt; 0 )
71  *    // swap keys[i] and keys[j]
72  * <br>
73  * //...
74  * <br>
75  * // Finally, when we've returned from sort.
76  * System.out.println( keys[0].getSourceString() );
77  * System.out.println( keys[1].getSourceString() );
78  * System.out.println( keys[2].getSourceString() );
79  * </pre>
80  * </blockquote>
81  * <p>
82  * This class is not subclassable
83  * @see Collator
84  * @see RuleBasedCollator
85  * @author Syn Wee Quek
86  */
87 public final class CollationKey implements Comparable<CollationKey>
88 {
89     // public inner classes -------------------------------------------------
90 
91     /**
92      * Options that used in the API CollationKey.getBound() for getting a
93      * CollationKey based on the bound mode requested.
94      */
95     public static final class BoundMode
96     {
97         /*
98          * do not change the values assigned to the members of this enum.
99          * Underlying code depends on them having these numbers
100          */
101 
102         /**
103          * Lower bound
104          */
105         public static final int LOWER = 0;
106 
107         /**
108          * Upper bound that will match strings of exact size
109          */
110         public static final int UPPER = 1;
111 
112         /**
113          * Upper bound that will match all the strings that have the same
114          * initial substring as the given string
115          */
116         public static final int UPPER_LONG = 2;
117 
118         /**
119          * One more than the highest normal BoundMode value.
120          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
121          * @hide unsupported on OHOS
122          */
123         @Deprecated
124         public static final int COUNT = 3;
125 
126         /**
127          * Private Constructor
128          */
129         ///CLOVER:OFF
BoundMode()130         private BoundMode(){}
131         ///CLOVER:ON
132     }
133 
134     // public constructor ---------------------------------------------------
135 
136     /**
137      * CollationKey constructor.
138      * This constructor is given public access, unlike the JDK version, to
139      * allow access to users extending the Collator class. See
140      * {@link Collator#getCollationKey(String)}.
141      * @param source string this CollationKey is to represent
142      * @param key array of bytes that represent the collation order of argument
143      *            source terminated by a null
144      * @see Collator
145      */
CollationKey(String source, byte key[])146     public CollationKey(String source, byte key[])
147     {
148         this(source, key, -1);
149     }
150 
151     /**
152      * Private constructor, takes a length argument so it need not be lazy-evaluated.
153      * There must be a 00 byte at key[length] and none before.
154      */
CollationKey(String source, byte key[], int length)155     private CollationKey(String source, byte key[], int length)
156     {
157         m_source_ = source;
158         m_key_ = key;
159         m_hashCode_ = 0;
160         m_length_ = length;
161     }
162 
163     /**
164      * CollationKey constructor that forces key to release its internal byte
165      * array for adoption. key will have a null byte array after this
166      * construction.
167      * @param source string this CollationKey is to represent
168      * @param key RawCollationKey object that represents the collation order of
169      *            argument source.
170      * @see Collator
171      * @see RawCollationKey
172      * @hide unsupported on OHOS
173      */
CollationKey(String source, RawCollationKey key)174     public CollationKey(String source, RawCollationKey key)
175     {
176         m_source_ = source;
177         m_length_ = key.size - 1;
178         m_key_ = key.releaseBytes();
179         assert m_key_[m_length_] == 0;
180         m_hashCode_ = 0;
181     }
182 
183     // public getters -------------------------------------------------------
184 
185     /**
186      * Return the source string that this CollationKey represents.
187      * @return source string that this CollationKey represents
188      */
getSourceString()189     public String getSourceString()
190     {
191         return m_source_;
192     }
193 
194     /**
195      * Duplicates and returns the value of this CollationKey as a sequence
196      * of big-endian bytes terminated by a null.
197      *
198      * <p>If two CollationKeys can be legitimately compared, then one can
199      * compare the byte arrays of each to obtain the same result, e.g.
200      * <pre>
201      * byte key1[] = collationkey1.toByteArray();
202      * byte key2[] = collationkey2.toByteArray();
203      * int key, targetkey;
204      * int i = 0;
205      * do {
206      *       key = key1[i] &amp; 0xFF;
207      *     targetkey = key2[i] &amp; 0xFF;
208      *     if (key &lt; targetkey) {
209      *         System.out.println("String 1 is less than string 2");
210      *         return;
211      *     }
212      *     if (targetkey &lt; key) {
213      *         System.out.println("String 1 is more than string 2");
214      *     }
215      *     i ++;
216      * } while (key != 0 &amp;&amp; targetKey != 0);
217      *
218      * System.out.println("Strings are equal.");
219      * </pre>
220      *
221      * @return CollationKey value in a sequence of big-endian byte bytes
222      *         terminated by a null.
223      */
toByteArray()224     public byte[] toByteArray()
225     {
226         int length = getLength() + 1;
227         byte result[] = new byte[length];
228         System.arraycopy(m_key_, 0, result, 0, length);
229         return result;
230     }
231 
232     // public other methods -------------------------------------------------
233 
234     /**
235      * Compare this CollationKey to another CollationKey.  The
236      * collation rules of the Collator that created this key are
237      * applied.
238      *
239      * <p><strong>Note:</strong> Comparison between CollationKeys
240      * created by different Collators might return incorrect
241      * results.  See class documentation.
242      *
243      * @param target target CollationKey
244      * @return an integer value.  If the value is less than zero this CollationKey
245      *         is less than than target, if the value is zero they are equal, and
246      *         if the value is greater than zero this CollationKey is greater
247      *         than target.
248      * @exception NullPointerException is thrown if argument is null.
249      * @see Collator#compare(String, String)
250      */
251     @Override
compareTo(CollationKey target)252     public int compareTo(CollationKey target)
253     {
254         for (int i = 0;; ++i) {
255             int l = m_key_[i]&0xff;
256             int r = target.m_key_[i]&0xff;
257             if (l < r) {
258             return -1;
259             } else if (l > r) {
260             return 1;
261             } else if (l == 0) {
262             return 0;
263             }
264         }
265     }
266 
267     /**
268      * Compare this CollationKey and the specified Object for
269      * equality.  The collation rules of the Collator that created
270      * this key are applied.
271      *
272      * <p>See note in compareTo(CollationKey) for warnings about
273      * possible incorrect results.
274      *
275      * @param target the object to compare to.
276      * @return true if the two keys compare as equal, false otherwise.
277      * @see #compareTo(CollationKey)
278      * @exception ClassCastException is thrown when the argument is not
279      *            a CollationKey.  NullPointerException is thrown when the argument
280      *            is null.
281      */
282     @Override
equals(Object target)283     public boolean equals(Object target)
284     {
285         if (!(target instanceof CollationKey)) {
286             return false;
287         }
288 
289         return equals((CollationKey)target);
290     }
291 
292     /**
293      * Compare this CollationKey and the argument target CollationKey for
294      * equality.
295      * The collation
296      * rules of the Collator object which created these objects are applied.
297      * <p>
298      * See note in compareTo(CollationKey) for warnings of incorrect results
299      *
300      * @param target the CollationKey to compare to.
301      * @return true if two objects are equal, false otherwise.
302      * @exception NullPointerException is thrown when the argument is null.
303      */
equals(CollationKey target)304     public boolean equals(CollationKey target)
305     {
306         if (this == target) {
307             return true;
308         }
309         if (target == null) {
310             return false;
311         }
312         CollationKey other = target;
313         int i = 0;
314         while (true) {
315             if (m_key_[i] != other.m_key_[i]) {
316                 return false;
317             }
318             if (m_key_[i] == 0) {
319               break;
320             }
321             i ++;
322         }
323         return true;
324     }
325 
326     /**
327      * Returns a hash code for this CollationKey. The hash value is calculated
328      * on the key itself, not the String from which the key was created. Thus
329      * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode()
330      * if x.equals(y) is true. This allows language-sensitive comparison in a
331      * hash table.
332      *
333      * @return the hash value.
334      */
335     @Override
hashCode()336     public int hashCode()
337     {
338         if (m_hashCode_ == 0) {
339             if (m_key_ == null) {
340                 m_hashCode_ = 1;
341             }
342             else {
343                 int size = m_key_.length >> 1;
344                 StringBuilder key = new StringBuilder(size);
345                 int i = 0;
346                 while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
347                     key.append((char)((m_key_[i] << 8) | (0xff & m_key_[i + 1])));
348                     i += 2;
349                 }
350                 if (m_key_[i] != 0) {
351                     key.append((char)(m_key_[i] << 8));
352                 }
353                 m_hashCode_ = key.toString().hashCode();
354             }
355         }
356         return m_hashCode_;
357     }
358 
359     /**
360      * Produces a bound for the sort order of a given collation key and a
361      * strength level. This API does not attempt to find a bound for the
362      * CollationKey String representation, hence null will be returned in its
363      * place.
364      * <p>
365      * Resulting bounds can be used to produce a range of strings that are
366      * between upper and lower bounds. For example, if bounds are produced
367      * for a sortkey of string "smith", strings between upper and lower
368      * bounds with primary strength would include "Smith", "SMITH", "sMiTh".
369      * <p>
370      * There are two upper bounds that can be produced. If BoundMode.UPPER
371      * is produced, strings matched would be as above. However, if a bound
372      * is produced using BoundMode.UPPER_LONG is used, the above example will
373      * also match "Smithsonian" and similar.
374      * <p>
375      * For more on usage, see example in test procedure
376      * <a href="http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/collator/CollationAPITest.java">
377      * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
378      * </a>
379      * <p>
380      * Collation keys produced may be compared using the <TT>compare</TT> API.
381      * @param boundType Mode of bound required. It can be BoundMode.LOWER, which
382      *              produces a lower inclusive bound, BoundMode.UPPER, that
383      *              produces upper bound that matches strings of the same
384      *              length or BoundMode.UPPER_LONG that matches strings that
385      *              have the same starting substring as the source string.
386      * @param noOfLevels Strength levels required in the resulting bound
387      *                 (for most uses, the recommended value is PRIMARY). This
388      *                 strength should be less than the maximum strength of
389      *                 this CollationKey.
390      *                 See users guide for explanation on the strength levels a
391      *                 collation key can have.
392      * @return the result bounded CollationKey with a valid sort order but
393      *         a null String representation.
394      * @exception IllegalArgumentException thrown when the strength level
395      *            requested is higher than or equal to the strength in this
396      *            CollationKey.
397      *            In the case of an Exception, information
398      *            about the maximum strength to use will be returned in the
399      *            Exception. The user can then call getBound() again with the
400      *            appropriate strength.
401      * @see CollationKey
402      * @see CollationKey.BoundMode
403      * @see Collator#PRIMARY
404      * @see Collator#SECONDARY
405      * @see Collator#TERTIARY
406      * @see Collator#QUATERNARY
407      * @see Collator#IDENTICAL
408      */
getBound(int boundType, int noOfLevels)409     public CollationKey getBound(int boundType, int noOfLevels)
410     {
411         // Scan the string until we skip enough of the key OR reach the end of
412         // the key
413         int offset = 0;
414         int keystrength = Collator.PRIMARY;
415 
416         if (noOfLevels > Collator.PRIMARY) {
417             while (offset < m_key_.length && m_key_[offset] != 0) {
418                 if (m_key_[offset ++]
419                         == Collation.LEVEL_SEPARATOR_BYTE) {
420                     keystrength ++;
421                     noOfLevels --;
422                     if (noOfLevels == Collator.PRIMARY
423                         || offset == m_key_.length || m_key_[offset] == 0) {
424                         offset --;
425                         break;
426                     }
427                 }
428             }
429         }
430 
431         if (noOfLevels > 0) {
432             throw new IllegalArgumentException(
433                                   "Source collation key has only "
434                                   + keystrength
435                                   + " strength level. Call getBound() again "
436                                   + " with noOfLevels < " + keystrength);
437         }
438 
439         // READ ME: this code assumes that the values for BoundMode variables
440         // will not change. They are set so that the enum value corresponds to
441         // the number of extra bytes each bound type needs.
442         byte resultkey[] = new byte[offset + boundType + 1];
443         System.arraycopy(m_key_, 0, resultkey, 0, offset);
444         switch (boundType) {
445             case BoundMode.LOWER: // = 0
446                 // Lower bound just gets terminated. No extra bytes
447                 break;
448             case BoundMode.UPPER: // = 1
449                 // Upper bound needs one extra byte
450                 resultkey[offset ++] = 2;
451                 break;
452             case BoundMode.UPPER_LONG: // = 2
453                 // Upper long bound needs two extra bytes
454                 resultkey[offset ++] = (byte)0xFF;
455                 resultkey[offset ++] = (byte)0xFF;
456                 break;
457             default:
458                 throw new IllegalArgumentException(
459                                                 "Illegal boundType argument");
460         }
461         resultkey[offset] = 0;
462         return new CollationKey(null, resultkey, offset);
463     }
464 
465 
466     /**
467      * Merges this CollationKey with another.
468      * The levels are merged with their corresponding counterparts
469      * (primaries with primaries, secondaries with secondaries etc.).
470      * Between the values from the same level a separator is inserted.
471      *
472      * <p>This is useful, for example, for combining sort keys from first and last names
473      * to sort such pairs.
474      * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
475      *
476      * <p>The recommended way to achieve "merged" sorting is by
477      * concatenating strings with U+FFFE between them.
478      * The concatenation has the same sort order as the merged sort keys,
479      * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2).
480      * Using strings with U+FFFE may yield shorter sort keys.
481      *
482      * <p>For details about Sort Key Features see
483      * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
484      *
485      * <p>It is possible to merge multiple sort keys by consecutively merging
486      * another one with the intermediate result.
487      *
488      * <p>Only the sort key bytes of the CollationKeys are merged.
489      * This API does not attempt to merge the
490      * String representations of the CollationKeys, hence null will be returned
491      * as the result's String representation.
492      *
493      * <p>Example (uncompressed):
494      * <pre>191B1D 01 050505 01 910505 00
495      * 1F2123 01 050505 01 910505 00</pre>
496      * will be merged as
497      * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
498      *
499      * @param source CollationKey to merge with
500      * @return a CollationKey that contains the valid merged sort keys
501      *         with a null String representation,
502      *         i.e. <tt>new CollationKey(null, merged_sort_keys)</tt>
503      * @exception IllegalArgumentException thrown if source CollationKey
504      *            argument is null or of 0 length.
505      */
merge(CollationKey source)506     public CollationKey merge(CollationKey source)
507     {
508         // check arguments
509         if (source == null || source.getLength() == 0) {
510             throw new IllegalArgumentException(
511                       "CollationKey argument can not be null or of 0 length");
512         }
513 
514         // 1 byte extra for the 02 separator at the end of the copy of this sort key,
515         // and 1 more for the terminating 00.
516         byte result[] = new byte[getLength() + source.getLength() + 2];
517 
518         // merge the sort keys with the same number of levels
519         int rindex = 0;
520         int index = 0;
521         int sourceindex = 0;
522         while (true) {
523             // copy level from src1 not including 00 or 01
524             // unsigned issues
525             while (m_key_[index] < 0 || m_key_[index] >= MERGE_SEPERATOR_) {
526                 result[rindex++] = m_key_[index++];
527             }
528 
529             // add a 02 merge separator
530             result[rindex++] = MERGE_SEPERATOR_;
531 
532             // copy level from src2 not including 00 or 01
533             while (source.m_key_[sourceindex] < 0
534                    || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) {
535                 result[rindex++] = source.m_key_[sourceindex++];
536             }
537 
538             // if both sort keys have another level, then add a 01 level
539             // separator and continue
540             if (m_key_[index] == Collation.LEVEL_SEPARATOR_BYTE
541                 && source.m_key_[sourceindex]
542                         == Collation.LEVEL_SEPARATOR_BYTE) {
543                 ++index;
544                 ++sourceindex;
545                 result[rindex++] = Collation.LEVEL_SEPARATOR_BYTE;
546             }
547             else {
548                 break;
549             }
550         }
551 
552         // here, at least one sort key is finished now, but the other one
553         // might have some contents left from containing more levels;
554         // that contents is just appended to the result
555         int remainingLength;
556         if ((remainingLength = m_length_ - index) > 0) {
557             System.arraycopy(m_key_, index, result, rindex, remainingLength);
558             rindex += remainingLength;
559         }
560         else if ((remainingLength = source.m_length_ - sourceindex) > 0) {
561             System.arraycopy(source.m_key_, sourceindex, result, rindex, remainingLength);
562             rindex += remainingLength;
563         }
564         result[rindex] = 0;
565 
566         assert rindex == result.length - 1;
567         return new CollationKey(null, result, rindex);
568     }
569 
570     // private data members -------------------------------------------------
571 
572     /**
573      * Sequence of bytes that represents the sort key
574      */
575     private byte m_key_[];
576 
577     /**
578      * Source string this CollationKey represents
579      */
580     private String m_source_;
581 
582     /**
583      * Hash code for the key
584      */
585     private int m_hashCode_;
586     /**
587      * Gets the length of this CollationKey
588      */
589     private int m_length_;
590     /**
591      * Collation key merge seperator
592      */
593     private static final int MERGE_SEPERATOR_ = 2;
594 
595     // private methods ------------------------------------------------------
596 
597     /**
598      * Gets the length of the CollationKey
599      * @return length of the CollationKey
600      */
getLength()601     private int getLength()
602     {
603         if (m_length_ >= 0) {
604             return m_length_;
605         }
606         int length = m_key_.length;
607         for (int index = 0; index < length; index ++) {
608             if (m_key_[index] == 0) {
609                 length = index;
610                 break;
611             }
612         }
613         m_length_ = length;
614         return m_length_;
615     }
616 }
617