• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2013-2015, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 * CollationSettings.java, ported from collationsettings.h/.cpp
10 *
11 * C++ version created on: 2013feb07
12 * created by: Markus W. Scherer
13 */
14 
15 package ohos.global.icu.impl.coll;
16 
17 import java.util.Arrays;
18 
19 import ohos.global.icu.text.Collator;
20 
21 /**
22  * Collation settings/options/attributes.
23  * These are the values that can be changed via API.
24  * @hide exposed on OHOS
25  */
26 public final class CollationSettings extends SharedObject {
27     /**
28      * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
29      */
30     public static final int CHECK_FCD = 1;
31     /**
32      * Options bit 1: Numeric collation.
33      * Also known as CODAN = COllate Digits As Numbers.
34      *
35      * Treat digit sequences as numbers with CE sequences in numeric order,
36      * rather than returning a normal CE for each digit.
37      */
38     public static final int NUMERIC = 2;
39     /**
40      * "Shifted" alternate handling, see ALTERNATE_MASK.
41      */
42     static final int SHIFTED = 4;
43     /**
44      * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
45      * Reserve values 8 and 0xc for shift-trimmed and blanked.
46      */
47     static final int ALTERNATE_MASK = 0xc;
48     /**
49      * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
50      */
51     static final int MAX_VARIABLE_SHIFT = 4;
52     /** maxVariable options bit mask before shifting. */
53     static final int MAX_VARIABLE_MASK = 0x70;
54     /** Options bit 7: Reserved/unused/0. */
55     /**
56      * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
57      */
58     static final int UPPER_FIRST = 0x100;
59     /**
60      * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
61      * unless case level is on (when they are *moved* into the separate case level).
62      * By default, the case bits are removed from the tertiary weight (ignored).
63      *
64      * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
65      * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
66      */
67     public static final int CASE_FIRST = 0x200;
68     /**
69      * Options bit mask for caseFirst and upperFirst, before shifting.
70      * Same value as caseFirst==upperFirst.
71      */
72     public static final int CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
73     /**
74      * Options bit 10: Insert the case level between the secondary and tertiary levels.
75      */
76     public static final int CASE_LEVEL = 0x400;
77     /**
78      * Options bit 11: Compare secondary weights backwards. ("French secondary")
79      */
80     public static final int BACKWARD_SECONDARY = 0x800;
81     /**
82      * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
83      * It is the top used bit field in the options. (No need to mask after shifting.)
84      */
85     static final int STRENGTH_SHIFT = 12;
86     /** Strength options bit mask before shifting. */
87     static final int STRENGTH_MASK = 0xf000;
88 
89     /** maxVariable values */
90     static final int MAX_VAR_SPACE = 0;
91     static final int MAX_VAR_PUNCT = 1;
92     static final int MAX_VAR_SYMBOL = 2;
93     static final int MAX_VAR_CURRENCY = 3;
94 
CollationSettings()95     CollationSettings() {}
96 
97     @Override
clone()98     public CollationSettings clone() {
99         CollationSettings newSettings = (CollationSettings)super.clone();
100         // Note: The reorderTable, reorderRanges, and reorderCodes need not be cloned
101         // because, in Java, they only get replaced but not modified.
102         newSettings.fastLatinPrimaries = fastLatinPrimaries.clone();
103         return newSettings;
104     }
105 
106     @Override
equals(Object other)107     public boolean equals(Object other) {
108         if(other == null) { return false; }
109         if(!this.getClass().equals(other.getClass())) { return false; }
110         CollationSettings o = (CollationSettings)other;
111         if(options != o.options) { return false; }
112         if((options & ALTERNATE_MASK) != 0 && variableTop != o.variableTop) { return false; }
113         if(!Arrays.equals(reorderCodes, o.reorderCodes)) { return false; }
114         return true;
115     }
116 
117     @Override
hashCode()118     public int hashCode() {
119         int h = options << 8;
120         if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
121         h ^= reorderCodes.length;
122         for(int i = 0; i < reorderCodes.length; ++i) {
123             h ^= (reorderCodes[i] << i);
124         }
125         return h;
126     }
127 
resetReordering()128     public void resetReordering() {
129         // When we turn off reordering, we want to set a null permutation
130         // rather than a no-op permutation.
131         reorderTable = null;
132         minHighNoReorder = 0;
133         reorderRanges = null;
134         reorderCodes = EMPTY_INT_ARRAY;
135     }
136 
aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table)137     void aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table) {
138         int[] codes;
139         if(codesLength == codesAndRanges.length) {
140             codes = codesAndRanges;
141         } else {
142             codes = Arrays.copyOf(codesAndRanges, codesLength);
143         }
144         int rangesStart = codesLength;
145         int rangesLimit = codesAndRanges.length;
146         int rangesLength = rangesLimit - rangesStart;
147         if(table != null &&
148                 (rangesLength == 0 ?
149                         !reorderTableHasSplitBytes(table) :
150                         rangesLength >= 2 &&
151                         // The first offset must be 0. The last offset must not be 0.
152                         (codesAndRanges[rangesStart] & 0xffff) == 0 &&
153                         (codesAndRanges[rangesLimit - 1] & 0xffff) != 0)) {
154             reorderTable = table;
155             reorderCodes = codes;
156             // Drop ranges before the first split byte. They are reordered by the table.
157             // This then speeds up reordering of the remaining ranges.
158             int firstSplitByteRangeIndex = rangesStart;
159             while(firstSplitByteRangeIndex < rangesLimit &&
160                     (codesAndRanges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
161                 // The second byte of the primary limit is 0.
162                 ++firstSplitByteRangeIndex;
163             }
164             if(firstSplitByteRangeIndex == rangesLimit) {
165                 assert(!reorderTableHasSplitBytes(table));
166                 minHighNoReorder = 0;
167                 reorderRanges = null;
168             } else {
169                 assert(table[codesAndRanges[firstSplitByteRangeIndex] >>> 24] == 0);
170                 minHighNoReorder = codesAndRanges[rangesLimit - 1] & 0xffff0000L;
171                 setReorderRanges(codesAndRanges, firstSplitByteRangeIndex,
172                         rangesLimit - firstSplitByteRangeIndex);
173             }
174             return;
175         }
176         // Regenerate missing data.
177         setReordering(data, codes);
178     }
179 
setReordering(CollationData data, int[] codes)180     public void setReordering(CollationData data, int[] codes) {
181         if(codes.length == 0 || (codes.length == 1 && codes[0] == Collator.ReorderCodes.NONE)) {
182             resetReordering();
183             return;
184         }
185         UVector32 rangesList = new UVector32();
186         data.makeReorderRanges(codes, rangesList);
187         int rangesLength = rangesList.size();
188         if(rangesLength == 0) {
189             resetReordering();
190             return;
191         }
192         int[] ranges = rangesList.getBuffer();
193         // ranges[] contains at least two (limit, offset) pairs.
194         // The first offset must be 0. The last offset must not be 0.
195         // Separators (at the low end) and trailing weights (at the high end)
196         // are never reordered.
197         assert(rangesLength >= 2);
198         assert((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
199         minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000L;
200 
201         // Write the lead byte permutation table.
202         // Set a 0 for each lead byte that has a range boundary in the middle.
203         byte[] table = new byte[256];
204         int b = 0;
205         int firstSplitByteRangeIndex = -1;
206         for(int i = 0; i < rangesLength; ++i) {
207             int pair = ranges[i];
208             int limit1 = pair >>> 24;
209             while(b < limit1) {
210                 table[b] = (byte)(b + pair);
211                 ++b;
212             }
213             // Check the second byte of the limit.
214             if((pair & 0xff0000) != 0) {
215                 table[limit1] = 0;
216                 b = limit1 + 1;
217                 if(firstSplitByteRangeIndex < 0) {
218                     firstSplitByteRangeIndex = i;
219                 }
220             }
221         }
222         while(b <= 0xff) {
223             table[b] = (byte)b;
224             ++b;
225         }
226         int rangesStart;
227         if(firstSplitByteRangeIndex < 0) {
228             // The lead byte permutation table alone suffices for reordering.
229             rangesStart = rangesLength = 0;
230         } else {
231             // Remove the ranges below the first split byte.
232             rangesStart = firstSplitByteRangeIndex;
233             rangesLength -= firstSplitByteRangeIndex;
234         }
235         setReorderArrays(codes, ranges, rangesStart, rangesLength, table);
236     }
237 
setReorderArrays(int[] codes, int[] ranges, int rangesStart, int rangesLength, byte[] table)238     private void setReorderArrays(int[] codes,
239             int[] ranges, int rangesStart, int rangesLength, byte[] table) {
240         // Very different from C++. See the comments after the reorderCodes declaration.
241         if(codes == null) {
242             codes = EMPTY_INT_ARRAY;
243         }
244         assert (codes.length == 0) == (table == null);
245         reorderTable = table;
246         reorderCodes = codes;
247         setReorderRanges(ranges, rangesStart, rangesLength);
248     }
249 
setReorderRanges(int[] ranges, int rangesStart, int rangesLength)250     private void setReorderRanges(int[] ranges, int rangesStart, int rangesLength) {
251         if(rangesLength == 0) {
252             reorderRanges = null;
253         } else {
254             reorderRanges = new long[rangesLength];
255             int i = 0;
256             do {
257                 reorderRanges[i++] = ranges[rangesStart++] & 0xffffffffL;
258             } while(i < rangesLength);
259         }
260     }
261 
copyReorderingFrom(CollationSettings other)262     public void copyReorderingFrom(CollationSettings other) {
263         if(!other.hasReordering()) {
264             resetReordering();
265             return;
266         }
267         minHighNoReorder = other.minHighNoReorder;
268         reorderTable = other.reorderTable;
269         reorderRanges = other.reorderRanges;
270         reorderCodes = other.reorderCodes;
271     }
272 
hasReordering()273     public boolean hasReordering() { return reorderTable != null; }
274 
reorderTableHasSplitBytes(byte[] table)275     private static boolean reorderTableHasSplitBytes(byte[] table) {
276         assert(table[0] == 0);
277         for(int i = 1; i < 256; ++i) {
278             if(table[i] == 0) {
279                 return true;
280             }
281         }
282         return false;
283     }
284 
reorder(long p)285     public long reorder(long p) {
286         byte b = reorderTable[(int)p >>> 24];
287         if(b != 0 || p <= Collation.NO_CE_PRIMARY) {
288             return ((b & 0xffL) << 24) | (p & 0xffffff);
289         } else {
290             return reorderEx(p);
291         }
292     }
293 
reorderEx(long p)294     private long reorderEx(long p) {
295         assert minHighNoReorder > 0;
296         if(p >= minHighNoReorder) { return p; }
297         // Round up p so that its lower 16 bits are >= any offset bits.
298         // Then compare q directly with (limit, offset) pairs.
299         long q = p | 0xffff;
300         long r;
301         int i = 0;
302         while(q >= (r = reorderRanges[i])) { ++i; }
303         return p + ((long)(short)r << 24);
304     }
305 
306     // In C++, we use enums for attributes and their values, with a special value for the default.
307     // Combined getter/setter methods handle many attributes.
308     // In Java, we have specific methods for getting, setting, and set-to-default,
309     // except that this class uses bits in its own bit set for simple values.
310 
setStrength(int value)311     public void setStrength(int value) {
312         int noStrength = options & ~STRENGTH_MASK;
313         switch(value) {
314         case Collator.PRIMARY:
315         case Collator.SECONDARY:
316         case Collator.TERTIARY:
317         case Collator.QUATERNARY:
318         case Collator.IDENTICAL:
319             options = noStrength | (value << STRENGTH_SHIFT);
320             break;
321         default:
322             throw new IllegalArgumentException("illegal strength value " + value);
323         }
324     }
325 
setStrengthDefault(int defaultOptions)326     public void setStrengthDefault(int defaultOptions) {
327         int noStrength = options & ~STRENGTH_MASK;
328         options = noStrength | (defaultOptions & STRENGTH_MASK);
329     }
330 
getStrength(int options)331     static int getStrength(int options) {
332         return options >> STRENGTH_SHIFT;
333     }
334 
getStrength()335     public int getStrength() {
336         return getStrength(options);
337     }
338 
339     /** Sets the options bit for an on/off attribute. */
setFlag(int bit, boolean value)340     public void setFlag(int bit, boolean value) {
341         if(value) {
342             options |= bit;
343         } else {
344             options &= ~bit;
345         }
346     }
347 
setFlagDefault(int bit, int defaultOptions)348     public void setFlagDefault(int bit, int defaultOptions) {
349         options = (options & ~bit) | (defaultOptions & bit);
350     }
351 
getFlag(int bit)352     public boolean getFlag(int bit) {
353         return (options & bit) != 0;
354     }
355 
setCaseFirst(int value)356     public void setCaseFirst(int value) {
357         assert value == 0 || value == CASE_FIRST || value == CASE_FIRST_AND_UPPER_MASK;
358         int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
359         options = noCaseFirst | value;
360     }
361 
setCaseFirstDefault(int defaultOptions)362     public void setCaseFirstDefault(int defaultOptions) {
363         int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
364         options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
365     }
366 
getCaseFirst()367     public int getCaseFirst() {
368         return options & CASE_FIRST_AND_UPPER_MASK;
369     }
370 
setAlternateHandlingShifted(boolean value)371     public void setAlternateHandlingShifted(boolean value) {
372         int noAlternate = options & ~ALTERNATE_MASK;
373         if(value) {
374             options = noAlternate | SHIFTED;
375         } else {
376             options = noAlternate;
377         }
378     }
379 
setAlternateHandlingDefault(int defaultOptions)380     public void setAlternateHandlingDefault(int defaultOptions) {
381         int noAlternate = options & ~ALTERNATE_MASK;
382         options = noAlternate | (defaultOptions & ALTERNATE_MASK);
383     }
384 
getAlternateHandling()385     public boolean getAlternateHandling() {
386         return (options & ALTERNATE_MASK) != 0;
387     }
388 
setMaxVariable(int value, int defaultOptions)389     public void setMaxVariable(int value, int defaultOptions) {
390         int noMax = options & ~MAX_VARIABLE_MASK;
391         switch(value) {
392         case MAX_VAR_SPACE:
393         case MAX_VAR_PUNCT:
394         case MAX_VAR_SYMBOL:
395         case MAX_VAR_CURRENCY:
396             options = noMax | (value << MAX_VARIABLE_SHIFT);
397             break;
398         case -1:
399             options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
400             break;
401         default:
402             throw new IllegalArgumentException("illegal maxVariable value " + value);
403         }
404     }
405 
getMaxVariable()406     public int getMaxVariable() {
407         return (options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT;
408     }
409 
410     /**
411      * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
412      */
isTertiaryWithCaseBits(int options)413     static boolean isTertiaryWithCaseBits(int options) {
414         return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
415     }
getTertiaryMask(int options)416     static int getTertiaryMask(int options) {
417         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
418         return isTertiaryWithCaseBits(options) ?
419                 Collation.CASE_AND_TERTIARY_MASK : Collation.ONLY_TERTIARY_MASK;
420     }
421 
sortsTertiaryUpperCaseFirst(int options)422     static boolean sortsTertiaryUpperCaseFirst(int options) {
423         // On tertiary level, consider case bits and sort uppercase first
424         // if caseLevel is off and caseFirst==upperFirst.
425         return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
426     }
427 
dontCheckFCD()428     public boolean dontCheckFCD() {
429         return (options & CHECK_FCD) == 0;
430     }
431 
hasBackwardSecondary()432     boolean hasBackwardSecondary() {
433         return (options & BACKWARD_SECONDARY) != 0;
434     }
435 
isNumeric()436     public boolean isNumeric() {
437         return (options & NUMERIC) != 0;
438     }
439 
440     /** CHECK_FCD etc. */
441     public int options = (Collator.TERTIARY << STRENGTH_SHIFT) |  // DEFAULT_STRENGTH
442             (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT);
443     /** Variable-top primary weight. */
444     public long variableTop;
445     /**
446      * 256-byte table for reordering permutation of primary lead bytes; null if no reordering.
447      * A 0 entry at a non-zero index means that the primary lead byte is "split"
448      * (there are different offsets for primaries that share that lead byte)
449      * and the reordering offset must be determined via the reorderRanges.
450      */
451     public byte[] reorderTable;
452     /** Limit of last reordered range. 0 if no reordering or no split bytes. */
453     long minHighNoReorder;
454     /**
455      * Primary-weight ranges for script reordering,
456      * to be used by reorder(p) for split-reordered primary lead bytes.
457      *
458      * <p>Each entry is a (limit, offset) pair.
459      * The upper 16 bits of the entry are the upper 16 bits of the
460      * exclusive primary limit of a range.
461      * Primaries between the previous limit and this one have their lead bytes
462      * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits.
463      *
464      * <p>CollationData.makeReorderRanges() writes a full list where the first range
465      * (at least for terminators and separators) has a 0 offset.
466      * The last range has a non-zero offset.
467      * minHighNoReorder is set to the limit of that last range.
468      *
469      * <p>In the settings object, the initial ranges before the first split lead byte
470      * are omitted for efficiency; they are handled by reorder(p) via the reorderTable.
471      * If there are no split-reordered lead bytes, then no ranges are needed.
472      */
473     long[] reorderRanges;
474     /** Array of reorder codes; ignored if length == 0. */
475     public int[] reorderCodes = EMPTY_INT_ARRAY;
476     // Note: In C++, we keep a memory block around for the reorder codes,
477     // the ranges, and the permutation table,
478     // and modify them for new codes.
479     // In Java, we simply copy references and then never modify the array contents.
480     // The caller must abandon the arrays.
481     // Reorder codes from the public setter API must be cloned.
482     private static final int[] EMPTY_INT_ARRAY = new int[0];
483 
484     /** Options for CollationFastLatin. Negative if disabled. */
485     public int fastLatinOptions = -1;
486     // fastLatinPrimaries.length must be equal to CollationFastLatin.LATIN_LIMIT,
487     // but we do not import CollationFastLatin to reduce circular dependencies.
488     public char[] fastLatinPrimaries = new char[0x180];  // mutable contents
489 }
490