1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2013-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * CollationSettings.java, ported from collationsettings.h/.cpp 10 * 11 * C++ version created on: 2013feb07 12 * created by: Markus W. Scherer 13 */ 14 15 package ohos.global.icu.impl.coll; 16 17 import java.util.Arrays; 18 19 import ohos.global.icu.text.Collator; 20 21 /** 22 * Collation settings/options/attributes. 23 * These are the values that can be changed via API. 24 * @hide exposed on OHOS 25 */ 26 public final class CollationSettings extends SharedObject { 27 /** 28 * Options bit 0: Perform the FCD check on the input text and deliver normalized text. 29 */ 30 public static final int CHECK_FCD = 1; 31 /** 32 * Options bit 1: Numeric collation. 33 * Also known as CODAN = COllate Digits As Numbers. 34 * 35 * Treat digit sequences as numbers with CE sequences in numeric order, 36 * rather than returning a normal CE for each digit. 37 */ 38 public static final int NUMERIC = 2; 39 /** 40 * "Shifted" alternate handling, see ALTERNATE_MASK. 41 */ 42 static final int SHIFTED = 4; 43 /** 44 * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable. 45 * Reserve values 8 and 0xc for shift-trimmed and blanked. 46 */ 47 static final int ALTERNATE_MASK = 0xc; 48 /** 49 * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value. 50 */ 51 static final int MAX_VARIABLE_SHIFT = 4; 52 /** maxVariable options bit mask before shifting. */ 53 static final int MAX_VARIABLE_MASK = 0x70; 54 /** Options bit 7: Reserved/unused/0. */ 55 /** 56 * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on. 57 */ 58 static final int UPPER_FIRST = 0x100; 59 /** 60 * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values) 61 * unless case level is on (when they are *moved* into the separate case level). 62 * By default, the case bits are removed from the tertiary weight (ignored). 63 * 64 * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to 65 * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST. 66 */ 67 public static final int CASE_FIRST = 0x200; 68 /** 69 * Options bit mask for caseFirst and upperFirst, before shifting. 70 * Same value as caseFirst==upperFirst. 71 */ 72 public static final int CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST; 73 /** 74 * Options bit 10: Insert the case level between the secondary and tertiary levels. 75 */ 76 public static final int CASE_LEVEL = 0x400; 77 /** 78 * Options bit 11: Compare secondary weights backwards. ("French secondary") 79 */ 80 public static final int BACKWARD_SECONDARY = 0x800; 81 /** 82 * Options bits 15..12: The 4-bit strength value bit field is shifted by this value. 83 * It is the top used bit field in the options. (No need to mask after shifting.) 84 */ 85 static final int STRENGTH_SHIFT = 12; 86 /** Strength options bit mask before shifting. */ 87 static final int STRENGTH_MASK = 0xf000; 88 89 /** maxVariable values */ 90 static final int MAX_VAR_SPACE = 0; 91 static final int MAX_VAR_PUNCT = 1; 92 static final int MAX_VAR_SYMBOL = 2; 93 static final int MAX_VAR_CURRENCY = 3; 94 CollationSettings()95 CollationSettings() {} 96 97 @Override clone()98 public CollationSettings clone() { 99 CollationSettings newSettings = (CollationSettings)super.clone(); 100 // Note: The reorderTable, reorderRanges, and reorderCodes need not be cloned 101 // because, in Java, they only get replaced but not modified. 102 newSettings.fastLatinPrimaries = fastLatinPrimaries.clone(); 103 return newSettings; 104 } 105 106 @Override equals(Object other)107 public boolean equals(Object other) { 108 if(other == null) { return false; } 109 if(!this.getClass().equals(other.getClass())) { return false; } 110 CollationSettings o = (CollationSettings)other; 111 if(options != o.options) { return false; } 112 if((options & ALTERNATE_MASK) != 0 && variableTop != o.variableTop) { return false; } 113 if(!Arrays.equals(reorderCodes, o.reorderCodes)) { return false; } 114 return true; 115 } 116 117 @Override hashCode()118 public int hashCode() { 119 int h = options << 8; 120 if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } 121 h ^= reorderCodes.length; 122 for(int i = 0; i < reorderCodes.length; ++i) { 123 h ^= (reorderCodes[i] << i); 124 } 125 return h; 126 } 127 resetReordering()128 public void resetReordering() { 129 // When we turn off reordering, we want to set a null permutation 130 // rather than a no-op permutation. 131 reorderTable = null; 132 minHighNoReorder = 0; 133 reorderRanges = null; 134 reorderCodes = EMPTY_INT_ARRAY; 135 } 136 aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table)137 void aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table) { 138 int[] codes; 139 if(codesLength == codesAndRanges.length) { 140 codes = codesAndRanges; 141 } else { 142 codes = Arrays.copyOf(codesAndRanges, codesLength); 143 } 144 int rangesStart = codesLength; 145 int rangesLimit = codesAndRanges.length; 146 int rangesLength = rangesLimit - rangesStart; 147 if(table != null && 148 (rangesLength == 0 ? 149 !reorderTableHasSplitBytes(table) : 150 rangesLength >= 2 && 151 // The first offset must be 0. The last offset must not be 0. 152 (codesAndRanges[rangesStart] & 0xffff) == 0 && 153 (codesAndRanges[rangesLimit - 1] & 0xffff) != 0)) { 154 reorderTable = table; 155 reorderCodes = codes; 156 // Drop ranges before the first split byte. They are reordered by the table. 157 // This then speeds up reordering of the remaining ranges. 158 int firstSplitByteRangeIndex = rangesStart; 159 while(firstSplitByteRangeIndex < rangesLimit && 160 (codesAndRanges[firstSplitByteRangeIndex] & 0xff0000) == 0) { 161 // The second byte of the primary limit is 0. 162 ++firstSplitByteRangeIndex; 163 } 164 if(firstSplitByteRangeIndex == rangesLimit) { 165 assert(!reorderTableHasSplitBytes(table)); 166 minHighNoReorder = 0; 167 reorderRanges = null; 168 } else { 169 assert(table[codesAndRanges[firstSplitByteRangeIndex] >>> 24] == 0); 170 minHighNoReorder = codesAndRanges[rangesLimit - 1] & 0xffff0000L; 171 setReorderRanges(codesAndRanges, firstSplitByteRangeIndex, 172 rangesLimit - firstSplitByteRangeIndex); 173 } 174 return; 175 } 176 // Regenerate missing data. 177 setReordering(data, codes); 178 } 179 setReordering(CollationData data, int[] codes)180 public void setReordering(CollationData data, int[] codes) { 181 if(codes.length == 0 || (codes.length == 1 && codes[0] == Collator.ReorderCodes.NONE)) { 182 resetReordering(); 183 return; 184 } 185 UVector32 rangesList = new UVector32(); 186 data.makeReorderRanges(codes, rangesList); 187 int rangesLength = rangesList.size(); 188 if(rangesLength == 0) { 189 resetReordering(); 190 return; 191 } 192 int[] ranges = rangesList.getBuffer(); 193 // ranges[] contains at least two (limit, offset) pairs. 194 // The first offset must be 0. The last offset must not be 0. 195 // Separators (at the low end) and trailing weights (at the high end) 196 // are never reordered. 197 assert(rangesLength >= 2); 198 assert((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); 199 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000L; 200 201 // Write the lead byte permutation table. 202 // Set a 0 for each lead byte that has a range boundary in the middle. 203 byte[] table = new byte[256]; 204 int b = 0; 205 int firstSplitByteRangeIndex = -1; 206 for(int i = 0; i < rangesLength; ++i) { 207 int pair = ranges[i]; 208 int limit1 = pair >>> 24; 209 while(b < limit1) { 210 table[b] = (byte)(b + pair); 211 ++b; 212 } 213 // Check the second byte of the limit. 214 if((pair & 0xff0000) != 0) { 215 table[limit1] = 0; 216 b = limit1 + 1; 217 if(firstSplitByteRangeIndex < 0) { 218 firstSplitByteRangeIndex = i; 219 } 220 } 221 } 222 while(b <= 0xff) { 223 table[b] = (byte)b; 224 ++b; 225 } 226 int rangesStart; 227 if(firstSplitByteRangeIndex < 0) { 228 // The lead byte permutation table alone suffices for reordering. 229 rangesStart = rangesLength = 0; 230 } else { 231 // Remove the ranges below the first split byte. 232 rangesStart = firstSplitByteRangeIndex; 233 rangesLength -= firstSplitByteRangeIndex; 234 } 235 setReorderArrays(codes, ranges, rangesStart, rangesLength, table); 236 } 237 setReorderArrays(int[] codes, int[] ranges, int rangesStart, int rangesLength, byte[] table)238 private void setReorderArrays(int[] codes, 239 int[] ranges, int rangesStart, int rangesLength, byte[] table) { 240 // Very different from C++. See the comments after the reorderCodes declaration. 241 if(codes == null) { 242 codes = EMPTY_INT_ARRAY; 243 } 244 assert (codes.length == 0) == (table == null); 245 reorderTable = table; 246 reorderCodes = codes; 247 setReorderRanges(ranges, rangesStart, rangesLength); 248 } 249 setReorderRanges(int[] ranges, int rangesStart, int rangesLength)250 private void setReorderRanges(int[] ranges, int rangesStart, int rangesLength) { 251 if(rangesLength == 0) { 252 reorderRanges = null; 253 } else { 254 reorderRanges = new long[rangesLength]; 255 int i = 0; 256 do { 257 reorderRanges[i++] = ranges[rangesStart++] & 0xffffffffL; 258 } while(i < rangesLength); 259 } 260 } 261 copyReorderingFrom(CollationSettings other)262 public void copyReorderingFrom(CollationSettings other) { 263 if(!other.hasReordering()) { 264 resetReordering(); 265 return; 266 } 267 minHighNoReorder = other.minHighNoReorder; 268 reorderTable = other.reorderTable; 269 reorderRanges = other.reorderRanges; 270 reorderCodes = other.reorderCodes; 271 } 272 hasReordering()273 public boolean hasReordering() { return reorderTable != null; } 274 reorderTableHasSplitBytes(byte[] table)275 private static boolean reorderTableHasSplitBytes(byte[] table) { 276 assert(table[0] == 0); 277 for(int i = 1; i < 256; ++i) { 278 if(table[i] == 0) { 279 return true; 280 } 281 } 282 return false; 283 } 284 reorder(long p)285 public long reorder(long p) { 286 byte b = reorderTable[(int)p >>> 24]; 287 if(b != 0 || p <= Collation.NO_CE_PRIMARY) { 288 return ((b & 0xffL) << 24) | (p & 0xffffff); 289 } else { 290 return reorderEx(p); 291 } 292 } 293 reorderEx(long p)294 private long reorderEx(long p) { 295 assert minHighNoReorder > 0; 296 if(p >= minHighNoReorder) { return p; } 297 // Round up p so that its lower 16 bits are >= any offset bits. 298 // Then compare q directly with (limit, offset) pairs. 299 long q = p | 0xffff; 300 long r; 301 int i = 0; 302 while(q >= (r = reorderRanges[i])) { ++i; } 303 return p + ((long)(short)r << 24); 304 } 305 306 // In C++, we use enums for attributes and their values, with a special value for the default. 307 // Combined getter/setter methods handle many attributes. 308 // In Java, we have specific methods for getting, setting, and set-to-default, 309 // except that this class uses bits in its own bit set for simple values. 310 setStrength(int value)311 public void setStrength(int value) { 312 int noStrength = options & ~STRENGTH_MASK; 313 switch(value) { 314 case Collator.PRIMARY: 315 case Collator.SECONDARY: 316 case Collator.TERTIARY: 317 case Collator.QUATERNARY: 318 case Collator.IDENTICAL: 319 options = noStrength | (value << STRENGTH_SHIFT); 320 break; 321 default: 322 throw new IllegalArgumentException("illegal strength value " + value); 323 } 324 } 325 setStrengthDefault(int defaultOptions)326 public void setStrengthDefault(int defaultOptions) { 327 int noStrength = options & ~STRENGTH_MASK; 328 options = noStrength | (defaultOptions & STRENGTH_MASK); 329 } 330 getStrength(int options)331 static int getStrength(int options) { 332 return options >> STRENGTH_SHIFT; 333 } 334 getStrength()335 public int getStrength() { 336 return getStrength(options); 337 } 338 339 /** Sets the options bit for an on/off attribute. */ setFlag(int bit, boolean value)340 public void setFlag(int bit, boolean value) { 341 if(value) { 342 options |= bit; 343 } else { 344 options &= ~bit; 345 } 346 } 347 setFlagDefault(int bit, int defaultOptions)348 public void setFlagDefault(int bit, int defaultOptions) { 349 options = (options & ~bit) | (defaultOptions & bit); 350 } 351 getFlag(int bit)352 public boolean getFlag(int bit) { 353 return (options & bit) != 0; 354 } 355 setCaseFirst(int value)356 public void setCaseFirst(int value) { 357 assert value == 0 || value == CASE_FIRST || value == CASE_FIRST_AND_UPPER_MASK; 358 int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; 359 options = noCaseFirst | value; 360 } 361 setCaseFirstDefault(int defaultOptions)362 public void setCaseFirstDefault(int defaultOptions) { 363 int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; 364 options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); 365 } 366 getCaseFirst()367 public int getCaseFirst() { 368 return options & CASE_FIRST_AND_UPPER_MASK; 369 } 370 setAlternateHandlingShifted(boolean value)371 public void setAlternateHandlingShifted(boolean value) { 372 int noAlternate = options & ~ALTERNATE_MASK; 373 if(value) { 374 options = noAlternate | SHIFTED; 375 } else { 376 options = noAlternate; 377 } 378 } 379 setAlternateHandlingDefault(int defaultOptions)380 public void setAlternateHandlingDefault(int defaultOptions) { 381 int noAlternate = options & ~ALTERNATE_MASK; 382 options = noAlternate | (defaultOptions & ALTERNATE_MASK); 383 } 384 getAlternateHandling()385 public boolean getAlternateHandling() { 386 return (options & ALTERNATE_MASK) != 0; 387 } 388 setMaxVariable(int value, int defaultOptions)389 public void setMaxVariable(int value, int defaultOptions) { 390 int noMax = options & ~MAX_VARIABLE_MASK; 391 switch(value) { 392 case MAX_VAR_SPACE: 393 case MAX_VAR_PUNCT: 394 case MAX_VAR_SYMBOL: 395 case MAX_VAR_CURRENCY: 396 options = noMax | (value << MAX_VARIABLE_SHIFT); 397 break; 398 case -1: 399 options = noMax | (defaultOptions & MAX_VARIABLE_MASK); 400 break; 401 default: 402 throw new IllegalArgumentException("illegal maxVariable value " + value); 403 } 404 } 405 getMaxVariable()406 public int getMaxVariable() { 407 return (options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT; 408 } 409 410 /** 411 * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off. 412 */ isTertiaryWithCaseBits(int options)413 static boolean isTertiaryWithCaseBits(int options) { 414 return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST; 415 } getTertiaryMask(int options)416 static int getTertiaryMask(int options) { 417 // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off. 418 return isTertiaryWithCaseBits(options) ? 419 Collation.CASE_AND_TERTIARY_MASK : Collation.ONLY_TERTIARY_MASK; 420 } 421 sortsTertiaryUpperCaseFirst(int options)422 static boolean sortsTertiaryUpperCaseFirst(int options) { 423 // On tertiary level, consider case bits and sort uppercase first 424 // if caseLevel is off and caseFirst==upperFirst. 425 return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK; 426 } 427 dontCheckFCD()428 public boolean dontCheckFCD() { 429 return (options & CHECK_FCD) == 0; 430 } 431 hasBackwardSecondary()432 boolean hasBackwardSecondary() { 433 return (options & BACKWARD_SECONDARY) != 0; 434 } 435 isNumeric()436 public boolean isNumeric() { 437 return (options & NUMERIC) != 0; 438 } 439 440 /** CHECK_FCD etc. */ 441 public int options = (Collator.TERTIARY << STRENGTH_SHIFT) | // DEFAULT_STRENGTH 442 (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT); 443 /** Variable-top primary weight. */ 444 public long variableTop; 445 /** 446 * 256-byte table for reordering permutation of primary lead bytes; null if no reordering. 447 * A 0 entry at a non-zero index means that the primary lead byte is "split" 448 * (there are different offsets for primaries that share that lead byte) 449 * and the reordering offset must be determined via the reorderRanges. 450 */ 451 public byte[] reorderTable; 452 /** Limit of last reordered range. 0 if no reordering or no split bytes. */ 453 long minHighNoReorder; 454 /** 455 * Primary-weight ranges for script reordering, 456 * to be used by reorder(p) for split-reordered primary lead bytes. 457 * 458 * <p>Each entry is a (limit, offset) pair. 459 * The upper 16 bits of the entry are the upper 16 bits of the 460 * exclusive primary limit of a range. 461 * Primaries between the previous limit and this one have their lead bytes 462 * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits. 463 * 464 * <p>CollationData.makeReorderRanges() writes a full list where the first range 465 * (at least for terminators and separators) has a 0 offset. 466 * The last range has a non-zero offset. 467 * minHighNoReorder is set to the limit of that last range. 468 * 469 * <p>In the settings object, the initial ranges before the first split lead byte 470 * are omitted for efficiency; they are handled by reorder(p) via the reorderTable. 471 * If there are no split-reordered lead bytes, then no ranges are needed. 472 */ 473 long[] reorderRanges; 474 /** Array of reorder codes; ignored if length == 0. */ 475 public int[] reorderCodes = EMPTY_INT_ARRAY; 476 // Note: In C++, we keep a memory block around for the reorder codes, 477 // the ranges, and the permutation table, 478 // and modify them for new codes. 479 // In Java, we simply copy references and then never modify the array contents. 480 // The caller must abandon the arrays. 481 // Reorder codes from the public setter API must be cloned. 482 private static final int[] EMPTY_INT_ARRAY = new int[0]; 483 484 /** Options for CollationFastLatin. Negative if disabled. */ 485 public int fastLatinOptions = -1; 486 // fastLatinPrimaries.length must be equal to CollationFastLatin.LATIN_LIMIT, 487 // but we do not import CollationFastLatin to reduce circular dependencies. 488 public char[] fastLatinPrimaries = new char[0x180]; // mutable contents 489 } 490