1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2010-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * Collation.java, ported from collation.h/.cpp 10 * 11 * C++ version created on: 2010oct27 12 * created by: Markus W. Scherer 13 */ 14 15 package ohos.global.icu.impl.coll; 16 17 /** 18 * Collation v2 basic definitions and static helper functions. 19 * 20 * Data structures except for expansion tables store 32-bit CEs which are 21 * either specials (see tags below) or are compact forms of 64-bit CEs. 22 * @hide exposed on OHOS 23 */ 24 public final class Collation { 25 /** UChar32 U_SENTINEL. 26 * TODO: Create a common, public constant? 27 */ 28 public static final int SENTINEL_CP = -1; 29 30 // ICU4C compare() API returns enum UCollationResult values (with UCOL_ prefix). 31 // ICU4J just returns int. We use these constants for ease of porting. 32 public static final int LESS = -1; 33 public static final int EQUAL = 0; 34 public static final int GREATER = 1; 35 36 // Special sort key bytes for all levels. 37 public static final int TERMINATOR_BYTE = 0; 38 public static final int LEVEL_SEPARATOR_BYTE = 1; 39 40 /** The secondary/tertiary lower limit for tailoring before any root elements. */ 41 static final int BEFORE_WEIGHT16 = 0x100; 42 43 /** 44 * Merge-sort-key separator. 45 * Same as the unique primary and identical-level weights of U+FFFE. 46 * Must not be used as primary compression low terminator. 47 * Otherwise usable. 48 */ 49 public static final int MERGE_SEPARATOR_BYTE = 2; 50 public static final long MERGE_SEPARATOR_PRIMARY = 0x02000000; // U+FFFE 51 static final int MERGE_SEPARATOR_CE32 = 0x02000505; // U+FFFE 52 53 /** 54 * Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE. 55 * Reserved value in primary second byte if the lead byte is compressible. 56 * Otherwise usable in all CE weight bytes. 57 */ 58 public static final int PRIMARY_COMPRESSION_LOW_BYTE = 3; 59 /** 60 * Primary compression high terminator. 61 * Reserved value in primary second byte if the lead byte is compressible. 62 * Otherwise usable in all CE weight bytes. 63 */ 64 public static final int PRIMARY_COMPRESSION_HIGH_BYTE = 0xff; 65 66 /** Default secondary/tertiary weight lead byte. */ 67 static final int COMMON_BYTE = 5; 68 public static final int COMMON_WEIGHT16 = 0x0500; 69 /** Middle 16 bits of a CE with a common secondary weight. */ 70 static final int COMMON_SECONDARY_CE = 0x05000000; 71 /** Lower 16 bits of a CE with a common tertiary weight. */ 72 static final int COMMON_TERTIARY_CE = 0x0500; 73 /** Lower 32 bits of a CE with common secondary and tertiary weights. */ 74 public static final int COMMON_SEC_AND_TER_CE = 0x05000500; 75 76 static final int SECONDARY_MASK = 0xffff0000; 77 public static final int CASE_MASK = 0xc000; 78 static final int SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK; 79 /** Only the 2*6 bits for the pure tertiary weight. */ 80 public static final int ONLY_TERTIARY_MASK = 0x3f3f; 81 /** Only the secondary & tertiary bits; no case, no quaternary. */ 82 static final int ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MASK; 83 /** Case bits and tertiary bits. */ 84 static final int CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MASK; 85 public static final int QUATERNARY_MASK = 0xc0; 86 /** Case bits and quaternary bits. */ 87 public static final int CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK; 88 89 static final int UNASSIGNED_IMPLICIT_BYTE = 0xfe; // compressible 90 /** 91 * First unassigned: AlphabeticIndex overflow boundary. 92 * We want a 3-byte primary so that it fits into the root elements table. 93 * 94 * This 3-byte primary will not collide with 95 * any unassigned-implicit 4-byte primaries because 96 * the first few hundred Unicode code points all have real mappings. 97 */ 98 static final long FIRST_UNASSIGNED_PRIMARY = 0xfe040200L; 99 100 static final int TRAIL_WEIGHT_BYTE = 0xff; // not compressible 101 static final long FIRST_TRAILING_PRIMARY = 0xff020200L; // [first trailing] 102 public static final long MAX_PRIMARY = 0xffff0000L; // U+FFFF 103 static final int MAX_REGULAR_CE32 = 0xffff0505; // U+FFFF 104 105 // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which behave like U+FFFD). 106 // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+). 107 public static final long FFFD_PRIMARY = MAX_PRIMARY - 0x20000; 108 static final int FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000; 109 110 /** 111 * A CE32 is special if its low byte is this or greater. 112 * Impossible case bits 11 mark special CE32s. 113 * This value itself is used to indicate a fallback to the base collator. 114 */ 115 static final int SPECIAL_CE32_LOW_BYTE = 0xc0; 116 static final int FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE; 117 /** 118 * Low byte of a long-primary special CE32. 119 */ 120 static final int LONG_PRIMARY_CE32_LOW_BYTE = 0xc1; // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG 121 122 static final int UNASSIGNED_CE32 = 0xffffffff; // Compute an unassigned-implicit CE. 123 124 static final int NO_CE32 = 1; 125 126 /** No CE: End of input. Only used in runtime code, not stored in data. */ 127 static final long NO_CE_PRIMARY = 1; // not a left-adjusted weight 128 static final int NO_CE_WEIGHT16 = 0x0100; // weight of LEVEL_SEPARATOR_BYTE 129 public static final long NO_CE = 0x101000100L; // NO_CE_PRIMARY, NO_CE_WEIGHT16, NO_CE_WEIGHT16 130 131 /** Sort key levels. */ 132 133 /** Unspecified level. */ 134 public static final int NO_LEVEL = 0; 135 public static final int PRIMARY_LEVEL = 1; 136 public static final int SECONDARY_LEVEL = 2; 137 public static final int CASE_LEVEL = 3; 138 public static final int TERTIARY_LEVEL = 4; 139 public static final int QUATERNARY_LEVEL = 5; 140 public static final int IDENTICAL_LEVEL = 6; 141 /** Beyond sort key bytes. */ 142 public static final int ZERO_LEVEL = 7; 143 144 /** 145 * Sort key level flags: xx_FLAG = 1 << xx_LEVEL. 146 * In Java, use enum Level with flag() getters, or use EnumSet rather than hand-made bit sets. 147 */ 148 static final int NO_LEVEL_FLAG = 1; 149 static final int PRIMARY_LEVEL_FLAG = 2; 150 static final int SECONDARY_LEVEL_FLAG = 4; 151 static final int CASE_LEVEL_FLAG = 8; 152 static final int TERTIARY_LEVEL_FLAG = 0x10; 153 static final int QUATERNARY_LEVEL_FLAG = 0x20; 154 static final int IDENTICAL_LEVEL_FLAG = 0x40; 155 static final int ZERO_LEVEL_FLAG = 0x80; 156 157 /** 158 * Special-CE32 tags, from bits 3..0 of a special 32-bit CE. 159 * Bits 31..8 are available for tag-specific data. 160 * Bits 5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0. 161 */ 162 163 /** 164 * Fall back to the base collator. 165 * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32. 166 * Bits 31..8: Unused, 0. 167 */ 168 static final int FALLBACK_TAG = 0; 169 /** 170 * Long-primary CE with COMMON_SEC_AND_TER_CE. 171 * Bits 31..8: Three-byte primary. 172 */ 173 static final int LONG_PRIMARY_TAG = 1; 174 /** 175 * Long-secondary CE with zero primary. 176 * Bits 31..16: Secondary weight. 177 * Bits 15.. 8: Tertiary weight. 178 */ 179 static final int LONG_SECONDARY_TAG = 2; 180 /** 181 * Unused. 182 * May be used in the future for single-byte secondary CEs (SHORT_SECONDARY_TAG), 183 * storing the secondary in bits 31..24, the ccc in bits 23..16, 184 * and the tertiary in bits 15..8. 185 */ 186 static final int RESERVED_TAG_3 = 3; 187 /** 188 * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05]. 189 * Bits 31..24: Single-byte primary weight pp of the first CE. 190 * Bits 23..16: Tertiary weight tt of the first CE. 191 * Bits 15.. 8: Secondary weight ss of the second CE. 192 */ 193 static final int LATIN_EXPANSION_TAG = 4; 194 /** 195 * Points to one or more simple/long-primary/long-secondary 32-bit CE32s. 196 * Bits 31..13: Index into int table. 197 * Bits 12.. 8: Length=1..31. 198 */ 199 static final int EXPANSION32_TAG = 5; 200 /** 201 * Points to one or more 64-bit CEs. 202 * Bits 31..13: Index into CE table. 203 * Bits 12.. 8: Length=1..31. 204 */ 205 static final int EXPANSION_TAG = 6; 206 /** 207 * Builder data, used only in the CollationDataBuilder, not in runtime data. 208 * 209 * If bit 8 is 0: Builder context, points to a list of context-sensitive mappings. 210 * Bits 31..13: Index to the builder's list of ConditionalCE32 for this character. 211 * Bits 12.. 9: Unused, 0. 212 * 213 * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value. 214 * The builder fetches the Jamo CE32 from the trie. 215 * Bits 31..13: Jamo code point. 216 * Bits 12.. 9: Unused, 0. 217 */ 218 static final int BUILDER_DATA_TAG = 7; 219 /** 220 * Points to prefix trie. 221 * Bits 31..13: Index into prefix/contraction data. 222 * Bits 12.. 8: Unused, 0. 223 */ 224 static final int PREFIX_TAG = 8; 225 /** 226 * Points to contraction data. 227 * Bits 31..13: Index into prefix/contraction data. 228 * Bits 12..11: Unused, 0. 229 * Bit 10: CONTRACT_TRAILING_CCC flag. 230 * Bit 9: CONTRACT_NEXT_CCC flag. 231 * Bit 8: CONTRACT_SINGLE_CP_NO_MATCH flag. 232 */ 233 static final int CONTRACTION_TAG = 9; 234 /** 235 * Decimal digit. 236 * Bits 31..13: Index into int table for non-numeric-collation CE32. 237 * Bit 12: Unused, 0. 238 * Bits 11.. 8: Digit value 0..9. 239 */ 240 static final int DIGIT_TAG = 10; 241 /** 242 * Tag for U+0000, for moving the NUL-termination handling 243 * from the regular fastpath into specials-handling code. 244 * Bits 31..8: Unused, 0. 245 */ 246 static final int U0000_TAG = 11; 247 /** 248 * Tag for a Hangul syllable. 249 * Bits 31..9: Unused, 0. 250 * Bit 8: HANGUL_NO_SPECIAL_JAMO flag. 251 */ 252 static final int HANGUL_TAG = 12; 253 /** 254 * Tag for a lead surrogate code unit. 255 * Optional optimization for UTF-16 string processing. 256 * Bits 31..10: Unused, 0. 257 * 9.. 8: =0: All associated supplementary code points are unassigned-implict. 258 * =1: All associated supplementary code points fall back to the base data. 259 * else: (Normally 2) Look up the data for the supplementary code point. 260 */ 261 static final int LEAD_SURROGATE_TAG = 13; 262 /** 263 * Tag for CEs with primary weights in code point order. 264 * Bits 31..13: Index into CE table, for one data "CE". 265 * Bits 12.. 8: Unused, 0. 266 * 267 * This data "CE" has the following bit fields: 268 * Bits 63..32: Three-byte primary pppppp00. 269 * 31.. 8: Start/base code point of the in-order range. 270 * 7: Flag isCompressible primary. 271 * 6.. 0: Per-code point primary-weight increment. 272 */ 273 static final int OFFSET_TAG = 14; 274 /** 275 * Implicit CE tag. Compute an unassigned-implicit CE. 276 * All bits are set (UNASSIGNED_CE32=0xffffffff). 277 */ 278 static final int IMPLICIT_TAG = 15; 279 isAssignedCE32(int ce32)280 static boolean isAssignedCE32(int ce32) { 281 return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32; 282 } 283 284 /** 285 * We limit the number of CEs in an expansion 286 * so that we can use a small number of length bits in the data structure, 287 * and so that an implementation can copy CEs at runtime without growing a destination buffer. 288 */ 289 static final int MAX_EXPANSION_LENGTH = 31; 290 static final int MAX_INDEX = 0x7ffff; 291 292 /** 293 * Set if there is no match for the single (no-suffix) character itself. 294 * This is only possible if there is a prefix. 295 * In this case, discontiguous contraction matching cannot add combining marks 296 * starting from an empty suffix. 297 * The default CE32 is used anyway if there is no suffix match. 298 */ 299 static final int CONTRACT_SINGLE_CP_NO_MATCH = 0x100; 300 /** Set if the first character of every contraction suffix has lccc!=0. */ 301 static final int CONTRACT_NEXT_CCC = 0x200; 302 /** Set if any contraction suffix ends with lccc!=0. */ 303 static final int CONTRACT_TRAILING_CCC = 0x400; 304 305 /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */ 306 static final int HANGUL_NO_SPECIAL_JAMO = 0x100; 307 308 static final int LEAD_ALL_UNASSIGNED = 0; 309 static final int LEAD_ALL_FALLBACK = 0x100; 310 static final int LEAD_MIXED = 0x200; 311 static final int LEAD_TYPE_MASK = 0x300; 312 makeLongPrimaryCE32(long p)313 static int makeLongPrimaryCE32(long p) { return (int)(p | LONG_PRIMARY_CE32_LOW_BYTE); } 314 315 /** Turns the long-primary CE32 into a primary weight pppppp00. */ primaryFromLongPrimaryCE32(int ce32)316 static long primaryFromLongPrimaryCE32(int ce32) { 317 return (long)ce32 & 0xffffff00L; 318 } ceFromLongPrimaryCE32(int ce32)319 static long ceFromLongPrimaryCE32(int ce32) { 320 return ((long)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE; 321 } 322 makeLongSecondaryCE32(int lower32)323 static int makeLongSecondaryCE32(int lower32) { 324 return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG; 325 } ceFromLongSecondaryCE32(int ce32)326 static long ceFromLongSecondaryCE32(int ce32) { 327 return (long)ce32 & 0xffffff00L; 328 } 329 330 /** Makes a special CE32 with tag, index and length. */ makeCE32FromTagIndexAndLength(int tag, int index, int length)331 static int makeCE32FromTagIndexAndLength(int tag, int index, int length) { 332 return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag; 333 } 334 /** Makes a special CE32 with only tag and index. */ makeCE32FromTagAndIndex(int tag, int index)335 static int makeCE32FromTagAndIndex(int tag, int index) { 336 return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag; 337 } 338 isSpecialCE32(int ce32)339 static boolean isSpecialCE32(int ce32) { 340 return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE; 341 } 342 tagFromCE32(int ce32)343 static int tagFromCE32(int ce32) { 344 return ce32 & 0xf; 345 } 346 hasCE32Tag(int ce32, int tag)347 static boolean hasCE32Tag(int ce32, int tag) { 348 return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag; 349 } 350 isLongPrimaryCE32(int ce32)351 static boolean isLongPrimaryCE32(int ce32) { 352 return hasCE32Tag(ce32, LONG_PRIMARY_TAG); 353 } 354 isSimpleOrLongCE32(int ce32)355 static boolean isSimpleOrLongCE32(int ce32) { 356 return !isSpecialCE32(ce32) || 357 tagFromCE32(ce32) == LONG_PRIMARY_TAG || 358 tagFromCE32(ce32) == LONG_SECONDARY_TAG; 359 } 360 361 /** 362 * @return true if the ce32 yields one or more CEs without further data lookups 363 */ isSelfContainedCE32(int ce32)364 static boolean isSelfContainedCE32(int ce32) { 365 return !isSpecialCE32(ce32) || 366 tagFromCE32(ce32) == LONG_PRIMARY_TAG || 367 tagFromCE32(ce32) == LONG_SECONDARY_TAG || 368 tagFromCE32(ce32) == LATIN_EXPANSION_TAG; 369 } 370 isPrefixCE32(int ce32)371 static boolean isPrefixCE32(int ce32) { 372 return hasCE32Tag(ce32, PREFIX_TAG); 373 } 374 isContractionCE32(int ce32)375 static boolean isContractionCE32(int ce32) { 376 return hasCE32Tag(ce32, CONTRACTION_TAG); 377 } 378 ce32HasContext(int ce32)379 static boolean ce32HasContext(int ce32) { 380 return isSpecialCE32(ce32) && 381 (tagFromCE32(ce32) == PREFIX_TAG || 382 tagFromCE32(ce32) == CONTRACTION_TAG); 383 } 384 385 /** 386 * Get the first of the two Latin-expansion CEs encoded in ce32. 387 * @see LATIN_EXPANSION_TAG 388 */ latinCE0FromCE32(int ce32)389 static long latinCE0FromCE32(int ce32) { 390 return ((long)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce32 & 0xff0000) >> 8); 391 } 392 393 /** 394 * Get the second of the two Latin-expansion CEs encoded in ce32. 395 * @see LATIN_EXPANSION_TAG 396 */ latinCE1FromCE32(int ce32)397 static long latinCE1FromCE32(int ce32) { 398 return (((long)ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE; 399 } 400 401 /** 402 * Returns the data index from a special CE32. 403 */ indexFromCE32(int ce32)404 static int indexFromCE32(int ce32) { 405 return ce32 >>> 13; 406 } 407 408 /** 409 * Returns the data length from a ce32. 410 */ lengthFromCE32(int ce32)411 static int lengthFromCE32(int ce32) { 412 return (ce32 >> 8) & 31; 413 } 414 415 /** 416 * Returns the digit value from a DIGIT_TAG ce32. 417 */ digitFromCE32(int ce32)418 static char digitFromCE32(int ce32) { 419 return (char)((ce32 >> 8) & 0xf); 420 } 421 422 /** Returns a 64-bit CE from a simple CE32 (not special). */ ceFromSimpleCE32(int ce32)423 static long ceFromSimpleCE32(int ce32) { 424 // normal form ppppsstt -> pppp0000ss00tt00 425 assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE; 426 return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | ((ce32 & 0xff) << 8); 427 } 428 429 /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */ 430 static long ceFromCE32(int ce32) { 431 int tertiary = ce32 & 0xff; 432 if(tertiary < SPECIAL_CE32_LOW_BYTE) { 433 // normal form ppppsstt -> pppp0000ss00tt00 434 return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (tertiary << 8); 435 } else { 436 ce32 -= tertiary; 437 if((tertiary & 0xf) == LONG_PRIMARY_TAG) { 438 // long-primary form ppppppC1 -> pppppp00050000500 439 return ((long)ce32 << 32) | COMMON_SEC_AND_TER_CE; 440 } else { 441 // long-secondary form ssssttC2 -> 00000000sssstt00 442 assert (tertiary & 0xf) == LONG_SECONDARY_TAG; 443 return ce32 & 0xffffffffL; 444 } 445 } 446 } 447 448 /** Creates a CE from a primary weight. */ 449 public static long makeCE(long p) { 450 return (p << 32) | COMMON_SEC_AND_TER_CE; 451 } 452 /** 453 * Creates a CE from a primary weight, 454 * 16-bit secondary/tertiary weights, and a 2-bit quaternary. 455 */ 456 static long makeCE(long p, int s, int t, int q) { 457 return (p << 32) | ((long)s << 16) | t | (q << 6); 458 } 459 460 /** 461 * Increments a 2-byte primary by a code point offset. 462 */ 463 public static long incTwoBytePrimaryByOffset(long basePrimary, boolean isCompressible, 464 int offset) { 465 // Extract the second byte, minus the minimum byte value, 466 // plus the offset, modulo the number of usable byte values, plus the minimum. 467 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 468 long primary; 469 if(isCompressible) { 470 offset += ((int)(basePrimary >> 16) & 0xff) - 4; 471 primary = ((offset % 251) + 4) << 16; 472 offset /= 251; 473 } else { 474 offset += ((int)(basePrimary >> 16) & 0xff) - 2; 475 primary = ((offset % 254) + 2) << 16; 476 offset /= 254; 477 } 478 // First byte, assume no further overflow. 479 return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24)); 480 } 481 482 /** 483 * Increments a 3-byte primary by a code point offset. 484 */ 485 public static long incThreeBytePrimaryByOffset(long basePrimary, boolean isCompressible, 486 int offset) { 487 // Extract the third byte, minus the minimum byte value, 488 // plus the offset, modulo the number of usable byte values, plus the minimum. 489 offset += ((int)(basePrimary >> 8) & 0xff) - 2; 490 long primary = ((offset % 254) + 2) << 8; 491 offset /= 254; 492 // Same with the second byte, 493 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 494 if(isCompressible) { 495 offset += ((int)(basePrimary >> 16) & 0xff) - 4; 496 primary |= ((offset % 251) + 4) << 16; 497 offset /= 251; 498 } else { 499 offset += ((int)(basePrimary >> 16) & 0xff) - 2; 500 primary |= ((offset % 254) + 2) << 16; 501 offset /= 254; 502 } 503 // First byte, assume no further overflow. 504 return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24)); 505 } 506 507 /** 508 * Decrements a 2-byte primary by one range step (1..0x7f). 509 */ 510 static long decTwoBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) { 511 // Extract the second byte, minus the minimum byte value, 512 // minus the step, modulo the number of usable byte values, plus the minimum. 513 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 514 // Assume no further underflow for the first byte. 515 assert(0 < step && step <= 0x7f); 516 int byte2 = ((int)(basePrimary >> 16) & 0xff) - step; 517 if(isCompressible) { 518 if(byte2 < 4) { 519 byte2 += 251; 520 basePrimary -= 0x1000000; 521 } 522 } else { 523 if(byte2 < 2) { 524 byte2 += 254; 525 basePrimary -= 0x1000000; 526 } 527 } 528 return (basePrimary & 0xff000000L) | (byte2 << 16); 529 } 530 531 /** 532 * Decrements a 3-byte primary by one range step (1..0x7f). 533 */ 534 static long decThreeBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) { 535 // Extract the third byte, minus the minimum byte value, 536 // minus the step, modulo the number of usable byte values, plus the minimum. 537 assert(0 < step && step <= 0x7f); 538 int byte3 = ((int)(basePrimary >> 8) & 0xff) - step; 539 if(byte3 >= 2) { 540 return (basePrimary & 0xffff0000L) | (byte3 << 8); 541 } 542 byte3 += 254; 543 // Same with the second byte, 544 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 545 int byte2 = ((int)(basePrimary >> 16) & 0xff) - 1; 546 if(isCompressible) { 547 if(byte2 < 4) { 548 byte2 = 0xfe; 549 basePrimary -= 0x1000000; 550 } 551 } else { 552 if(byte2 < 2) { 553 byte2 = 0xff; 554 basePrimary -= 0x1000000; 555 } 556 } 557 // First byte, assume no further underflow. 558 return (basePrimary & 0xff000000L) | (byte2 << 16) | (byte3 << 8); 559 } 560 561 /** 562 * Computes a 3-byte primary for c's OFFSET_TAG data "CE". 563 */ 564 static long getThreeBytePrimaryForOffsetData(int c, long dataCE) { 565 long p = dataCE >>> 32; // three-byte primary pppppp00 566 int lower32 = (int)dataCE; // base code point b & step s: bbbbbbss (bit 7: isCompressible) 567 int offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment 568 boolean isCompressible = (lower32 & 0x80) != 0; 569 return Collation.incThreeBytePrimaryByOffset(p, isCompressible, offset); 570 } 571 572 /** 573 * Returns the unassigned-character implicit primary weight for any valid code point c. 574 */ 575 static long unassignedPrimaryFromCodePoint(int c) { 576 // Create a gap before U+0000. Use c=-1 for [first unassigned]. 577 ++c; 578 // Fourth byte: 18 values, every 14th byte value (gap of 13). 579 long primary = 2 + (c % 18) * 14; 580 c /= 18; 581 // Third byte: 254 values. 582 primary |= (2 + (c % 254)) << 8; 583 c /= 254; 584 // Second byte: 251 values 04..FE excluding the primary compression bytes. 585 primary |= (4 + (c % 251)) << 16; 586 // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 587 return primary | ((long)UNASSIGNED_IMPLICIT_BYTE << 24); 588 } 589 590 static long unassignedCEFromCodePoint(int c) { 591 return makeCE(unassignedPrimaryFromCodePoint(c)); 592 } 593 594 // private Collation() // No instantiation. 595 } 596