1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2012-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * CollationKeys.java, ported from collationkeys.h/.cpp 10 * 11 * C++ version created on: 2012sep02 12 * created by: Markus W. Scherer 13 */ 14 15 package ohos.global.icu.impl.coll; 16 17 import ohos.global.icu.text.Collator; 18 19 /** 20 * @hide exposed on OHOS 21 */ 22 public final class CollationKeys /* all methods are static */ { 23 24 // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink, 25 // which is not available in Java. We don't need a super class created for implementing 26 // collation features. 27 /** 28 * @hide exposed on OHOS 29 */ 30 public static abstract class SortKeyByteSink { 31 protected byte[] buffer_; 32 // protected int capacity_; == buffer_.length 33 private int appended_ = 0; 34 // not used in Java -- private int ignore_ = 0; 35 SortKeyByteSink(byte[] dest)36 public SortKeyByteSink(byte[] dest) { 37 buffer_ = dest; 38 } 39 40 /** 41 * Needed in Java for when we write to the buffer directly. 42 * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that. 43 * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it? 44 * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that?? 45 */ setBufferAndAppended(byte[] dest, int app)46 public void setBufferAndAppended(byte[] dest, int app) { 47 buffer_ = dest; 48 appended_ = app; 49 } 50 51 /* not used in Java -- public void IgnoreBytes(int numIgnore) { 52 ignore_ = numIgnore; 53 } */ 54 55 /** 56 * @param bytes 57 * the array of byte 58 * @param n 59 * the length of bytes to be appended 60 */ Append(byte[] bytes, int n)61 public void Append(byte[] bytes, int n) { 62 if (n <= 0 || bytes == null) { 63 return; 64 } 65 66 /* not used in Java -- if (ignore_ > 0) { 67 int ignoreRest = ignore_ - n; 68 if (ignoreRest >= 0) { 69 ignore_ = ignoreRest; 70 return; 71 } else { 72 start = ignore_; 73 n = -ignoreRest; 74 ignore_ = 0; 75 } 76 } */ 77 78 int length = appended_; 79 appended_ += n; 80 81 int available = buffer_.length - length; 82 if (n <= available) { 83 System.arraycopy(bytes, 0, buffer_, length, n); 84 } else { 85 AppendBeyondCapacity(bytes, 0, n, length); 86 } 87 } 88 Append(int b)89 public void Append(int b) { 90 /* not used in Java -- if (ignore_ > 0) { 91 --ignore_; 92 } else */ { 93 if (appended_ < buffer_.length || Resize(1, appended_)) { 94 buffer_[appended_] = (byte) b; 95 } 96 ++appended_; 97 } 98 } 99 100 // Java porting note: This method is not used by collator implementation. 101 // 102 // virtual char *GetAppendBuffer(int min_capacity, 103 // int desired_capacity_hint, 104 // char *scratch, int scratch_capacity, 105 // int *result_capacity); 106 NumberOfBytesAppended()107 public int NumberOfBytesAppended() { 108 return appended_; 109 } 110 GetRemainingCapacity()111 public int GetRemainingCapacity() { 112 return /* not used in Java -- ignore_ + */ buffer_.length - appended_; 113 } 114 Overflowed()115 public boolean Overflowed() { 116 return appended_ > buffer_.length; 117 } 118 119 /* not used in Java -- public boolean IsOk() { 120 return true; 121 } */ 122 123 /** 124 * @param bytes 125 * the array of byte 126 * @param start 127 * the start index within the array to be appended 128 * @param n 129 * the length of bytes to be appended 130 * @param length 131 * the length of buffer required to store the entire data (i.e. already appended 132 * bytes + bytes to be appended by this method) 133 */ AppendBeyondCapacity(byte[] bytes, int start, int n, int length)134 protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length); 135 Resize(int appendCapacity, int length)136 protected abstract boolean Resize(int appendCapacity, int length); 137 } 138 139 /** 140 * @hide exposed on OHOS 141 */ 142 public static class LevelCallback { 143 /** 144 * @param level 145 * The next level about to be written to the ByteSink. 146 * @return true if the level is to be written (the base class implementation always returns 147 * true) 148 */ needToWrite(int level)149 boolean needToWrite(int level) { 150 return true; 151 } 152 } 153 public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback(); 154 155 private static final class SortKeyLevel { 156 private static final int INITIAL_CAPACITY = 40; 157 158 byte[] buffer = new byte[INITIAL_CAPACITY]; 159 int len = 0; 160 // not used in Java -- private static final boolean ok = true; // In C++ "ok" is reset when memory allocations fail. 161 SortKeyLevel()162 SortKeyLevel() { 163 } 164 165 /* not used in Java -- boolean isOk() { 166 return ok; 167 } */ 168 isEmpty()169 boolean isEmpty() { 170 return len == 0; 171 } 172 length()173 int length() { 174 return len; 175 } 176 177 // Java porting note: Java uses this instead of C++ operator [] overload 178 // uint8_t operator[](int index) getAt(int index)179 byte getAt(int index) { 180 return buffer[index]; 181 } 182 data()183 byte[] data() { 184 return buffer; 185 } 186 appendByte(int b)187 void appendByte(int b) { 188 if (len < buffer.length || ensureCapacity(1)) { 189 buffer[len++] = (byte) b; 190 } 191 } 192 appendWeight16(int w)193 void appendWeight16(int w) { 194 assert ((w & 0xffff) != 0); 195 byte b0 = (byte) (w >>> 8); 196 byte b1 = (byte) w; 197 int appendLength = (b1 == 0) ? 1 : 2; 198 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 199 buffer[len++] = b0; 200 if (b1 != 0) { 201 buffer[len++] = b1; 202 } 203 } 204 } 205 appendWeight32(long w)206 void appendWeight32(long w) { 207 assert (w != 0); 208 byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8), 209 (byte) w }; 210 int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4; 211 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 212 buffer[len++] = bytes[0]; 213 if (bytes[1] != 0) { 214 buffer[len++] = bytes[1]; 215 if (bytes[2] != 0) { 216 buffer[len++] = bytes[2]; 217 if (bytes[3] != 0) { 218 buffer[len++] = bytes[3]; 219 } 220 } 221 } 222 } 223 } 224 appendReverseWeight16(int w)225 void appendReverseWeight16(int w) { 226 assert ((w & 0xffff) != 0); 227 byte b0 = (byte) (w >>> 8); 228 byte b1 = (byte) w; 229 int appendLength = (b1 == 0) ? 1 : 2; 230 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 231 if (b1 == 0) { 232 buffer[len++] = b0; 233 } else { 234 buffer[len] = b1; 235 buffer[len + 1] = b0; 236 len += 2; 237 } 238 } 239 } 240 241 // Appends all but the last byte to the sink. The last byte should be the 01 terminator. appendTo(SortKeyByteSink sink)242 void appendTo(SortKeyByteSink sink) { 243 assert (len > 0 && buffer[len - 1] == 1); 244 sink.Append(buffer, len - 1); 245 } 246 ensureCapacity(int appendCapacity)247 private boolean ensureCapacity(int appendCapacity) { 248 /* not used in Java -- if (!ok) { 249 return false; 250 } */ 251 int newCapacity = 2 * buffer.length; 252 int altCapacity = len + 2 * appendCapacity; 253 if (newCapacity < altCapacity) { 254 newCapacity = altCapacity; 255 } 256 if (newCapacity < 200) { 257 newCapacity = 200; 258 } 259 byte[] newbuf = new byte[newCapacity]; 260 System.arraycopy(buffer, 0, newbuf, 0, len); 261 buffer = newbuf; 262 263 return true; 264 } 265 } 266 getSortKeyLevel(int levels, int level)267 private static SortKeyLevel getSortKeyLevel(int levels, int level) { 268 return (levels & level) != 0 ? new SortKeyLevel() : null; 269 } 270 CollationKeys()271 private CollationKeys() { 272 } // no instantiation 273 274 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 275 private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE; 276 private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 277 static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader 278 private static final int SEC_COMMON_MAX_COUNT = 0x21; 279 280 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 281 private static final int CASE_LOWER_FIRST_COMMON_LOW = 1; 282 private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 283 private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13; 284 private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 285 286 // Case level, upperFirst: Compress up to 13 common weights as 3..15. 287 private static final int CASE_UPPER_FIRST_COMMON_LOW = 3; 288 @SuppressWarnings("unused") 289 private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15; 290 private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 291 292 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 293 private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE; 294 private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 295 private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 296 private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61; 297 298 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 299 private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE; 300 private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 301 private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 302 private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 303 304 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 305 private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80; 306 private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 307 private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 308 private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 309 310 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 311 private static final int QUAT_COMMON_LOW = 0x1c; 312 private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 313 private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 314 private static final int QUAT_COMMON_MAX_COUNT = 0x71; 315 // Primary weights shifted to quaternary level must be encoded with 316 // a lead byte below the common-weight compression range. 317 private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 318 319 /** 320 * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that 321 * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding 322 * IDENTICAL_LEVEL which this function does not write. 323 */ 324 private static final int levelMasks[] = new int[] { 325 2, // UCOL_PRIMARY -> PRIMARY_LEVEL 326 6, // UCOL_SECONDARY -> up to SECONDARY_LEVEL 327 0x16, // UCOL_TERTIARY -> up to TERTIARY_LEVEL 328 0x36, // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL 329 0, 0, 0, 0, 330 0, 0, 0, 0, 331 0, 0, 0, 332 0x36 // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL 333 }; 334 335 /** 336 * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes 337 * the case level. Stops writing levels when callback.needToWrite(level) returns false. 338 * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE. 339 */ writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, boolean preflight)340 public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, 341 CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, 342 boolean preflight) { 343 344 int options = settings.options; 345 // Set of levels to process and write. 346 int levels = levelMasks[CollationSettings.getStrength(options)]; 347 if ((options & CollationSettings.CASE_LEVEL) != 0) { 348 levels |= Collation.CASE_LEVEL_FLAG; 349 } 350 // Minus the levels below minLevel. 351 levels &= ~((1 << minLevel) - 1); 352 if (levels == 0) { 353 return; 354 } 355 356 long variableTop; 357 if ((options & CollationSettings.ALTERNATE_MASK) == 0) { 358 variableTop = 0; 359 } else { 360 // +1 so that we can use "<" and primary ignorables test out early. 361 variableTop = settings.variableTop + 1; 362 } 363 364 int tertiaryMask = CollationSettings.getTertiaryMask(options); 365 366 byte[] p234 = new byte[3]; 367 SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG); 368 SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG); 369 SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG); 370 SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG); 371 372 long prevReorderedPrimary = 0; // 0==no compression 373 int commonCases = 0; 374 int commonSecondaries = 0; 375 int commonTertiaries = 0; 376 int commonQuaternaries = 0; 377 378 int prevSecondary = 0; 379 int secSegmentStart = 0; 380 381 for (;;) { 382 // No need to keep all CEs in the buffer when we write a sort key. 383 iter.clearCEsIfNoneRemaining(); 384 long ce = iter.nextCE(); 385 long p = ce >>> 32; 386 if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) { 387 // Variable CE, shift it to quaternary level. 388 // Ignore all following primary ignorables, and shift further variable CEs. 389 if (commonQuaternaries != 0) { 390 --commonQuaternaries; 391 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 392 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 393 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 394 } 395 // Shifted primary weights are lower than the common weight. 396 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries); 397 commonQuaternaries = 0; 398 } 399 do { 400 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 401 if (settings.hasReordering()) { 402 p = settings.reorder(p); 403 } 404 if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) { 405 // Prevent shifted primary lead bytes from 406 // overlapping with the common compression range. 407 quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE); 408 } 409 quaternaries.appendWeight32(p); 410 } 411 do { 412 ce = iter.nextCE(); 413 p = ce >>> 32; 414 } while (p == 0); 415 } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY); 416 } 417 // ce could be primary ignorable, or NO_CE, or the merge separator, 418 // or a regular primary CE, but it is not variable. 419 // If ce==NO_CE, then write nothing for the primary level but 420 // terminate compression on all levels and then exit the loop. 421 if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) { 422 // Test the un-reordered primary for compressibility. 423 boolean isCompressible = compressibleBytes[(int) p >>> 24]; 424 if(settings.hasReordering()) { 425 p = settings.reorder(p); 426 } 427 int p1 = (int) p >>> 24; 428 if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) { 429 if (prevReorderedPrimary != 0) { 430 if (p < prevReorderedPrimary) { 431 // No primary compression terminator 432 // at the end of the level or merged segment. 433 if (p1 > Collation.MERGE_SEPARATOR_BYTE) { 434 sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE); 435 } 436 } else { 437 sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE); 438 } 439 } 440 sink.Append(p1); 441 if(isCompressible) { 442 prevReorderedPrimary = p; 443 } else { 444 prevReorderedPrimary = 0; 445 } 446 } 447 byte p2 = (byte) (p >>> 16); 448 if (p2 != 0) { 449 p234[0] = p2; 450 p234[1] = (byte) (p >>> 8); 451 p234[2] = (byte) p; 452 sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3); 453 } 454 // Optimization for internalNextSortKeyPart(): 455 // When the primary level overflows we can stop because we need not 456 // calculate (preflight) the whole sort key length. 457 if (!preflight && sink.Overflowed()) { 458 // not used in Java -- if (!sink.IsOk()) { 459 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 460 // C implementation. IsOk() in Java always returns true, so this 461 // is a dead code. 462 return; 463 } 464 } 465 466 int lower32 = (int) ce; 467 if (lower32 == 0) { 468 continue; 469 } // completely ignorable, no secondary/case/tertiary/quaternary 470 471 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 472 int s = lower32 >>> 16; // 16 bits 473 if (s == 0) { 474 // secondary ignorable 475 } else if (s == Collation.COMMON_WEIGHT16 && 476 ((options & CollationSettings.BACKWARD_SECONDARY) == 0 || 477 p != Collation.MERGE_SEPARATOR_PRIMARY)) { 478 // s is a common secondary weight, and 479 // backwards-secondary is off or the ce is not the merge separator. 480 ++commonSecondaries; 481 } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { 482 if (commonSecondaries != 0) { 483 --commonSecondaries; 484 while (commonSecondaries >= SEC_COMMON_MAX_COUNT) { 485 secondaries.appendByte(SEC_COMMON_MIDDLE); 486 commonSecondaries -= SEC_COMMON_MAX_COUNT; 487 } 488 int b; 489 if (s < Collation.COMMON_WEIGHT16) { 490 b = SEC_COMMON_LOW + commonSecondaries; 491 } else { 492 b = SEC_COMMON_HIGH - commonSecondaries; 493 } 494 secondaries.appendByte(b); 495 commonSecondaries = 0; 496 } 497 secondaries.appendWeight16(s); 498 } else { 499 if (commonSecondaries != 0) { 500 --commonSecondaries; 501 // Append reverse weights. The level will be re-reversed later. 502 int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT; 503 int b; 504 if (prevSecondary < Collation.COMMON_WEIGHT16) { 505 b = SEC_COMMON_LOW + remainder; 506 } else { 507 b = SEC_COMMON_HIGH - remainder; 508 } 509 secondaries.appendByte(b); 510 commonSecondaries -= remainder; 511 // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT. 512 while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT 513 secondaries.appendByte(SEC_COMMON_MIDDLE); 514 commonSecondaries -= SEC_COMMON_MAX_COUNT; 515 } 516 // commonSecondaries == 0 517 } 518 if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) { 519 // The backwards secondary level compares secondary weights backwards 520 // within segments separated by the merge separator (U+FFFE). 521 byte[] secs = secondaries.data(); 522 int last = secondaries.length() - 1; 523 while (secSegmentStart < last) { 524 byte b = secs[secSegmentStart]; 525 secs[secSegmentStart++] = secs[last]; 526 secs[last--] = b; 527 } 528 secondaries.appendByte(p == Collation.NO_CE_PRIMARY ? 529 Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE); 530 prevSecondary = 0; 531 secSegmentStart = secondaries.length(); 532 } else { 533 secondaries.appendReverseWeight16(s); 534 prevSecondary = s; 535 } 536 } 537 } 538 539 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 540 if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0 541 : (lower32 >>> 16) == 0) { 542 // Primary+caseLevel: Ignore case level weights of primary ignorables. 543 // Otherwise: Ignore case level weights of secondary ignorables. 544 // For details see the comments in the CollationCompare class. 545 } else { 546 int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte 547 assert ((c & 0xc0) != 0xc0); 548 if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) { 549 ++commonCases; 550 } else { 551 if ((options & CollationSettings.UPPER_FIRST) == 0) { 552 // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, 553 // upper=15. 554 // If there are only common (=lowest) weights in the whole level, 555 // then we need not write anything. 556 // Level length differences are handled already on the next-higher level. 557 if (commonCases != 0 && 558 (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) { 559 --commonCases; 560 while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) { 561 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4); 562 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT; 563 } 564 int b; 565 if (c <= Collation.LEVEL_SEPARATOR_BYTE) { 566 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases; 567 } else { 568 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases; 569 } 570 cases.appendByte(b << 4); 571 commonCases = 0; 572 } 573 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 574 c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15 575 } 576 } else { 577 // upperFirst: Compress common weights to nibbles 3..15, mixed=2, 578 // upper=1. 579 // The compressed common case weights only go up from the "low" value 580 // because with upperFirst the common weight is the highest one. 581 if (commonCases != 0) { 582 --commonCases; 583 while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) { 584 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4); 585 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT; 586 } 587 cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4); 588 commonCases = 0; 589 } 590 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 591 c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1 592 } 593 } 594 // c is a separator byte 01, 595 // or a left-shifted nibble 0x10, 0x20, ... 0xf0. 596 cases.appendByte(c); 597 } 598 } 599 } 600 601 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 602 int t = lower32 & tertiaryMask; 603 assert ((lower32 & 0xc000) != 0xc000); 604 if (t == Collation.COMMON_WEIGHT16) { 605 ++commonTertiaries; 606 } else if ((tertiaryMask & 0x8000) == 0) { 607 // Tertiary weights without case bits. 608 // Move lead bytes 06..3F to C6..FF for a large common-weight range. 609 if (commonTertiaries != 0) { 610 --commonTertiaries; 611 while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) { 612 tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE); 613 commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT; 614 } 615 int b; 616 if (t < Collation.COMMON_WEIGHT16) { 617 b = TER_ONLY_COMMON_LOW + commonTertiaries; 618 } else { 619 b = TER_ONLY_COMMON_HIGH - commonTertiaries; 620 } 621 tertiaries.appendByte(b); 622 commonTertiaries = 0; 623 } 624 if (t > Collation.COMMON_WEIGHT16) { 625 t += 0xc000; 626 } 627 tertiaries.appendWeight16(t); 628 } else if ((options & CollationSettings.UPPER_FIRST) == 0) { 629 // Tertiary weights with caseFirst=lowerFirst. 630 // Move lead bytes 06..BF to 46..FF for the common-weight range. 631 if (commonTertiaries != 0) { 632 --commonTertiaries; 633 while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) { 634 tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE); 635 commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT; 636 } 637 int b; 638 if (t < Collation.COMMON_WEIGHT16) { 639 b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries; 640 } else { 641 b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries; 642 } 643 tertiaries.appendByte(b); 644 commonTertiaries = 0; 645 } 646 if (t > Collation.COMMON_WEIGHT16) { 647 t += 0x4000; 648 } 649 tertiaries.appendWeight16(t); 650 } else { 651 // Tertiary weights with caseFirst=upperFirst. 652 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), 653 // to keep tertiary CEs well-formed. 654 // Their case+tertiary weights must be greater than those of 655 // primary and secondary CEs. 656 // 657 // Separator 01 -> 01 (unchanged) 658 // Lowercase 02..04 -> 82..84 (includes uncased) 659 // Common weight 05 -> 85..C5 (common-weight compression range) 660 // Lowercase 06..3F -> C6..FF 661 // Mixed case 42..7F -> 42..7F 662 // Uppercase 82..BF -> 02..3F 663 // Tertiary CE 86..BF -> C6..FF 664 if (t <= Collation.NO_CE_WEIGHT16) { 665 // Keep separators unchanged. 666 } else if ((lower32 >>> 16) != 0) { 667 // Invert case bits of primary & secondary CEs. 668 t ^= 0xc000; 669 if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) { 670 t -= 0x4000; 671 } 672 } else { 673 // Keep uppercase bits of tertiary CEs. 674 assert (0x8600 <= t && t <= 0xbfff); 675 t += 0x4000; 676 } 677 if (commonTertiaries != 0) { 678 --commonTertiaries; 679 while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) { 680 tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE); 681 commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT; 682 } 683 int b; 684 if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) { 685 b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries; 686 } else { 687 b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries; 688 } 689 tertiaries.appendByte(b); 690 commonTertiaries = 0; 691 } 692 tertiaries.appendWeight16(t); 693 } 694 } 695 696 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 697 int q = lower32 & 0xffff; 698 if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) { 699 ++commonQuaternaries; 700 } else if (q == Collation.NO_CE_WEIGHT16 701 && (options & CollationSettings.ALTERNATE_MASK) == 0 702 && quaternaries.isEmpty()) { 703 // If alternate=non-ignorable and there are only common quaternary weights, 704 // then we need not write anything. 705 // The only weights greater than the merge separator and less than the common 706 // weight 707 // are shifted primary weights, which are not generated for 708 // alternate=non-ignorable. 709 // There are also exactly as many quaternary weights as tertiary weights, 710 // so level length differences are handled already on tertiary level. 711 // Any above-common quaternary weight will compare greater regardless. 712 quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE); 713 } else { 714 if (q == Collation.NO_CE_WEIGHT16) { 715 q = Collation.LEVEL_SEPARATOR_BYTE; 716 } else { 717 q = 0xfc + ((q >>> 6) & 3); 718 } 719 if (commonQuaternaries != 0) { 720 --commonQuaternaries; 721 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 722 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 723 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 724 } 725 int b; 726 if (q < QUAT_COMMON_LOW) { 727 b = QUAT_COMMON_LOW + commonQuaternaries; 728 } else { 729 b = QUAT_COMMON_HIGH - commonQuaternaries; 730 } 731 quaternaries.appendByte(b); 732 commonQuaternaries = 0; 733 } 734 quaternaries.appendByte(q); 735 } 736 } 737 738 if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) { 739 break; 740 } // ce == NO_CE 741 } 742 743 // Append the beyond-primary levels. 744 // not used in Java -- boolean ok = true; 745 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 746 if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) { 747 return; 748 } 749 // not used in Java -- ok &= secondaries.isOk(); 750 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 751 secondaries.appendTo(sink); 752 } 753 754 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 755 if (!callback.needToWrite(Collation.CASE_LEVEL)) { 756 return; 757 } 758 // not used in Java -- ok &= cases.isOk(); 759 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 760 // Write pairs of nibbles as bytes, except separator bytes as themselves. 761 int length = cases.length() - 1; // Ignore the trailing NO_CE. 762 byte b = 0; 763 for (int i = 0; i < length; ++i) { 764 byte c = cases.getAt(i); 765 assert ((c & 0xf) == 0 && c != 0); 766 if (b == 0) { 767 b = c; 768 } else { 769 sink.Append(b | ((c >> 4) & 0xf)); 770 b = 0; 771 } 772 } 773 if (b != 0) { 774 sink.Append(b); 775 } 776 } 777 778 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 779 if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) { 780 return; 781 } 782 // not used in Java -- ok &= tertiaries.isOk(); 783 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 784 tertiaries.appendTo(sink); 785 } 786 787 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 788 if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) { 789 return; 790 } 791 // not used in Java -- ok &= quaternaries.isOk(); 792 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 793 quaternaries.appendTo(sink); 794 } 795 796 // not used in Java -- if (!ok || !sink.IsOk()) { 797 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 798 // C implementation. IsOk() in Java always returns true, so this 799 // is a dead code. 800 } 801 } 802