1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2018 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 5 // created: 2018may04 Markus W. Scherer 6 7 package ohos.global.icu.util; 8 9 import java.io.DataOutputStream; 10 import java.io.IOException; 11 import java.io.OutputStream; 12 import java.nio.ByteBuffer; 13 import java.nio.ByteOrder; 14 15 import ohos.global.icu.impl.ICUBinary; 16 import ohos.global.icu.impl.Normalizer2Impl.UTF16Plus; 17 18 /** 19 * Immutable Unicode code point trie. 20 * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. 21 * For details see http://site.icu-project.org/design/struct/utrie 22 * 23 * <p>This class is not intended for public subclassing. 24 * 25 * @see MutableCodePointTrie 26 * @hide exposed on OHOS 27 */ 28 public abstract class CodePointTrie extends CodePointMap { 29 /** 30 * Selectors for the type of a CodePointTrie. 31 * Different trade-offs for size vs. speed. 32 * 33 * <p>Use null for {@link #fromBinary} to accept any type; 34 * {@link #getType} will return the actual type. 35 * 36 * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth) 37 * @see #fromBinary 38 * @see #getType 39 * @hide exposed on OHOS 40 */ 41 public enum Type { 42 /** 43 * Fast/simple/larger BMP data structure. 44 * The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points. 45 * 46 * @see Fast 47 */ 48 FAST, 49 /** 50 * Small/slower BMP data structure. 51 * 52 * @see Small 53 */ 54 SMALL 55 } 56 57 /** 58 * Selectors for the number of bits in a CodePointTrie data value. 59 * 60 * <p>Use null for {@link #fromBinary} to accept any data value width; 61 * {@link #getValueWidth} will return the actual data value width. 62 * 63 * @hide exposed on OHOS 64 */ 65 public enum ValueWidth { 66 /** 67 * The trie stores 16 bits per data value. 68 * It returns them as unsigned values 0..0xffff=65535. 69 */ 70 BITS_16, 71 /** 72 * The trie stores 32 bits per data value. 73 */ 74 BITS_32, 75 /** 76 * The trie stores 8 bits per data value. 77 * It returns them as unsigned values 0..0xff=255. 78 */ 79 BITS_8 80 } 81 CodePointTrie(char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset)82 private CodePointTrie(char[] index, Data data, int highStart, 83 int index3NullOffset, int dataNullOffset) { 84 this.ascii = new int[ASCII_LIMIT]; 85 this.index = index; 86 this.data = data; 87 this.dataLength = data.getDataLength(); 88 this.highStart = highStart; 89 this.index3NullOffset = index3NullOffset; 90 this.dataNullOffset = dataNullOffset; 91 92 for (int c = 0; c < ASCII_LIMIT; ++c) { 93 ascii[c] = data.getFromIndex(c); 94 } 95 96 int nullValueOffset = dataNullOffset; 97 if (nullValueOffset >= dataLength) { 98 nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; 99 } 100 nullValue = data.getFromIndex(nullValueOffset); 101 } 102 103 /** 104 * Creates a trie from its binary form, 105 * stored in the ByteBuffer starting at the current position. 106 * Advances the buffer position to just after the trie data. 107 * Inverse of {@link #toBinary(OutputStream)}. 108 * 109 * <p>The data is copied from the buffer; 110 * later modification of the buffer will not affect the trie. 111 * 112 * @param type selects the trie type; this method throws an exception 113 * if the type does not match the binary data; 114 * use null to accept any type 115 * @param valueWidth selects the number of bits in a data value; this method throws an exception 116 * if the valueWidth does not match the binary data; 117 * use null to accept any data value width 118 * @param bytes a buffer containing the binary data of a CodePointTrie 119 * @return the trie 120 * @see MutableCodePointTrie#MutableCodePointTrie(int, int) 121 * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth) 122 * @see #toBinary(OutputStream) 123 */ fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes)124 public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) { 125 ByteOrder outerByteOrder = bytes.order(); 126 try { 127 // Enough data for a trie header? 128 if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) { 129 throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header"); 130 } 131 132 // struct UCPTrieHeader 133 /** "Tri3" in big-endian US-ASCII (0x54726933) */ 134 int signature = bytes.getInt(); 135 136 // Check the signature. 137 switch (signature) { 138 case 0x54726933: 139 // The buffer is already set to the trie data byte order. 140 break; 141 case 0x33697254: 142 // Temporarily reverse the byte order. 143 boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN; 144 bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); 145 signature = 0x54726933; 146 break; 147 default: 148 throw new ICUUncheckedIOException("Buffer does not contain a serialized CodePointTrie"); 149 } 150 151 // struct UCPTrieHeader continued 152 /** 153 * Options bit field: 154 * Bits 15..12: Data length bits 19..16. 155 * Bits 11..8: Data null block offset bits 19..16. 156 * Bits 7..6: UCPTrieType 157 * Bits 5..3: Reserved (0). 158 * Bits 2..0: UCPTrieValueWidth 159 */ 160 int options = bytes.getChar(); 161 162 /** Total length of the index tables. */ 163 int indexLength = bytes.getChar(); 164 165 /** Data length bits 15..0. */ 166 int dataLength = bytes.getChar(); 167 168 /** Index-3 null block offset, 0x7fff or 0xffff if none. */ 169 int index3NullOffset = bytes.getChar(); 170 171 /** Data null block offset bits 15..0, 0xfffff if none. */ 172 int dataNullOffset = bytes.getChar(); 173 174 /** 175 * First code point of the single-value range ending with U+10ffff, 176 * rounded up and then shifted right by SHIFT_2. 177 */ 178 int shiftedHighStart = bytes.getChar(); 179 // struct UCPTrieHeader end 180 181 int typeInt = (options >> 6) & 3; 182 Type actualType; 183 switch (typeInt) { 184 case 0: actualType = Type.FAST; break; 185 case 1: actualType = Type.SMALL; break; 186 default: 187 throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported type"); 188 } 189 190 int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK; 191 ValueWidth actualValueWidth; 192 switch (valueWidthInt) { 193 case 0: actualValueWidth = ValueWidth.BITS_16; break; 194 case 1: actualValueWidth = ValueWidth.BITS_32; break; 195 case 2: actualValueWidth = ValueWidth.BITS_8; break; 196 default: 197 throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported value width"); 198 } 199 200 if ((options & OPTIONS_RESERVED_MASK) != 0) { 201 throw new ICUUncheckedIOException("CodePointTrie data header has unsupported options"); 202 } 203 204 if (type == null) { 205 type = actualType; 206 } 207 if (valueWidth == null) { 208 valueWidth = actualValueWidth; 209 } 210 if (type != actualType || valueWidth != actualValueWidth) { 211 throw new ICUUncheckedIOException("CodePointTrie data header has a different type or value width than required"); 212 } 213 214 // Get the length values and offsets. 215 dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4); 216 dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8); 217 218 int highStart = shiftedHighStart << SHIFT_2; 219 220 // Calculate the actual length, minus the header. 221 int actualLength = indexLength * 2; 222 if (valueWidth == ValueWidth.BITS_16) { 223 actualLength += dataLength * 2; 224 } else if (valueWidth == ValueWidth.BITS_32) { 225 actualLength += dataLength * 4; 226 } else { 227 actualLength += dataLength; 228 } 229 if (bytes.remaining() < actualLength) { 230 throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data"); 231 } 232 233 char[] index = ICUBinary.getChars(bytes, indexLength, 0); 234 switch (valueWidth) { 235 case BITS_16: { 236 char[] data16 = ICUBinary.getChars(bytes, dataLength, 0); 237 return type == Type.FAST ? 238 new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) : 239 new Small16(index, data16, highStart, index3NullOffset, dataNullOffset); 240 } 241 case BITS_32: { 242 int[] data32 = ICUBinary.getInts(bytes, dataLength, 0); 243 return type == Type.FAST ? 244 new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) : 245 new Small32(index, data32, highStart, index3NullOffset, dataNullOffset); 246 } 247 case BITS_8: { 248 byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0); 249 return type == Type.FAST ? 250 new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) : 251 new Small8(index, data8, highStart, index3NullOffset, dataNullOffset); 252 } 253 default: 254 throw new AssertionError("should be unreachable"); 255 } 256 } finally { 257 bytes.order(outerByteOrder); 258 } 259 } 260 261 /** 262 * Returns the trie type. 263 * 264 * @return the trie type 265 */ getType()266 public abstract Type getType(); 267 /** 268 * Returns the number of bits in a trie data value. 269 * 270 * @return the number of bits in a trie data value 271 */ getValueWidth()272 public final ValueWidth getValueWidth() { return data.getValueWidth(); } 273 274 /** 275 * {@inheritDoc} 276 */ 277 @Override get(int c)278 public int get(int c) { 279 return data.getFromIndex(cpIndex(c)); 280 } 281 282 /** 283 * Returns a trie value for an ASCII code point, without range checking. 284 * 285 * @param c the input code point; must be U+0000..U+007F 286 * @return The ASCII code point's trie value. 287 */ asciiGet(int c)288 public final int asciiGet(int c) { 289 return ascii[c]; 290 } 291 292 private static final int MAX_UNICODE = 0x10ffff; 293 294 private static final int ASCII_LIMIT = 0x80; 295 maybeFilterValue(int value, int trieNullValue, int nullValue, ValueFilter filter)296 private static final int maybeFilterValue(int value, int trieNullValue, int nullValue, 297 ValueFilter filter) { 298 if (value == trieNullValue) { 299 value = nullValue; 300 } else if (filter != null) { 301 value = filter.apply(value); 302 } 303 return value; 304 } 305 306 /** 307 * {@inheritDoc} 308 */ 309 @Override getRange(int start, ValueFilter filter, Range range)310 public final boolean getRange(int start, ValueFilter filter, Range range) { 311 if (start < 0 || MAX_UNICODE < start) { 312 return false; 313 } 314 if (start >= highStart) { 315 int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; 316 int value = data.getFromIndex(di); 317 if (filter != null) { value = filter.apply(value); } 318 range.set(start, MAX_UNICODE, value); 319 return true; 320 } 321 322 int nullValue = this.nullValue; 323 if (filter != null) { nullValue = filter.apply(nullValue); } 324 Type type = getType(); 325 326 int prevI3Block = -1; 327 int prevBlock = -1; 328 int c = start; 329 // Initialize to make compiler happy. Real value when haveValue is true. 330 int trieValue = 0, value = 0; 331 boolean haveValue = false; 332 do { 333 int i3Block; 334 int i3; 335 int i3BlockLength; 336 int dataBlockLength; 337 if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) { 338 i3Block = 0; 339 i3 = c >> FAST_SHIFT; 340 i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH; 341 dataBlockLength = FAST_DATA_BLOCK_LENGTH; 342 } else { 343 // Use the multi-stage index. 344 int i1 = c >> SHIFT_1; 345 if (type == Type.FAST) { 346 assert(0xffff < c && c < highStart); 347 i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; 348 } else { 349 assert(c < highStart && highStart > SMALL_LIMIT); 350 i1 += SMALL_INDEX_LENGTH; 351 } 352 i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; 353 if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) { 354 // The index-3 block is the same as the previous one, and filled with value. 355 assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0); 356 c += CP_PER_INDEX_2_ENTRY; 357 continue; 358 } 359 prevI3Block = i3Block; 360 if (i3Block == index3NullOffset) { 361 // This is the index-3 null block. 362 if (haveValue) { 363 if (nullValue != value) { 364 range.set(start, c - 1, value); 365 return true; 366 } 367 } else { 368 trieValue = this.nullValue; 369 value = nullValue; 370 haveValue = true; 371 } 372 prevBlock = dataNullOffset; 373 c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1); 374 continue; 375 } 376 i3 = (c >> SHIFT_3) & INDEX_3_MASK; 377 i3BlockLength = INDEX_3_BLOCK_LENGTH; 378 dataBlockLength = SMALL_DATA_BLOCK_LENGTH; 379 } 380 // Enumerate data blocks for one index-3 block. 381 do { 382 int block; 383 if ((i3Block & 0x8000) == 0) { 384 block = index[i3Block + i3]; 385 } else { 386 // 18-bit indexes stored in groups of 9 entries per 8 indexes. 387 int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); 388 int gi = i3 & 7; 389 block = (index[group++] << (2 + (2 * gi))) & 0x30000; 390 block |= index[group + gi]; 391 } 392 if (block == prevBlock && (c - start) >= dataBlockLength) { 393 // The block is the same as the previous one, and filled with value. 394 assert((c & (dataBlockLength - 1)) == 0); 395 c += dataBlockLength; 396 } else { 397 int dataMask = dataBlockLength - 1; 398 prevBlock = block; 399 if (block == dataNullOffset) { 400 // This is the data null block. 401 if (haveValue) { 402 if (nullValue != value) { 403 range.set(start, c - 1, value); 404 return true; 405 } 406 } else { 407 trieValue = this.nullValue; 408 value = nullValue; 409 haveValue = true; 410 } 411 c = (c + dataBlockLength) & ~dataMask; 412 } else { 413 int di = block + (c & dataMask); 414 int trieValue2 = data.getFromIndex(di); 415 if (haveValue) { 416 if (trieValue2 != trieValue) { 417 if (filter == null || 418 maybeFilterValue(trieValue2, this.nullValue, nullValue, 419 filter) != value) { 420 range.set(start, c - 1, value); 421 return true; 422 } 423 trieValue = trieValue2; // may or may not help 424 } 425 } else { 426 trieValue = trieValue2; 427 value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter); 428 haveValue = true; 429 } 430 while ((++c & dataMask) != 0) { 431 trieValue2 = data.getFromIndex(++di); 432 if (trieValue2 != trieValue) { 433 if (filter == null || 434 maybeFilterValue(trieValue2, this.nullValue, nullValue, 435 filter) != value) { 436 range.set(start, c - 1, value); 437 return true; 438 } 439 trieValue = trieValue2; // may or may not help 440 } 441 } 442 } 443 } 444 } while (++i3 < i3BlockLength); 445 } while (c < highStart); 446 assert(haveValue); 447 int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; 448 int highValue = data.getFromIndex(di); 449 if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) { 450 --c; 451 } else { 452 c = MAX_UNICODE; 453 } 454 range.set(start, c, value); 455 return true; 456 } 457 458 /** 459 * Writes a representation of the trie to the output stream. 460 * Inverse of {@link #fromBinary}. 461 * 462 * @param os the output stream 463 * @return the number of bytes written 464 */ toBinary(OutputStream os)465 public final int toBinary(OutputStream os) { 466 try { 467 DataOutputStream dos = new DataOutputStream(os); 468 469 // Write the UCPTrieHeader 470 dos.writeInt(0x54726933); // signature="Tri3" 471 dos.writeChar( // options 472 ((dataLength & 0xf0000) >> 4) | 473 ((dataNullOffset & 0xf0000) >> 8) | 474 (getType().ordinal() << 6) | 475 getValueWidth().ordinal()); 476 dos.writeChar(index.length); 477 dos.writeChar(dataLength); 478 dos.writeChar(index3NullOffset); 479 dos.writeChar(dataNullOffset); 480 dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart 481 int length = 16; // sizeof(UCPTrieHeader) 482 483 for (char i : index) { dos.writeChar(i); } 484 length += index.length * 2; 485 length += data.write(dos); 486 return length; 487 } catch (IOException e) { 488 throw new ICUUncheckedIOException(e); 489 } 490 } 491 492 /** @hide draft / provisional / internal are hidden on OHOS*/ 493 static final int FAST_SHIFT = 6; 494 495 /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ 496 static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT; 497 498 /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ 499 private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1; 500 501 /** @hide draft / provisional / internal are hidden on OHOS*/ 502 private static final int SMALL_MAX = 0xfff; 503 504 /** 505 * Offset from dataLength (to be subtracted) for fetching the 506 * value returned for out-of-range code points and ill-formed UTF-8/16. 507 * @hide draft / provisional / internal are hidden on OHOS 508 */ 509 private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1; 510 /** 511 * Offset from dataLength (to be subtracted) for fetching the 512 * value returned for code points highStart..U+10FFFF. 513 * @hide draft / provisional / internal are hidden on OHOS 514 */ 515 private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2; 516 517 // ucptrie_impl.h 518 519 /** The length of the BMP index table. 1024=0x400 */ 520 private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT; 521 522 static final int SMALL_LIMIT = 0x1000; 523 private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT; 524 525 /** Shift size for getting the index-3 table offset. */ 526 static final int SHIFT_3 = 4; 527 528 /** Shift size for getting the index-2 table offset. */ 529 private static final int SHIFT_2 = 5 + SHIFT_3; 530 531 /** Shift size for getting the index-1 table offset. */ 532 private static final int SHIFT_1 = 5 + SHIFT_2; 533 534 /** 535 * Difference between two shift sizes, 536 * for getting an index-2 offset from an index-3 offset. 5=9-4 537 */ 538 static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3; 539 540 /** 541 * Difference between two shift sizes, 542 * for getting an index-1 offset from an index-2 offset. 5=14-9 543 */ 544 static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2; 545 546 /** 547 * Number of index-1 entries for the BMP. (4) 548 * This part of the index-1 table is omitted from the serialized form. 549 */ 550 private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1; 551 552 /** Number of entries in an index-2 block. 32=0x20 */ 553 static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2; 554 555 /** Mask for getting the lower bits for the in-index-2-block offset. */ 556 static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1; 557 558 /** Number of code points per index-2 table entry. 512=0x200 */ 559 static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2; 560 561 /** Number of entries in an index-3 block. 32=0x20 */ 562 static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3; 563 564 /** Mask for getting the lower bits for the in-index-3-block offset. */ 565 private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1; 566 567 /** Number of entries in a small data block. 16=0x10 */ 568 static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3; 569 570 /** Mask for getting the lower bits for the in-small-data-block offset. */ 571 static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1; 572 573 // ucptrie_impl.h: Constants for use with UCPTrieHeader.options. 574 private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000; 575 private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00; 576 private static final int OPTIONS_RESERVED_MASK = 0x38; 577 private static final int OPTIONS_VALUE_BITS_MASK = 7; 578 /** 579 * Value for index3NullOffset which indicates that there is no index-3 null block. 580 * Bit 15 is unused for this value because this bit is used if the index-3 contains 581 * 18-bit indexes. 582 */ 583 static final int NO_INDEX3_NULL_OFFSET = 0x7fff; 584 static final int NO_DATA_NULL_OFFSET = 0xfffff; 585 586 private static abstract class Data { getValueWidth()587 abstract ValueWidth getValueWidth(); getDataLength()588 abstract int getDataLength(); getFromIndex(int index)589 abstract int getFromIndex(int index); write(DataOutputStream dos)590 abstract int write(DataOutputStream dos) throws IOException; 591 } 592 593 private static final class Data16 extends Data { 594 char[] array; Data16(char[] a)595 Data16(char[] a) { array = a; } getValueWidth()596 @Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; } getDataLength()597 @Override int getDataLength() { return array.length; } getFromIndex(int index)598 @Override int getFromIndex(int index) { return array[index]; } write(DataOutputStream dos)599 @Override int write(DataOutputStream dos) throws IOException { 600 for (char v : array) { dos.writeChar(v); } 601 return array.length * 2; 602 } 603 } 604 605 private static final class Data32 extends Data { 606 int[] array; Data32(int[] a)607 Data32(int[] a) { array = a; } getValueWidth()608 @Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; } getDataLength()609 @Override int getDataLength() { return array.length; } getFromIndex(int index)610 @Override int getFromIndex(int index) { return array[index]; } write(DataOutputStream dos)611 @Override int write(DataOutputStream dos) throws IOException { 612 for (int v : array) { dos.writeInt(v); } 613 return array.length * 4; 614 } 615 } 616 617 private static final class Data8 extends Data { 618 byte[] array; Data8(byte[] a)619 Data8(byte[] a) { array = a; } getValueWidth()620 @Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; } getDataLength()621 @Override int getDataLength() { return array.length; } getFromIndex(int index)622 @Override int getFromIndex(int index) { return array[index] & 0xff; } write(DataOutputStream dos)623 @Override int write(DataOutputStream dos) throws IOException { 624 for (byte v : array) { dos.writeByte(v); } 625 return array.length; 626 } 627 } 628 629 /** @hide draft / provisional / internal are hidden on OHOS*/ 630 private final int[] ascii; 631 632 /** @hide draft / provisional / internal are hidden on OHOS*/ 633 private final char[] index; 634 635 /** 636 * @deprecated This API is ICU internal only. 637 * @hide draft / provisional / internal are hidden on OHOS 638 */ 639 @Deprecated 640 protected final Data data; 641 /** 642 * @deprecated This API is ICU internal only. 643 * @hide draft / provisional / internal are hidden on OHOS 644 */ 645 @Deprecated 646 protected final int dataLength; 647 /** 648 * Start of the last range which ends at U+10FFFF. 649 * @deprecated This API is ICU internal only. 650 * @hide draft / provisional / internal are hidden on OHOS 651 */ 652 @Deprecated 653 protected final int highStart; 654 655 /** 656 * Internal index-3 null block offset. 657 * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. 658 * @hide draft / provisional / internal are hidden on OHOS 659 */ 660 private final int index3NullOffset; 661 /** 662 * Internal data null block offset, not shifted. 663 * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. 664 * @hide draft / provisional / internal are hidden on OHOS 665 */ 666 private final int dataNullOffset; 667 /** @hide draft / provisional / internal are hidden on OHOS*/ 668 private final int nullValue; 669 670 /** 671 * @deprecated This API is ICU internal only. 672 * @hide draft / provisional / internal are hidden on OHOS 673 */ 674 @Deprecated fastIndex(int c)675 protected final int fastIndex(int c) { 676 return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK); 677 } 678 679 /** 680 * @deprecated This API is ICU internal only. 681 * @hide draft / provisional / internal are hidden on OHOS 682 */ 683 @Deprecated smallIndex(Type type, int c)684 protected final int smallIndex(Type type, int c) { 685 // Split into two methods to make this part inline-friendly. 686 // In C, this part is a macro. 687 if (c >= highStart) { 688 return dataLength - HIGH_VALUE_NEG_DATA_OFFSET; 689 } 690 return internalSmallIndex(type, c); 691 } 692 internalSmallIndex(Type type, int c)693 private final int internalSmallIndex(Type type, int c) { 694 int i1 = c >> SHIFT_1; 695 if (type == Type.FAST) { 696 assert(0xffff < c && c < highStart); 697 i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; 698 } else { 699 assert(0 <= c && c < highStart && highStart > SMALL_LIMIT); 700 i1 += SMALL_INDEX_LENGTH; 701 } 702 int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; 703 int i3 = (c >> SHIFT_3) & INDEX_3_MASK; 704 int dataBlock; 705 if ((i3Block & 0x8000) == 0) { 706 // 16-bit indexes 707 dataBlock = index[i3Block + i3]; 708 } else { 709 // 18-bit indexes stored in groups of 9 entries per 8 indexes. 710 i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); 711 i3 &= 7; 712 dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000; 713 dataBlock |= index[i3Block + i3]; 714 } 715 return dataBlock + (c & SMALL_DATA_MASK); 716 } 717 718 /** 719 * @deprecated This API is ICU internal only. 720 * @hide draft / provisional / internal are hidden on OHOS 721 */ 722 @Deprecated cpIndex(int c)723 protected abstract int cpIndex(int c); 724 725 /** 726 * A CodePointTrie with {@link Type#FAST}. 727 * 728 * @hide exposed on OHOS 729 */ 730 public static abstract class Fast extends CodePointTrie { Fast(char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset)731 private Fast(char[] index, Data data, int highStart, 732 int index3NullOffset, int dataNullOffset) { 733 super(index, data, highStart, index3NullOffset, dataNullOffset); 734 } 735 736 /** 737 * Creates a trie from its binary form. 738 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 739 * with {@link Type#FAST}. 740 * 741 * @param valueWidth selects the number of bits in a data value; this method throws an exception 742 * if the valueWidth does not match the binary data; 743 * use null to accept any data value width 744 * @param bytes a buffer containing the binary data of a CodePointTrie 745 * @return the trie 746 */ fromBinary(ValueWidth valueWidth, ByteBuffer bytes)747 public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { 748 return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes); 749 } 750 751 /** 752 * @return {@link Type#FAST} 753 */ 754 @Override getType()755 public final Type getType() { return Type.FAST; } 756 757 /** 758 * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. 759 * Can be used to look up a value for a UTF-16 code unit if other parts of 760 * the string processing check for surrogates. 761 * 762 * @param c the input code point, must be U+0000..U+FFFF 763 * @return The BMP code point's trie value. 764 */ bmpGet(int c)765 public abstract int bmpGet(int c); 766 767 /** 768 * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), 769 * without range checking. 770 * 771 * @param c the input code point, must be U+10000..U+10FFFF 772 * @return The supplementary code point's trie value. 773 */ suppGet(int c)774 public abstract int suppGet(int c); 775 776 /** 777 * @deprecated This API is ICU internal only. 778 * @hide draft / provisional / internal are hidden on OHOS 779 */ 780 @Deprecated 781 @Override cpIndex(int c)782 protected final int cpIndex(int c) { 783 if (c >= 0) { 784 if (c <= 0xffff) { 785 return fastIndex(c); 786 } else if (c <= 0x10ffff) { 787 return smallIndex(Type.FAST, c); 788 } 789 } 790 return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; 791 } 792 793 /** 794 * {@inheritDoc} 795 */ 796 @Override stringIterator(CharSequence s, int sIndex)797 public final StringIterator stringIterator(CharSequence s, int sIndex) { 798 return new FastStringIterator(s, sIndex); 799 } 800 801 private final class FastStringIterator extends StringIterator { FastStringIterator(CharSequence s, int sIndex)802 private FastStringIterator(CharSequence s, int sIndex) { 803 super(s, sIndex); 804 } 805 806 @Override next()807 public boolean next() { 808 if (sIndex >= s.length()) { 809 return false; 810 } 811 char lead = s.charAt(sIndex++); 812 c = lead; 813 int dataIndex; 814 if (!Character.isSurrogate(lead)) { 815 dataIndex = fastIndex(c); 816 } else { 817 char trail; 818 if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && 819 Character.isLowSurrogate(trail = s.charAt(sIndex))) { 820 ++sIndex; 821 c = Character.toCodePoint(lead, trail); 822 dataIndex = smallIndex(Type.FAST, c); 823 } else { 824 dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; 825 } 826 } 827 value = data.getFromIndex(dataIndex); 828 return true; 829 } 830 831 @Override previous()832 public boolean previous() { 833 if (sIndex <= 0) { 834 return false; 835 } 836 char trail = s.charAt(--sIndex); 837 c = trail; 838 int dataIndex; 839 if (!Character.isSurrogate(trail)) { 840 dataIndex = fastIndex(c); 841 } else { 842 char lead; 843 if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && 844 Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { 845 --sIndex; 846 c = Character.toCodePoint(lead, trail); 847 dataIndex = smallIndex(Type.FAST, c); 848 } else { 849 dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; 850 } 851 } 852 value = data.getFromIndex(dataIndex); 853 return true; 854 } 855 } 856 } 857 858 /** 859 * A CodePointTrie with {@link Type#SMALL}. 860 * 861 * @hide exposed on OHOS 862 */ 863 public static abstract class Small extends CodePointTrie { Small(char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset)864 private Small(char[] index, Data data, int highStart, 865 int index3NullOffset, int dataNullOffset) { 866 super(index, data, highStart, index3NullOffset, dataNullOffset); 867 } 868 869 /** 870 * Creates a trie from its binary form. 871 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 872 * with {@link Type#SMALL}. 873 * 874 * @param valueWidth selects the number of bits in a data value; this method throws an exception 875 * if the valueWidth does not match the binary data; 876 * use null to accept any data value width 877 * @param bytes a buffer containing the binary data of a CodePointTrie 878 * @return the trie 879 */ fromBinary(ValueWidth valueWidth, ByteBuffer bytes)880 public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { 881 return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes); 882 } 883 884 /** 885 * @return {@link Type#SMALL} 886 */ 887 @Override getType()888 public final Type getType() { return Type.SMALL; } 889 890 /** 891 * @deprecated This API is ICU internal only. 892 * @hide draft / provisional / internal are hidden on OHOS 893 */ 894 @Deprecated 895 @Override cpIndex(int c)896 protected final int cpIndex(int c) { 897 if (c >= 0) { 898 if (c <= SMALL_MAX) { 899 return fastIndex(c); 900 } else if (c <= 0x10ffff) { 901 return smallIndex(Type.SMALL, c); 902 } 903 } 904 return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; 905 } 906 907 /** 908 * {@inheritDoc} 909 */ 910 @Override stringIterator(CharSequence s, int sIndex)911 public final StringIterator stringIterator(CharSequence s, int sIndex) { 912 return new SmallStringIterator(s, sIndex); 913 } 914 915 private final class SmallStringIterator extends StringIterator { SmallStringIterator(CharSequence s, int sIndex)916 private SmallStringIterator(CharSequence s, int sIndex) { 917 super(s, sIndex); 918 } 919 920 @Override next()921 public boolean next() { 922 if (sIndex >= s.length()) { 923 return false; 924 } 925 char lead = s.charAt(sIndex++); 926 c = lead; 927 int dataIndex; 928 if (!Character.isSurrogate(lead)) { 929 dataIndex = cpIndex(c); 930 } else { 931 char trail; 932 if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && 933 Character.isLowSurrogate(trail = s.charAt(sIndex))) { 934 ++sIndex; 935 c = Character.toCodePoint(lead, trail); 936 dataIndex = smallIndex(Type.SMALL, c); 937 } else { 938 dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; 939 } 940 } 941 value = data.getFromIndex(dataIndex); 942 return true; 943 } 944 945 @Override previous()946 public boolean previous() { 947 if (sIndex <= 0) { 948 return false; 949 } 950 char trail = s.charAt(--sIndex); 951 c = trail; 952 int dataIndex; 953 if (!Character.isSurrogate(trail)) { 954 dataIndex = cpIndex(c); 955 } else { 956 char lead; 957 if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && 958 Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { 959 --sIndex; 960 c = Character.toCodePoint(lead, trail); 961 dataIndex = smallIndex(Type.SMALL, c); 962 } else { 963 dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; 964 } 965 } 966 value = data.getFromIndex(dataIndex); 967 return true; 968 } 969 } 970 } 971 972 /** 973 * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}. 974 * 975 * @hide exposed on OHOS 976 */ 977 public static final class Fast16 extends Fast { 978 private final char[] dataArray; 979 Fast16(char[] index, char[] data16, int highStart, int index3NullOffset, int dataNullOffset)980 Fast16(char[] index, char[] data16, int highStart, 981 int index3NullOffset, int dataNullOffset) { 982 super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); 983 this.dataArray = data16; 984 } 985 986 /** 987 * Creates a trie from its binary form. 988 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 989 * with {@link Type#FAST} and {@link ValueWidth#BITS_16}. 990 * 991 * @param bytes a buffer containing the binary data of a CodePointTrie 992 * @return the trie 993 */ fromBinary(ByteBuffer bytes)994 public static Fast16 fromBinary(ByteBuffer bytes) { 995 return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes); 996 } 997 998 /** 999 * {@inheritDoc} 1000 */ 1001 @Override get(int c)1002 public final int get(int c) { 1003 return dataArray[cpIndex(c)]; 1004 } 1005 1006 /** 1007 * {@inheritDoc} 1008 */ 1009 @Override bmpGet(int c)1010 public final int bmpGet(int c) { 1011 assert 0 <= c && c <= 0xffff; 1012 return dataArray[fastIndex(c)]; 1013 } 1014 1015 /** 1016 * {@inheritDoc} 1017 */ 1018 @Override suppGet(int c)1019 public final int suppGet(int c) { 1020 assert 0x10000 <= c && c <= 0x10ffff; 1021 return dataArray[smallIndex(Type.FAST, c)]; 1022 } 1023 } 1024 1025 /** 1026 * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}. 1027 * 1028 * @hide exposed on OHOS 1029 */ 1030 public static final class Fast32 extends Fast { 1031 private final int[] dataArray; 1032 Fast32(char[] index, int[] data32, int highStart, int index3NullOffset, int dataNullOffset)1033 Fast32(char[] index, int[] data32, int highStart, 1034 int index3NullOffset, int dataNullOffset) { 1035 super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); 1036 this.dataArray = data32; 1037 } 1038 1039 /** 1040 * Creates a trie from its binary form. 1041 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 1042 * with {@link Type#FAST} and {@link ValueWidth#BITS_32}. 1043 * 1044 * @param bytes a buffer containing the binary data of a CodePointTrie 1045 * @return the trie 1046 */ fromBinary(ByteBuffer bytes)1047 public static Fast32 fromBinary(ByteBuffer bytes) { 1048 return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes); 1049 } 1050 1051 /** 1052 * {@inheritDoc} 1053 */ 1054 @Override get(int c)1055 public final int get(int c) { 1056 return dataArray[cpIndex(c)]; 1057 } 1058 1059 /** 1060 * {@inheritDoc} 1061 */ 1062 @Override bmpGet(int c)1063 public final int bmpGet(int c) { 1064 assert 0 <= c && c <= 0xffff; 1065 return dataArray[fastIndex(c)]; 1066 } 1067 1068 /** 1069 * {@inheritDoc} 1070 */ 1071 @Override suppGet(int c)1072 public final int suppGet(int c) { 1073 assert 0x10000 <= c && c <= 0x10ffff; 1074 return dataArray[smallIndex(Type.FAST, c)]; 1075 } 1076 } 1077 1078 /** 1079 * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}. 1080 * 1081 * @hide exposed on OHOS 1082 */ 1083 public static final class Fast8 extends Fast { 1084 private final byte[] dataArray; 1085 Fast8(char[] index, byte[] data8, int highStart, int index3NullOffset, int dataNullOffset)1086 Fast8(char[] index, byte[] data8, int highStart, 1087 int index3NullOffset, int dataNullOffset) { 1088 super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); 1089 this.dataArray = data8; 1090 } 1091 1092 /** 1093 * Creates a trie from its binary form. 1094 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 1095 * with {@link Type#FAST} and {@link ValueWidth#BITS_8}. 1096 * 1097 * @param bytes a buffer containing the binary data of a CodePointTrie 1098 * @return the trie 1099 */ fromBinary(ByteBuffer bytes)1100 public static Fast8 fromBinary(ByteBuffer bytes) { 1101 return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes); 1102 } 1103 1104 /** 1105 * {@inheritDoc} 1106 */ 1107 @Override get(int c)1108 public final int get(int c) { 1109 return dataArray[cpIndex(c)] & 0xff; 1110 } 1111 1112 /** 1113 * {@inheritDoc} 1114 */ 1115 @Override bmpGet(int c)1116 public final int bmpGet(int c) { 1117 assert 0 <= c && c <= 0xffff; 1118 return dataArray[fastIndex(c)] & 0xff; 1119 } 1120 1121 /** 1122 * {@inheritDoc} 1123 */ 1124 @Override suppGet(int c)1125 public final int suppGet(int c) { 1126 assert 0x10000 <= c && c <= 0x10ffff; 1127 return dataArray[smallIndex(Type.FAST, c)] & 0xff; 1128 } 1129 } 1130 1131 /** 1132 * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}. 1133 * 1134 * @hide exposed on OHOS 1135 */ 1136 public static final class Small16 extends Small { Small16(char[] index, char[] data16, int highStart, int index3NullOffset, int dataNullOffset)1137 Small16(char[] index, char[] data16, int highStart, 1138 int index3NullOffset, int dataNullOffset) { 1139 super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); 1140 } 1141 1142 /** 1143 * Creates a trie from its binary form. 1144 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 1145 * with {@link Type#SMALL} and {@link ValueWidth#BITS_16}. 1146 * 1147 * @param bytes a buffer containing the binary data of a CodePointTrie 1148 * @return the trie 1149 */ fromBinary(ByteBuffer bytes)1150 public static Small16 fromBinary(ByteBuffer bytes) { 1151 return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes); 1152 } 1153 } 1154 1155 /** 1156 * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}. 1157 * 1158 * @hide exposed on OHOS 1159 */ 1160 public static final class Small32 extends Small { Small32(char[] index, int[] data32, int highStart, int index3NullOffset, int dataNullOffset)1161 Small32(char[] index, int[] data32, int highStart, 1162 int index3NullOffset, int dataNullOffset) { 1163 super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); 1164 } 1165 1166 /** 1167 * Creates a trie from its binary form. 1168 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 1169 * with {@link Type#SMALL} and {@link ValueWidth#BITS_32}. 1170 * 1171 * @param bytes a buffer containing the binary data of a CodePointTrie 1172 * @return the trie 1173 */ fromBinary(ByteBuffer bytes)1174 public static Small32 fromBinary(ByteBuffer bytes) { 1175 return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes); 1176 } 1177 } 1178 1179 /** 1180 * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}. 1181 * 1182 * @hide exposed on OHOS 1183 */ 1184 public static final class Small8 extends Small { Small8(char[] index, byte[] data8, int highStart, int index3NullOffset, int dataNullOffset)1185 Small8(char[] index, byte[] data8, int highStart, 1186 int index3NullOffset, int dataNullOffset) { 1187 super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); 1188 } 1189 1190 /** 1191 * Creates a trie from its binary form. 1192 * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} 1193 * with {@link Type#SMALL} and {@link ValueWidth#BITS_8}. 1194 * 1195 * @param bytes a buffer containing the binary data of a CodePointTrie 1196 * @return the trie 1197 */ fromBinary(ByteBuffer bytes)1198 public static Small8 fromBinary(ByteBuffer bytes) { 1199 return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes); 1200 } 1201 } 1202 } 1203