• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2018 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 
5 // created: 2018may04 Markus W. Scherer
6 
7 package ohos.global.icu.util;
8 
9 import java.io.DataOutputStream;
10 import java.io.IOException;
11 import java.io.OutputStream;
12 import java.nio.ByteBuffer;
13 import java.nio.ByteOrder;
14 
15 import ohos.global.icu.impl.ICUBinary;
16 import ohos.global.icu.impl.Normalizer2Impl.UTF16Plus;
17 
18 /**
19  * Immutable Unicode code point trie.
20  * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
21  * For details see http://site.icu-project.org/design/struct/utrie
22  *
23  * <p>This class is not intended for public subclassing.
24  *
25  * @see MutableCodePointTrie
26  * @hide exposed on OHOS
27  */
28 public abstract class CodePointTrie extends CodePointMap {
29     /**
30      * Selectors for the type of a CodePointTrie.
31      * Different trade-offs for size vs. speed.
32      *
33      * <p>Use null for {@link #fromBinary} to accept any type;
34      * {@link #getType} will return the actual type.
35      *
36      * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
37      * @see #fromBinary
38      * @see #getType
39      * @hide exposed on OHOS
40      */
41     public enum Type {
42         /**
43          * Fast/simple/larger BMP data structure.
44          * The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points.
45          *
46          * @see Fast
47          */
48         FAST,
49         /**
50          * Small/slower BMP data structure.
51          *
52          * @see Small
53          */
54         SMALL
55     }
56 
57     /**
58      * Selectors for the number of bits in a CodePointTrie data value.
59      *
60      * <p>Use null for {@link #fromBinary} to accept any data value width;
61      * {@link #getValueWidth} will return the actual data value width.
62      *
63      * @hide exposed on OHOS
64      */
65     public enum ValueWidth {
66         /**
67          * The trie stores 16 bits per data value.
68          * It returns them as unsigned values 0..0xffff=65535.
69          */
70         BITS_16,
71         /**
72          * The trie stores 32 bits per data value.
73          */
74         BITS_32,
75         /**
76          * The trie stores 8 bits per data value.
77          * It returns them as unsigned values 0..0xff=255.
78          */
79         BITS_8
80     }
81 
CodePointTrie(char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset)82     private CodePointTrie(char[] index, Data data, int highStart,
83             int index3NullOffset, int dataNullOffset) {
84         this.ascii = new int[ASCII_LIMIT];
85         this.index = index;
86         this.data = data;
87         this.dataLength = data.getDataLength();
88         this.highStart = highStart;
89         this.index3NullOffset = index3NullOffset;
90         this.dataNullOffset = dataNullOffset;
91 
92         for (int c = 0; c < ASCII_LIMIT; ++c) {
93             ascii[c] = data.getFromIndex(c);
94         }
95 
96         int nullValueOffset = dataNullOffset;
97         if (nullValueOffset >= dataLength) {
98             nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
99         }
100         nullValue = data.getFromIndex(nullValueOffset);
101     }
102 
103     /**
104      * Creates a trie from its binary form,
105      * stored in the ByteBuffer starting at the current position.
106      * Advances the buffer position to just after the trie data.
107      * Inverse of {@link #toBinary(OutputStream)}.
108      *
109      * <p>The data is copied from the buffer;
110      * later modification of the buffer will not affect the trie.
111      *
112      * @param type selects the trie type; this method throws an exception
113      *             if the type does not match the binary data;
114      *             use null to accept any type
115      * @param valueWidth selects the number of bits in a data value; this method throws an exception
116      *                  if the valueWidth does not match the binary data;
117      *                  use null to accept any data value width
118      * @param bytes a buffer containing the binary data of a CodePointTrie
119      * @return the trie
120      * @see MutableCodePointTrie#MutableCodePointTrie(int, int)
121      * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
122      * @see #toBinary(OutputStream)
123      */
fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes)124     public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) {
125         ByteOrder outerByteOrder = bytes.order();
126         try {
127             // Enough data for a trie header?
128             if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) {
129                 throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header");
130             }
131 
132             // struct UCPTrieHeader
133             /** "Tri3" in big-endian US-ASCII (0x54726933) */
134             int signature = bytes.getInt();
135 
136             // Check the signature.
137             switch (signature) {
138             case 0x54726933:
139                 // The buffer is already set to the trie data byte order.
140                 break;
141             case 0x33697254:
142                 // Temporarily reverse the byte order.
143                 boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
144                 bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
145                 signature = 0x54726933;
146                 break;
147             default:
148                 throw new ICUUncheckedIOException("Buffer does not contain a serialized CodePointTrie");
149             }
150 
151             // struct UCPTrieHeader continued
152             /**
153              * Options bit field:
154              * Bits 15..12: Data length bits 19..16.
155              * Bits 11..8: Data null block offset bits 19..16.
156              * Bits 7..6: UCPTrieType
157              * Bits 5..3: Reserved (0).
158              * Bits 2..0: UCPTrieValueWidth
159              */
160             int options = bytes.getChar();
161 
162             /** Total length of the index tables. */
163             int indexLength = bytes.getChar();
164 
165             /** Data length bits 15..0. */
166             int dataLength = bytes.getChar();
167 
168             /** Index-3 null block offset, 0x7fff or 0xffff if none. */
169             int index3NullOffset = bytes.getChar();
170 
171             /** Data null block offset bits 15..0, 0xfffff if none. */
172             int dataNullOffset = bytes.getChar();
173 
174             /**
175              * First code point of the single-value range ending with U+10ffff,
176              * rounded up and then shifted right by SHIFT_2.
177              */
178             int shiftedHighStart = bytes.getChar();
179             // struct UCPTrieHeader end
180 
181             int typeInt = (options >> 6) & 3;
182             Type actualType;
183             switch (typeInt) {
184             case 0: actualType = Type.FAST; break;
185             case 1: actualType = Type.SMALL; break;
186             default:
187                 throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported type");
188             }
189 
190             int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK;
191             ValueWidth actualValueWidth;
192             switch (valueWidthInt) {
193             case 0: actualValueWidth = ValueWidth.BITS_16; break;
194             case 1: actualValueWidth = ValueWidth.BITS_32; break;
195             case 2: actualValueWidth = ValueWidth.BITS_8; break;
196             default:
197                 throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported value width");
198             }
199 
200             if ((options & OPTIONS_RESERVED_MASK) != 0) {
201                 throw new ICUUncheckedIOException("CodePointTrie data header has unsupported options");
202             }
203 
204             if (type == null) {
205                 type = actualType;
206             }
207             if (valueWidth == null) {
208                 valueWidth = actualValueWidth;
209             }
210             if (type != actualType || valueWidth != actualValueWidth) {
211                 throw new ICUUncheckedIOException("CodePointTrie data header has a different type or value width than required");
212             }
213 
214             // Get the length values and offsets.
215             dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4);
216             dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8);
217 
218             int highStart = shiftedHighStart << SHIFT_2;
219 
220             // Calculate the actual length, minus the header.
221             int actualLength = indexLength * 2;
222             if (valueWidth == ValueWidth.BITS_16) {
223                 actualLength += dataLength * 2;
224             } else if (valueWidth == ValueWidth.BITS_32) {
225                 actualLength += dataLength * 4;
226             } else {
227                 actualLength += dataLength;
228             }
229             if (bytes.remaining() < actualLength) {
230                 throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data");
231             }
232 
233             char[] index = ICUBinary.getChars(bytes, indexLength, 0);
234             switch (valueWidth) {
235             case BITS_16: {
236                 char[] data16 = ICUBinary.getChars(bytes, dataLength, 0);
237                 return type == Type.FAST ?
238                         new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) :
239                             new Small16(index, data16, highStart, index3NullOffset, dataNullOffset);
240             }
241             case BITS_32: {
242                 int[] data32 = ICUBinary.getInts(bytes, dataLength, 0);
243                 return type == Type.FAST ?
244                         new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) :
245                             new Small32(index, data32, highStart, index3NullOffset, dataNullOffset);
246             }
247             case BITS_8: {
248                 byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0);
249                 return type == Type.FAST ?
250                         new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) :
251                             new Small8(index, data8, highStart, index3NullOffset, dataNullOffset);
252             }
253             default:
254                 throw new AssertionError("should be unreachable");
255             }
256         } finally {
257             bytes.order(outerByteOrder);
258         }
259     }
260 
261     /**
262      * Returns the trie type.
263      *
264      * @return the trie type
265      */
getType()266     public abstract Type getType();
267     /**
268      * Returns the number of bits in a trie data value.
269      *
270      * @return the number of bits in a trie data value
271      */
getValueWidth()272     public final ValueWidth getValueWidth() { return data.getValueWidth(); }
273 
274     /**
275      * {@inheritDoc}
276      */
277     @Override
get(int c)278     public int get(int c) {
279         return data.getFromIndex(cpIndex(c));
280     }
281 
282     /**
283      * Returns a trie value for an ASCII code point, without range checking.
284      *
285      * @param c the input code point; must be U+0000..U+007F
286      * @return The ASCII code point's trie value.
287      */
asciiGet(int c)288     public final int asciiGet(int c) {
289         return ascii[c];
290     }
291 
292     private static final int MAX_UNICODE = 0x10ffff;
293 
294     private static final int ASCII_LIMIT = 0x80;
295 
maybeFilterValue(int value, int trieNullValue, int nullValue, ValueFilter filter)296     private static final int maybeFilterValue(int value, int trieNullValue, int nullValue,
297             ValueFilter filter) {
298         if (value == trieNullValue) {
299             value = nullValue;
300         } else if (filter != null) {
301             value = filter.apply(value);
302         }
303         return value;
304     }
305 
306     /**
307      * {@inheritDoc}
308      */
309     @Override
getRange(int start, ValueFilter filter, Range range)310     public final boolean getRange(int start, ValueFilter filter, Range range) {
311         if (start < 0 || MAX_UNICODE < start) {
312             return false;
313         }
314         if (start >= highStart) {
315             int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
316             int value = data.getFromIndex(di);
317             if (filter != null) { value = filter.apply(value); }
318             range.set(start, MAX_UNICODE, value);
319             return true;
320         }
321 
322         int nullValue = this.nullValue;
323         if (filter != null) { nullValue = filter.apply(nullValue); }
324         Type type = getType();
325 
326         int prevI3Block = -1;
327         int prevBlock = -1;
328         int c = start;
329         // Initialize to make compiler happy. Real value when haveValue is true.
330         int trieValue = 0, value = 0;
331         boolean haveValue = false;
332         do {
333             int i3Block;
334             int i3;
335             int i3BlockLength;
336             int dataBlockLength;
337             if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) {
338                 i3Block = 0;
339                 i3 = c >> FAST_SHIFT;
340                 i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH;
341                 dataBlockLength = FAST_DATA_BLOCK_LENGTH;
342             } else {
343                 // Use the multi-stage index.
344                 int i1 = c >> SHIFT_1;
345                 if (type == Type.FAST) {
346                     assert(0xffff < c && c < highStart);
347                     i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
348                 } else {
349                     assert(c < highStart && highStart > SMALL_LIMIT);
350                     i1 += SMALL_INDEX_LENGTH;
351                 }
352                 i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
353                 if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) {
354                     // The index-3 block is the same as the previous one, and filled with value.
355                     assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
356                     c += CP_PER_INDEX_2_ENTRY;
357                     continue;
358                 }
359                 prevI3Block = i3Block;
360                 if (i3Block == index3NullOffset) {
361                     // This is the index-3 null block.
362                     if (haveValue) {
363                         if (nullValue != value) {
364                             range.set(start, c - 1, value);
365                             return true;
366                         }
367                     } else {
368                         trieValue = this.nullValue;
369                         value = nullValue;
370                         haveValue = true;
371                     }
372                     prevBlock = dataNullOffset;
373                     c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1);
374                     continue;
375                 }
376                 i3 = (c >> SHIFT_3) & INDEX_3_MASK;
377                 i3BlockLength = INDEX_3_BLOCK_LENGTH;
378                 dataBlockLength = SMALL_DATA_BLOCK_LENGTH;
379             }
380             // Enumerate data blocks for one index-3 block.
381             do {
382                 int block;
383                 if ((i3Block & 0x8000) == 0) {
384                     block = index[i3Block + i3];
385                 } else {
386                     // 18-bit indexes stored in groups of 9 entries per 8 indexes.
387                     int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
388                     int gi = i3 & 7;
389                     block = (index[group++] << (2 + (2 * gi))) & 0x30000;
390                     block |= index[group + gi];
391                 }
392                 if (block == prevBlock && (c - start) >= dataBlockLength) {
393                     // The block is the same as the previous one, and filled with value.
394                     assert((c & (dataBlockLength - 1)) == 0);
395                     c += dataBlockLength;
396                 } else {
397                     int dataMask = dataBlockLength - 1;
398                     prevBlock = block;
399                     if (block == dataNullOffset) {
400                         // This is the data null block.
401                         if (haveValue) {
402                             if (nullValue != value) {
403                                 range.set(start, c - 1, value);
404                                 return true;
405                             }
406                         } else {
407                             trieValue = this.nullValue;
408                             value = nullValue;
409                             haveValue = true;
410                         }
411                         c = (c + dataBlockLength) & ~dataMask;
412                     } else {
413                         int di = block + (c & dataMask);
414                         int trieValue2 = data.getFromIndex(di);
415                         if (haveValue) {
416                             if (trieValue2 != trieValue) {
417                                 if (filter == null ||
418                                         maybeFilterValue(trieValue2, this.nullValue, nullValue,
419                                                 filter) != value) {
420                                     range.set(start, c - 1, value);
421                                     return true;
422                                 }
423                                 trieValue = trieValue2;  // may or may not help
424                             }
425                         } else {
426                             trieValue = trieValue2;
427                             value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter);
428                             haveValue = true;
429                         }
430                         while ((++c & dataMask) != 0) {
431                             trieValue2 = data.getFromIndex(++di);
432                             if (trieValue2 != trieValue) {
433                                 if (filter == null ||
434                                         maybeFilterValue(trieValue2, this.nullValue, nullValue,
435                                                 filter) != value) {
436                                     range.set(start, c - 1, value);
437                                     return true;
438                                 }
439                                 trieValue = trieValue2;  // may or may not help
440                             }
441                         }
442                     }
443                 }
444             } while (++i3 < i3BlockLength);
445         } while (c < highStart);
446         assert(haveValue);
447         int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
448         int highValue = data.getFromIndex(di);
449         if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) {
450             --c;
451         } else {
452             c = MAX_UNICODE;
453         }
454         range.set(start, c, value);
455         return true;
456     }
457 
458     /**
459      * Writes a representation of the trie to the output stream.
460      * Inverse of {@link #fromBinary}.
461      *
462      * @param os the output stream
463      * @return the number of bytes written
464      */
toBinary(OutputStream os)465     public final int toBinary(OutputStream os) {
466         try {
467             DataOutputStream dos = new DataOutputStream(os);
468 
469             // Write the UCPTrieHeader
470             dos.writeInt(0x54726933);  // signature="Tri3"
471             dos.writeChar(  // options
472                 ((dataLength & 0xf0000) >> 4) |
473                 ((dataNullOffset & 0xf0000) >> 8) |
474                 (getType().ordinal() << 6) |
475                 getValueWidth().ordinal());
476             dos.writeChar(index.length);
477             dos.writeChar(dataLength);
478             dos.writeChar(index3NullOffset);
479             dos.writeChar(dataNullOffset);
480             dos.writeChar(highStart >> SHIFT_2);  // shiftedHighStart
481             int length = 16;  // sizeof(UCPTrieHeader)
482 
483             for (char i : index) { dos.writeChar(i); }
484             length += index.length * 2;
485             length += data.write(dos);
486             return length;
487         } catch (IOException e) {
488             throw new ICUUncheckedIOException(e);
489         }
490     }
491 
492     /** @hide draft / provisional / internal are hidden on OHOS*/
493     static final int FAST_SHIFT = 6;
494 
495     /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
496     static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT;
497 
498     /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
499     private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1;
500 
501     /** @hide draft / provisional / internal are hidden on OHOS*/
502     private static final int SMALL_MAX = 0xfff;
503 
504     /**
505      * Offset from dataLength (to be subtracted) for fetching the
506      * value returned for out-of-range code points and ill-formed UTF-8/16.
507      * @hide draft / provisional / internal are hidden on OHOS
508      */
509     private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1;
510     /**
511      * Offset from dataLength (to be subtracted) for fetching the
512      * value returned for code points highStart..U+10FFFF.
513      * @hide draft / provisional / internal are hidden on OHOS
514      */
515     private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2;
516 
517     // ucptrie_impl.h
518 
519     /** The length of the BMP index table. 1024=0x400 */
520     private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT;
521 
522     static final int SMALL_LIMIT = 0x1000;
523     private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT;
524 
525     /** Shift size for getting the index-3 table offset. */
526     static final int SHIFT_3 = 4;
527 
528     /** Shift size for getting the index-2 table offset. */
529     private static final int SHIFT_2 = 5 + SHIFT_3;
530 
531     /** Shift size for getting the index-1 table offset. */
532     private static final int SHIFT_1 = 5 + SHIFT_2;
533 
534     /**
535      * Difference between two shift sizes,
536      * for getting an index-2 offset from an index-3 offset. 5=9-4
537      */
538     static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3;
539 
540     /**
541      * Difference between two shift sizes,
542      * for getting an index-1 offset from an index-2 offset. 5=14-9
543      */
544     static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2;
545 
546     /**
547      * Number of index-1 entries for the BMP. (4)
548      * This part of the index-1 table is omitted from the serialized form.
549      */
550     private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;
551 
552     /** Number of entries in an index-2 block. 32=0x20 */
553     static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;
554 
555     /** Mask for getting the lower bits for the in-index-2-block offset. */
556     static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;
557 
558     /** Number of code points per index-2 table entry. 512=0x200 */
559     static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2;
560 
561     /** Number of entries in an index-3 block. 32=0x20 */
562     static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3;
563 
564     /** Mask for getting the lower bits for the in-index-3-block offset. */
565     private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1;
566 
567     /** Number of entries in a small data block. 16=0x10 */
568     static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3;
569 
570     /** Mask for getting the lower bits for the in-small-data-block offset. */
571     static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1;
572 
573     // ucptrie_impl.h: Constants for use with UCPTrieHeader.options.
574     private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000;
575     private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;
576     private static final int OPTIONS_RESERVED_MASK = 0x38;
577     private static final int OPTIONS_VALUE_BITS_MASK = 7;
578     /**
579      * Value for index3NullOffset which indicates that there is no index-3 null block.
580      * Bit 15 is unused for this value because this bit is used if the index-3 contains
581      * 18-bit indexes.
582      */
583     static final int NO_INDEX3_NULL_OFFSET = 0x7fff;
584     static final int NO_DATA_NULL_OFFSET = 0xfffff;
585 
586     private static abstract class Data {
getValueWidth()587         abstract ValueWidth getValueWidth();
getDataLength()588         abstract int getDataLength();
getFromIndex(int index)589         abstract int getFromIndex(int index);
write(DataOutputStream dos)590         abstract int write(DataOutputStream dos) throws IOException;
591     }
592 
593     private static final class Data16 extends Data {
594         char[] array;
Data16(char[] a)595         Data16(char[] a) { array = a; }
getValueWidth()596         @Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; }
getDataLength()597         @Override int getDataLength() { return array.length; }
getFromIndex(int index)598         @Override int getFromIndex(int index) { return array[index]; }
write(DataOutputStream dos)599         @Override int write(DataOutputStream dos) throws IOException {
600             for (char v : array) { dos.writeChar(v); }
601             return array.length * 2;
602         }
603     }
604 
605     private static final class Data32 extends Data {
606         int[] array;
Data32(int[] a)607         Data32(int[] a) { array = a; }
getValueWidth()608         @Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; }
getDataLength()609         @Override int getDataLength() { return array.length; }
getFromIndex(int index)610         @Override int getFromIndex(int index) { return array[index]; }
write(DataOutputStream dos)611         @Override int write(DataOutputStream dos) throws IOException {
612             for (int v : array) { dos.writeInt(v); }
613             return array.length * 4;
614         }
615     }
616 
617     private static final class Data8 extends Data {
618         byte[] array;
Data8(byte[] a)619         Data8(byte[] a) { array = a; }
getValueWidth()620         @Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; }
getDataLength()621         @Override int getDataLength() { return array.length; }
getFromIndex(int index)622         @Override int getFromIndex(int index) { return array[index] & 0xff; }
write(DataOutputStream dos)623         @Override int write(DataOutputStream dos) throws IOException {
624             for (byte v : array) { dos.writeByte(v); }
625             return array.length;
626         }
627     }
628 
629     /** @hide draft / provisional / internal are hidden on OHOS*/
630     private final int[] ascii;
631 
632     /** @hide draft / provisional / internal are hidden on OHOS*/
633     private final char[] index;
634 
635     /**
636      * @deprecated This API is ICU internal only.
637      * @hide draft / provisional / internal are hidden on OHOS
638      */
639     @Deprecated
640     protected final Data data;
641     /**
642      * @deprecated This API is ICU internal only.
643      * @hide draft / provisional / internal are hidden on OHOS
644      */
645     @Deprecated
646     protected final int dataLength;
647     /**
648      * Start of the last range which ends at U+10FFFF.
649      * @deprecated This API is ICU internal only.
650      * @hide draft / provisional / internal are hidden on OHOS
651      */
652     @Deprecated
653     protected final int highStart;
654 
655     /**
656      * Internal index-3 null block offset.
657      * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
658      * @hide draft / provisional / internal are hidden on OHOS
659      */
660     private final int index3NullOffset;
661     /**
662      * Internal data null block offset, not shifted.
663      * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
664      * @hide draft / provisional / internal are hidden on OHOS
665      */
666     private final int dataNullOffset;
667     /** @hide draft / provisional / internal are hidden on OHOS*/
668     private final int nullValue;
669 
670     /**
671      * @deprecated This API is ICU internal only.
672      * @hide draft / provisional / internal are hidden on OHOS
673      */
674     @Deprecated
fastIndex(int c)675     protected final int fastIndex(int c) {
676         return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK);
677     }
678 
679     /**
680      * @deprecated This API is ICU internal only.
681      * @hide draft / provisional / internal are hidden on OHOS
682      */
683     @Deprecated
smallIndex(Type type, int c)684     protected final int smallIndex(Type type, int c) {
685         // Split into two methods to make this part inline-friendly.
686         // In C, this part is a macro.
687         if (c >= highStart) {
688             return dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
689         }
690         return internalSmallIndex(type, c);
691     }
692 
internalSmallIndex(Type type, int c)693     private final int internalSmallIndex(Type type, int c) {
694         int i1 = c >> SHIFT_1;
695         if (type == Type.FAST) {
696             assert(0xffff < c && c < highStart);
697             i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
698         } else {
699             assert(0 <= c && c < highStart && highStart > SMALL_LIMIT);
700             i1 += SMALL_INDEX_LENGTH;
701         }
702         int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
703         int i3 = (c >> SHIFT_3) & INDEX_3_MASK;
704         int dataBlock;
705         if ((i3Block & 0x8000) == 0) {
706             // 16-bit indexes
707             dataBlock = index[i3Block + i3];
708         } else {
709             // 18-bit indexes stored in groups of 9 entries per 8 indexes.
710             i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
711             i3 &= 7;
712             dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000;
713             dataBlock |= index[i3Block + i3];
714         }
715         return dataBlock + (c & SMALL_DATA_MASK);
716     }
717 
718     /**
719      * @deprecated This API is ICU internal only.
720      * @hide draft / provisional / internal are hidden on OHOS
721      */
722     @Deprecated
cpIndex(int c)723     protected abstract int cpIndex(int c);
724 
725     /**
726      * A CodePointTrie with {@link Type#FAST}.
727      *
728      * @hide exposed on OHOS
729      */
730     public static abstract class Fast extends CodePointTrie {
Fast(char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset)731         private Fast(char[] index, Data data, int highStart,
732                 int index3NullOffset, int dataNullOffset) {
733             super(index, data, highStart, index3NullOffset, dataNullOffset);
734         }
735 
736         /**
737          * Creates a trie from its binary form.
738          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
739          * with {@link Type#FAST}.
740          *
741          * @param valueWidth selects the number of bits in a data value; this method throws an exception
742          *                  if the valueWidth does not match the binary data;
743          *                  use null to accept any data value width
744          * @param bytes a buffer containing the binary data of a CodePointTrie
745          * @return the trie
746          */
fromBinary(ValueWidth valueWidth, ByteBuffer bytes)747         public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
748             return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes);
749         }
750 
751         /**
752          * @return {@link Type#FAST}
753          */
754         @Override
getType()755         public final Type getType() { return Type.FAST; }
756 
757         /**
758          * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
759          * Can be used to look up a value for a UTF-16 code unit if other parts of
760          * the string processing check for surrogates.
761          *
762          * @param c the input code point, must be U+0000..U+FFFF
763          * @return The BMP code point's trie value.
764          */
bmpGet(int c)765         public abstract int bmpGet(int c);
766 
767         /**
768          * Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
769          * without range checking.
770          *
771          * @param c the input code point, must be U+10000..U+10FFFF
772          * @return The supplementary code point's trie value.
773          */
suppGet(int c)774         public abstract int suppGet(int c);
775 
776         /**
777          * @deprecated This API is ICU internal only.
778          * @hide draft / provisional / internal are hidden on OHOS
779          */
780         @Deprecated
781         @Override
cpIndex(int c)782         protected final int cpIndex(int c) {
783             if (c >= 0) {
784                 if (c <= 0xffff) {
785                     return fastIndex(c);
786                 } else if (c <= 0x10ffff) {
787                     return smallIndex(Type.FAST, c);
788                 }
789             }
790             return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
791         }
792 
793         /**
794          * {@inheritDoc}
795          */
796         @Override
stringIterator(CharSequence s, int sIndex)797         public final StringIterator stringIterator(CharSequence s, int sIndex) {
798             return new FastStringIterator(s, sIndex);
799         }
800 
801         private final class FastStringIterator extends StringIterator {
FastStringIterator(CharSequence s, int sIndex)802             private FastStringIterator(CharSequence s, int sIndex) {
803                 super(s, sIndex);
804             }
805 
806             @Override
next()807             public boolean next() {
808                 if (sIndex >= s.length()) {
809                     return false;
810                 }
811                 char lead = s.charAt(sIndex++);
812                 c = lead;
813                 int dataIndex;
814                 if (!Character.isSurrogate(lead)) {
815                     dataIndex = fastIndex(c);
816                 } else {
817                     char trail;
818                     if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
819                             Character.isLowSurrogate(trail = s.charAt(sIndex))) {
820                         ++sIndex;
821                         c = Character.toCodePoint(lead, trail);
822                         dataIndex = smallIndex(Type.FAST, c);
823                     } else {
824                         dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
825                     }
826                 }
827                 value = data.getFromIndex(dataIndex);
828                 return true;
829             }
830 
831             @Override
previous()832             public boolean previous() {
833                 if (sIndex <= 0) {
834                     return false;
835                 }
836                 char trail = s.charAt(--sIndex);
837                 c = trail;
838                 int dataIndex;
839                 if (!Character.isSurrogate(trail)) {
840                     dataIndex = fastIndex(c);
841                 } else {
842                     char lead;
843                     if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
844                             Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
845                         --sIndex;
846                         c = Character.toCodePoint(lead, trail);
847                         dataIndex = smallIndex(Type.FAST, c);
848                     } else {
849                         dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
850                     }
851                 }
852                 value = data.getFromIndex(dataIndex);
853                 return true;
854             }
855         }
856     }
857 
858     /**
859      * A CodePointTrie with {@link Type#SMALL}.
860      *
861      * @hide exposed on OHOS
862      */
863     public static abstract class Small extends CodePointTrie {
Small(char[] index, Data data, int highStart, int index3NullOffset, int dataNullOffset)864         private Small(char[] index, Data data, int highStart,
865                 int index3NullOffset, int dataNullOffset) {
866             super(index, data, highStart, index3NullOffset, dataNullOffset);
867         }
868 
869         /**
870          * Creates a trie from its binary form.
871          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
872          * with {@link Type#SMALL}.
873          *
874          * @param valueWidth selects the number of bits in a data value; this method throws an exception
875          *                  if the valueWidth does not match the binary data;
876          *                  use null to accept any data value width
877          * @param bytes a buffer containing the binary data of a CodePointTrie
878          * @return the trie
879          */
fromBinary(ValueWidth valueWidth, ByteBuffer bytes)880         public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
881             return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes);
882         }
883 
884         /**
885          * @return {@link Type#SMALL}
886          */
887         @Override
getType()888         public final Type getType() { return Type.SMALL; }
889 
890         /**
891          * @deprecated This API is ICU internal only.
892          * @hide draft / provisional / internal are hidden on OHOS
893          */
894         @Deprecated
895         @Override
cpIndex(int c)896         protected final int cpIndex(int c) {
897             if (c >= 0) {
898                 if (c <= SMALL_MAX) {
899                     return fastIndex(c);
900                 } else if (c <= 0x10ffff) {
901                     return smallIndex(Type.SMALL, c);
902                 }
903             }
904             return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
905         }
906 
907         /**
908          * {@inheritDoc}
909          */
910         @Override
stringIterator(CharSequence s, int sIndex)911         public final StringIterator stringIterator(CharSequence s, int sIndex) {
912             return new SmallStringIterator(s, sIndex);
913         }
914 
915         private final class SmallStringIterator extends StringIterator {
SmallStringIterator(CharSequence s, int sIndex)916             private SmallStringIterator(CharSequence s, int sIndex) {
917                 super(s, sIndex);
918             }
919 
920             @Override
next()921             public boolean next() {
922                 if (sIndex >= s.length()) {
923                     return false;
924                 }
925                 char lead = s.charAt(sIndex++);
926                 c = lead;
927                 int dataIndex;
928                 if (!Character.isSurrogate(lead)) {
929                     dataIndex = cpIndex(c);
930                 } else {
931                     char trail;
932                     if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
933                             Character.isLowSurrogate(trail = s.charAt(sIndex))) {
934                         ++sIndex;
935                         c = Character.toCodePoint(lead, trail);
936                         dataIndex = smallIndex(Type.SMALL, c);
937                     } else {
938                         dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
939                     }
940                 }
941                 value = data.getFromIndex(dataIndex);
942                 return true;
943             }
944 
945             @Override
previous()946             public boolean previous() {
947                 if (sIndex <= 0) {
948                     return false;
949                 }
950                 char trail = s.charAt(--sIndex);
951                 c = trail;
952                 int dataIndex;
953                 if (!Character.isSurrogate(trail)) {
954                     dataIndex = cpIndex(c);
955                 } else {
956                     char lead;
957                     if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
958                             Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
959                         --sIndex;
960                         c = Character.toCodePoint(lead, trail);
961                         dataIndex = smallIndex(Type.SMALL, c);
962                     } else {
963                         dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
964                     }
965                 }
966                 value = data.getFromIndex(dataIndex);
967                 return true;
968             }
969         }
970     }
971 
972     /**
973      * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
974      *
975      * @hide exposed on OHOS
976      */
977     public static final class Fast16 extends Fast {
978         private final char[] dataArray;
979 
Fast16(char[] index, char[] data16, int highStart, int index3NullOffset, int dataNullOffset)980         Fast16(char[] index, char[] data16, int highStart,
981                 int index3NullOffset, int dataNullOffset) {
982             super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
983             this.dataArray = data16;
984         }
985 
986         /**
987          * Creates a trie from its binary form.
988          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
989          * with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
990          *
991          * @param bytes a buffer containing the binary data of a CodePointTrie
992          * @return the trie
993          */
fromBinary(ByteBuffer bytes)994         public static Fast16 fromBinary(ByteBuffer bytes) {
995             return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes);
996         }
997 
998         /**
999          * {@inheritDoc}
1000          */
1001         @Override
get(int c)1002         public final int get(int c) {
1003             return dataArray[cpIndex(c)];
1004         }
1005 
1006         /**
1007          * {@inheritDoc}
1008          */
1009         @Override
bmpGet(int c)1010         public final int bmpGet(int c) {
1011             assert 0 <= c && c <= 0xffff;
1012             return dataArray[fastIndex(c)];
1013         }
1014 
1015         /**
1016          * {@inheritDoc}
1017          */
1018         @Override
suppGet(int c)1019         public final int suppGet(int c) {
1020             assert 0x10000 <= c && c <= 0x10ffff;
1021             return dataArray[smallIndex(Type.FAST, c)];
1022         }
1023     }
1024 
1025     /**
1026      * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
1027      *
1028      * @hide exposed on OHOS
1029      */
1030     public static final class Fast32 extends Fast {
1031         private final int[] dataArray;
1032 
Fast32(char[] index, int[] data32, int highStart, int index3NullOffset, int dataNullOffset)1033         Fast32(char[] index, int[] data32, int highStart,
1034                 int index3NullOffset, int dataNullOffset) {
1035             super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
1036             this.dataArray = data32;
1037         }
1038 
1039         /**
1040          * Creates a trie from its binary form.
1041          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1042          * with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
1043          *
1044          * @param bytes a buffer containing the binary data of a CodePointTrie
1045          * @return the trie
1046          */
fromBinary(ByteBuffer bytes)1047         public static Fast32 fromBinary(ByteBuffer bytes) {
1048             return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes);
1049         }
1050 
1051         /**
1052          * {@inheritDoc}
1053          */
1054         @Override
get(int c)1055         public final int get(int c) {
1056             return dataArray[cpIndex(c)];
1057         }
1058 
1059         /**
1060          * {@inheritDoc}
1061          */
1062         @Override
bmpGet(int c)1063         public final int bmpGet(int c) {
1064             assert 0 <= c && c <= 0xffff;
1065             return dataArray[fastIndex(c)];
1066         }
1067 
1068         /**
1069          * {@inheritDoc}
1070          */
1071         @Override
suppGet(int c)1072         public final int suppGet(int c) {
1073             assert 0x10000 <= c && c <= 0x10ffff;
1074             return dataArray[smallIndex(Type.FAST, c)];
1075         }
1076     }
1077 
1078     /**
1079      * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
1080      *
1081      * @hide exposed on OHOS
1082      */
1083     public static final class Fast8 extends Fast {
1084         private final byte[] dataArray;
1085 
Fast8(char[] index, byte[] data8, int highStart, int index3NullOffset, int dataNullOffset)1086         Fast8(char[] index, byte[] data8, int highStart,
1087                 int index3NullOffset, int dataNullOffset) {
1088             super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
1089             this.dataArray = data8;
1090         }
1091 
1092         /**
1093          * Creates a trie from its binary form.
1094          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1095          * with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
1096          *
1097          * @param bytes a buffer containing the binary data of a CodePointTrie
1098          * @return the trie
1099          */
fromBinary(ByteBuffer bytes)1100         public static Fast8 fromBinary(ByteBuffer bytes) {
1101             return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes);
1102         }
1103 
1104         /**
1105          * {@inheritDoc}
1106          */
1107         @Override
get(int c)1108         public final int get(int c) {
1109             return dataArray[cpIndex(c)] & 0xff;
1110         }
1111 
1112         /**
1113          * {@inheritDoc}
1114          */
1115         @Override
bmpGet(int c)1116         public final int bmpGet(int c) {
1117             assert 0 <= c && c <= 0xffff;
1118             return dataArray[fastIndex(c)] & 0xff;
1119         }
1120 
1121         /**
1122          * {@inheritDoc}
1123          */
1124         @Override
suppGet(int c)1125         public final int suppGet(int c) {
1126             assert 0x10000 <= c && c <= 0x10ffff;
1127             return dataArray[smallIndex(Type.FAST, c)] & 0xff;
1128         }
1129     }
1130 
1131     /**
1132      * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
1133      *
1134      * @hide exposed on OHOS
1135      */
1136     public static final class Small16 extends Small {
Small16(char[] index, char[] data16, int highStart, int index3NullOffset, int dataNullOffset)1137         Small16(char[] index, char[] data16, int highStart,
1138                 int index3NullOffset, int dataNullOffset) {
1139             super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
1140         }
1141 
1142         /**
1143          * Creates a trie from its binary form.
1144          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1145          * with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
1146          *
1147          * @param bytes a buffer containing the binary data of a CodePointTrie
1148          * @return the trie
1149          */
fromBinary(ByteBuffer bytes)1150         public static Small16 fromBinary(ByteBuffer bytes) {
1151             return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes);
1152         }
1153     }
1154 
1155     /**
1156      * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
1157      *
1158      * @hide exposed on OHOS
1159      */
1160     public static final class Small32 extends Small {
Small32(char[] index, int[] data32, int highStart, int index3NullOffset, int dataNullOffset)1161         Small32(char[] index, int[] data32, int highStart,
1162                 int index3NullOffset, int dataNullOffset) {
1163             super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
1164         }
1165 
1166         /**
1167          * Creates a trie from its binary form.
1168          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1169          * with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
1170          *
1171          * @param bytes a buffer containing the binary data of a CodePointTrie
1172          * @return the trie
1173          */
fromBinary(ByteBuffer bytes)1174         public static Small32 fromBinary(ByteBuffer bytes) {
1175             return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes);
1176         }
1177     }
1178 
1179     /**
1180      * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
1181      *
1182      * @hide exposed on OHOS
1183      */
1184     public static final class Small8 extends Small {
Small8(char[] index, byte[] data8, int highStart, int index3NullOffset, int dataNullOffset)1185         Small8(char[] index, byte[] data8, int highStart,
1186                 int index3NullOffset, int dataNullOffset) {
1187             super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
1188         }
1189 
1190         /**
1191          * Creates a trie from its binary form.
1192          * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1193          * with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
1194          *
1195          * @param bytes a buffer containing the binary data of a CodePointTrie
1196          * @return the trie
1197          */
fromBinary(ByteBuffer bytes)1198         public static Small8 fromBinary(ByteBuffer bytes) {
1199             return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes);
1200         }
1201     }
1202 }
1203