• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2009-2014, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.impl;
12 
13 import java.io.DataOutputStream;
14 import java.io.IOException;
15 import java.io.OutputStream;
16 import java.nio.ByteBuffer;
17 
18 
19 /**
20  * @author aheninger
21  *
22  * A read-only Trie2, holding 16 bit data values.
23  *
24  * A Trie2 is a highly optimized data structure for mapping from Unicode
25  * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
26  *
27  * See class Trie2 for descriptions of the API for accessing the contents of a trie.
28  *
29  * The fundamental data access methods are declared final in this class, with
30  * the intent that applications might gain a little extra performance, when compared
31  * with calling the same methods via the abstract UTrie2 base class.
32  * @hide exposed on OHOS
33  */
34 public final class Trie2_16 extends Trie2 {
35 
36 
37     /**
38      *  Internal constructor, not for general use.
39      */
Trie2_16()40     Trie2_16() {
41     }
42 
43 
44     /**
45      * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
46      * The serialized format is identical between ICU4C and ICU4J, so this function
47      * will work with serialized Trie2s from either.
48      *
49      * The serialized Trie2 in the bytes may be in either little or big endian byte order.
50      * This allows using serialized Tries from ICU4C without needing to consider the
51      * byte order of the system that created them.
52      *
53      * @param bytes a byte buffer to the serialized form of a UTrie2.
54      * @return An unserialized Trie2_16, ready for use.
55      * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2.
56      * @throws IOException if a read error occurs in the buffer.
57      * @throws ClassCastException if the bytes contain a serialized Trie2_32
58      */
createFromSerialized(ByteBuffer bytes)59     public static Trie2_16  createFromSerialized(ByteBuffer bytes) throws IOException {
60         return (Trie2_16) Trie2.createFromSerialized(bytes);
61     }
62 
63     /**
64      * Get the value for a code point as stored in the Trie2.
65      *
66      * @param codePoint the code point
67      * @return the value
68      */
69     @Override
get(int codePoint)70     public final int get(int codePoint) {
71         int value;
72         int ix;
73 
74         if (codePoint >= 0) {
75             if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
76                 // Ordinary BMP code point, excluding leading surrogates.
77                 // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
78                 // 16 bit data is stored in the index array itself.
79                 ix = index[codePoint >> UTRIE2_SHIFT_2];
80                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
81                 value = index[ix];
82                 return value;
83             }
84             if (codePoint <= 0xffff) {
85                 // Lead Surrogate Code Point.  A Separate index section is stored for
86                 // lead surrogate code units and code points.
87                 //   The main index has the code unit data.
88                 //   For this function, we need the code point data.
89                 // Note: this expression could be refactored for slightly improved efficiency, but
90                 //       surrogate code points will be so rare in practice that it's not worth it.
91                 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
92                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
93                 value = index[ix];
94                 return value;
95             }
96             if (codePoint < highStart) {
97                 // Supplemental code point, use two-level lookup.
98                 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
99                 ix = index[ix];
100                 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
101                 ix = index[ix];
102                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
103                 value = index[ix];
104                 return value;
105             }
106             if (codePoint <= 0x10ffff) {
107                 value = index[highValueIndex];
108                 return value;
109             }
110         }
111 
112         // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
113         return errorValue;
114     }
115 
116 
117     /**
118      * Get a Trie2 value for a UTF-16 code unit.
119      *
120      * This function returns the same value as get() if the input
121      * character is outside of the lead surrogate range
122      *
123      * There are two values stored in a Trie2 for inputs in the lead
124      * surrogate range.  This function returns the alternate value,
125      * while Trie2.get() returns the main value.
126      *
127      * @param codeUnit a 16 bit code unit or lead surrogate value.
128      * @return the value
129      */
130     @Override
getFromU16SingleLead(char codeUnit)131     public int getFromU16SingleLead(char codeUnit) {
132         int value;
133         int ix;
134 
135         // Because the input is a 16 bit char, we can skip the tests for it being in
136         // the BMP range.  It is.
137         ix = index[codeUnit >> UTRIE2_SHIFT_2];
138         ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
139         value = index[ix];
140         return value;
141     }
142 
143 
144     /**
145      * Serialize a Trie2_16 onto an OutputStream.
146      *
147      * A Trie2 can be serialized multiple times.
148      * The serialized data is compatible with ICU4C UTrie2 serialization.
149      * Trie2 serialization is unrelated to Java object serialization.
150      *
151      * @param os the stream to which the serialized Trie2 data will be written.
152      * @return the number of bytes written.
153      * @throw IOException on an error writing to the OutputStream.
154      */
serialize(OutputStream os)155     public int serialize(OutputStream os) throws IOException {
156         DataOutputStream dos = new DataOutputStream(os);
157         int  bytesWritten = 0;
158 
159         bytesWritten += serializeHeader(dos);
160         for (int i=0; i<dataLength; i++) {
161             dos.writeChar(index[data16+i]);
162         }
163         bytesWritten += dataLength*2;
164         return bytesWritten;
165     }
166 
167     /**
168      * @return the number of bytes of the serialized trie
169      */
getSerializedLength()170     public int getSerializedLength() {
171         return 16+(header.indexLength+dataLength)*2;
172     }
173 
174     /**
175      * Given a starting code point, find the last in a range of code points,
176      * all with the same value.
177      *
178      * This function is part of the implementation of iterating over the
179      * Trie2's contents.
180      * @param startingCP The code point at which to begin looking.
181      * @return The last code point with the same value as the starting code point.
182      */
183     @Override
rangeEnd(int startingCP, int limit, int value)184     int rangeEnd(int startingCP, int limit, int value) {
185         int   cp = startingCP;
186         int   block = 0;
187         int   index2Block = 0;
188 
189         // Loop runs once for each of
190         //   - a partial data block
191         //   - a reference to the null (default) data block.
192         //   - a reference to the index2 null block
193 
194       outerLoop:
195         for (;;) {
196             if (cp >= limit) {
197                 break;
198             }
199             if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
200                 // Ordinary BMP code point, excluding leading surrogates.
201                 // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
202                 // 16 bit data is stored in the index array itself.
203                 index2Block = 0;
204                 block       = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
205             } else if (cp < 0xffff) {
206                 // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
207                 index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
208                 block       = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
209             } else if (cp < highStart) {
210                 // Supplemental code point, use two-level lookup.
211                 int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
212                 index2Block = index[ix];
213                 block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
214             } else  {
215                 // Code point above highStart.
216                 if (value == index[highValueIndex]) {
217                     cp = limit;
218                 }
219                 break;
220             }
221 
222             if (index2Block == index2NullOffset) {
223                 if (value != initialValue) {
224                     break;
225                 }
226                 cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
227             } else if (block == dataNullOffset) {
228                 // The block at dataNullOffset has all values == initialValue.
229                 // Because Trie2 iteration always proceeds in ascending order, we will always
230                 //   encounter a null block at its beginning, and can skip over
231                 //   a number of code points equal to the length of the block.
232                 if (value != initialValue) {
233                     break;
234                 }
235                 cp += UTRIE2_DATA_BLOCK_LENGTH;
236             } else {
237                 // Current position refers to an ordinary data block.
238                 // Walk over the data entries, checking the values.
239                 int startIx = block + (cp & UTRIE2_DATA_MASK);
240                 int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
241                 for (int ix = startIx; ix<limitIx; ix++) {
242                     if (index[ix] != value) {
243                         // We came to an entry with a different value.
244                         //   We are done.
245                         cp += (ix - startIx);
246                         break outerLoop;
247                     }
248                 }
249                 // The ordinary data block contained our value until its end.
250                 //  Advance the current code point, and continue the outerloop.
251                 cp += limitIx - startIx;
252             }
253         }
254         if (cp > limit) {
255             cp = limit;
256         }
257 
258         return cp - 1;
259     }
260 }
261