• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2009-2014, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.impl;
12 
13 import java.io.DataOutputStream;
14 import java.io.IOException;
15 import java.io.OutputStream;
16 import java.nio.ByteBuffer;
17 
18 /**
19  * @author aheninger
20  *
21  * A read-only Trie2, holding 32 bit data values.
22  *
23  * A Trie2 is a highly optimized data structure for mapping from Unicode
24  * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
25  *
26  * See class Trie2 for descriptions of the API for accessing the contents of a trie.
27  *
28  * The fundamental data access methods are declared final in this class, with
29  * the intent that applications might gain a little extra performance, when compared
30  * with calling the same methods via the abstract UTrie2 base class.
31  * @hide exposed on OHOS
32  */
33 
34 public class Trie2_32 extends Trie2 {
35 
36     /**
37      * Internal constructor, not for general use.
38      */
Trie2_32()39     Trie2_32() {
40     }
41 
42 
43     /**
44      * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
45      * The serialized format is identical between ICU4C and ICU4J, so this function
46      * will work with serialized Trie2s from either.
47      *
48      * The serialized Trie2 in the bytes may be in either little or big endian byte order.
49      * This allows using serialized Tries from ICU4C without needing to consider the
50      * byte order of the system that created them.
51      *
52      * @param bytes a byte buffer to the serialized form of a UTrie2.
53      * @return An unserialized Trie_32, ready for use.
54      * @throws IllegalArgumentException if the stream does not contain a serialized Trie2.
55      * @throws IOException if a read error occurs in the buffer.
56      * @throws ClassCastException if the bytes contains a serialized Trie2_16
57      */
createFromSerialized(ByteBuffer bytes)58     public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException {
59         return (Trie2_32) Trie2.createFromSerialized(bytes);
60     }
61 
62     /**
63      * Get the value for a code point as stored in the Trie2.
64      *
65      * @param codePoint the code point
66      * @return the value
67      */
68     @Override
get(int codePoint)69     public final int get(int codePoint) {
70         int value;
71         int ix;
72 
73         if (codePoint >= 0) {
74             if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
75                 // Ordinary BMP code point, excluding leading surrogates.
76                 // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
77                 // 32 bit data is stored in the index array itself.
78                 ix = index[codePoint >> UTRIE2_SHIFT_2];
79                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
80                 value = data32[ix];
81                 return value;
82             }
83             if (codePoint <= 0xffff) {
84                 // Lead Surrogate Code Point.  A Separate index section is stored for
85                 // lead surrogate code units and code points.
86                 //   The main index has the code unit data.
87                 //   For this function, we need the code point data.
88                 // Note: this expression could be refactored for slightly improved efficiency, but
89                 //       surrogate code points will be so rare in practice that it's not worth it.
90                 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
91                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
92                 value = data32[ix];
93                 return value;
94             }
95             if (codePoint < highStart) {
96                 // Supplemental code point, use two-level lookup.
97                 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
98                 ix = index[ix];
99                 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
100                 ix = index[ix];
101                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
102                 value = data32[ix];
103                 return value;
104             }
105             if (codePoint <= 0x10ffff) {
106                 value = data32[highValueIndex];
107                 return value;
108             }
109         }
110 
111         // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
112         return errorValue;
113     }
114 
115 
116     /**
117      * Get a Trie2 value for a UTF-16 code unit.
118      *
119      * This function returns the same value as get() if the input
120      * character is outside of the lead surrogate range
121      *
122      * There are two values stored in a Trie2 for inputs in the lead
123      * surrogate range.  This function returns the alternate value,
124      * while Trie2.get() returns the main value.
125      *
126      * @param codeUnit a 16 bit code unit or lead surrogate value.
127      * @return the value
128      */
129     @Override
getFromU16SingleLead(char codeUnit)130     public int getFromU16SingleLead(char codeUnit){
131         int value;
132         int ix;
133 
134         ix = index[codeUnit >> UTRIE2_SHIFT_2];
135         ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
136         value = data32[ix];
137         return value;
138 
139     }
140 
141     /**
142      * Serialize a Trie2_32 onto an OutputStream.
143      *
144      * A Trie2 can be serialized multiple times.
145      * The serialized data is compatible with ICU4C UTrie2 serialization.
146      * Trie2 serialization is unrelated to Java object serialization.
147      *
148      * @param os the stream to which the serialized Trie2 data will be written.
149      * @return the number of bytes written.
150      * @throw IOException on an error writing to the OutputStream.
151      */
serialize(OutputStream os)152     public int serialize(OutputStream os) throws IOException {
153         DataOutputStream dos = new DataOutputStream(os);
154         int  bytesWritten = 0;
155 
156         bytesWritten += serializeHeader(dos);
157         for (int i=0; i<dataLength; i++) {
158             dos.writeInt(data32[i]);
159         }
160         bytesWritten += dataLength*4;
161         return bytesWritten;
162     }
163 
164     /**
165      * @return the number of bytes of the serialized trie
166      */
getSerializedLength()167     public int getSerializedLength() {
168         return 16+header.indexLength*2+dataLength*4;
169     }
170 
171     /**
172      * Given a starting code point, find the last in a range of code points,
173      * all with the same value.
174      *
175      * This function is part of the implementation of iterating over the
176      * Trie2's contents.
177      * @param startingCP The code point at which to begin looking.
178      * @return The last code point with the same value as the starting code point.
179      */
180     @Override
rangeEnd(int startingCP, int limit, int value)181     int rangeEnd(int startingCP, int limit, int value) {
182         int   cp = startingCP;
183         int   block = 0;
184         int   index2Block = 0;
185 
186         // Loop runs once for each of
187         //   - a partial data block
188         //   - a reference to the null (default) data block.
189         //   - a reference to the index2 null block
190 
191       outerLoop:
192         for (;;) {
193             if (cp >= limit) {
194                 break;
195             }
196             if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
197                 // Ordinary BMP code point, excluding leading surrogates.
198                 // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
199                 // 16 bit data is stored in the index array itself.
200                 index2Block = 0;
201                 block       = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
202             } else if (cp < 0xffff) {
203                 // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
204                 index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
205                 block       = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
206             } else if (cp < highStart) {
207                 // Supplemental code point, use two-level lookup.
208                 int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
209                 index2Block = index[ix];
210                 block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
211             } else  {
212                 // Code point above highStart.
213                 if (value == data32[highValueIndex]) {
214                     cp = limit;
215                 }
216                 break;
217             }
218 
219             if (index2Block == index2NullOffset) {
220                 if (value != initialValue) {
221                     break;
222                 }
223                 cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
224             } else if (block == dataNullOffset) {
225                 // The block at dataNullOffset has all values == initialValue.
226                 // Because Trie2 iteration always proceeds in ascending order, we will always
227                 //   encounter a null block at its beginning, and can skip over
228                 //   a number of code points equal to the length of the block.
229                 if (value != initialValue) {
230                     break;
231                 }
232                 cp += UTRIE2_DATA_BLOCK_LENGTH;
233             } else {
234                 // Current position refers to an ordinary data block.
235                 // Walk over the data entries, checking the values.
236                 int startIx = block + (cp & UTRIE2_DATA_MASK);
237                 int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
238                 for (int ix = startIx; ix<limitIx; ix++) {
239                     if (data32[ix] != value) {
240                         // We came to an entry with a different value.
241                         //   We are done.
242                         cp += (ix - startIx);
243                         break outerLoop;
244                     }
245                 }
246                 // The ordinary data block contained our value until its end.
247                 //  Advance the current code point, and continue the outer loop.
248                 cp += limitIx - startIx;
249             }
250         }
251         if (cp > limit) {
252             cp = limit;
253         }
254 
255         return cp - 1;
256     }
257 
258 }
259 
260