1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2009-2014, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.impl; 12 13 import java.io.DataOutputStream; 14 import java.io.IOException; 15 import java.io.OutputStream; 16 import java.nio.ByteBuffer; 17 18 /** 19 * @author aheninger 20 * 21 * A read-only Trie2, holding 32 bit data values. 22 * 23 * A Trie2 is a highly optimized data structure for mapping from Unicode 24 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. 25 * 26 * See class Trie2 for descriptions of the API for accessing the contents of a trie. 27 * 28 * The fundamental data access methods are declared final in this class, with 29 * the intent that applications might gain a little extra performance, when compared 30 * with calling the same methods via the abstract UTrie2 base class. 31 * @hide exposed on OHOS 32 */ 33 34 public class Trie2_32 extends Trie2 { 35 36 /** 37 * Internal constructor, not for general use. 38 */ Trie2_32()39 Trie2_32() { 40 } 41 42 43 /** 44 * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). 45 * The serialized format is identical between ICU4C and ICU4J, so this function 46 * will work with serialized Trie2s from either. 47 * 48 * The serialized Trie2 in the bytes may be in either little or big endian byte order. 49 * This allows using serialized Tries from ICU4C without needing to consider the 50 * byte order of the system that created them. 51 * 52 * @param bytes a byte buffer to the serialized form of a UTrie2. 53 * @return An unserialized Trie_32, ready for use. 54 * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. 55 * @throws IOException if a read error occurs in the buffer. 56 * @throws ClassCastException if the bytes contains a serialized Trie2_16 57 */ createFromSerialized(ByteBuffer bytes)58 public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException { 59 return (Trie2_32) Trie2.createFromSerialized(bytes); 60 } 61 62 /** 63 * Get the value for a code point as stored in the Trie2. 64 * 65 * @param codePoint the code point 66 * @return the value 67 */ 68 @Override get(int codePoint)69 public final int get(int codePoint) { 70 int value; 71 int ix; 72 73 if (codePoint >= 0) { 74 if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { 75 // Ordinary BMP code point, excluding leading surrogates. 76 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 77 // 32 bit data is stored in the index array itself. 78 ix = index[codePoint >> UTRIE2_SHIFT_2]; 79 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 80 value = data32[ix]; 81 return value; 82 } 83 if (codePoint <= 0xffff) { 84 // Lead Surrogate Code Point. A Separate index section is stored for 85 // lead surrogate code units and code points. 86 // The main index has the code unit data. 87 // For this function, we need the code point data. 88 // Note: this expression could be refactored for slightly improved efficiency, but 89 // surrogate code points will be so rare in practice that it's not worth it. 90 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; 91 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 92 value = data32[ix]; 93 return value; 94 } 95 if (codePoint < highStart) { 96 // Supplemental code point, use two-level lookup. 97 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); 98 ix = index[ix]; 99 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; 100 ix = index[ix]; 101 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 102 value = data32[ix]; 103 return value; 104 } 105 if (codePoint <= 0x10ffff) { 106 value = data32[highValueIndex]; 107 return value; 108 } 109 } 110 111 // Fall through. The code point is outside of the legal range of 0..0x10ffff. 112 return errorValue; 113 } 114 115 116 /** 117 * Get a Trie2 value for a UTF-16 code unit. 118 * 119 * This function returns the same value as get() if the input 120 * character is outside of the lead surrogate range 121 * 122 * There are two values stored in a Trie2 for inputs in the lead 123 * surrogate range. This function returns the alternate value, 124 * while Trie2.get() returns the main value. 125 * 126 * @param codeUnit a 16 bit code unit or lead surrogate value. 127 * @return the value 128 */ 129 @Override getFromU16SingleLead(char codeUnit)130 public int getFromU16SingleLead(char codeUnit){ 131 int value; 132 int ix; 133 134 ix = index[codeUnit >> UTRIE2_SHIFT_2]; 135 ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); 136 value = data32[ix]; 137 return value; 138 139 } 140 141 /** 142 * Serialize a Trie2_32 onto an OutputStream. 143 * 144 * A Trie2 can be serialized multiple times. 145 * The serialized data is compatible with ICU4C UTrie2 serialization. 146 * Trie2 serialization is unrelated to Java object serialization. 147 * 148 * @param os the stream to which the serialized Trie2 data will be written. 149 * @return the number of bytes written. 150 * @throw IOException on an error writing to the OutputStream. 151 */ serialize(OutputStream os)152 public int serialize(OutputStream os) throws IOException { 153 DataOutputStream dos = new DataOutputStream(os); 154 int bytesWritten = 0; 155 156 bytesWritten += serializeHeader(dos); 157 for (int i=0; i<dataLength; i++) { 158 dos.writeInt(data32[i]); 159 } 160 bytesWritten += dataLength*4; 161 return bytesWritten; 162 } 163 164 /** 165 * @return the number of bytes of the serialized trie 166 */ getSerializedLength()167 public int getSerializedLength() { 168 return 16+header.indexLength*2+dataLength*4; 169 } 170 171 /** 172 * Given a starting code point, find the last in a range of code points, 173 * all with the same value. 174 * 175 * This function is part of the implementation of iterating over the 176 * Trie2's contents. 177 * @param startingCP The code point at which to begin looking. 178 * @return The last code point with the same value as the starting code point. 179 */ 180 @Override rangeEnd(int startingCP, int limit, int value)181 int rangeEnd(int startingCP, int limit, int value) { 182 int cp = startingCP; 183 int block = 0; 184 int index2Block = 0; 185 186 // Loop runs once for each of 187 // - a partial data block 188 // - a reference to the null (default) data block. 189 // - a reference to the index2 null block 190 191 outerLoop: 192 for (;;) { 193 if (cp >= limit) { 194 break; 195 } 196 if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { 197 // Ordinary BMP code point, excluding leading surrogates. 198 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 199 // 16 bit data is stored in the index array itself. 200 index2Block = 0; 201 block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; 202 } else if (cp < 0xffff) { 203 // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 204 index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; 205 block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; 206 } else if (cp < highStart) { 207 // Supplemental code point, use two-level lookup. 208 int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); 209 index2Block = index[ix]; 210 block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; 211 } else { 212 // Code point above highStart. 213 if (value == data32[highValueIndex]) { 214 cp = limit; 215 } 216 break; 217 } 218 219 if (index2Block == index2NullOffset) { 220 if (value != initialValue) { 221 break; 222 } 223 cp += UTRIE2_CP_PER_INDEX_1_ENTRY; 224 } else if (block == dataNullOffset) { 225 // The block at dataNullOffset has all values == initialValue. 226 // Because Trie2 iteration always proceeds in ascending order, we will always 227 // encounter a null block at its beginning, and can skip over 228 // a number of code points equal to the length of the block. 229 if (value != initialValue) { 230 break; 231 } 232 cp += UTRIE2_DATA_BLOCK_LENGTH; 233 } else { 234 // Current position refers to an ordinary data block. 235 // Walk over the data entries, checking the values. 236 int startIx = block + (cp & UTRIE2_DATA_MASK); 237 int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; 238 for (int ix = startIx; ix<limitIx; ix++) { 239 if (data32[ix] != value) { 240 // We came to an entry with a different value. 241 // We are done. 242 cp += (ix - startIx); 243 break outerLoop; 244 } 245 } 246 // The ordinary data block contained our value until its end. 247 // Advance the current code point, and continue the outer loop. 248 cp += limitIx - startIx; 249 } 250 } 251 if (cp > limit) { 252 cp = limit; 253 } 254 255 return cp - 1; 256 } 257 258 } 259 260