1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2009-2014, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.impl; 12 13 import java.io.DataOutputStream; 14 import java.io.IOException; 15 import java.io.OutputStream; 16 import java.nio.ByteBuffer; 17 18 19 /** 20 * @author aheninger 21 * 22 * A read-only Trie2, holding 16 bit data values. 23 * 24 * A Trie2 is a highly optimized data structure for mapping from Unicode 25 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. 26 * 27 * See class Trie2 for descriptions of the API for accessing the contents of a trie. 28 * 29 * The fundamental data access methods are declared final in this class, with 30 * the intent that applications might gain a little extra performance, when compared 31 * with calling the same methods via the abstract UTrie2 base class. 32 * @hide exposed on OHOS 33 */ 34 public final class Trie2_16 extends Trie2 { 35 36 37 /** 38 * Internal constructor, not for general use. 39 */ Trie2_16()40 Trie2_16() { 41 } 42 43 44 /** 45 * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). 46 * The serialized format is identical between ICU4C and ICU4J, so this function 47 * will work with serialized Trie2s from either. 48 * 49 * The serialized Trie2 in the bytes may be in either little or big endian byte order. 50 * This allows using serialized Tries from ICU4C without needing to consider the 51 * byte order of the system that created them. 52 * 53 * @param bytes a byte buffer to the serialized form of a UTrie2. 54 * @return An unserialized Trie2_16, ready for use. 55 * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. 56 * @throws IOException if a read error occurs in the buffer. 57 * @throws ClassCastException if the bytes contain a serialized Trie2_32 58 */ createFromSerialized(ByteBuffer bytes)59 public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { 60 return (Trie2_16) Trie2.createFromSerialized(bytes); 61 } 62 63 /** 64 * Get the value for a code point as stored in the Trie2. 65 * 66 * @param codePoint the code point 67 * @return the value 68 */ 69 @Override get(int codePoint)70 public final int get(int codePoint) { 71 int value; 72 int ix; 73 74 if (codePoint >= 0) { 75 if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { 76 // Ordinary BMP code point, excluding leading surrogates. 77 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 78 // 16 bit data is stored in the index array itself. 79 ix = index[codePoint >> UTRIE2_SHIFT_2]; 80 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 81 value = index[ix]; 82 return value; 83 } 84 if (codePoint <= 0xffff) { 85 // Lead Surrogate Code Point. A Separate index section is stored for 86 // lead surrogate code units and code points. 87 // The main index has the code unit data. 88 // For this function, we need the code point data. 89 // Note: this expression could be refactored for slightly improved efficiency, but 90 // surrogate code points will be so rare in practice that it's not worth it. 91 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; 92 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 93 value = index[ix]; 94 return value; 95 } 96 if (codePoint < highStart) { 97 // Supplemental code point, use two-level lookup. 98 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); 99 ix = index[ix]; 100 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; 101 ix = index[ix]; 102 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 103 value = index[ix]; 104 return value; 105 } 106 if (codePoint <= 0x10ffff) { 107 value = index[highValueIndex]; 108 return value; 109 } 110 } 111 112 // Fall through. The code point is outside of the legal range of 0..0x10ffff. 113 return errorValue; 114 } 115 116 117 /** 118 * Get a Trie2 value for a UTF-16 code unit. 119 * 120 * This function returns the same value as get() if the input 121 * character is outside of the lead surrogate range 122 * 123 * There are two values stored in a Trie2 for inputs in the lead 124 * surrogate range. This function returns the alternate value, 125 * while Trie2.get() returns the main value. 126 * 127 * @param codeUnit a 16 bit code unit or lead surrogate value. 128 * @return the value 129 */ 130 @Override getFromU16SingleLead(char codeUnit)131 public int getFromU16SingleLead(char codeUnit) { 132 int value; 133 int ix; 134 135 // Because the input is a 16 bit char, we can skip the tests for it being in 136 // the BMP range. It is. 137 ix = index[codeUnit >> UTRIE2_SHIFT_2]; 138 ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); 139 value = index[ix]; 140 return value; 141 } 142 143 144 /** 145 * Serialize a Trie2_16 onto an OutputStream. 146 * 147 * A Trie2 can be serialized multiple times. 148 * The serialized data is compatible with ICU4C UTrie2 serialization. 149 * Trie2 serialization is unrelated to Java object serialization. 150 * 151 * @param os the stream to which the serialized Trie2 data will be written. 152 * @return the number of bytes written. 153 * @throw IOException on an error writing to the OutputStream. 154 */ serialize(OutputStream os)155 public int serialize(OutputStream os) throws IOException { 156 DataOutputStream dos = new DataOutputStream(os); 157 int bytesWritten = 0; 158 159 bytesWritten += serializeHeader(dos); 160 for (int i=0; i<dataLength; i++) { 161 dos.writeChar(index[data16+i]); 162 } 163 bytesWritten += dataLength*2; 164 return bytesWritten; 165 } 166 167 /** 168 * @return the number of bytes of the serialized trie 169 */ getSerializedLength()170 public int getSerializedLength() { 171 return 16+(header.indexLength+dataLength)*2; 172 } 173 174 /** 175 * Given a starting code point, find the last in a range of code points, 176 * all with the same value. 177 * 178 * This function is part of the implementation of iterating over the 179 * Trie2's contents. 180 * @param startingCP The code point at which to begin looking. 181 * @return The last code point with the same value as the starting code point. 182 */ 183 @Override rangeEnd(int startingCP, int limit, int value)184 int rangeEnd(int startingCP, int limit, int value) { 185 int cp = startingCP; 186 int block = 0; 187 int index2Block = 0; 188 189 // Loop runs once for each of 190 // - a partial data block 191 // - a reference to the null (default) data block. 192 // - a reference to the index2 null block 193 194 outerLoop: 195 for (;;) { 196 if (cp >= limit) { 197 break; 198 } 199 if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { 200 // Ordinary BMP code point, excluding leading surrogates. 201 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 202 // 16 bit data is stored in the index array itself. 203 index2Block = 0; 204 block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; 205 } else if (cp < 0xffff) { 206 // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 207 index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; 208 block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; 209 } else if (cp < highStart) { 210 // Supplemental code point, use two-level lookup. 211 int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); 212 index2Block = index[ix]; 213 block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; 214 } else { 215 // Code point above highStart. 216 if (value == index[highValueIndex]) { 217 cp = limit; 218 } 219 break; 220 } 221 222 if (index2Block == index2NullOffset) { 223 if (value != initialValue) { 224 break; 225 } 226 cp += UTRIE2_CP_PER_INDEX_1_ENTRY; 227 } else if (block == dataNullOffset) { 228 // The block at dataNullOffset has all values == initialValue. 229 // Because Trie2 iteration always proceeds in ascending order, we will always 230 // encounter a null block at its beginning, and can skip over 231 // a number of code points equal to the length of the block. 232 if (value != initialValue) { 233 break; 234 } 235 cp += UTRIE2_DATA_BLOCK_LENGTH; 236 } else { 237 // Current position refers to an ordinary data block. 238 // Walk over the data entries, checking the values. 239 int startIx = block + (cp & UTRIE2_DATA_MASK); 240 int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; 241 for (int ix = startIx; ix<limitIx; ix++) { 242 if (index[ix] != value) { 243 // We came to an entry with a different value. 244 // We are done. 245 cp += (ix - startIx); 246 break outerLoop; 247 } 248 } 249 // The ordinary data block contained our value until its end. 250 // Advance the current code point, and continue the outerloop. 251 cp += limitIx - startIx; 252 } 253 } 254 if (cp > limit) { 255 cp = limit; 256 } 257 258 return cp - 1; 259 } 260 } 261