1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ****************************************************************************** 6 * Copyright (C) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ****************************************************************************** 9 */ 10 11 package ohos.global.icu.impl; 12 13 import java.nio.ByteBuffer; 14 15 import ohos.global.icu.text.UTF16; 16 17 /** 18 * Trie implementation which stores data in char, 16 bits. 19 * @author synwee 20 * @see ohos.global.icu.impl.Trie 21 * @hide exposed on OHOS 22 */ 23 24 // note that i need to handle the block calculations later, since chartrie 25 // in icu4c uses the same index array. 26 public class CharTrie extends Trie 27 { 28 // public constructors --------------------------------------------- 29 30 /** 31 * <p>Creates a new Trie with the settings for the trie data.</p> 32 * <p>Unserialize the 32-bit-aligned input buffer and use the data for the 33 * trie.</p> 34 * @param bytes data of an ICU data file, containing the trie 35 * @param dataManipulate object which provides methods to parse the char 36 * data 37 */ CharTrie(ByteBuffer bytes, DataManipulate dataManipulate)38 public CharTrie(ByteBuffer bytes, DataManipulate dataManipulate) { 39 super(bytes, dataManipulate); 40 41 if (!isCharTrie()) { 42 throw new IllegalArgumentException( 43 "Data given does not belong to a char trie."); 44 } 45 } 46 47 /** 48 * Make a dummy CharTrie. 49 * A dummy trie is an empty runtime trie, used when a real data trie cannot 50 * be loaded. 51 * 52 * The trie always returns the initialValue, 53 * or the leadUnitValue for lead surrogate code points. 54 * The Latin-1 part is always set up to be linear. 55 * 56 * @param initialValue the initial value that is set for all code points 57 * @param leadUnitValue the value for lead surrogate code _units_ that do not 58 * have associated supplementary data 59 * @param dataManipulate object which provides methods to parse the char data 60 */ 61 @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770 CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate)62 public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { 63 super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); 64 65 int dataLength, latin1Length, i, limit; 66 char block; 67 68 /* calculate the actual size of the dummy trie data */ 69 70 /* max(Latin-1, block 0) */ 71 dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; 72 if(leadUnitValue!=initialValue) { 73 dataLength+=DATA_BLOCK_LENGTH; 74 } 75 m_data_=new char[dataLength]; 76 m_dataLength_=dataLength; 77 78 m_initialValue_=(char)initialValue; 79 80 /* fill the index and data arrays */ 81 82 /* indexes are preset to 0 (block 0) */ 83 84 /* Latin-1 data */ 85 for(i=0; i<latin1Length; ++i) { 86 m_data_[i]=(char)initialValue; 87 } 88 89 if(leadUnitValue!=initialValue) { 90 /* indexes for lead surrogate code units to the block after Latin-1 */ 91 block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_); 92 i=0xd800>>INDEX_STAGE_1_SHIFT_; 93 limit=0xdc00>>INDEX_STAGE_1_SHIFT_; 94 for(; i<limit; ++i) { 95 m_index_[i]=block; 96 } 97 98 /* data for lead surrogate code units */ 99 limit=latin1Length+DATA_BLOCK_LENGTH; 100 for(i=latin1Length; i<limit; ++i) { 101 m_data_[i]=(char)leadUnitValue; 102 } 103 } 104 } 105 106 // public methods -------------------------------------------------- 107 108 /** 109 * Gets the value associated with the codepoint. 110 * If no value is associated with the codepoint, a default value will be 111 * returned. 112 * @param ch codepoint 113 * @return offset to data 114 */ getCodePointValue(int ch)115 public final char getCodePointValue(int ch) 116 { 117 int offset; 118 119 // fastpath for U+0000..U+D7FF 120 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 121 // copy of getRawOffset() 122 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 123 + (ch & INDEX_STAGE_3_MASK_); 124 return m_data_[offset]; 125 } 126 127 // handle U+D800..U+10FFFF 128 offset = getCodePointOffset(ch); 129 130 // return -1 if there is an error, in this case we return the default 131 // value: m_initialValue_ 132 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 133 } 134 135 /** 136 * Gets the value to the data which this lead surrogate character points 137 * to. 138 * Returned data may contain folding offset information for the next 139 * trailing surrogate character. 140 * This method does not guarantee correct results for trail surrogates. 141 * @param ch lead surrogate character 142 * @return data value 143 */ getLeadValue(char ch)144 public final char getLeadValue(char ch) 145 { 146 return m_data_[getLeadOffset(ch)]; 147 } 148 149 /** 150 * Get the value associated with the BMP code point. 151 * Lead surrogate code points are treated as normal code points, with 152 * unfolded values that may differ from getLeadValue() results. 153 * @param ch the input BMP code point 154 * @return trie data value associated with the BMP codepoint 155 */ getBMPValue(char ch)156 public final char getBMPValue(char ch) 157 { 158 return m_data_[getBMPOffset(ch)]; 159 } 160 161 /** 162 * Get the value associated with a pair of surrogates. 163 * @param lead a lead surrogate 164 * @param trail a trail surrogate 165 */ getSurrogateValue(char lead, char trail)166 public final char getSurrogateValue(char lead, char trail) 167 { 168 int offset = getSurrogateOffset(lead, trail); 169 if (offset > 0) { 170 return m_data_[offset]; 171 } 172 return m_initialValue_; 173 } 174 175 /** 176 * <p>Get a value from a folding offset (from the value of a lead surrogate) 177 * and a trail surrogate.</p> 178 * <p>If the 179 * @param leadvalue value associated with the lead surrogate which contains 180 * the folding offset 181 * @param trail surrogate 182 * @return trie data value associated with the trail character 183 */ getTrailValue(int leadvalue, char trail)184 public final char getTrailValue(int leadvalue, char trail) 185 { 186 if (m_dataManipulate_ == null) { 187 throw new NullPointerException( 188 "The field DataManipulate in this Trie is null"); 189 } 190 int offset = m_dataManipulate_.getFoldingOffset(leadvalue); 191 if (offset > 0) { 192 return m_data_[getRawOffset(offset, 193 (char)(trail & SURROGATE_MASK_))]; 194 } 195 return m_initialValue_; 196 } 197 198 /** 199 * <p>Gets the latin 1 fast path value.</p> 200 * <p>Note this only works if latin 1 characters have their own linear 201 * array.</p> 202 * @param ch latin 1 characters 203 * @return value associated with latin character 204 */ getLatin1LinearValue(char ch)205 public final char getLatin1LinearValue(char ch) 206 { 207 return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch]; 208 } 209 210 /** 211 * Checks if the argument Trie has the same data as this Trie 212 * @param other Trie to check 213 * @return true if the argument Trie has the same data as this Trie, false 214 * otherwise 215 */ 216 ///CLOVER:OFF 217 @Override equals(Object other)218 public boolean equals(Object other) 219 { 220 boolean result = super.equals(other); 221 if (result && other instanceof CharTrie) { 222 CharTrie othertrie = (CharTrie)other; 223 return m_initialValue_ == othertrie.m_initialValue_; 224 } 225 return false; 226 } 227 228 @Override hashCode()229 public int hashCode() { 230 assert false : "hashCode not designed"; 231 return 42; 232 } 233 ///CLOVER:ON 234 235 // protected methods ----------------------------------------------- 236 237 /** 238 * <p>Parses the byte buffer and stores its trie content into a index and 239 * data array</p> 240 * @param bytes buffer containing trie data 241 */ 242 @Override unserialize(ByteBuffer bytes)243 protected final void unserialize(ByteBuffer bytes) 244 { 245 int indexDataLength = m_dataOffset_ + m_dataLength_; 246 m_index_ = ICUBinary.getChars(bytes, indexDataLength, 0); 247 m_data_ = m_index_; 248 m_initialValue_ = m_data_[m_dataOffset_]; 249 } 250 251 /** 252 * Gets the offset to the data which the surrogate pair points to. 253 * @param lead lead surrogate 254 * @param trail trailing surrogate 255 * @return offset to data 256 */ 257 @Override getSurrogateOffset(char lead, char trail)258 protected final int getSurrogateOffset(char lead, char trail) 259 { 260 if (m_dataManipulate_ == null) { 261 throw new NullPointerException( 262 "The field DataManipulate in this Trie is null"); 263 } 264 265 // get fold position for the next trail surrogate 266 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 267 268 // get the real data from the folded lead/trail units 269 if (offset > 0) { 270 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 271 } 272 273 // return -1 if there is an error, in this case we return the default 274 // value: m_initialValue_ 275 return -1; 276 } 277 278 /** 279 * Gets the value at the argument index. 280 * For use internally in TrieIterator. 281 * @param index value at index will be retrieved 282 * @return 32 bit value 283 * @see ohos.global.icu.impl.TrieIterator 284 */ 285 @Override getValue(int index)286 protected final int getValue(int index) 287 { 288 return m_data_[index]; 289 } 290 291 /** 292 * Gets the default initial value 293 * @return 32 bit value 294 */ 295 @Override getInitialValue()296 protected final int getInitialValue() 297 { 298 return m_initialValue_; 299 } 300 301 // private data members -------------------------------------------- 302 303 /** 304 * Default value 305 */ 306 private char m_initialValue_; 307 /** 308 * Array of char data 309 */ 310 private char m_data_[]; 311 } 312