1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ****************************************************************************** 5 * Copyright (C) 1996-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ****************************************************************************** 8 */ 9 10 package com.ibm.icu.impl; 11 12 import java.io.IOException; 13 import java.nio.ByteBuffer; 14 import java.util.Arrays; 15 16 import com.ibm.icu.text.UTF16; 17 18 /** 19 * Trie implementation which stores data in int, 32 bits. 20 * 2015-sep-03: Used only in CharsetSelector which could be switched to {@link Trie2_32} 21 * as long as that does not load ICU4C selector data. 22 * 23 * @author synwee 24 * @see com.ibm.icu.impl.Trie 25 * @since release 2.1, Jan 01 2002 26 */ 27 public class IntTrie extends Trie 28 { 29 // public constructors --------------------------------------------- 30 31 /** 32 * <p>Creates a new Trie with the settings for the trie data.</p> 33 * <p>Unserialize the 32-bit-aligned input stream and use the data for the 34 * trie.</p> 35 * @param bytes file buffer to a ICU data file, containing the trie 36 * @param dataManipulate object which provides methods to parse the char 37 * data 38 * @throws IOException thrown when data reading fails 39 */ IntTrie(ByteBuffer bytes, DataManipulate dataManipulate)40 public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate) 41 throws IOException 42 { 43 super(bytes, dataManipulate); 44 if (!isIntTrie()) { 45 throw new IllegalArgumentException( 46 "Data given does not belong to a int trie."); 47 } 48 } 49 50 /** 51 * Make a dummy IntTrie. 52 * A dummy trie is an empty runtime trie, used when a real data trie cannot 53 * be loaded. 54 * 55 * The trie always returns the initialValue, 56 * or the leadUnitValue for lead surrogate code points. 57 * The Latin-1 part is always set up to be linear. 58 * 59 * @param initialValue the initial value that is set for all code points 60 * @param leadUnitValue the value for lead surrogate code _units_ that do not 61 * have associated supplementary data 62 * @param dataManipulate object which provides methods to parse the char data 63 */ 64 @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770 IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate)65 public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { 66 super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); 67 68 int dataLength, latin1Length, i, limit; 69 char block; 70 71 /* calculate the actual size of the dummy trie data */ 72 73 /* max(Latin-1, block 0) */ 74 dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; 75 if(leadUnitValue!=initialValue) { 76 dataLength+=DATA_BLOCK_LENGTH; 77 } 78 m_data_=new int[dataLength]; 79 m_dataLength_=dataLength; 80 81 m_initialValue_=initialValue; 82 83 /* fill the index and data arrays */ 84 85 /* indexes are preset to 0 (block 0) */ 86 87 /* Latin-1 data */ 88 for(i=0; i<latin1Length; ++i) { 89 m_data_[i]=initialValue; 90 } 91 92 if(leadUnitValue!=initialValue) { 93 /* indexes for lead surrogate code units to the block after Latin-1 */ 94 block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_); 95 i=0xd800>>INDEX_STAGE_1_SHIFT_; 96 limit=0xdc00>>INDEX_STAGE_1_SHIFT_; 97 for(; i<limit; ++i) { 98 m_index_[i]=block; 99 } 100 101 /* data for lead surrogate code units */ 102 limit=latin1Length+DATA_BLOCK_LENGTH; 103 for(i=latin1Length; i<limit; ++i) { 104 m_data_[i]=leadUnitValue; 105 } 106 } 107 } 108 109 // public methods -------------------------------------------------- 110 111 /** 112 * Gets the value associated with the codepoint. 113 * If no value is associated with the codepoint, a default value will be 114 * returned. 115 * @param ch codepoint 116 * @return offset to data 117 */ getCodePointValue(int ch)118 public final int getCodePointValue(int ch) 119 { 120 int offset; 121 122 // fastpath for U+0000..U+D7FF 123 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 124 // copy of getRawOffset() 125 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 126 + (ch & INDEX_STAGE_3_MASK_); 127 return m_data_[offset]; 128 } 129 130 // handle U+D800..U+10FFFF 131 offset = getCodePointOffset(ch); 132 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 133 } 134 135 /** 136 * Gets the value to the data which this lead surrogate character points 137 * to. 138 * Returned data may contain folding offset information for the next 139 * trailing surrogate character. 140 * This method does not guarantee correct results for trail surrogates. 141 * @param ch lead surrogate character 142 * @return data value 143 */ getLeadValue(char ch)144 public final int getLeadValue(char ch) 145 { 146 return m_data_[getLeadOffset(ch)]; 147 } 148 149 /** 150 * Get the value associated with the BMP code point. 151 * Lead surrogate code points are treated as normal code points, with 152 * unfolded values that may differ from getLeadValue() results. 153 * @param ch the input BMP code point 154 * @return trie data value associated with the BMP codepoint 155 */ getBMPValue(char ch)156 public final int getBMPValue(char ch) 157 { 158 return m_data_[getBMPOffset(ch)]; 159 } 160 161 /** 162 * Get the value associated with a pair of surrogates. 163 * @param lead a lead surrogate 164 * @param trail a trail surrogate 165 */ getSurrogateValue(char lead, char trail)166 public final int getSurrogateValue(char lead, char trail) 167 { 168 if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) { 169 throw new IllegalArgumentException( 170 "Argument characters do not form a supplementary character"); 171 } 172 // get fold position for the next trail surrogate 173 int offset = getSurrogateOffset(lead, trail); 174 175 // get the real data from the folded lead/trail units 176 if (offset > 0) { 177 return m_data_[offset]; 178 } 179 180 // return m_initialValue_ if there is an error 181 return m_initialValue_; 182 } 183 184 /** 185 * Get a value from a folding offset (from the value of a lead surrogate) 186 * and a trail surrogate. 187 * @param leadvalue the value of a lead surrogate that contains the 188 * folding offset 189 * @param trail surrogate 190 * @return trie data value associated with the trail character 191 */ getTrailValue(int leadvalue, char trail)192 public final int getTrailValue(int leadvalue, char trail) 193 { 194 if (m_dataManipulate_ == null) { 195 throw new NullPointerException( 196 "The field DataManipulate in this Trie is null"); 197 } 198 int offset = m_dataManipulate_.getFoldingOffset(leadvalue); 199 if (offset > 0) { 200 return m_data_[getRawOffset(offset, 201 (char)(trail & SURROGATE_MASK_))]; 202 } 203 return m_initialValue_; 204 } 205 206 /** 207 * <p>Gets the latin 1 fast path value.</p> 208 * <p>Note this only works if latin 1 characters have their own linear 209 * array.</p> 210 * @param ch latin 1 characters 211 * @return value associated with latin character 212 */ getLatin1LinearValue(char ch)213 public final int getLatin1LinearValue(char ch) 214 { 215 return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch]; 216 } 217 218 /** 219 * Checks if the argument Trie has the same data as this Trie 220 * @param other Trie to check 221 * @return true if the argument Trie has the same data as this Trie, false 222 * otherwise 223 */ 224 ///CLOVER:OFF 225 @Override equals(Object other)226 public boolean equals(Object other) 227 { 228 boolean result = super.equals(other); 229 if (result && other instanceof IntTrie) { 230 IntTrie othertrie = (IntTrie)other; 231 if (m_initialValue_ != othertrie.m_initialValue_ 232 || !Arrays.equals(m_data_, othertrie.m_data_)) { 233 return false; 234 } 235 return true; 236 } 237 return false; 238 } 239 240 @Override hashCode()241 public int hashCode() { 242 assert false : "hashCode not designed"; 243 return 42; 244 } 245 ///CLOVER:ON 246 247 // protected methods ----------------------------------------------- 248 249 /** 250 * <p>Parses the input stream and stores its trie content into a index and 251 * data array</p> 252 * @param bytes data buffer containing trie data 253 */ 254 @Override unserialize(ByteBuffer bytes)255 protected final void unserialize(ByteBuffer bytes) 256 { 257 super.unserialize(bytes); 258 // one used for initial value 259 m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0); 260 m_initialValue_ = m_data_[0]; 261 } 262 263 /** 264 * Gets the offset to the data which the surrogate pair points to. 265 * @param lead lead surrogate 266 * @param trail trailing surrogate 267 * @return offset to data 268 */ 269 @Override getSurrogateOffset(char lead, char trail)270 protected final int getSurrogateOffset(char lead, char trail) 271 { 272 if (m_dataManipulate_ == null) { 273 throw new NullPointerException( 274 "The field DataManipulate in this Trie is null"); 275 } 276 // get fold position for the next trail surrogate 277 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 278 279 // get the real data from the folded lead/trail units 280 if (offset > 0) { 281 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 282 } 283 284 // return -1 if there is an error, in this case we return the default 285 // value: m_initialValue_ 286 return -1; 287 } 288 289 /** 290 * Gets the value at the argument index. 291 * For use internally in TrieIterator 292 * @param index value at index will be retrieved 293 * @return 32 bit value 294 * @see com.ibm.icu.impl.TrieIterator 295 */ 296 @Override getValue(int index)297 protected final int getValue(int index) 298 { 299 return m_data_[index]; 300 } 301 302 /** 303 * Gets the default initial value 304 * @return 32 bit value 305 */ 306 @Override getInitialValue()307 protected final int getInitialValue() 308 { 309 return m_initialValue_; 310 } 311 312 // package private methods ----------------------------------------- 313 314 /** 315 * Internal constructor for builder use 316 * @param index the index array to be slotted into this trie 317 * @param data the data array to be slotted into this trie 318 * @param initialvalue the initial value for this trie 319 * @param options trie options to use 320 * @param datamanipulate folding implementation 321 */ IntTrie(char index[], int data[], int initialvalue, int options, DataManipulate datamanipulate)322 IntTrie(char index[], int data[], int initialvalue, int options, 323 DataManipulate datamanipulate) 324 { 325 super(index, options, datamanipulate); 326 m_data_ = data; 327 m_dataLength_ = m_data_.length; 328 m_initialValue_ = initialvalue; 329 } 330 331 // private data members -------------------------------------------- 332 333 /** 334 * Default value 335 */ 336 private int m_initialValue_; 337 /** 338 * Array of char data 339 */ 340 private int m_data_[]; 341 } 342