1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * created on: 2011jan05 9 * created by: Markus W. Scherer 10 * ported from ICU4C bytestriebuilder.h/.cpp 11 */ 12 13 package com.ibm.icu.util; 14 15 import java.nio.ByteBuffer; 16 17 /** 18 * Builder class for BytesTrie. 19 * 20 * <p>This class is not intended for public subclassing. 21 * 22 * @stable ICU 4.8 23 * @author Markus W. Scherer 24 */ 25 public final class BytesTrieBuilder extends StringTrieBuilder { 26 /** 27 * Constructs an empty builder. 28 * @stable ICU 4.8 29 */ BytesTrieBuilder()30 public BytesTrieBuilder() {} 31 32 // Used in add() to wrap the bytes into a CharSequence for StringTrieBuilder.addImpl(). 33 private static final class BytesAsCharSequence implements CharSequence { BytesAsCharSequence(byte[] sequence, int length)34 public BytesAsCharSequence(byte[] sequence, int length) { 35 s=sequence; 36 len=length; 37 } 38 @Override charAt(int i)39 public char charAt(int i) { return (char)(s[i]&0xff); } 40 @Override length()41 public int length() { return len; } 42 @Override subSequence(int start, int end)43 public CharSequence subSequence(int start, int end) { return null; } 44 45 private byte[] s; 46 private int len; 47 } 48 49 /** 50 * Adds a (byte sequence, value) pair. 51 * The byte sequence must be unique. 52 * Bytes 0..length-1 will be copied; the builder does not keep 53 * a reference to the input array. 54 * @param sequence The array that contains the byte sequence, starting at index 0. 55 * @param length The length of the byte sequence. 56 * @param value The value associated with this byte sequence. 57 * @return this 58 * @stable ICU 4.8 59 */ add(byte[] sequence, int length, int value)60 public BytesTrieBuilder add(byte[] sequence, int length, int value) { 61 addImpl(new BytesAsCharSequence(sequence, length), value); 62 return this; 63 } 64 65 /** 66 * Builds a BytesTrie for the add()ed data. 67 * Once built, no further data can be add()ed until clear() is called. 68 * 69 * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair 70 * must have been add()ed. 71 * 72 * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers 73 * which share the builder's byte array, without rebuilding. 74 * <em>The byte array must not be modified via the buildByteBuffer() result object.</em> 75 * After clear() has been called, a new array will be used. 76 * @param buildOption Build option, see StringTrieBuilder.Option. 77 * @return A new BytesTrie for the add()ed data. 78 * @stable ICU 4.8 79 */ build(StringTrieBuilder.Option buildOption)80 public BytesTrie build(StringTrieBuilder.Option buildOption) { 81 buildBytes(buildOption); 82 return new BytesTrie(bytes, bytes.length-bytesLength); 83 } 84 85 /** 86 * Builds a BytesTrie for the add()ed data and byte-serializes it. 87 * Once built, no further data can be add()ed until clear() is called. 88 * 89 * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair 90 * must have been add()ed. 91 * 92 * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers 93 * which share the builder's byte array, without rebuilding. 94 * <em>Do not modify the bytes in the buffer!</em> 95 * After clear() has been called, a new array will be used. 96 * 97 * <p>The serialized BytesTrie is accessible via the buffer's 98 * array()/arrayOffset()+position() or remaining()/get(byte[]) etc. 99 * @param buildOption Build option, see StringTrieBuilder.Option. 100 * @return A ByteBuffer with the byte-serialized BytesTrie for the add()ed data. 101 * The buffer is not read-only and array() can be called. 102 * @stable ICU 4.8 103 */ buildByteBuffer(StringTrieBuilder.Option buildOption)104 public ByteBuffer buildByteBuffer(StringTrieBuilder.Option buildOption) { 105 buildBytes(buildOption); 106 return ByteBuffer.wrap(bytes, bytes.length-bytesLength, bytesLength); 107 } 108 buildBytes(StringTrieBuilder.Option buildOption)109 private void buildBytes(StringTrieBuilder.Option buildOption) { 110 // Create and byte-serialize the trie for the elements. 111 if(bytes==null) { 112 bytes=new byte[1024]; 113 } 114 buildImpl(buildOption); 115 } 116 117 /** 118 * Removes all (byte sequence, value) pairs. 119 * New data can then be add()ed and a new trie can be built. 120 * @return this 121 * @stable ICU 4.8 122 */ clear()123 public BytesTrieBuilder clear() { 124 clearImpl(); 125 bytes=null; 126 bytesLength=0; 127 return this; 128 } 129 130 /** 131 * {@inheritDoc} 132 * @internal 133 * @deprecated This API is ICU internal only. 134 */ 135 @Deprecated 136 @Override matchNodesCanHaveValues()137 protected boolean matchNodesCanHaveValues() /*const*/ { return false; } 138 139 /** 140 * {@inheritDoc} 141 * @internal 142 * @deprecated This API is ICU internal only. 143 */ 144 @Deprecated 145 @Override getMaxBranchLinearSubNodeLength()146 protected int getMaxBranchLinearSubNodeLength() /*const*/ { return BytesTrie.kMaxBranchLinearSubNodeLength; } 147 /** 148 * {@inheritDoc} 149 * @internal 150 * @deprecated This API is ICU internal only. 151 */ 152 @Deprecated 153 @Override getMinLinearMatch()154 protected int getMinLinearMatch() /*const*/ { return BytesTrie.kMinLinearMatch; } 155 /** 156 * {@inheritDoc} 157 * @internal 158 * @deprecated This API is ICU internal only. 159 */ 160 @Deprecated 161 @Override getMaxLinearMatchLength()162 protected int getMaxLinearMatchLength() /*const*/ { return BytesTrie.kMaxLinearMatchLength; } 163 ensureCapacity(int length)164 private void ensureCapacity(int length) { 165 if(length>bytes.length) { 166 int newCapacity=bytes.length; 167 do { 168 newCapacity*=2; 169 } while(newCapacity<=length); 170 byte[] newBytes=new byte[newCapacity]; 171 System.arraycopy(bytes, bytes.length-bytesLength, 172 newBytes, newBytes.length-bytesLength, bytesLength); 173 bytes=newBytes; 174 } 175 } 176 /** 177 * {@inheritDoc} 178 * @internal 179 * @deprecated This API is ICU internal only. 180 */ 181 @Deprecated 182 @Override write(int b)183 protected int write(int b) { 184 int newLength=bytesLength+1; 185 ensureCapacity(newLength); 186 bytesLength=newLength; 187 bytes[bytes.length-bytesLength]=(byte)b; 188 return bytesLength; 189 } 190 /** 191 * {@inheritDoc} 192 * @internal 193 * @deprecated This API is ICU internal only. 194 */ 195 @Deprecated 196 @Override write(int offset, int length)197 protected int write(int offset, int length) { 198 int newLength=bytesLength+length; 199 ensureCapacity(newLength); 200 bytesLength=newLength; 201 int bytesOffset=bytes.length-bytesLength; 202 while(length>0) { 203 bytes[bytesOffset++]=(byte)strings.charAt(offset++); 204 --length; 205 } 206 return bytesLength; 207 } write(byte[] b, int length)208 private int write(byte[] b, int length) { 209 int newLength=bytesLength+length; 210 ensureCapacity(newLength); 211 bytesLength=newLength; 212 System.arraycopy(b, 0, bytes, bytes.length-bytesLength, length); 213 return bytesLength; 214 } 215 216 // For writeValueAndFinal() and writeDeltaTo(). 217 private final byte[] intBytes=new byte[5]; 218 219 /** 220 * {@inheritDoc} 221 * @internal 222 * @deprecated This API is ICU internal only. 223 */ 224 @Deprecated 225 @Override writeValueAndFinal(int i, boolean isFinal)226 protected int writeValueAndFinal(int i, boolean isFinal) { 227 if(0<=i && i<=BytesTrie.kMaxOneByteValue) { 228 return write(((BytesTrie.kMinOneByteValueLead+i)<<1)|(isFinal?1:0)); 229 } 230 int length=1; 231 if(i<0 || i>0xffffff) { 232 intBytes[0]=(byte)BytesTrie.kFiveByteValueLead; 233 intBytes[1]=(byte)(i>>24); 234 intBytes[2]=(byte)(i>>16); 235 intBytes[3]=(byte)(i>>8); 236 intBytes[4]=(byte)i; 237 length=5; 238 // } else if(i<=BytesTrie.kMaxOneByteValue) { 239 // intBytes[0]=(byte)(BytesTrie.kMinOneByteValueLead+i); 240 } else { 241 if(i<=BytesTrie.kMaxTwoByteValue) { 242 intBytes[0]=(byte)(BytesTrie.kMinTwoByteValueLead+(i>>8)); 243 } else { 244 if(i<=BytesTrie.kMaxThreeByteValue) { 245 intBytes[0]=(byte)(BytesTrie.kMinThreeByteValueLead+(i>>16)); 246 } else { 247 intBytes[0]=(byte)BytesTrie.kFourByteValueLead; 248 intBytes[1]=(byte)(i>>16); 249 length=2; 250 } 251 intBytes[length++]=(byte)(i>>8); 252 } 253 intBytes[length++]=(byte)i; 254 } 255 intBytes[0]=(byte)((intBytes[0]<<1)|(isFinal?1:0)); 256 return write(intBytes, length); 257 } 258 /** 259 * {@inheritDoc} 260 * @internal 261 * @deprecated This API is ICU internal only. 262 */ 263 @Deprecated 264 @Override writeValueAndType(boolean hasValue, int value, int node)265 protected int writeValueAndType(boolean hasValue, int value, int node) { 266 int offset=write(node); 267 if(hasValue) { 268 offset=writeValueAndFinal(value, false); 269 } 270 return offset; 271 } 272 /** 273 * {@inheritDoc} 274 * @internal 275 * @deprecated This API is ICU internal only. 276 */ 277 @Deprecated 278 @Override writeDeltaTo(int jumpTarget)279 protected int writeDeltaTo(int jumpTarget) { 280 int i=bytesLength-jumpTarget; 281 assert(i>=0); 282 if(i<=BytesTrie.kMaxOneByteDelta) { 283 return write(i); 284 } else { 285 return write(intBytes, internalEncodeDelta(i, intBytes)); 286 } 287 } 288 /** 289 * @internal 290 * @deprecated This API is ICU internal only. 291 */ 292 @Deprecated internalEncodeDelta(int i, byte[] intBytes)293 public static final int internalEncodeDelta(int i, byte[] intBytes) { 294 assert(i>=0); 295 if(i<=BytesTrie.kMaxOneByteDelta) { 296 intBytes[0]=(byte)i; 297 return 1; 298 } 299 int length=1; 300 if(i<=BytesTrie.kMaxTwoByteDelta) { 301 intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8)); 302 } else { 303 if(i<=BytesTrie.kMaxThreeByteDelta) { 304 intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16)); 305 } else { 306 if(i<=0xffffff) { 307 intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead; 308 } else { 309 intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead; 310 intBytes[1]=(byte)(i>>24); 311 length=2; 312 } 313 intBytes[length++]=(byte)(i>>16); 314 } 315 intBytes[length++]=(byte)(i>>8); 316 } 317 intBytes[length++]=(byte)i; 318 return length; 319 } 320 321 // Byte serialization of the trie. 322 // Grows from the back: bytesLength measures from the end of the buffer! 323 private byte[] bytes; 324 private int bytesLength; 325 } 326