• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2011-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   created on: 2011jan05
9 *   created by: Markus W. Scherer
10 *   ported from ICU4C bytestriebuilder.h/.cpp
11 */
12 
13 package com.ibm.icu.util;
14 
15 import java.nio.ByteBuffer;
16 
17 /**
18  * Builder class for BytesTrie.
19  *
20  * <p>This class is not intended for public subclassing.
21  *
22  * @stable ICU 4.8
23  * @author Markus W. Scherer
24  */
25 public final class BytesTrieBuilder extends StringTrieBuilder {
26     /**
27      * Constructs an empty builder.
28      * @stable ICU 4.8
29      */
BytesTrieBuilder()30     public BytesTrieBuilder() {}
31 
32     // Used in add() to wrap the bytes into a CharSequence for StringTrieBuilder.addImpl().
33     private static final class BytesAsCharSequence implements CharSequence {
BytesAsCharSequence(byte[] sequence, int length)34         public BytesAsCharSequence(byte[] sequence, int length) {
35             s=sequence;
36             len=length;
37         }
38         @Override
charAt(int i)39         public char charAt(int i) { return (char)(s[i]&0xff); }
40         @Override
length()41         public int length() { return len; }
42         @Override
subSequence(int start, int end)43         public CharSequence subSequence(int start, int end) { return null; }
44 
45         private byte[] s;
46         private int len;
47     }
48 
49     /**
50      * Adds a (byte sequence, value) pair.
51      * The byte sequence must be unique.
52      * Bytes 0..length-1 will be copied; the builder does not keep
53      * a reference to the input array.
54      * @param sequence The array that contains the byte sequence, starting at index 0.
55      * @param length The length of the byte sequence.
56      * @param value The value associated with this byte sequence.
57      * @return this
58      * @stable ICU 4.8
59      */
add(byte[] sequence, int length, int value)60     public BytesTrieBuilder add(byte[] sequence, int length, int value) {
61         addImpl(new BytesAsCharSequence(sequence, length), value);
62         return this;
63     }
64 
65     /**
66      * Builds a BytesTrie for the add()ed data.
67      * Once built, no further data can be add()ed until clear() is called.
68      *
69      * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair
70      * must have been add()ed.
71      *
72      * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers
73      * which share the builder's byte array, without rebuilding.
74      * <em>The byte array must not be modified via the buildByteBuffer() result object.</em>
75      * After clear() has been called, a new array will be used.
76      * @param buildOption Build option, see StringTrieBuilder.Option.
77      * @return A new BytesTrie for the add()ed data.
78      * @stable ICU 4.8
79      */
build(StringTrieBuilder.Option buildOption)80     public BytesTrie build(StringTrieBuilder.Option buildOption) {
81         buildBytes(buildOption);
82         return new BytesTrie(bytes, bytes.length-bytesLength);
83     }
84 
85     /**
86      * Builds a BytesTrie for the add()ed data and byte-serializes it.
87      * Once built, no further data can be add()ed until clear() is called.
88      *
89      * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair
90      * must have been add()ed.
91      *
92      * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers
93      * which share the builder's byte array, without rebuilding.
94      * <em>Do not modify the bytes in the buffer!</em>
95      * After clear() has been called, a new array will be used.
96      *
97      * <p>The serialized BytesTrie is accessible via the buffer's
98      * array()/arrayOffset()+position() or remaining()/get(byte[]) etc.
99      * @param buildOption Build option, see StringTrieBuilder.Option.
100      * @return A ByteBuffer with the byte-serialized BytesTrie for the add()ed data.
101      *         The buffer is not read-only and array() can be called.
102      * @stable ICU 4.8
103      */
buildByteBuffer(StringTrieBuilder.Option buildOption)104     public ByteBuffer buildByteBuffer(StringTrieBuilder.Option buildOption) {
105         buildBytes(buildOption);
106         return ByteBuffer.wrap(bytes, bytes.length-bytesLength, bytesLength);
107     }
108 
buildBytes(StringTrieBuilder.Option buildOption)109     private void buildBytes(StringTrieBuilder.Option buildOption) {
110         // Create and byte-serialize the trie for the elements.
111         if(bytes==null) {
112             bytes=new byte[1024];
113         }
114         buildImpl(buildOption);
115     }
116 
117     /**
118      * Removes all (byte sequence, value) pairs.
119      * New data can then be add()ed and a new trie can be built.
120      * @return this
121      * @stable ICU 4.8
122      */
clear()123     public BytesTrieBuilder clear() {
124         clearImpl();
125         bytes=null;
126         bytesLength=0;
127         return this;
128     }
129 
130     /**
131      * {@inheritDoc}
132      * @internal
133      * @deprecated This API is ICU internal only.
134      */
135     @Deprecated
136     @Override
matchNodesCanHaveValues()137     protected boolean matchNodesCanHaveValues() /*const*/ { return false; }
138 
139     /**
140      * {@inheritDoc}
141      * @internal
142      * @deprecated This API is ICU internal only.
143      */
144     @Deprecated
145     @Override
getMaxBranchLinearSubNodeLength()146     protected int getMaxBranchLinearSubNodeLength() /*const*/ { return BytesTrie.kMaxBranchLinearSubNodeLength; }
147     /**
148      * {@inheritDoc}
149      * @internal
150      * @deprecated This API is ICU internal only.
151      */
152     @Deprecated
153     @Override
getMinLinearMatch()154     protected int getMinLinearMatch() /*const*/ { return BytesTrie.kMinLinearMatch; }
155     /**
156      * {@inheritDoc}
157      * @internal
158      * @deprecated This API is ICU internal only.
159      */
160     @Deprecated
161     @Override
getMaxLinearMatchLength()162     protected int getMaxLinearMatchLength() /*const*/ { return BytesTrie.kMaxLinearMatchLength; }
163 
ensureCapacity(int length)164     private void ensureCapacity(int length) {
165         if(length>bytes.length) {
166             int newCapacity=bytes.length;
167             do {
168                 newCapacity*=2;
169             } while(newCapacity<=length);
170             byte[] newBytes=new byte[newCapacity];
171             System.arraycopy(bytes, bytes.length-bytesLength,
172                              newBytes, newBytes.length-bytesLength, bytesLength);
173             bytes=newBytes;
174         }
175     }
176     /**
177      * {@inheritDoc}
178      * @internal
179      * @deprecated This API is ICU internal only.
180      */
181     @Deprecated
182     @Override
write(int b)183     protected int write(int b) {
184         int newLength=bytesLength+1;
185         ensureCapacity(newLength);
186         bytesLength=newLength;
187         bytes[bytes.length-bytesLength]=(byte)b;
188         return bytesLength;
189     }
190     /**
191      * {@inheritDoc}
192      * @internal
193      * @deprecated This API is ICU internal only.
194      */
195     @Deprecated
196     @Override
write(int offset, int length)197     protected int write(int offset, int length) {
198         int newLength=bytesLength+length;
199         ensureCapacity(newLength);
200         bytesLength=newLength;
201         int bytesOffset=bytes.length-bytesLength;
202         while(length>0) {
203             bytes[bytesOffset++]=(byte)strings.charAt(offset++);
204             --length;
205         }
206         return bytesLength;
207     }
write(byte[] b, int length)208     private int write(byte[] b, int length) {
209         int newLength=bytesLength+length;
210         ensureCapacity(newLength);
211         bytesLength=newLength;
212         System.arraycopy(b, 0, bytes, bytes.length-bytesLength, length);
213         return bytesLength;
214     }
215 
216     // For writeValueAndFinal() and writeDeltaTo().
217     private final byte[] intBytes=new byte[5];
218 
219     /**
220      * {@inheritDoc}
221      * @internal
222      * @deprecated This API is ICU internal only.
223      */
224     @Deprecated
225     @Override
writeValueAndFinal(int i, boolean isFinal)226     protected int writeValueAndFinal(int i, boolean isFinal) {
227         if(0<=i && i<=BytesTrie.kMaxOneByteValue) {
228             return write(((BytesTrie.kMinOneByteValueLead+i)<<1)|(isFinal?1:0));
229         }
230         int length=1;
231         if(i<0 || i>0xffffff) {
232             intBytes[0]=(byte)BytesTrie.kFiveByteValueLead;
233             intBytes[1]=(byte)(i>>24);
234             intBytes[2]=(byte)(i>>16);
235             intBytes[3]=(byte)(i>>8);
236             intBytes[4]=(byte)i;
237             length=5;
238         // } else if(i<=BytesTrie.kMaxOneByteValue) {
239         //     intBytes[0]=(byte)(BytesTrie.kMinOneByteValueLead+i);
240         } else {
241             if(i<=BytesTrie.kMaxTwoByteValue) {
242                 intBytes[0]=(byte)(BytesTrie.kMinTwoByteValueLead+(i>>8));
243             } else {
244                 if(i<=BytesTrie.kMaxThreeByteValue) {
245                     intBytes[0]=(byte)(BytesTrie.kMinThreeByteValueLead+(i>>16));
246                 } else {
247                     intBytes[0]=(byte)BytesTrie.kFourByteValueLead;
248                     intBytes[1]=(byte)(i>>16);
249                     length=2;
250                 }
251                 intBytes[length++]=(byte)(i>>8);
252             }
253             intBytes[length++]=(byte)i;
254         }
255         intBytes[0]=(byte)((intBytes[0]<<1)|(isFinal?1:0));
256         return write(intBytes, length);
257     }
258     /**
259      * {@inheritDoc}
260      * @internal
261      * @deprecated This API is ICU internal only.
262      */
263     @Deprecated
264     @Override
writeValueAndType(boolean hasValue, int value, int node)265     protected int writeValueAndType(boolean hasValue, int value, int node) {
266         int offset=write(node);
267         if(hasValue) {
268             offset=writeValueAndFinal(value, false);
269         }
270         return offset;
271     }
272     /**
273      * {@inheritDoc}
274      * @internal
275      * @deprecated This API is ICU internal only.
276      */
277     @Deprecated
278     @Override
writeDeltaTo(int jumpTarget)279     protected int writeDeltaTo(int jumpTarget) {
280         int i=bytesLength-jumpTarget;
281         assert(i>=0);
282         if(i<=BytesTrie.kMaxOneByteDelta) {
283             return write(i);
284         } else {
285             return write(intBytes, internalEncodeDelta(i, intBytes));
286         }
287     }
288     /**
289      * @internal
290      * @deprecated This API is ICU internal only.
291      */
292     @Deprecated
internalEncodeDelta(int i, byte[] intBytes)293     public static final int internalEncodeDelta(int i, byte[] intBytes) {
294         assert(i>=0);
295         if(i<=BytesTrie.kMaxOneByteDelta) {
296             intBytes[0]=(byte)i;
297             return 1;
298         }
299         int length=1;
300         if(i<=BytesTrie.kMaxTwoByteDelta) {
301             intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8));
302         } else {
303             if(i<=BytesTrie.kMaxThreeByteDelta) {
304                 intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16));
305             } else {
306                 if(i<=0xffffff) {
307                     intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead;
308                 } else {
309                     intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead;
310                     intBytes[1]=(byte)(i>>24);
311                     length=2;
312                 }
313                 intBytes[length++]=(byte)(i>>16);
314             }
315             intBytes[length++]=(byte)(i>>8);
316         }
317         intBytes[length++]=(byte)i;
318         return length;
319     }
320 
321     // Byte serialization of the trie.
322     // Grows from the back: bytesLength measures from the end of the buffer!
323     private byte[] bytes;
324     private int bytesLength;
325 }
326