• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 *   Copyright (C) 2010-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 *   created on: 2010nov23
10 *   created by: Markus W. Scherer
11 *   ported from ICU4C bytestrie.h/.cpp
12 */
13 package ohos.global.icu.util;
14 
15 import java.io.IOException;
16 import java.nio.ByteBuffer;
17 import java.util.ArrayList;
18 import java.util.NoSuchElementException;
19 
20 /**
21  * Light-weight, non-const reader class for a BytesTrie.
22  * Traverses a byte-serialized data structure with minimal state,
23  * for mapping byte sequences to non-negative integer values.
24  *
25  * <p>This class is not intended for public subclassing.
26  *
27  * @author Markus W. Scherer
28  * @hide exposed on OHOS
29  */
30 public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
31     /**
32      * Constructs a BytesTrie reader instance.
33      *
34      * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
35      * with the offset indicating the first byte of that sequence.
36      * The BytesTrie object will not read more bytes than
37      * the BytesTrieBuilder generated in the corresponding build() call.
38      *
39      * <p>The array is not copied/cloned and must not be modified while
40      * the BytesTrie object is in use.
41      *
42      * @param trieBytes Bytes array that contains the serialized trie.
43      * @param offset Root offset of the trie in the array.
44      */
BytesTrie(byte[] trieBytes, int offset)45     public BytesTrie(byte[] trieBytes, int offset) {
46         bytes_=trieBytes;
47         pos_=root_=offset;
48         remainingMatchLength_=-1;
49     }
50 
51     /**
52      * Copy constructor.
53      * Makes a shallow copy of the other trie reader object and its state.
54      * Does not copy the byte array which will be shared.
55      * Same as clone() but without the throws clause.
56      */
BytesTrie(BytesTrie other)57     public BytesTrie(BytesTrie other) {
58         bytes_ = other.bytes_;
59         root_ = other.root_;
60         pos_ = other.pos_;
61         remainingMatchLength_ = other.remainingMatchLength_;
62     }
63 
64     /**
65      * Clones this trie reader object and its state,
66      * but not the byte array which will be shared.
67      * @return A shallow clone of this trie.
68      */
69     @Override
clone()70     public BytesTrie clone() throws CloneNotSupportedException {
71         return (BytesTrie) super.clone();  // A shallow copy is just what we need.
72     }
73 
74     /**
75      * Resets this trie to its initial state.
76      * @return this
77      */
reset()78     public BytesTrie reset() {
79         pos_=root_;
80         remainingMatchLength_=-1;
81         return this;
82     }
83 
84     /**
85      * Returns the state of this trie as a 64-bit integer.
86      * The state value is never 0.
87      *
88      * @return opaque state value
89      * @see #resetToState64
90      */
getState64()91     public long getState64() {
92         return ((long)remainingMatchLength_ << 32) | pos_;
93     }
94 
95     /**
96      * Resets this trie to the saved state.
97      * Unlike {@link #resetToState(State)}, the 64-bit state value
98      * must be from {@link #getState64()} from the same trie object or
99      * from one initialized the exact same way.
100      * Because of no validation, this method is faster.
101      *
102      * @param state The opaque trie state value from getState64().
103      * @return this
104      * @see #getState64
105      * @see #resetToState
106      * @see #reset
107      */
resetToState64(long state)108     public BytesTrie resetToState64(long state) {
109         remainingMatchLength_ = (int)(state >> 32);
110         pos_ = (int)state;
111         return this;
112     }
113 
114     /**
115      * BytesTrie state object, for saving a trie's current state
116      * and resetting the trie back to this state later.
117      * @hide exposed on OHOS
118      */
119     public static final class State {
120         /**
121          * Constructs an empty State.
122          */
State()123         public State() {}
124         private byte[] bytes;
125         private int root;
126         private int pos;
127         private int remainingMatchLength;
128     }
129 
130     /**
131      * Saves the state of this trie.
132      * @param state The State object to hold the trie's state.
133      * @return this
134      * @see #resetToState
135      */
saveState(State state)136     public BytesTrie saveState(State state) /*const*/ {
137         state.bytes=bytes_;
138         state.root=root_;
139         state.pos=pos_;
140         state.remainingMatchLength=remainingMatchLength_;
141         return this;
142     }
143 
144     /**
145      * Resets this trie to the saved state.
146      * Slower than {@link #resetToState64(long)} which does not validate the state value.
147      *
148      * @param state The State object which holds a saved trie state.
149      * @return this
150      * @throws IllegalArgumentException if the state object contains no state,
151      *         or the state of a different trie
152      * @see #saveState
153      * @see #reset
154      */
resetToState(State state)155     public BytesTrie resetToState(State state) {
156         if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
157             pos_=state.pos;
158             remainingMatchLength_=state.remainingMatchLength;
159         } else {
160             throw new IllegalArgumentException("incompatible trie state");
161         }
162         return this;
163     }
164 
165     /**
166      * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
167      * @hide exposed on OHOS
168      */
169     public enum Result {
170         /**
171          * The input unit(s) did not continue a matching string.
172          * Once current()/next() return NO_MATCH,
173          * all further calls to current()/next() will also return NO_MATCH,
174          * until the trie is reset to its original state or to a saved state.
175          */
176         NO_MATCH,
177         /**
178          * The input unit(s) continued a matching string
179          * but there is no value for the string so far.
180          * (It is a prefix of a longer string.)
181          */
182         NO_VALUE,
183         /**
184          * The input unit(s) continued a matching string
185          * and there is a value for the string so far.
186          * This value will be returned by getValue().
187          * No further input byte/unit can continue a matching string.
188          */
189         FINAL_VALUE,
190         /**
191          * The input unit(s) continued a matching string
192          * and there is a value for the string so far.
193          * This value will be returned by getValue().
194          * Another input byte/unit can continue a matching string.
195          */
196         INTERMEDIATE_VALUE;
197 
198         // Note: The following methods assume the particular order
199         // of enum constants, treating the ordinal() values like bit sets.
200         // Do not reorder the enum constants!
201 
202         /**
203          * Same as (result!=NO_MATCH).
204          * @return true if the input bytes/units so far are part of a matching string/byte sequence.
205          */
matches()206         public boolean matches() { return this!=NO_MATCH; }
207 
208         /**
209          * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
210          * @return true if there is a value for the input bytes/units so far.
211          * @see #getValue
212          */
hasValue()213         public boolean hasValue() { return ordinal()>=2; }
214 
215         /**
216          * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
217          * @return true if another input byte/unit can continue a matching string.
218          */
hasNext()219         public boolean hasNext() { return (ordinal()&1)!=0; }
220     }
221 
222     /**
223      * Determines whether the byte sequence so far matches, whether it has a value,
224      * and whether another input byte can continue a matching byte sequence.
225      * @return The match/value Result.
226      */
current()227     public Result current() /*const*/ {
228         int pos=pos_;
229         if(pos<0) {
230             return Result.NO_MATCH;
231         } else {
232             int node;
233             return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
234                     valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
235         }
236     }
237 
238     /**
239      * Traverses the trie from the initial state for this input byte.
240      * Equivalent to reset().next(inByte).
241      * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
242      *               Values below -0x100 and above 0xff will never match.
243      * @return The match/value Result.
244      */
first(int inByte)245     public Result first(int inByte) {
246         remainingMatchLength_=-1;
247         if(inByte<0) {
248             inByte+=0x100;
249         }
250         return nextImpl(root_, inByte);
251     }
252 
253     /**
254      * Traverses the trie from the current state for this input byte.
255      * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
256      *               Values below -0x100 and above 0xff will never match.
257      * @return The match/value Result.
258      */
next(int inByte)259     public Result next(int inByte) {
260         int pos=pos_;
261         if(pos<0) {
262             return Result.NO_MATCH;
263         }
264         if(inByte<0) {
265             inByte+=0x100;
266         }
267         int length=remainingMatchLength_;  // Actual remaining match length minus 1.
268         if(length>=0) {
269             // Remaining part of a linear-match node.
270             if(inByte==(bytes_[pos++]&0xff)) {
271                 remainingMatchLength_=--length;
272                 pos_=pos;
273                 int node;
274                 return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
275                         valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
276             } else {
277                 stop();
278                 return Result.NO_MATCH;
279             }
280         }
281         return nextImpl(pos, inByte);
282     }
283 
284     /**
285      * Traverses the trie from the current state for this byte sequence.
286      * Equivalent to
287      * <pre>
288      * Result result=current();
289      * for(each c in s)
290      *   if(!result.hasNext()) return Result.NO_MATCH;
291      *   result=next(c);
292      * return result;
293      * </pre>
294      * @param s Contains a string or byte sequence.
295      * @param sIndex The start index of the byte sequence in s.
296      * @param sLimit The (exclusive) end index of the byte sequence in s.
297      * @return The match/value Result.
298      */
next(byte[] s, int sIndex, int sLimit)299     public Result next(byte[] s, int sIndex, int sLimit) {
300         if(sIndex>=sLimit) {
301             // Empty input.
302             return current();
303         }
304         int pos=pos_;
305         if(pos<0) {
306             return Result.NO_MATCH;
307         }
308         int length=remainingMatchLength_;  // Actual remaining match length minus 1.
309         for(;;) {
310             // Fetch the next input byte, if there is one.
311             // Continue a linear-match node.
312             byte inByte;
313             for(;;) {
314                 if(sIndex==sLimit) {
315                     remainingMatchLength_=length;
316                     pos_=pos;
317                     int node;
318                     return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
319                             valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
320                 }
321                 inByte=s[sIndex++];
322                 if(length<0) {
323                     remainingMatchLength_=length;
324                     break;
325                 }
326                 if(inByte!=bytes_[pos]) {
327                     stop();
328                     return Result.NO_MATCH;
329                 }
330                 ++pos;
331                 --length;
332             }
333             for(;;) {
334                 int node=bytes_[pos++]&0xff;
335                 if(node<kMinLinearMatch) {
336                     Result result=branchNext(pos, node, inByte&0xff);
337                     if(result==Result.NO_MATCH) {
338                         return Result.NO_MATCH;
339                     }
340                     // Fetch the next input byte, if there is one.
341                     if(sIndex==sLimit) {
342                         return result;
343                     }
344                     if(result==Result.FINAL_VALUE) {
345                         // No further matching bytes.
346                         stop();
347                         return Result.NO_MATCH;
348                     }
349                     inByte=s[sIndex++];
350                     pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
351                 } else if(node<kMinValueLead) {
352                     // Match length+1 bytes.
353                     length=node-kMinLinearMatch;  // Actual match length minus 1.
354                     if(inByte!=bytes_[pos]) {
355                         stop();
356                         return Result.NO_MATCH;
357                     }
358                     ++pos;
359                     --length;
360                     break;
361                 } else if((node&kValueIsFinal)!=0) {
362                     // No further matching bytes.
363                     stop();
364                     return Result.NO_MATCH;
365                 } else {
366                     // Skip intermediate value.
367                     pos=skipValue(pos, node);
368                     // The next node must not also be a value node.
369                     assert((bytes_[pos]&0xff)<kMinValueLead);
370                 }
371             }
372         }
373     }
374 
375     /**
376      * Returns a matching byte sequence's value if called immediately after
377      * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
378      * getValue() can be called multiple times.
379      *
380      * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
381      * @return The value for the byte sequence so far.
382      */
383     public int getValue() /*const*/ {
384         int pos=pos_;
385         int leadByte=bytes_[pos++]&0xff;
386         assert(leadByte>=kMinValueLead);
387         return readValue(bytes_, pos, leadByte>>1);
388     }
389 
390     /**
391      * Determines whether all byte sequences reachable from the current state
392      * map to the same value, and if so, returns that value.
393      * @return The unique value in bits 32..1 with bit 0 set,
394      *         if all byte sequences reachable from the current state
395      *         map to the same value; otherwise returns 0.
396      */
getUniqueValue()397     public long getUniqueValue() /*const*/ {
398         int pos=pos_;
399         if(pos<0) {
400             return 0;
401         }
402         // Skip the rest of a pending linear-match node.
403         long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
404         // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
405         return (uniqueValue<<31)>>31;
406     }
407 
408     /**
409      * Finds each byte which continues the byte sequence from the current state.
410      * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
411      * @param out Each next byte is 0-extended to a char and appended to this object.
412      *            (Only uses the out.append(c) method.)
413      * @return The number of bytes which continue the byte sequence from here.
414      */
getNextBytes(Appendable out)415     public int getNextBytes(Appendable out) /*const*/ {
416         int pos=pos_;
417         if(pos<0) {
418             return 0;
419         }
420         if(remainingMatchLength_>=0) {
421             append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
422             return 1;
423         }
424         int node=bytes_[pos++]&0xff;
425         if(node>=kMinValueLead) {
426             if((node&kValueIsFinal)!=0) {
427                 return 0;
428             } else {
429                 pos=skipValue(pos, node);
430                 node=bytes_[pos++]&0xff;
431                 assert(node<kMinValueLead);
432             }
433         }
434         if(node<kMinLinearMatch) {
435             if(node==0) {
436                 node=bytes_[pos++]&0xff;
437             }
438             getNextBranchBytes(bytes_, pos, ++node, out);
439             return node;
440         } else {
441             // First byte of the linear-match node.
442             append(out, bytes_[pos]&0xff);
443             return 1;
444         }
445     }
446 
447     /**
448      * Iterates from the current state of this trie.
449      * @return A new BytesTrie.Iterator.
450      */
451     @Override
452     public Iterator iterator() {
453         return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
454     }
455 
456     /**
457      * Iterates from the current state of this trie.
458      * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
459      *                        Otherwise, the iterator returns strings with this maximum length.
460      * @return A new BytesTrie.Iterator.
461      */
462     public Iterator iterator(int maxStringLength) {
463         return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
464     }
465 
466     /**
467      * Iterates from the root of a byte-serialized BytesTrie.
468      * @param trieBytes Bytes array that contains the serialized trie.
469      * @param offset Root offset of the trie in the array.
470      * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
471      *                        Otherwise, the iterator returns strings with this maximum length.
472      * @return A new BytesTrie.Iterator.
473      */
474     public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
475         return new Iterator(trieBytes, offset, -1, maxStringLength);
476     }
477 
478     /**
479      * Return value type for the Iterator.
480      * @hide exposed on OHOS
481      */
482     public static final class Entry {
483         private Entry(int capacity) {
484             bytes=new byte[capacity];
485         }
486 
487         /**
488          * @return The length of the byte sequence.
489          */
490         public int bytesLength() { return length; }
491         /**
492          * Returns a byte of the byte sequence.
493          * @param index An index into the byte sequence.
494          * @return The index-th byte sequence byte.
495          */
496         public byte byteAt(int index) { return bytes[index]; }
497         /**
498          * Copies the byte sequence into a byte array.
499          * @param dest Destination byte array.
500          * @param destOffset Starting offset to where in dest the byte sequence is copied.
501          */
502         public void copyBytesTo(byte[] dest, int destOffset) {
503             System.arraycopy(bytes, 0, dest, destOffset, length);
504         }
505         /**
506          * @return The byte sequence as a read-only ByteBuffer.
507          */
508         public ByteBuffer bytesAsByteBuffer() {
509             return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
510         }
511 
512         /**
513          * The value associated with the byte sequence.
514          */
515         public int value;
516 
517         private void ensureCapacity(int len) {
518             if(bytes.length<len) {
519                 byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
520                 System.arraycopy(bytes, 0, newBytes, 0, length);
521                 bytes=newBytes;
522             }
523         }
524         private void append(byte b) {
525             ensureCapacity(length+1);
526             bytes[length++]=b;
527         }
528         private void append(byte[] b, int off, int len) {
529             ensureCapacity(length+len);
530             System.arraycopy(b, off, bytes, length, len);
531             length+=len;
532         }
533         private void truncateString(int newLength) { length=newLength; }
534 
535         private byte[] bytes;
536         private int length;
537     }
538 
539     /**
540      * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
541      * @hide exposed on OHOS
542      */
543     public static final class Iterator implements java.util.Iterator<Entry> {
544         private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
545             bytes_=trieBytes;
546             pos_=initialPos_=offset;
547             remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
548             maxLength_=maxStringLength;
549             entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
550             int length=remainingMatchLength_;  // Actual remaining match length minus 1.
551             if(length>=0) {
552                 // Pending linear-match node, append remaining bytes to entry_.
553                 ++length;
554                 if(maxLength_>0 && length>maxLength_) {
555                     length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
556                 }
557                 entry_.append(bytes_, pos_, length);
558                 pos_+=length;
559                 remainingMatchLength_-=length;
560             }
561         }
562 
563         /**
564          * Resets this iterator to its initial state.
565          * @return this
566          */
reset()567         public Iterator reset() {
568             pos_=initialPos_;
569             remainingMatchLength_=initialRemainingMatchLength_;
570             int length=remainingMatchLength_+1;  // Remaining match length.
571             if(maxLength_>0 && length>maxLength_) {
572                 length=maxLength_;
573             }
574             entry_.truncateString(length);
575             pos_+=length;
576             remainingMatchLength_-=length;
577             stack_.clear();
578             return this;
579         }
580 
581         /**
582          * @return true if there are more elements.
583          */
584         @Override
hasNext()585         public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
586 
587         /**
588          * Finds the next (byte sequence, value) pair if there is one.
589          *
590          * If the byte sequence is truncated to the maximum length and does not
591          * have a real value, then the value is set to -1.
592          * In this case, this "not a real value" is indistinguishable from
593          * a real value of -1.
594          * @return An Entry with the string and value of the next element.
595          * @throws NoSuchElementException - iteration has no more elements.
596          */
597         @Override
next()598         public Entry next() {
599             int pos=pos_;
600             if(pos<0) {
601                 if(stack_.isEmpty()) {
602                     throw new NoSuchElementException();
603                 }
604                 // Pop the state off the stack and continue with the next outbound edge of
605                 // the branch node.
606                 long top=stack_.remove(stack_.size()-1);
607                 int length=(int)top;
608                 pos=(int)(top>>32);
609                 entry_.truncateString(length&0xffff);
610                 length>>>=16;
611                 if(length>1) {
612                     pos=branchNext(pos, length);
613                     if(pos<0) {
614                         return entry_;  // Reached a final value.
615                     }
616                 } else {
617                     entry_.append(bytes_[pos++]);
618                 }
619             }
620             if(remainingMatchLength_>=0) {
621                 // We only get here if we started in a pending linear-match node
622                 // with more than maxLength remaining bytes.
623                 return truncateAndStop();
624             }
625             for(;;) {
626                 int node=bytes_[pos++]&0xff;
627                 if(node>=kMinValueLead) {
628                     // Deliver value for the byte sequence so far.
629                     boolean isFinal=(node&kValueIsFinal)!=0;
630                     entry_.value=readValue(bytes_, pos, node>>1);
631                     if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
632                         pos_=-1;
633                     } else {
634                         pos_=skipValue(pos, node);
635                     }
636                     return entry_;
637                 }
638                 if(maxLength_>0 && entry_.length==maxLength_) {
639                     return truncateAndStop();
640                 }
641                 if(node<kMinLinearMatch) {
642                     if(node==0) {
643                         node=bytes_[pos++]&0xff;
644                     }
645                     pos=branchNext(pos, node+1);
646                     if(pos<0) {
647                         return entry_;  // Reached a final value.
648                     }
649                 } else {
650                     // Linear-match node, append length bytes to entry_.
651                     int length=node-kMinLinearMatch+1;
652                     if(maxLength_>0 && entry_.length+length>maxLength_) {
653                         entry_.append(bytes_, pos, maxLength_-entry_.length);
654                         return truncateAndStop();
655                     }
656                     entry_.append(bytes_, pos, length);
657                     pos+=length;
658                 }
659             }
660         }
661 
662         /**
663          * Iterator.remove() is not supported.
664          * @throws UnsupportedOperationException (always)
665          */
666         @Override
remove()667         public void remove() {
668             throw new UnsupportedOperationException();
669         }
670 
truncateAndStop()671         private Entry truncateAndStop() {
672             pos_=-1;
673             entry_.value=-1;  // no real value for str
674             return entry_;
675         }
676 
branchNext(int pos, int length)677         private int branchNext(int pos, int length) {
678             while(length>kMaxBranchLinearSubNodeLength) {
679                 ++pos;  // ignore the comparison byte
680                 // Push state for the greater-or-equal edge.
681                 stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
682                 // Follow the less-than edge.
683                 length>>=1;
684                 pos=jumpByDelta(bytes_, pos);
685             }
686             // List of key-value pairs where values are either final values or jump deltas.
687             // Read the first (key, value) pair.
688             byte trieByte=bytes_[pos++];
689             int node=bytes_[pos++]&0xff;
690             boolean isFinal=(node&kValueIsFinal)!=0;
691             int value=readValue(bytes_, pos, node>>1);
692             pos=skipValue(pos, node);
693             stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
694             entry_.append(trieByte);
695             if(isFinal) {
696                 pos_=-1;
697                 entry_.value=value;
698                 return -1;
699             } else {
700                 return pos+value;
701             }
702         }
703 
704         private byte[] bytes_;
705         private int pos_;
706         private int initialPos_;
707         private int remainingMatchLength_;
708         private int initialRemainingMatchLength_;
709 
710         private int maxLength_;
711         private Entry entry_;
712 
713         // The stack stores longs for backtracking to another
714         // outbound edge of a branch node.
715         // Each long has the offset from bytes_ in bits 62..32,
716         // the entry_.stringLength() from before the node in bits 15..0,
717         // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
718         // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
719         // but the code looks more confusing that way.)
720         private ArrayList<Long> stack_=new ArrayList<>();
721     }
722 
stop()723     private void stop() {
724         pos_=-1;
725     }
726 
727     // Reads a compact 32-bit integer.
728     // pos is already after the leadByte, and the lead byte is already shifted right by 1.
readValue(byte[] bytes, int pos, int leadByte)729     private static int readValue(byte[] bytes, int pos, int leadByte) {
730         int value;
731         if(leadByte<kMinTwoByteValueLead) {
732             value=leadByte-kMinOneByteValueLead;
733         } else if(leadByte<kMinThreeByteValueLead) {
734             value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
735         } else if(leadByte<kFourByteValueLead) {
736             value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
737         } else if(leadByte==kFourByteValueLead) {
738             value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
739         } else {
740             value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
741         }
742         return value;
743     }
skipValue(int pos, int leadByte)744     private static int skipValue(int pos, int leadByte) {
745         assert(leadByte>=kMinValueLead);
746         if(leadByte>=(kMinTwoByteValueLead<<1)) {
747             if(leadByte<(kMinThreeByteValueLead<<1)) {
748                 ++pos;
749             } else if(leadByte<(kFourByteValueLead<<1)) {
750                 pos+=2;
751             } else {
752                 pos+=3+((leadByte>>1)&1);
753             }
754         }
755         return pos;
756     }
skipValue(byte[] bytes, int pos)757     private static int skipValue(byte[] bytes, int pos) {
758         int leadByte=bytes[pos++]&0xff;
759         return skipValue(pos, leadByte);
760     }
761 
762     // Reads a jump delta and jumps.
jumpByDelta(byte[] bytes, int pos)763     private static int jumpByDelta(byte[] bytes, int pos) {
764         int delta=bytes[pos++]&0xff;
765         if(delta<kMinTwoByteDeltaLead) {
766             // nothing to do
767         } else if(delta<kMinThreeByteDeltaLead) {
768             delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
769         } else if(delta<kFourByteDeltaLead) {
770             delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
771             pos+=2;
772         } else if(delta==kFourByteDeltaLead) {
773             delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
774             pos+=3;
775         } else {
776             delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
777             pos+=4;
778         }
779         return pos+delta;
780     }
781 
skipDelta(byte[] bytes, int pos)782     private static int skipDelta(byte[] bytes, int pos) {
783         int delta=bytes[pos++]&0xff;
784         if(delta>=kMinTwoByteDeltaLead) {
785             if(delta<kMinThreeByteDeltaLead) {
786                 ++pos;
787             } else if(delta<kFourByteDeltaLead) {
788                 pos+=2;
789             } else {
790                 pos+=3+(delta&1);
791             }
792         }
793         return pos;
794     }
795 
796     private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
797 
798     // Handles a branch node for both next(byte) and next(string).
branchNext(int pos, int length, int inByte)799     private Result branchNext(int pos, int length, int inByte) {
800         // Branch according to the current byte.
801         if(length==0) {
802             length=bytes_[pos++]&0xff;
803         }
804         ++length;
805         // The length of the branch is the number of bytes to select from.
806         // The data structure encodes a binary search.
807         while(length>kMaxBranchLinearSubNodeLength) {
808             if(inByte<(bytes_[pos++]&0xff)) {
809                 length>>=1;
810                 pos=jumpByDelta(bytes_, pos);
811             } else {
812                 length=length-(length>>1);
813                 pos=skipDelta(bytes_, pos);
814             }
815         }
816         // Drop down to linear search for the last few bytes.
817         // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
818         // and divides length by 2.
819         do {
820             if(inByte==(bytes_[pos++]&0xff)) {
821                 Result result;
822                 int node=bytes_[pos]&0xff;
823                 assert(node>=kMinValueLead);
824                 if((node&kValueIsFinal)!=0) {
825                     // Leave the final value for getValue() to read.
826                     result=Result.FINAL_VALUE;
827                 } else {
828                     // Use the non-final value as the jump delta.
829                     ++pos;
830                     // int delta=readValue(pos, node>>1);
831                     node>>=1;
832                     int delta;
833                     if(node<kMinTwoByteValueLead) {
834                         delta=node-kMinOneByteValueLead;
835                     } else if(node<kMinThreeByteValueLead) {
836                         delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
837                     } else if(node<kFourByteValueLead) {
838                         delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
839                         pos+=2;
840                     } else if(node==kFourByteValueLead) {
841                         delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
842                         pos+=3;
843                     } else {
844                         delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
845                         pos+=4;
846                     }
847                     // end readValue()
848                     pos+=delta;
849                     node=bytes_[pos]&0xff;
850                     result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
851                 }
852                 pos_=pos;
853                 return result;
854             }
855             --length;
856             pos=skipValue(bytes_, pos);
857         } while(length>1);
858         if(inByte==(bytes_[pos++]&0xff)) {
859             pos_=pos;
860             int node=bytes_[pos]&0xff;
861             return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
862         } else {
863             stop();
864             return Result.NO_MATCH;
865         }
866     }
867 
868     // Requires remainingLength_<0.
nextImpl(int pos, int inByte)869     private Result nextImpl(int pos, int inByte) {
870         for(;;) {
871             int node=bytes_[pos++]&0xff;
872             if(node<kMinLinearMatch) {
873                 return branchNext(pos, node, inByte);
874             } else if(node<kMinValueLead) {
875                 // Match the first of length+1 bytes.
876                 int length=node-kMinLinearMatch;  // Actual match length minus 1.
877                 if(inByte==(bytes_[pos++]&0xff)) {
878                     remainingMatchLength_=--length;
879                     pos_=pos;
880                     return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
881                             valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
882                 } else {
883                     // No match.
884                     break;
885                 }
886             } else if((node&kValueIsFinal)!=0) {
887                 // No further matching bytes.
888                 break;
889             } else {
890                 // Skip intermediate value.
891                 pos=skipValue(pos, node);
892                 // The next node must not also be a value node.
893                 assert((bytes_[pos]&0xff)<kMinValueLead);
894             }
895         }
896         stop();
897         return Result.NO_MATCH;
898     }
899 
900     // Helper functions for getUniqueValue().
901     // Recursively finds a unique value (or whether there is not a unique one)
902     // from a branch.
903     // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
904     // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
905     private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
906                                                   long uniqueValue) {
907         while(length>kMaxBranchLinearSubNodeLength) {
908             ++pos;  // ignore the comparison byte
909             uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
910             if(uniqueValue==0) {
911                 return 0;
912             }
913             length=length-(length>>1);
914             pos=skipDelta(bytes, pos);
915         }
916         do {
917             ++pos;  // ignore a comparison byte
918             // handle its value
919             int node=bytes[pos++]&0xff;
920             boolean isFinal=(node&kValueIsFinal)!=0;
921             int value=readValue(bytes, pos, node>>1);
922             pos=skipValue(pos, node);
923             if(isFinal) {
924                 if(uniqueValue!=0) {
925                     if(value!=(int)(uniqueValue>>1)) {
926                         return 0;
927                     }
928                 } else {
929                     uniqueValue=((long)value<<1)|1;
930                 }
931             } else {
932                 uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
933                 if(uniqueValue==0) {
934                     return 0;
935                 }
936             }
937         } while(--length>1);
938         // ignore the last comparison byte
939         return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
940     }
941     // Recursively finds a unique value (or whether there is not a unique one)
942     // starting from a position on a node lead byte.
943     // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
944     // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
findUniqueValue(byte[] bytes, int pos, long uniqueValue)945     private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
946         for(;;) {
947             int node=bytes[pos++]&0xff;
948             if(node<kMinLinearMatch) {
949                 if(node==0) {
950                     node=bytes[pos++]&0xff;
951                 }
952                 uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
953                 if(uniqueValue==0) {
954                     return 0;
955                 }
956                 pos=(int)(uniqueValue>>>33);
957             } else if(node<kMinValueLead) {
958                 // linear-match node
959                 pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
960             } else {
961                 boolean isFinal=(node&kValueIsFinal)!=0;
962                 int value=readValue(bytes, pos, node>>1);
963                 if(uniqueValue!=0) {
964                     if(value!=(int)(uniqueValue>>1)) {
965                         return 0;
966                     }
967                 } else {
968                     uniqueValue=((long)value<<1)|1;
969                 }
970                 if(isFinal) {
971                     return uniqueValue;
972                 }
973                 pos=skipValue(pos, node);
974             }
975         }
976     }
977 
978     // Helper functions for getNextBytes().
979     // getNextBytes() when pos is on a branch node.
getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out)980     private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
981         while(length>kMaxBranchLinearSubNodeLength) {
982             ++pos;  // ignore the comparison byte
983             getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
984             length=length-(length>>1);
985             pos=skipDelta(bytes, pos);
986         }
987         do {
988             append(out, bytes[pos++]&0xff);
989             pos=skipValue(bytes, pos);
990         } while(--length>1);
991         append(out, bytes[pos]&0xff);
992     }
append(Appendable out, int c)993     private static void append(Appendable out, int c) {
994         try {
995             out.append((char)c);
996         } catch(IOException e) {
997             throw new ICUUncheckedIOException(e);
998         }
999     }
1000 
1001     // BytesTrie data structure
1002     //
1003     // The trie consists of a series of byte-serialized nodes for incremental
1004     // string/byte sequence matching. The root node is at the beginning of the trie data.
1005     //
1006     // Types of nodes are distinguished by their node lead byte ranges.
1007     // After each node, except a final-value node, another node follows to
1008     // encode match values or continue matching further bytes.
1009     //
1010     // Node types:
1011     //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
1012     //    The value is for the string/byte sequence so far.
1013     //    One node bit indicates whether the value is final or whether
1014     //    matching continues with the next node.
1015     //  - Linear-match node: Matches a number of bytes.
1016     //  - Branch node: Branches to other nodes according to the current input byte.
1017     //    The node byte is the length of the branch (number of bytes to select from)
1018     //    minus 1. It is followed by a sub-node:
1019     //    - If the length is at most kMaxBranchLinearSubNodeLength, then
1020     //      there are length-1 (key, value) pairs and then one more comparison byte.
1021     //      If one of the key bytes matches, then the value is either a final value for
1022     //      the string/byte sequence so far, or a "jump" delta to the next node.
1023     //      If the last byte matches, then matching continues with the next node.
1024     //      (Values have the same encoding as value nodes.)
1025     //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
1026     //      there is one byte and one "jump" delta.
1027     //      If the input byte is less than the sub-node byte, then "jump" by delta to
1028     //      the next sub-node which will have a length of length/2.
1029     //      (The delta has its own compact encoding.)
1030     //      Otherwise, skip the "jump" delta to the next sub-node
1031     //      which will have a length of length-length/2.
1032 
1033     // Node lead byte values.
1034 
1035     // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
1036     // the length is one more than the next byte.
1037 
1038     // For a branch sub-node with at most this many entries, we drop down
1039     // to a linear search.
1040     /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
1041 
1042     // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
1043     /*package*/ static final int kMinLinearMatch=0x10;
1044     /*package*/ static final int kMaxLinearMatchLength=0x10;
1045 
1046     // 20..ff: Variable-length value node.
1047     // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
1048     // Then shift-right by 1 bit.
1049     // The remaining lead byte value indicates the number of following bytes (0..4)
1050     // and contains the value's top bits.
1051     /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
1052     // It is a final value if bit 0 is set.
1053     private static final int kValueIsFinal=1;
1054 
1055     // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1056     /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1057     /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1058 
1059     /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1060     /*package*/ static final int kMaxTwoByteValue=0x1aff;
1061 
1062     /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1063     /*package*/ static final int kFourByteValueLead=0x7e;
1064 
1065     // A little more than Unicode code points. (0x11ffff)
1066     /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1067 
1068     /*package*/ static final int kFiveByteValueLead=0x7f;
1069 
1070     // Compact delta integers.
1071     /*package*/ static final int kMaxOneByteDelta=0xbf;
1072     /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1073     /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1074     /*package*/ static final int kFourByteDeltaLead=0xfe;
1075     /*package*/ static final int kFiveByteDeltaLead=0xff;
1076 
1077     /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1078     /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1079 
1080     // Fixed value referencing the BytesTrie bytes.
1081     private byte[] bytes_;
1082     private int root_;
1083 
1084     // Iterator variables.
1085 
1086     // Index of next trie byte to read. Negative if no more matches.
1087     private int pos_;
1088     // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1089     private int remainingMatchLength_;
1090 };
1091