• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 /*
5 *******************************************************************************
6 *   Copyright (C) 2010-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 *   created on: 2010nov23
10 *   created by: Markus W. Scherer
11 *   ported from ICU4C bytestrie.h/.cpp
12 */
13 package android.icu.util;
14 
15 import java.io.IOException;
16 import java.nio.ByteBuffer;
17 import java.util.ArrayList;
18 import java.util.NoSuchElementException;
19 
20 /**
21  * Light-weight, non-const reader class for a BytesTrie.
22  * Traverses a byte-serialized data structure with minimal state,
23  * for mapping byte sequences to non-negative integer values.
24  *
25  * <p>This class is not intended for public subclassing.
26  *
27  * @author Markus W. Scherer
28  * @hide Only a subset of ICU is exposed in Android
29  */
30 public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
31     /**
32      * Constructs a BytesTrie reader instance.
33      *
34      * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
35      * with the offset indicating the first byte of that sequence.
36      * The BytesTrie object will not read more bytes than
37      * the BytesTrieBuilder generated in the corresponding build() call.
38      *
39      * <p>The array is not copied/cloned and must not be modified while
40      * the BytesTrie object is in use.
41      *
42      * @param trieBytes Bytes array that contains the serialized trie.
43      * @param offset Root offset of the trie in the array.
44      */
BytesTrie(byte[] trieBytes, int offset)45     public BytesTrie(byte[] trieBytes, int offset) {
46         bytes_=trieBytes;
47         pos_=root_=offset;
48         remainingMatchLength_=-1;
49     }
50 
51     /**
52      * Copy constructor.
53      * Makes a shallow copy of the other trie reader object and its state.
54      * Does not copy the byte array which will be shared.
55      * Same as clone() but without the throws clause.
56      */
BytesTrie(BytesTrie other)57     public BytesTrie(BytesTrie other) {
58         bytes_ = other.bytes_;
59         root_ = other.root_;
60         pos_ = other.pos_;
61         remainingMatchLength_ = other.remainingMatchLength_;
62     }
63 
64     /**
65      * Clones this trie reader object and its state,
66      * but not the byte array which will be shared.
67      * @return A shallow clone of this trie.
68      */
69     @Override
clone()70     public BytesTrie clone() throws CloneNotSupportedException {
71         return (BytesTrie) super.clone();  // A shallow copy is just what we need.
72     }
73 
74     /**
75      * Resets this trie to its initial state.
76      * @return this
77      */
reset()78     public BytesTrie reset() {
79         pos_=root_;
80         remainingMatchLength_=-1;
81         return this;
82     }
83 
84     /**
85      * Returns the state of this trie as a 64-bit integer.
86      * The state value is never 0.
87      *
88      * @return opaque state value
89      * @see #resetToState64
90      */
getState64()91     public long getState64() {
92         return ((long)remainingMatchLength_ << 32) | pos_;
93     }
94 
95     /**
96      * Resets this trie to the saved state.
97      * Unlike {@link #resetToState(State)}, the 64-bit state value
98      * must be from {@link #getState64()} from the same trie object or
99      * from one initialized the exact same way.
100      * Because of no validation, this method is faster.
101      *
102      * @param state The opaque trie state value from getState64().
103      * @return this
104      * @see #getState64
105      * @see #resetToState
106      * @see #reset
107      */
resetToState64(long state)108     public BytesTrie resetToState64(long state) {
109         remainingMatchLength_ = (int)(state >> 32);
110         pos_ = (int)state;
111         return this;
112     }
113 
114     /**
115      * BytesTrie state object, for saving a trie's current state
116      * and resetting the trie back to this state later.
117      * @hide Only a subset of ICU is exposed in Android
118      */
119     public static final class State {
120         /**
121          * Constructs an empty State.
122          */
State()123         public State() {}
124         private byte[] bytes;
125         private int root;
126         private int pos;
127         private int remainingMatchLength;
128     }
129 
130     /**
131      * Saves the state of this trie.
132      * @param state The State object to hold the trie's state.
133      * @return this
134      * @see #resetToState
135      */
saveState(State state)136     public BytesTrie saveState(State state) /*const*/ {
137         state.bytes=bytes_;
138         state.root=root_;
139         state.pos=pos_;
140         state.remainingMatchLength=remainingMatchLength_;
141         return this;
142     }
143 
144     /**
145      * Resets this trie to the saved state.
146      * Slower than {@link #resetToState64(long)} which does not validate the state value.
147      *
148      * @param state The State object which holds a saved trie state.
149      * @return this
150      * @throws IllegalArgumentException if the state object contains no state,
151      *         or the state of a different trie
152      * @see #saveState
153      * @see #reset
154      */
resetToState(State state)155     public BytesTrie resetToState(State state) {
156         if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
157             pos_=state.pos;
158             remainingMatchLength_=state.remainingMatchLength;
159         } else {
160             throw new IllegalArgumentException("incompatible trie state");
161         }
162         return this;
163     }
164 
165     /**
166      * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
167      * @hide Only a subset of ICU is exposed in Android
168      */
169     public enum Result {
170         /**
171          * The input unit(s) did not continue a matching string.
172          * Once current()/next() return NO_MATCH,
173          * all further calls to current()/next() will also return NO_MATCH,
174          * until the trie is reset to its original state or to a saved state.
175          */
176         NO_MATCH,
177         /**
178          * The input unit(s) continued a matching string
179          * but there is no value for the string so far.
180          * (It is a prefix of a longer string.)
181          */
182         NO_VALUE,
183         /**
184          * The input unit(s) continued a matching string
185          * and there is a value for the string so far.
186          * This value will be returned by getValue().
187          * No further input byte/unit can continue a matching string.
188          */
189         FINAL_VALUE,
190         /**
191          * The input unit(s) continued a matching string
192          * and there is a value for the string so far.
193          * This value will be returned by getValue().
194          * Another input byte/unit can continue a matching string.
195          */
196         INTERMEDIATE_VALUE;
197 
198         // Note: The following methods assume the particular order
199         // of enum constants, treating the ordinal() values like bit sets.
200         // Do not reorder the enum constants!
201 
202         /**
203          * Same as (result!=NO_MATCH).
204          * @return true if the input bytes/units so far are part of a matching string/byte sequence.
205          */
matches()206         public boolean matches() { return this!=NO_MATCH; }
207 
208         /**
209          * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
210          * @return true if there is a value for the input bytes/units so far.
211          * @see #getValue
212          */
hasValue()213         public boolean hasValue() { return ordinal()>=2; }
214 
215         /**
216          * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
217          * @return true if another input byte/unit can continue a matching string.
218          */
hasNext()219         public boolean hasNext() { return (ordinal()&1)!=0; }
220     }
221 
222     /**
223      * Determines whether the byte sequence so far matches, whether it has a value,
224      * and whether another input byte can continue a matching byte sequence.
225      * @return The match/value Result.
226      */
current()227     public Result current() /*const*/ {
228         int pos=pos_;
229         if(pos<0) {
230             return Result.NO_MATCH;
231         } else {
232             int node;
233             return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
234                     valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
235         }
236     }
237 
238     /**
239      * Traverses the trie from the initial state for this input byte.
240      * Equivalent to reset().next(inByte).
241      * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
242      *               Values below -0x100 and above 0xff will never match.
243      * @return The match/value Result.
244      */
first(int inByte)245     public Result first(int inByte) {
246         remainingMatchLength_=-1;
247         if(inByte<0) {
248             inByte+=0x100;
249         }
250         return nextImpl(root_, inByte);
251     }
252 
253     /**
254      * Traverses the trie from the current state for this input byte.
255      * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
256      *               Values below -0x100 and above 0xff will never match.
257      * @return The match/value Result.
258      */
next(int inByte)259     public Result next(int inByte) {
260         int pos=pos_;
261         if(pos<0) {
262             return Result.NO_MATCH;
263         }
264         if(inByte<0) {
265             inByte+=0x100;
266         }
267         int length=remainingMatchLength_;  // Actual remaining match length minus 1.
268         if(length>=0) {
269             // Remaining part of a linear-match node.
270             if(inByte==(bytes_[pos++]&0xff)) {
271                 remainingMatchLength_=--length;
272                 pos_=pos;
273                 int node;
274                 return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
275                         valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
276             } else {
277                 stop();
278                 return Result.NO_MATCH;
279             }
280         }
281         return nextImpl(pos, inByte);
282     }
283 
284     /**
285      * Traverses the trie from the current state for this byte sequence.
286      * Equivalent to
287      * <pre>
288      * Result result=current();
289      * for(each c in s)
290      *   if(!result.hasNext()) return Result.NO_MATCH;
291      *   result=next(c);
292      * return result;
293      * </pre>
294      * @param s Contains a string or byte sequence.
295      * @param sIndex The start index of the byte sequence in s.
296      * @param sLimit The (exclusive) end index of the byte sequence in s.
297      * @return The match/value Result.
298      */
next(byte[] s, int sIndex, int sLimit)299     public Result next(byte[] s, int sIndex, int sLimit) {
300         if(sIndex>=sLimit) {
301             // Empty input.
302             return current();
303         }
304         int pos=pos_;
305         if(pos<0) {
306             return Result.NO_MATCH;
307         }
308         int length=remainingMatchLength_;  // Actual remaining match length minus 1.
309         for(;;) {
310             // Fetch the next input byte, if there is one.
311             // Continue a linear-match node.
312             byte inByte;
313             for(;;) {
314                 if(sIndex==sLimit) {
315                     remainingMatchLength_=length;
316                     pos_=pos;
317                     int node;
318                     return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
319                             valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
320                 }
321                 inByte=s[sIndex++];
322                 if(length<0) {
323                     remainingMatchLength_=length;
324                     break;
325                 }
326                 if(inByte!=bytes_[pos]) {
327                     stop();
328                     return Result.NO_MATCH;
329                 }
330                 ++pos;
331                 --length;
332             }
333             for(;;) {
334                 int node=bytes_[pos++]&0xff;
335                 if(node<kMinLinearMatch) {
336                     Result result=branchNext(pos, node, inByte&0xff);
337                     if(result==Result.NO_MATCH) {
338                         return Result.NO_MATCH;
339                     }
340                     // Fetch the next input byte, if there is one.
341                     if(sIndex==sLimit) {
342                         return result;
343                     }
344                     if(result==Result.FINAL_VALUE) {
345                         // No further matching bytes.
346                         stop();
347                         return Result.NO_MATCH;
348                     }
349                     inByte=s[sIndex++];
350                     pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
351                 } else if(node<kMinValueLead) {
352                     // Match length+1 bytes.
353                     length=node-kMinLinearMatch;  // Actual match length minus 1.
354                     if(inByte!=bytes_[pos]) {
355                         stop();
356                         return Result.NO_MATCH;
357                     }
358                     ++pos;
359                     --length;
360                     break;
361                 } else if((node&kValueIsFinal)!=0) {
362                     // No further matching bytes.
363                     stop();
364                     return Result.NO_MATCH;
365                 } else {
366                     // Skip intermediate value.
367                     pos=skipValue(pos, node);
368                     // The next node must not also be a value node.
369                     assert((bytes_[pos]&0xff)<kMinValueLead);
370                 }
371             }
372         }
373     }
374 
375     /**
376      * Returns a matching byte sequence's value if called immediately after
377      * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
378      * getValue() can be called multiple times.
379      *
380      * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
381      * @return The value for the byte sequence so far.
382      */
383     public int getValue() /*const*/ {
384         int pos=pos_;
385         int leadByte=bytes_[pos++]&0xff;
386         assert(leadByte>=kMinValueLead);
387         return readValue(bytes_, pos, leadByte>>1);
388     }
389 
390     /**
391      * Determines whether all byte sequences reachable from the current state
392      * map to the same value, and if so, returns that value.
393      * @return The unique value in bits 32..1 with bit 0 set,
394      *         if all byte sequences reachable from the current state
395      *         map to the same value; otherwise returns 0.
396      */
getUniqueValue()397     public long getUniqueValue() /*const*/ {
398         int pos=pos_;
399         if(pos<0) {
400             return 0;
401         }
402         // Skip the rest of a pending linear-match node.
403         long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
404         // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
405         return (uniqueValue<<31)>>31;
406     }
407 
408     /**
409      * Finds each byte which continues the byte sequence from the current state.
410      * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
411      * @param out Each next byte is 0-extended to a char and appended to this object.
412      *            (Only uses the out.append(c) method.)
413      * @return The number of bytes which continue the byte sequence from here.
414      */
getNextBytes(Appendable out)415     public int getNextBytes(Appendable out) /*const*/ {
416         int pos=pos_;
417         if(pos<0) {
418             return 0;
419         }
420         if(remainingMatchLength_>=0) {
421             append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
422             return 1;
423         }
424         int node=bytes_[pos++]&0xff;
425         if(node>=kMinValueLead) {
426             if((node&kValueIsFinal)!=0) {
427                 return 0;
428             } else {
429                 pos=skipValue(pos, node);
430                 node=bytes_[pos++]&0xff;
431                 assert(node<kMinValueLead);
432             }
433         }
434         if(node<kMinLinearMatch) {
435             if(node==0) {
436                 node=bytes_[pos++]&0xff;
437             }
438             getNextBranchBytes(bytes_, pos, ++node, out);
439             return node;
440         } else {
441             // First byte of the linear-match node.
442             append(out, bytes_[pos]&0xff);
443             return 1;
444         }
445     }
446 
447     /**
448      * Iterates from the current state of this trie.
449      * @return A new BytesTrie.Iterator.
450      */
451     @Override
452     public Iterator iterator() {
453         return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
454     }
455 
456     /**
457      * Iterates from the current state of this trie.
458      * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
459      *                        Otherwise, the iterator returns strings with this maximum length.
460      * @return A new BytesTrie.Iterator.
461      */
462     public Iterator iterator(int maxStringLength) {
463         return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
464     }
465 
466     /**
467      * Iterates from the root of a byte-serialized BytesTrie.
468      * @param trieBytes Bytes array that contains the serialized trie.
469      * @param offset Root offset of the trie in the array.
470      * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
471      *                        Otherwise, the iterator returns strings with this maximum length.
472      * @return A new BytesTrie.Iterator.
473      */
474     public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
475         return new Iterator(trieBytes, offset, -1, maxStringLength);
476     }
477 
478     /**
479      * Return value type for the Iterator.
480      * @hide Only a subset of ICU is exposed in Android
481      */
482     public static final class Entry {
483         private Entry(int capacity) {
484             bytes=new byte[capacity];
485         }
486 
487         /**
488          * @return The length of the byte sequence.
489          */
490         public int bytesLength() { return length; }
491         /**
492          * Returns a byte of the byte sequence.
493          * @param index An index into the byte sequence.
494          * @return The index-th byte sequence byte.
495          */
496         public byte byteAt(int index) { return bytes[index]; }
497         /**
498          * Copies the byte sequence into a byte array.
499          * @param dest Destination byte array.
500          * @param destOffset Starting offset to where in dest the byte sequence is copied.
501          */
502         public void copyBytesTo(byte[] dest, int destOffset) {
503             System.arraycopy(bytes, 0, dest, destOffset, length);
504         }
505         /**
506          * @return The byte sequence as a read-only ByteBuffer.
507          */
508         public ByteBuffer bytesAsByteBuffer() {
509             return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
510         }
511 
512         /**
513          * The value associated with the byte sequence.
514          */
515         public int value;
516 
517         private void ensureCapacity(int len) {
518             if(bytes.length<len) {
519                 byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
520                 System.arraycopy(bytes, 0, newBytes, 0, length);
521                 bytes=newBytes;
522             }
523         }
524         private void append(byte b) {
525             ensureCapacity(length+1);
526             bytes[length++]=b;
527         }
528         private void append(byte[] b, int off, int len) {
529             ensureCapacity(length+len);
530             System.arraycopy(b, off, bytes, length, len);
531             length+=len;
532         }
533         private void truncateString(int newLength) { length=newLength; }
534 
535         private byte[] bytes;
536         private int length;
537     }
538 
539     /**
540      * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
541      * @hide Only a subset of ICU is exposed in Android
542      */
543     public static final class Iterator implements java.util.Iterator<Entry> {
544         private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
545             bytes_=trieBytes;
546             pos_=initialPos_=offset;
547             remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
548             maxLength_=maxStringLength;
549             entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
550             int length=remainingMatchLength_;  // Actual remaining match length minus 1.
551             if(length>=0) {
552                 // Pending linear-match node, append remaining bytes to entry_.
553                 ++length;
554                 if(maxLength_>0 && length>maxLength_) {
555                     length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
556                 }
557                 entry_.append(bytes_, pos_, length);
558                 pos_+=length;
559                 remainingMatchLength_-=length;
560             }
561         }
562 
563         /**
564          * Resets this iterator to its initial state.
565          * @return this
566          */
reset()567         public Iterator reset() {
568             pos_=initialPos_;
569             remainingMatchLength_=initialRemainingMatchLength_;
570             int length=remainingMatchLength_+1;  // Remaining match length.
571             if(maxLength_>0 && length>maxLength_) {
572                 length=maxLength_;
573             }
574             entry_.truncateString(length);
575             pos_+=length;
576             remainingMatchLength_-=length;
577             stack_.clear();
578             return this;
579         }
580 
581         /**
582          * @return true if there are more elements.
583          */
584         @Override
hasNext()585         public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
586 
587         /**
588          * Finds the next (byte sequence, value) pair if there is one.
589          *
590          * If the byte sequence is truncated to the maximum length and does not
591          * have a real value, then the value is set to -1.
592          * In this case, this "not a real value" is indistinguishable from
593          * a real value of -1.
594          * @return An Entry with the string and value of the next element.
595          * @throws NoSuchElementException - iteration has no more elements.
596          */
597         @Override
next()598         public Entry next() {
599             int pos=pos_;
600             if(pos<0) {
601                 if(stack_.isEmpty()) {
602                     throw new NoSuchElementException();
603                 }
604                 // Pop the state off the stack and continue with the next outbound edge of
605                 // the branch node.
606                 long top=stack_.remove(stack_.size()-1);
607                 int length=(int)top;
608                 pos=(int)(top>>32);
609                 entry_.truncateString(length&0xffff);
610                 length>>>=16;
611                 if(length>1) {
612                     pos=branchNext(pos, length);
613                     if(pos<0) {
614                         return entry_;  // Reached a final value.
615                     }
616                 } else {
617                     entry_.append(bytes_[pos++]);
618                 }
619             }
620             if(remainingMatchLength_>=0) {
621                 // We only get here if we started in a pending linear-match node
622                 // with more than maxLength remaining bytes.
623                 return truncateAndStop();
624             }
625             for(;;) {
626                 int node=bytes_[pos++]&0xff;
627                 if(node>=kMinValueLead) {
628                     // Deliver value for the byte sequence so far.
629                     boolean isFinal=(node&kValueIsFinal)!=0;
630                     entry_.value=readValue(bytes_, pos, node>>1);
631                     if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
632                         pos_=-1;
633                     } else {
634                         pos_=skipValue(pos, node);
635                     }
636                     return entry_;
637                 }
638                 if(maxLength_>0 && entry_.length==maxLength_) {
639                     return truncateAndStop();
640                 }
641                 if(node<kMinLinearMatch) {
642                     if(node==0) {
643                         node=bytes_[pos++]&0xff;
644                     }
645                     pos=branchNext(pos, node+1);
646                     if(pos<0) {
647                         return entry_;  // Reached a final value.
648                     }
649                 } else {
650                     // Linear-match node, append length bytes to entry_.
651                     int length=node-kMinLinearMatch+1;
652                     if(maxLength_>0 && entry_.length+length>maxLength_) {
653                         entry_.append(bytes_, pos, maxLength_-entry_.length);
654                         return truncateAndStop();
655                     }
656                     entry_.append(bytes_, pos, length);
657                     pos+=length;
658                 }
659             }
660         }
661 
662         /**
663          * Iterator.remove() is not supported.
664          * @throws UnsupportedOperationException (always)
665          */
666         @Override
remove()667         public void remove() {
668             throw new UnsupportedOperationException();
669         }
670 
truncateAndStop()671         private Entry truncateAndStop() {
672             pos_=-1;
673             entry_.value=-1;  // no real value for str
674             return entry_;
675         }
676 
branchNext(int pos, int length)677         private int branchNext(int pos, int length) {
678             while(length>kMaxBranchLinearSubNodeLength) {
679                 ++pos;  // ignore the comparison byte
680                 // Push state for the greater-or-equal edge.
681                 stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
682                 // Follow the less-than edge.
683                 length>>=1;
684                 pos=jumpByDelta(bytes_, pos);
685             }
686             // List of key-value pairs where values are either final values or jump deltas.
687             // Read the first (key, value) pair.
688             byte trieByte=bytes_[pos++];
689             int node=bytes_[pos++]&0xff;
690             boolean isFinal=(node&kValueIsFinal)!=0;
691             int value=readValue(bytes_, pos, node>>1);
692             pos=skipValue(pos, node);
693             stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
694             entry_.append(trieByte);
695             if(isFinal) {
696                 pos_=-1;
697                 entry_.value=value;
698                 return -1;
699             } else {
700                 return pos+value;
701             }
702         }
703 
704         private byte[] bytes_;
705         private int pos_;
706         private int initialPos_;
707         private int remainingMatchLength_;
708         private int initialRemainingMatchLength_;
709 
710         private int maxLength_;
711         private Entry entry_;
712 
713         // The stack stores longs for backtracking to another
714         // outbound edge of a branch node.
715         // Each long has the offset from bytes_ in bits 62..32,
716         // the entry_.stringLength() from before the node in bits 15..0,
717         // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
718         // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
719         // but the code looks more confusing that way.)
720         private ArrayList<Long> stack_=new ArrayList<>();
721     }
722 
stop()723     private void stop() {
724         pos_=-1;
725     }
726 
727     // Reads a compact 32-bit integer.
728     // pos is already after the leadByte, and the lead byte is already shifted right by 1.
readValue(byte[] bytes, int pos, int leadByte)729     private static int readValue(byte[] bytes, int pos, int leadByte) {
730         int value;
731         if(leadByte<kMinTwoByteValueLead) {
732             value=leadByte-kMinOneByteValueLead;
733         } else if(leadByte<kMinThreeByteValueLead) {
734             value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
735         } else if(leadByte<kFourByteValueLead) {
736             value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
737         } else if(leadByte==kFourByteValueLead) {
738             value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
739         } else {
740             value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
741         }
742         return value;
743     }
skipValue(int pos, int leadByte)744     private static int skipValue(int pos, int leadByte) {
745         assert(leadByte>=kMinValueLead);
746         if(leadByte>=(kMinTwoByteValueLead<<1)) {
747             if(leadByte<(kMinThreeByteValueLead<<1)) {
748                 ++pos;
749             } else if(leadByte<(kFourByteValueLead<<1)) {
750                 pos+=2;
751             } else {
752                 pos+=3+((leadByte>>1)&1);
753             }
754         }
755         return pos;
756     }
skipValue(byte[] bytes, int pos)757     private static int skipValue(byte[] bytes, int pos) {
758         int leadByte=bytes[pos++]&0xff;
759         return skipValue(pos, leadByte);
760     }
761 
762     /**
763      * Reads a jump delta and jumps.
764      * @deprecated This API is ICU internal only.
765      * @hide draft / provisional / internal are hidden on Android
766      */
767     @Deprecated
jumpByDelta(byte[] bytes, int pos)768     public static int jumpByDelta(byte[] bytes, int pos) {
769         int delta=bytes[pos++]&0xff;
770         if(delta<kMinTwoByteDeltaLead) {
771             // nothing to do
772         } else if(delta<kMinThreeByteDeltaLead) {
773             delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
774         } else if(delta<kFourByteDeltaLead) {
775             delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
776             pos+=2;
777         } else if(delta==kFourByteDeltaLead) {
778             delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
779             pos+=3;
780         } else {
781             delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
782             pos+=4;
783         }
784         return pos+delta;
785     }
786 
skipDelta(byte[] bytes, int pos)787     private static int skipDelta(byte[] bytes, int pos) {
788         int delta=bytes[pos++]&0xff;
789         if(delta>=kMinTwoByteDeltaLead) {
790             if(delta<kMinThreeByteDeltaLead) {
791                 ++pos;
792             } else if(delta<kFourByteDeltaLead) {
793                 pos+=2;
794             } else {
795                 pos+=3+(delta&1);
796             }
797         }
798         return pos;
799     }
800 
801     private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
802 
803     // Handles a branch node for both next(byte) and next(string).
branchNext(int pos, int length, int inByte)804     private Result branchNext(int pos, int length, int inByte) {
805         // Branch according to the current byte.
806         if(length==0) {
807             length=bytes_[pos++]&0xff;
808         }
809         ++length;
810         // The length of the branch is the number of bytes to select from.
811         // The data structure encodes a binary search.
812         while(length>kMaxBranchLinearSubNodeLength) {
813             if(inByte<(bytes_[pos++]&0xff)) {
814                 length>>=1;
815                 pos=jumpByDelta(bytes_, pos);
816             } else {
817                 length=length-(length>>1);
818                 pos=skipDelta(bytes_, pos);
819             }
820         }
821         // Drop down to linear search for the last few bytes.
822         // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
823         // and divides length by 2.
824         do {
825             if(inByte==(bytes_[pos++]&0xff)) {
826                 Result result;
827                 int node=bytes_[pos]&0xff;
828                 assert(node>=kMinValueLead);
829                 if((node&kValueIsFinal)!=0) {
830                     // Leave the final value for getValue() to read.
831                     result=Result.FINAL_VALUE;
832                 } else {
833                     // Use the non-final value as the jump delta.
834                     ++pos;
835                     // int delta=readValue(pos, node>>1);
836                     node>>=1;
837                     int delta;
838                     if(node<kMinTwoByteValueLead) {
839                         delta=node-kMinOneByteValueLead;
840                     } else if(node<kMinThreeByteValueLead) {
841                         delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
842                     } else if(node<kFourByteValueLead) {
843                         delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
844                         pos+=2;
845                     } else if(node==kFourByteValueLead) {
846                         delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
847                         pos+=3;
848                     } else {
849                         delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
850                         pos+=4;
851                     }
852                     // end readValue()
853                     pos+=delta;
854                     node=bytes_[pos]&0xff;
855                     result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
856                 }
857                 pos_=pos;
858                 return result;
859             }
860             --length;
861             pos=skipValue(bytes_, pos);
862         } while(length>1);
863         if(inByte==(bytes_[pos++]&0xff)) {
864             pos_=pos;
865             int node=bytes_[pos]&0xff;
866             return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
867         } else {
868             stop();
869             return Result.NO_MATCH;
870         }
871     }
872 
873     // Requires remainingLength_<0.
nextImpl(int pos, int inByte)874     private Result nextImpl(int pos, int inByte) {
875         for(;;) {
876             int node=bytes_[pos++]&0xff;
877             if(node<kMinLinearMatch) {
878                 return branchNext(pos, node, inByte);
879             } else if(node<kMinValueLead) {
880                 // Match the first of length+1 bytes.
881                 int length=node-kMinLinearMatch;  // Actual match length minus 1.
882                 if(inByte==(bytes_[pos++]&0xff)) {
883                     remainingMatchLength_=--length;
884                     pos_=pos;
885                     return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
886                             valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
887                 } else {
888                     // No match.
889                     break;
890                 }
891             } else if((node&kValueIsFinal)!=0) {
892                 // No further matching bytes.
893                 break;
894             } else {
895                 // Skip intermediate value.
896                 pos=skipValue(pos, node);
897                 // The next node must not also be a value node.
898                 assert((bytes_[pos]&0xff)<kMinValueLead);
899             }
900         }
901         stop();
902         return Result.NO_MATCH;
903     }
904 
905     // Helper functions for getUniqueValue().
906     // Recursively finds a unique value (or whether there is not a unique one)
907     // from a branch.
908     // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
909     // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
910     private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
911                                                   long uniqueValue) {
912         while(length>kMaxBranchLinearSubNodeLength) {
913             ++pos;  // ignore the comparison byte
914             uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
915             if(uniqueValue==0) {
916                 return 0;
917             }
918             length=length-(length>>1);
919             pos=skipDelta(bytes, pos);
920         }
921         do {
922             ++pos;  // ignore a comparison byte
923             // handle its value
924             int node=bytes[pos++]&0xff;
925             boolean isFinal=(node&kValueIsFinal)!=0;
926             int value=readValue(bytes, pos, node>>1);
927             pos=skipValue(pos, node);
928             if(isFinal) {
929                 if(uniqueValue!=0) {
930                     if(value!=(int)(uniqueValue>>1)) {
931                         return 0;
932                     }
933                 } else {
934                     uniqueValue=((long)value<<1)|1;
935                 }
936             } else {
937                 uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
938                 if(uniqueValue==0) {
939                     return 0;
940                 }
941             }
942         } while(--length>1);
943         // ignore the last comparison byte
944         return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
945     }
946     // Recursively finds a unique value (or whether there is not a unique one)
947     // starting from a position on a node lead byte.
948     // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
949     // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
findUniqueValue(byte[] bytes, int pos, long uniqueValue)950     private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
951         for(;;) {
952             int node=bytes[pos++]&0xff;
953             if(node<kMinLinearMatch) {
954                 if(node==0) {
955                     node=bytes[pos++]&0xff;
956                 }
957                 uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
958                 if(uniqueValue==0) {
959                     return 0;
960                 }
961                 pos=(int)(uniqueValue>>>33);
962             } else if(node<kMinValueLead) {
963                 // linear-match node
964                 pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
965             } else {
966                 boolean isFinal=(node&kValueIsFinal)!=0;
967                 int value=readValue(bytes, pos, node>>1);
968                 if(uniqueValue!=0) {
969                     if(value!=(int)(uniqueValue>>1)) {
970                         return 0;
971                     }
972                 } else {
973                     uniqueValue=((long)value<<1)|1;
974                 }
975                 if(isFinal) {
976                     return uniqueValue;
977                 }
978                 pos=skipValue(pos, node);
979             }
980         }
981     }
982 
983     // Helper functions for getNextBytes().
984     // getNextBytes() when pos is on a branch node.
getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out)985     private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
986         while(length>kMaxBranchLinearSubNodeLength) {
987             ++pos;  // ignore the comparison byte
988             getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
989             length=length-(length>>1);
990             pos=skipDelta(bytes, pos);
991         }
992         do {
993             append(out, bytes[pos++]&0xff);
994             pos=skipValue(bytes, pos);
995         } while(--length>1);
996         append(out, bytes[pos]&0xff);
997     }
append(Appendable out, int c)998     private static void append(Appendable out, int c) {
999         try {
1000             out.append((char)c);
1001         } catch(IOException e) {
1002             throw new ICUUncheckedIOException(e);
1003         }
1004     }
1005 
1006     // BytesTrie data structure
1007     //
1008     // The trie consists of a series of byte-serialized nodes for incremental
1009     // string/byte sequence matching. The root node is at the beginning of the trie data.
1010     //
1011     // Types of nodes are distinguished by their node lead byte ranges.
1012     // After each node, except a final-value node, another node follows to
1013     // encode match values or continue matching further bytes.
1014     //
1015     // Node types:
1016     //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
1017     //    The value is for the string/byte sequence so far.
1018     //    One node bit indicates whether the value is final or whether
1019     //    matching continues with the next node.
1020     //  - Linear-match node: Matches a number of bytes.
1021     //  - Branch node: Branches to other nodes according to the current input byte.
1022     //    The node byte is the length of the branch (number of bytes to select from)
1023     //    minus 1. It is followed by a sub-node:
1024     //    - If the length is at most kMaxBranchLinearSubNodeLength, then
1025     //      there are length-1 (key, value) pairs and then one more comparison byte.
1026     //      If one of the key bytes matches, then the value is either a final value for
1027     //      the string/byte sequence so far, or a "jump" delta to the next node.
1028     //      If the last byte matches, then matching continues with the next node.
1029     //      (Values have the same encoding as value nodes.)
1030     //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
1031     //      there is one byte and one "jump" delta.
1032     //      If the input byte is less than the sub-node byte, then "jump" by delta to
1033     //      the next sub-node which will have a length of length/2.
1034     //      (The delta has its own compact encoding.)
1035     //      Otherwise, skip the "jump" delta to the next sub-node
1036     //      which will have a length of length-length/2.
1037 
1038     // Node lead byte values.
1039 
1040     // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
1041     // the length is one more than the next byte.
1042 
1043     // For a branch sub-node with at most this many entries, we drop down
1044     // to a linear search.
1045     /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
1046 
1047     // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
1048     /*package*/ static final int kMinLinearMatch=0x10;
1049     /*package*/ static final int kMaxLinearMatchLength=0x10;
1050 
1051     // 20..ff: Variable-length value node.
1052     // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
1053     // Then shift-right by 1 bit.
1054     // The remaining lead byte value indicates the number of following bytes (0..4)
1055     // and contains the value's top bits.
1056     /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
1057     // It is a final value if bit 0 is set.
1058     private static final int kValueIsFinal=1;
1059 
1060     // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1061     /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1062     /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1063 
1064     /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1065     /*package*/ static final int kMaxTwoByteValue=0x1aff;
1066 
1067     /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1068     /*package*/ static final int kFourByteValueLead=0x7e;
1069 
1070     // A little more than Unicode code points. (0x11ffff)
1071     /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1072 
1073     /*package*/ static final int kFiveByteValueLead=0x7f;
1074 
1075     // Compact delta integers.
1076     /*package*/ static final int kMaxOneByteDelta=0xbf;
1077     /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1078     /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1079     /*package*/ static final int kFourByteDeltaLead=0xfe;
1080     /*package*/ static final int kFiveByteDeltaLead=0xff;
1081 
1082     /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1083     /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1084 
1085     // Fixed value referencing the BytesTrie bytes.
1086     private byte[] bytes_;
1087     private int root_;
1088 
1089     // Iterator variables.
1090 
1091     // Index of next trie byte to read. Negative if no more matches.
1092     private int pos_;
1093     // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1094     private int remainingMatchLength_;
1095 };
1096