• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2011-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   created on: 2011jan05
9 *   created by: Markus W. Scherer
10 *   ported from ICU4C stringtriebuilder.h/.cpp
11 */
12 package com.ibm.icu.util;
13 
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 
17 /**
18  * Base class for string trie builder classes.
19  *
20  * <p>This class is not intended for public subclassing.
21  *
22  * @author Markus W. Scherer
23  * @stable ICU 4.8
24  */
25 public abstract class StringTrieBuilder {
26     /**
27      * Build options for BytesTrieBuilder and CharsTrieBuilder.
28      * @stable ICU 4.8
29      */
30     public enum Option {
31         /**
32          * Builds a trie quickly.
33          * @stable ICU 4.8
34          */
35         FAST,
36         /**
37          * Builds a trie more slowly, attempting to generate
38          * a shorter but equivalent serialization.
39          * This build option also uses more memory.
40          *
41          * <p>This option can be effective when many integer values are the same
42          * and string/byte sequence suffixes can be shared.
43          * Runtime speed is not expected to improve.
44          * @stable ICU 4.8
45          */
46         SMALL
47     }
48 
49     /**
50      * @internal
51      * @deprecated This API is ICU internal only.
52      */
53     @Deprecated
StringTrieBuilder()54     protected StringTrieBuilder() {}
55 
56     /**
57      * @internal
58      * @deprecated This API is ICU internal only.
59      */
60     @Deprecated
addImpl(CharSequence s, int value)61     protected void addImpl(CharSequence s, int value) {
62         if(state!=State.ADDING) {
63             // Cannot add elements after building.
64             throw new IllegalStateException("Cannot add (string, value) pairs after build().");
65         }
66         if(s.length()>0xffff) {
67             // Too long: Limited by iterator internals, and by builder recursion depth.
68             throw new IndexOutOfBoundsException("The maximum string length is 0xffff.");
69         }
70         if(root==null) {
71             root=createSuffixNode(s, 0, value);
72         } else {
73             root=root.add(this, s, 0, value);
74         }
75     }
76 
77     /**
78      * @internal
79      * @deprecated This API is ICU internal only.
80      */
81     @Deprecated
buildImpl(Option buildOption)82     protected final void buildImpl(Option buildOption) {
83         switch(state) {
84         case ADDING:
85             if(root==null) {
86                 throw new IndexOutOfBoundsException("No (string, value) pairs were added.");
87             }
88             if(buildOption==Option.FAST) {
89                 state=State.BUILDING_FAST;
90                 // Building "fast" is somewhat faster (25..50% in some test)
91                 // because it makes registerNode() return the input node
92                 // rather than checking for duplicates.
93                 // As a result, we sometimes write larger trie serializations.
94                 //
95                 // In either case we need to fix-up linear-match nodes (for their maximum length)
96                 // and branch nodes (turning dynamic branch nodes into trees of
97                 // runtime-equivalent nodes), but the HashMap/hashCode()/equals() are omitted for
98                 // nodes other than final values.
99             } else {
100                 state=State.BUILDING_SMALL;
101             }
102             break;
103         case BUILDING_FAST:
104         case BUILDING_SMALL:
105             // Building must have failed.
106             throw new IllegalStateException("Builder failed and must be clear()ed.");
107         case BUILT:
108             return;  // Nothing more to do.
109         }
110         // Implementation note:
111         // We really build three versions of the trie.
112         // The first is a fully dynamic trie, built successively by addImpl().
113         // Then we call root.register() to turn it into a tree of nodes
114         // which is 1:1 equivalent to the runtime data structure.
115         // Finally, root.markRightEdgesFirst() and root.write() write that serialized form.
116         root=root.register(this);
117         root.markRightEdgesFirst(-1);
118         root.write(this);
119         state=State.BUILT;
120     }
121 
122     /**
123      * @internal
124      * @deprecated This API is ICU internal only.
125      */
126     @Deprecated
clearImpl()127     protected void clearImpl() {
128         strings.setLength(0);
129         nodes.clear();
130         root=null;
131         state=State.ADDING;
132     }
133 
134     /**
135      * Makes sure that there is only one unique node registered that is
136      * equivalent to newNode, unless BUILDING_FAST.
137      * @param newNode Input node. The builder takes ownership.
138      * @return newNode if it is the first of its kind, or
139      *         an equivalent node if newNode is a duplicate.
140      */
registerNode(Node newNode)141     private final Node registerNode(Node newNode) {
142         if(state==State.BUILDING_FAST) {
143             return newNode;
144         }
145         // BUILDING_SMALL
146         Node oldNode=nodes.get(newNode);
147         if(oldNode!=null) {
148             return oldNode;
149         }
150         // If put() returns a non-null value from an equivalent, previously
151         // registered node, then get() failed to find that and we will leak newNode.
152         oldNode=nodes.put(newNode, newNode);
153         assert(oldNode==null);
154         return newNode;
155     }
156 
157     /**
158      * Makes sure that there is only one unique FinalValueNode registered
159      * with this value.
160      * Avoids creating a node if the value is a duplicate.
161      * @param value A final value.
162      * @return A FinalValueNode with the given value.
163      */
registerFinalValue(int value)164     private final ValueNode registerFinalValue(int value) {
165         // We always register final values because while ADDING
166         // we do not know yet whether we will build fast or small.
167         lookupFinalValueNode.setFinalValue(value);
168         Node oldNode=nodes.get(lookupFinalValueNode);
169         if(oldNode!=null) {
170             return (ValueNode)oldNode;
171         }
172         ValueNode newNode=new ValueNode(value);
173         // If put() returns a non-null value from an equivalent, previously
174         // registered node, then get() failed to find that and we will leak newNode.
175         oldNode=nodes.put(newNode, newNode);
176         assert(oldNode==null);
177         return newNode;
178     }
179 
180     private static abstract class Node {
Node()181         public Node() {
182             offset=0;
183         }
184         // hashCode() and equals() for use with registerNode() and the nodes hash.
185         @Override
hashCode()186         public abstract int hashCode() /*const*/;
187         // Base class equals() compares the actual class types.
188         @Override
equals(Object other)189         public boolean equals(Object other) {
190             return this==other || this.getClass()==other.getClass();
191         }
192         /**
193          * Recursive method for adding a new (string, value) pair.
194          * Matches the remaining part of s from start,
195          * and adds a new node where there is a mismatch.
196          * @return this or a replacement Node
197          */
add(StringTrieBuilder builder, CharSequence s, int start, int sValue)198         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
199             return this;
200         }
201         /**
202          * Recursive method for registering unique nodes,
203          * after all (string, value) pairs have been added.
204          * Final-value nodes are pre-registered while add()ing (string, value) pairs.
205          * Other nodes created while add()ing registerNode() themselves later
206          * and might replace themselves with new types of nodes for write()ing.
207          * @return The registered version of this node which implements write().
208          */
register(StringTrieBuilder builder)209         public Node register(StringTrieBuilder builder) { return this; }
210         /**
211          * Traverses the Node graph and numbers branch edges, with rightmost edges first.
212          * This is to avoid writing a duplicate node twice.
213          *
214          * Branch nodes in this trie data structure are not symmetric.
215          * Most branch edges "jump" to other nodes but the rightmost branch edges
216          * just continue without a jump.
217          * Therefore, write() must write the rightmost branch edge last
218          * (trie units are written backwards), and must write it at that point even if
219          * it is a duplicate of a node previously written elsewhere.
220          *
221          * This function visits and marks right branch edges first.
222          * Edges are numbered with increasingly negative values because we share the
223          * offset field which gets positive values when nodes are written.
224          * A branch edge also remembers the first number for any of its edges.
225          *
226          * When a further-left branch edge has a number in the range of the rightmost
227          * edge's numbers, then it will be written as part of the required right edge
228          * and we can avoid writing it first.
229          *
230          * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
231          * edge numbers.
232          *
233          * @param edgeNumber The first edge number for this node and its sub-nodes.
234          * @return An edge number that is at least the maximum-negative
235          *         of the input edge number and the numbers of this node and all of its sub-nodes.
236          */
markRightEdgesFirst(int edgeNumber)237         public int markRightEdgesFirst(int edgeNumber) {
238             if(offset==0) {
239                 offset=edgeNumber;
240             }
241             return edgeNumber;
242         }
243         // write() must set the offset to a positive value.
write(StringTrieBuilder builder)244         public abstract void write(StringTrieBuilder builder);
245         // See markRightEdgesFirst.
writeUnlessInsideRightEdge(int firstRight, int lastRight, StringTrieBuilder builder)246         public final void writeUnlessInsideRightEdge(int firstRight, int lastRight,
247                                                StringTrieBuilder builder) {
248             // Note: Edge numbers are negative, lastRight<=firstRight.
249             // If offset>0 then this node and its sub-nodes have been written already
250             // and we need not write them again.
251             // If this node is part of the unwritten right branch edge,
252             // then we wait until that is written.
253             if(offset<0 && (offset<lastRight || firstRight<offset)) {
254                 write(builder);
255             }
256         }
getOffset()257         public final int getOffset() /*const*/ { return offset; }
258 
259         protected int offset;
260     }
261 
262     // Used directly for final values, and as as a superclass for
263     // match nodes with intermediate values.
264     private static class ValueNode extends Node {
ValueNode()265         public ValueNode() {}
ValueNode(int v)266         public ValueNode(int v) {
267             hasValue=true;
268             value=v;
269         }
setValue(int v)270         public final void setValue(int v) {
271             assert(!hasValue);
272             hasValue=true;
273             value=v;
274         }
setFinalValue(int v)275         private void setFinalValue(int v) {
276             hasValue=true;
277             value=v;
278         }
279         @Override
hashCode()280         public int hashCode() /*const*/ {
281             int hash=0x111111;
282             if(hasValue) {
283                 hash=hash*37+value;
284             }
285             return hash;
286         }
287         @Override
equals(Object other)288         public boolean equals(Object other) {
289             if(this==other) {
290                 return true;
291             }
292             if(!super.equals(other)) {
293                 return false;
294             }
295             ValueNode o=(ValueNode)other;
296             return hasValue==o.hasValue && (!hasValue || value==o.value);
297         }
298         @Override
add(StringTrieBuilder builder, CharSequence s, int start, int sValue)299         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
300             if(start==s.length()) {
301                 throw new IllegalArgumentException("Duplicate string.");
302             }
303             // Replace self with a node for the remaining string suffix and value.
304             ValueNode node=builder.createSuffixNode(s, start, sValue);
305             node.setValue(value);
306             return node;
307         }
308         @Override
write(StringTrieBuilder builder)309         public void write(StringTrieBuilder builder) {
310             offset=builder.writeValueAndFinal(value, true);
311         }
312 
313         protected boolean hasValue;
314         protected int value;
315     }
316 
317     private static final class IntermediateValueNode extends ValueNode {
IntermediateValueNode(int v, Node nextNode)318         public IntermediateValueNode(int v, Node nextNode) {
319             next=nextNode;
320             setValue(v);
321         }
322         @Override
hashCode()323         public int hashCode() /*const*/ {
324             return (0x222222*37+value)*37+next.hashCode();
325         }
326         @Override
equals(Object other)327         public boolean equals(Object other) {
328             if(this==other) {
329                 return true;
330             }
331             if(!super.equals(other)) {
332                 return false;
333             }
334             IntermediateValueNode o=(IntermediateValueNode)other;
335             return next==o.next;
336         }
337         @Override
markRightEdgesFirst(int edgeNumber)338         public int markRightEdgesFirst(int edgeNumber) {
339             if(offset==0) {
340                 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber);
341             }
342             return edgeNumber;
343         }
344         @Override
write(StringTrieBuilder builder)345         public void write(StringTrieBuilder builder) {
346             next.write(builder);
347             offset=builder.writeValueAndFinal(value, false);
348         }
349 
350         private Node next;
351     }
352 
353     private static final class LinearMatchNode extends ValueNode {
LinearMatchNode(CharSequence builderStrings, int sOffset, int len, Node nextNode)354         public LinearMatchNode(CharSequence builderStrings, int sOffset, int len, Node nextNode) {
355             strings=builderStrings;
356             stringOffset=sOffset;
357             length=len;
358             next=nextNode;
359         }
360         @Override
hashCode()361         public int hashCode() /*const*/ { return hash; }
362         @Override
equals(Object other)363         public boolean equals(Object other) {
364             if(this==other) {
365                 return true;
366             }
367             if(!super.equals(other)) {
368                 return false;
369             }
370             LinearMatchNode o=(LinearMatchNode)other;
371             if(length!=o.length || next!=o.next) {
372                 return false;
373             }
374             for(int i=stringOffset, j=o.stringOffset, limit=stringOffset+length; i<limit; ++i, ++j) {
375                 if(strings.charAt(i)!=strings.charAt(j)) {
376                     return false;
377                 }
378             }
379             return true;
380         }
381         @Override
add(StringTrieBuilder builder, CharSequence s, int start, int sValue)382         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
383             if(start==s.length()) {
384                 if(hasValue) {
385                     throw new IllegalArgumentException("Duplicate string.");
386                 } else {
387                     setValue(sValue);
388                     return this;
389                 }
390             }
391             int limit=stringOffset+length;
392             for(int i=stringOffset; i<limit; ++i, ++start) {
393                 if(start==s.length()) {
394                     // s is a prefix with a new value. Split self into two linear-match nodes.
395                     int prefixLength=i-stringOffset;
396                     LinearMatchNode suffixNode=new LinearMatchNode(strings, i, length-prefixLength, next);
397                     suffixNode.setValue(sValue);
398                     length=prefixLength;
399                     next=suffixNode;
400                     return this;
401                 }
402                 char thisChar=strings.charAt(i);
403                 char newChar=s.charAt(start);
404                 if(thisChar!=newChar) {
405                     // Mismatch, insert a branch node.
406                     DynamicBranchNode branchNode=new DynamicBranchNode();
407                     // Reuse this node for one of the remaining substrings, if any.
408                     Node result, thisSuffixNode;
409                     if(i==stringOffset) {
410                         // Mismatch on first character, turn this node into a suffix.
411                         if(hasValue) {
412                             // Move the value for prefix length "start" to the new node.
413                             branchNode.setValue(value);
414                             value=0;
415                             hasValue=false;
416                         }
417                         ++stringOffset;
418                         --length;
419                         thisSuffixNode= length>0 ? this : next;
420                         // C++: if(length==0) { delete this; }
421                         result=branchNode;
422                     } else if(i==limit-1) {
423                         // Mismatch on last character, keep this node for the prefix.
424                         --length;
425                         thisSuffixNode=next;
426                         next=branchNode;
427                         result=this;
428                     } else {
429                         // Mismatch on intermediate character, keep this node for the prefix.
430                         int prefixLength=i-stringOffset;
431                         ++i;  // Suffix start offset (after thisChar).
432                         thisSuffixNode=new LinearMatchNode(
433                                 strings, i, length-(prefixLength+1), next);
434                         length=prefixLength;
435                         next=branchNode;
436                         result=this;
437                     }
438                     ValueNode newSuffixNode=builder.createSuffixNode(s, start+1, sValue);
439                     branchNode.add(thisChar, thisSuffixNode);
440                     branchNode.add(newChar, newSuffixNode);
441                     return result;
442                 }
443             }
444             // s matches all of this node's characters.
445             next=next.add(builder, s, start, sValue);
446             return this;
447         }
448         @Override
register(StringTrieBuilder builder)449         public Node register(StringTrieBuilder builder) {
450             next=next.register(builder);
451             // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
452             int maxLinearMatchLength=builder.getMaxLinearMatchLength();
453             while(length>maxLinearMatchLength) {
454                 int nextOffset=stringOffset+length-maxLinearMatchLength;
455                 length-=maxLinearMatchLength;
456                 LinearMatchNode suffixNode=
457                     new LinearMatchNode(strings, nextOffset, maxLinearMatchLength, next);
458                 suffixNode.setHashCode();
459                 next=builder.registerNode(suffixNode);
460             }
461             Node result;
462             if(hasValue && !builder.matchNodesCanHaveValues()) {
463                 int intermediateValue=value;
464                 value=0;
465                 hasValue=false;
466                 setHashCode();
467                 result=new IntermediateValueNode(intermediateValue, builder.registerNode(this));
468             } else {
469                 setHashCode();
470                 result=this;
471             }
472             return builder.registerNode(result);
473         }
474         @Override
markRightEdgesFirst(int edgeNumber)475         public int markRightEdgesFirst(int edgeNumber) {
476             if(offset==0) {
477                 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber);
478             }
479             return edgeNumber;
480         }
481         @Override
write(StringTrieBuilder builder)482         public void write(StringTrieBuilder builder) {
483             next.write(builder);
484             builder.write(stringOffset, length);
485             offset=builder.writeValueAndType(hasValue, value, builder.getMinLinearMatch()+length-1);
486         }
487 
488         // Must be called just before registerNode(this).
setHashCode()489         private void setHashCode() /*const*/ {
490             hash=(0x333333*37+length)*37+next.hashCode();
491             if(hasValue) {
492                 hash=hash*37+value;
493             }
494             for(int i=stringOffset, limit=stringOffset+length; i<limit; ++i) {
495                 hash=hash*37+strings.charAt(i);
496             }
497         }
498 
499         private CharSequence strings;
500         private int stringOffset;
501         private int length;
502         private Node next;
503         private int hash;
504     }
505 
506     private static final class DynamicBranchNode extends ValueNode {
DynamicBranchNode()507         public DynamicBranchNode() {}
508         // c must not be in chars yet.
add(char c, Node node)509         public void add(char c, Node node) {
510             int i=find(c);
511             chars.insert(i, c);
512             equal.add(i, node);
513         }
514         @Override
add(StringTrieBuilder builder, CharSequence s, int start, int sValue)515         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
516             if(start==s.length()) {
517                 if(hasValue) {
518                     throw new IllegalArgumentException("Duplicate string.");
519                 } else {
520                     setValue(sValue);
521                     return this;
522                 }
523             }
524             char c=s.charAt(start++);
525             int i=find(c);
526             if(i<chars.length() && c==chars.charAt(i)) {
527                 equal.set(i, equal.get(i).add(builder, s, start, sValue));
528             } else {
529                 chars.insert(i, c);
530                 equal.add(i, builder.createSuffixNode(s, start, sValue));
531             }
532             return this;
533         }
534         @Override
register(StringTrieBuilder builder)535         public Node register(StringTrieBuilder builder) {
536             Node subNode=register(builder, 0, chars.length());
537             BranchHeadNode head=new BranchHeadNode(chars.length(), subNode);
538             Node result=head;
539             if(hasValue) {
540                 if(builder.matchNodesCanHaveValues()) {
541                     head.setValue(value);
542                 } else {
543                     result=new IntermediateValueNode(value, builder.registerNode(head));
544                 }
545             }
546             return builder.registerNode(result);
547         }
register(StringTrieBuilder builder, int start, int limit)548         private Node register(StringTrieBuilder builder, int start, int limit) {
549             int length=limit-start;
550             if(length>builder.getMaxBranchLinearSubNodeLength()) {
551                 // Branch on the middle unit.
552                 int middle=start+length/2;
553                 return builder.registerNode(
554                         new SplitBranchNode(
555                                 chars.charAt(middle),
556                                 register(builder, start, middle),
557                                 register(builder, middle, limit)));
558             }
559             ListBranchNode listNode=new ListBranchNode(length);
560             do {
561                 char c=chars.charAt(start);
562                 Node node=equal.get(start);
563                 if(node.getClass()==ValueNode.class) {
564                     // Final value.
565                     listNode.add(c, ((ValueNode)node).value);
566                 } else {
567                     listNode.add(c, node.register(builder));
568                 }
569             } while(++start<limit);
570             return builder.registerNode(listNode);
571         }
572 
find(char c)573         private int find(char c) {
574             int start=0;
575             int limit=chars.length();
576             while(start<limit) {
577                 int i=(start+limit)/2;
578                 char middleChar=chars.charAt(i);
579                 if(c<middleChar) {
580                     limit=i;
581                 } else if(c==middleChar) {
582                     return i;
583                 } else {
584                     start=i+1;
585                 }
586             }
587             return start;
588         }
589 
590         private StringBuilder chars=new StringBuilder();
591         private ArrayList<Node> equal=new ArrayList<Node>();
592     }
593 
594     private static abstract class BranchNode extends Node {
BranchNode()595         public BranchNode() {}
596         @Override
hashCode()597         public int hashCode() /*const*/ { return hash; }
598 
599         protected int hash;
600         protected int firstEdgeNumber;
601     }
602 
603     private static final class ListBranchNode extends BranchNode {
ListBranchNode(int capacity)604         public ListBranchNode(int capacity) {
605             hash=0x444444*37+capacity;
606             equal=new Node[capacity];
607             values=new int[capacity];
608             units=new char[capacity];
609         }
610         @Override
equals(Object other)611         public boolean equals(Object other) {
612             if(this==other) {
613                 return true;
614             }
615             if(!super.equals(other)) {
616                 return false;
617             }
618             ListBranchNode o=(ListBranchNode)other;
619             for(int i=0; i<length; ++i) {
620                 if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
621                     return false;
622                 }
623             }
624             return true;
625         }
626         @Override
hashCode()627         public int hashCode() {
628             return super.hashCode();
629         }
630         @Override
markRightEdgesFirst(int edgeNumber)631         public int markRightEdgesFirst(int edgeNumber) {
632             if(offset==0) {
633                 firstEdgeNumber=edgeNumber;
634                 int step=0;
635                 int i=length;
636                 do {
637                     Node edge=equal[--i];
638                     if(edge!=null) {
639                         edgeNumber=edge.markRightEdgesFirst(edgeNumber-step);
640                     }
641                     // For all but the rightmost edge, decrement the edge number.
642                     step=1;
643                 } while(i>0);
644                 offset=edgeNumber;
645             }
646             return edgeNumber;
647         }
648         @Override
write(StringTrieBuilder builder)649         public void write(StringTrieBuilder builder) {
650             // Write the sub-nodes in reverse order: The jump lengths are deltas from
651             // after their own positions, so if we wrote the minUnit sub-node first,
652             // then its jump delta would be larger.
653             // Instead we write the minUnit sub-node last, for a shorter delta.
654             int unitNumber=length-1;
655             Node rightEdge=equal[unitNumber];
656             int rightEdgeNumber= rightEdge==null ? firstEdgeNumber : rightEdge.getOffset();
657             do {
658                 --unitNumber;
659                 if(equal[unitNumber]!=null) {
660                     equal[unitNumber].writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
661                 }
662             } while(unitNumber>0);
663             // The maxUnit sub-node is written as the very last one because we do
664             // not jump for it at all.
665             unitNumber=length-1;
666             if(rightEdge==null) {
667                 builder.writeValueAndFinal(values[unitNumber], true);
668             } else {
669                 rightEdge.write(builder);
670             }
671             offset=builder.write(units[unitNumber]);
672             // Write the rest of this node's unit-value pairs.
673             while(--unitNumber>=0) {
674                 int value;
675                 boolean isFinal;
676                 if(equal[unitNumber]==null) {
677                     // Write the final value for the one string ending with this unit.
678                     value=values[unitNumber];
679                     isFinal=true;
680                 } else {
681                     // Write the delta to the start position of the sub-node.
682                     assert(equal[unitNumber].getOffset()>0);
683                     value=offset-equal[unitNumber].getOffset();
684                     isFinal=false;
685                 }
686                 builder.writeValueAndFinal(value, isFinal);
687                 offset=builder.write(units[unitNumber]);
688             }
689         }
690         // Adds a unit with a final value.
add(int c, int value)691         public void add(int c, int value) {
692             units[length]=(char)c;
693             equal[length]=null;
694             values[length]=value;
695             ++length;
696             hash=(hash*37+c)*37+value;
697         }
698         // Adds a unit which leads to another match node.
add(int c, Node node)699         public void add(int c, Node node) {
700             units[length]=(char)c;
701             equal[length]=node;
702             values[length]=0;
703             ++length;
704             hash=(hash*37+c)*37+node.hashCode();
705         }
706 
707         // Note: We could try to reduce memory allocations
708         // by replacing these per-node arrays with per-builder ArrayLists and
709         // (for units) a StringBuilder (or even use its strings for the units too).
710         // It remains to be seen whether that would improve performance.
711         private Node[] equal;  // null means "has final value".
712         private int length;
713         private int[] values;
714         private char[] units;
715     }
716 
717     private static final class SplitBranchNode extends BranchNode {
SplitBranchNode(char middleUnit, Node lessThanNode, Node greaterOrEqualNode)718         public SplitBranchNode(char middleUnit, Node lessThanNode, Node greaterOrEqualNode) {
719             hash=((0x555555*37+middleUnit)*37+
720                     lessThanNode.hashCode())*37+greaterOrEqualNode.hashCode();
721             unit=middleUnit;
722             lessThan=lessThanNode;
723             greaterOrEqual=greaterOrEqualNode;
724         }
725         @Override
equals(Object other)726         public boolean equals(Object other) {
727             if(this==other) {
728                 return true;
729             }
730             if(!super.equals(other)) {
731                 return false;
732             }
733             SplitBranchNode o=(SplitBranchNode)other;
734             return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
735         }
736         @Override
hashCode()737         public int hashCode() {
738             return super.hashCode();
739         }
740         @Override
markRightEdgesFirst(int edgeNumber)741         public int markRightEdgesFirst(int edgeNumber) {
742             if(offset==0) {
743                 firstEdgeNumber=edgeNumber;
744                 edgeNumber=greaterOrEqual.markRightEdgesFirst(edgeNumber);
745                 offset=edgeNumber=lessThan.markRightEdgesFirst(edgeNumber-1);
746             }
747             return edgeNumber;
748         }
749         @Override
write(StringTrieBuilder builder)750         public void write(StringTrieBuilder builder) {
751             // Encode the less-than branch first.
752             lessThan.writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual.getOffset(), builder);
753             // Encode the greater-or-equal branch last because we do not jump for it at all.
754             greaterOrEqual.write(builder);
755             // Write this node.
756             assert(lessThan.getOffset()>0);
757             builder.writeDeltaTo(lessThan.getOffset());  // less-than
758             offset=builder.write(unit);
759         }
760 
761         private char unit;
762         private Node lessThan;
763         private Node greaterOrEqual;
764     }
765 
766     // Branch head node, for writing the actual node lead unit.
767     private static final class BranchHeadNode extends ValueNode {
BranchHeadNode(int len, Node subNode)768         public BranchHeadNode(int len, Node subNode) {
769             length=len;
770             next=subNode;
771         }
772         @Override
hashCode()773         public int hashCode() /*const*/ {
774             return (0x666666*37+length)*37+next.hashCode();
775         }
776         @Override
equals(Object other)777         public boolean equals(Object other) {
778             if(this==other) {
779                 return true;
780             }
781             if(!super.equals(other)) {
782                 return false;
783             }
784             BranchHeadNode o=(BranchHeadNode)other;
785             return length==o.length && next==o.next;
786         }
787         @Override
markRightEdgesFirst(int edgeNumber)788         public int markRightEdgesFirst(int edgeNumber) {
789             if(offset==0) {
790                 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber);
791             }
792             return edgeNumber;
793         }
794         @Override
write(StringTrieBuilder builder)795         public void write(StringTrieBuilder builder) {
796             next.write(builder);
797             if(length<=builder.getMinLinearMatch()) {
798                 offset=builder.writeValueAndType(hasValue, value, length-1);
799             } else {
800                 builder.write(length-1);
801                 offset=builder.writeValueAndType(hasValue, value, 0);
802             }
803         }
804 
805         private int length;
806         private Node next;  // A branch sub-node.
807     }
808 
createSuffixNode(CharSequence s, int start, int sValue)809     private ValueNode createSuffixNode(CharSequence s, int start, int sValue) {
810         ValueNode node=registerFinalValue(sValue);
811         if(start<s.length()) {
812             int offset=strings.length();
813             strings.append(s, start, s.length());
814             node=new LinearMatchNode(strings, offset, s.length()-start, node);
815         }
816         return node;
817     }
818 
819     /**
820      * @internal
821      * @deprecated This API is ICU internal only.
822      */
823     @Deprecated
matchNodesCanHaveValues()824     protected abstract boolean matchNodesCanHaveValues() /*const*/;
825 
826     /**
827      * @internal
828      * @deprecated This API is ICU internal only.
829      */
830     @Deprecated
getMaxBranchLinearSubNodeLength()831     protected abstract int getMaxBranchLinearSubNodeLength() /*const*/;
832     /**
833      * @internal
834      * @deprecated This API is ICU internal only.
835      */
836     @Deprecated
getMinLinearMatch()837     protected abstract int getMinLinearMatch() /*const*/;
838     /**
839      * @internal
840      * @deprecated This API is ICU internal only.
841      */
842     @Deprecated
getMaxLinearMatchLength()843     protected abstract int getMaxLinearMatchLength() /*const*/;
844 
845     /**
846      * @internal
847      * @deprecated This API is ICU internal only.
848      */
849     @Deprecated
write(int unit)850     protected abstract int write(int unit);
851     /**
852      * @internal
853      * @deprecated This API is ICU internal only.
854      */
855     @Deprecated
write(int offset, int length)856     protected abstract int write(int offset, int length);
857     /**
858      * @internal
859      * @deprecated This API is ICU internal only.
860      */
861     @Deprecated
writeValueAndFinal(int i, boolean isFinal)862     protected abstract int writeValueAndFinal(int i, boolean isFinal);
863     /**
864      * @internal
865      * @deprecated This API is ICU internal only.
866      */
867     @Deprecated
writeValueAndType(boolean hasValue, int value, int node)868     protected abstract int writeValueAndType(boolean hasValue, int value, int node);
869     /**
870      * @internal
871      * @deprecated This API is ICU internal only.
872      */
873     @Deprecated
writeDeltaTo(int jumpTarget)874     protected abstract int writeDeltaTo(int jumpTarget);
875 
876     private enum State {
877         ADDING, BUILDING_FAST, BUILDING_SMALL, BUILT
878     }
879     private State state=State.ADDING;
880 
881     // Strings and sub-strings for linear-match nodes.
882     /**
883      * @internal
884      * @deprecated This API is ICU internal only.
885      */
886     @Deprecated
887     protected StringBuilder strings=new StringBuilder();
888     private Node root;
889 
890     // Hash set of nodes, maps from nodes to integer 1.
891     private HashMap<Node, Node> nodes=new HashMap<Node, Node>();
892     private ValueNode lookupFinalValueNode=new ValueNode();
893 }
894