1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * created on: 2011jan05 9 * created by: Markus W. Scherer 10 * ported from ICU4C stringtriebuilder.h/.cpp 11 */ 12 package com.ibm.icu.util; 13 14 import java.util.ArrayList; 15 import java.util.HashMap; 16 17 /** 18 * Base class for string trie builder classes. 19 * 20 * <p>This class is not intended for public subclassing. 21 * 22 * @author Markus W. Scherer 23 * @stable ICU 4.8 24 */ 25 public abstract class StringTrieBuilder { 26 /** 27 * Build options for BytesTrieBuilder and CharsTrieBuilder. 28 * @stable ICU 4.8 29 */ 30 public enum Option { 31 /** 32 * Builds a trie quickly. 33 * @stable ICU 4.8 34 */ 35 FAST, 36 /** 37 * Builds a trie more slowly, attempting to generate 38 * a shorter but equivalent serialization. 39 * This build option also uses more memory. 40 * 41 * <p>This option can be effective when many integer values are the same 42 * and string/byte sequence suffixes can be shared. 43 * Runtime speed is not expected to improve. 44 * @stable ICU 4.8 45 */ 46 SMALL 47 } 48 49 /** 50 * @internal 51 * @deprecated This API is ICU internal only. 52 */ 53 @Deprecated StringTrieBuilder()54 protected StringTrieBuilder() {} 55 56 /** 57 * @internal 58 * @deprecated This API is ICU internal only. 59 */ 60 @Deprecated addImpl(CharSequence s, int value)61 protected void addImpl(CharSequence s, int value) { 62 if(state!=State.ADDING) { 63 // Cannot add elements after building. 64 throw new IllegalStateException("Cannot add (string, value) pairs after build()."); 65 } 66 if(s.length()>0xffff) { 67 // Too long: Limited by iterator internals, and by builder recursion depth. 68 throw new IndexOutOfBoundsException("The maximum string length is 0xffff."); 69 } 70 if(root==null) { 71 root=createSuffixNode(s, 0, value); 72 } else { 73 root=root.add(this, s, 0, value); 74 } 75 } 76 77 /** 78 * @internal 79 * @deprecated This API is ICU internal only. 80 */ 81 @Deprecated buildImpl(Option buildOption)82 protected final void buildImpl(Option buildOption) { 83 switch(state) { 84 case ADDING: 85 if(root==null) { 86 throw new IndexOutOfBoundsException("No (string, value) pairs were added."); 87 } 88 if(buildOption==Option.FAST) { 89 state=State.BUILDING_FAST; 90 // Building "fast" is somewhat faster (25..50% in some test) 91 // because it makes registerNode() return the input node 92 // rather than checking for duplicates. 93 // As a result, we sometimes write larger trie serializations. 94 // 95 // In either case we need to fix-up linear-match nodes (for their maximum length) 96 // and branch nodes (turning dynamic branch nodes into trees of 97 // runtime-equivalent nodes), but the HashMap/hashCode()/equals() are omitted for 98 // nodes other than final values. 99 } else { 100 state=State.BUILDING_SMALL; 101 } 102 break; 103 case BUILDING_FAST: 104 case BUILDING_SMALL: 105 // Building must have failed. 106 throw new IllegalStateException("Builder failed and must be clear()ed."); 107 case BUILT: 108 return; // Nothing more to do. 109 } 110 // Implementation note: 111 // We really build three versions of the trie. 112 // The first is a fully dynamic trie, built successively by addImpl(). 113 // Then we call root.register() to turn it into a tree of nodes 114 // which is 1:1 equivalent to the runtime data structure. 115 // Finally, root.markRightEdgesFirst() and root.write() write that serialized form. 116 root=root.register(this); 117 root.markRightEdgesFirst(-1); 118 root.write(this); 119 state=State.BUILT; 120 } 121 122 /** 123 * @internal 124 * @deprecated This API is ICU internal only. 125 */ 126 @Deprecated clearImpl()127 protected void clearImpl() { 128 strings.setLength(0); 129 nodes.clear(); 130 root=null; 131 state=State.ADDING; 132 } 133 134 /** 135 * Makes sure that there is only one unique node registered that is 136 * equivalent to newNode, unless BUILDING_FAST. 137 * @param newNode Input node. The builder takes ownership. 138 * @return newNode if it is the first of its kind, or 139 * an equivalent node if newNode is a duplicate. 140 */ registerNode(Node newNode)141 private final Node registerNode(Node newNode) { 142 if(state==State.BUILDING_FAST) { 143 return newNode; 144 } 145 // BUILDING_SMALL 146 Node oldNode=nodes.get(newNode); 147 if(oldNode!=null) { 148 return oldNode; 149 } 150 // If put() returns a non-null value from an equivalent, previously 151 // registered node, then get() failed to find that and we will leak newNode. 152 oldNode=nodes.put(newNode, newNode); 153 assert(oldNode==null); 154 return newNode; 155 } 156 157 /** 158 * Makes sure that there is only one unique FinalValueNode registered 159 * with this value. 160 * Avoids creating a node if the value is a duplicate. 161 * @param value A final value. 162 * @return A FinalValueNode with the given value. 163 */ registerFinalValue(int value)164 private final ValueNode registerFinalValue(int value) { 165 // We always register final values because while ADDING 166 // we do not know yet whether we will build fast or small. 167 lookupFinalValueNode.setFinalValue(value); 168 Node oldNode=nodes.get(lookupFinalValueNode); 169 if(oldNode!=null) { 170 return (ValueNode)oldNode; 171 } 172 ValueNode newNode=new ValueNode(value); 173 // If put() returns a non-null value from an equivalent, previously 174 // registered node, then get() failed to find that and we will leak newNode. 175 oldNode=nodes.put(newNode, newNode); 176 assert(oldNode==null); 177 return newNode; 178 } 179 180 private static abstract class Node { Node()181 public Node() { 182 offset=0; 183 } 184 // hashCode() and equals() for use with registerNode() and the nodes hash. 185 @Override hashCode()186 public abstract int hashCode() /*const*/; 187 // Base class equals() compares the actual class types. 188 @Override equals(Object other)189 public boolean equals(Object other) { 190 return this==other || this.getClass()==other.getClass(); 191 } 192 /** 193 * Recursive method for adding a new (string, value) pair. 194 * Matches the remaining part of s from start, 195 * and adds a new node where there is a mismatch. 196 * @return this or a replacement Node 197 */ add(StringTrieBuilder builder, CharSequence s, int start, int sValue)198 public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) { 199 return this; 200 } 201 /** 202 * Recursive method for registering unique nodes, 203 * after all (string, value) pairs have been added. 204 * Final-value nodes are pre-registered while add()ing (string, value) pairs. 205 * Other nodes created while add()ing registerNode() themselves later 206 * and might replace themselves with new types of nodes for write()ing. 207 * @return The registered version of this node which implements write(). 208 */ register(StringTrieBuilder builder)209 public Node register(StringTrieBuilder builder) { return this; } 210 /** 211 * Traverses the Node graph and numbers branch edges, with rightmost edges first. 212 * This is to avoid writing a duplicate node twice. 213 * 214 * Branch nodes in this trie data structure are not symmetric. 215 * Most branch edges "jump" to other nodes but the rightmost branch edges 216 * just continue without a jump. 217 * Therefore, write() must write the rightmost branch edge last 218 * (trie units are written backwards), and must write it at that point even if 219 * it is a duplicate of a node previously written elsewhere. 220 * 221 * This function visits and marks right branch edges first. 222 * Edges are numbered with increasingly negative values because we share the 223 * offset field which gets positive values when nodes are written. 224 * A branch edge also remembers the first number for any of its edges. 225 * 226 * When a further-left branch edge has a number in the range of the rightmost 227 * edge's numbers, then it will be written as part of the required right edge 228 * and we can avoid writing it first. 229 * 230 * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative 231 * edge numbers. 232 * 233 * @param edgeNumber The first edge number for this node and its sub-nodes. 234 * @return An edge number that is at least the maximum-negative 235 * of the input edge number and the numbers of this node and all of its sub-nodes. 236 */ markRightEdgesFirst(int edgeNumber)237 public int markRightEdgesFirst(int edgeNumber) { 238 if(offset==0) { 239 offset=edgeNumber; 240 } 241 return edgeNumber; 242 } 243 // write() must set the offset to a positive value. write(StringTrieBuilder builder)244 public abstract void write(StringTrieBuilder builder); 245 // See markRightEdgesFirst. writeUnlessInsideRightEdge(int firstRight, int lastRight, StringTrieBuilder builder)246 public final void writeUnlessInsideRightEdge(int firstRight, int lastRight, 247 StringTrieBuilder builder) { 248 // Note: Edge numbers are negative, lastRight<=firstRight. 249 // If offset>0 then this node and its sub-nodes have been written already 250 // and we need not write them again. 251 // If this node is part of the unwritten right branch edge, 252 // then we wait until that is written. 253 if(offset<0 && (offset<lastRight || firstRight<offset)) { 254 write(builder); 255 } 256 } getOffset()257 public final int getOffset() /*const*/ { return offset; } 258 259 protected int offset; 260 } 261 262 // Used directly for final values, and as as a superclass for 263 // match nodes with intermediate values. 264 private static class ValueNode extends Node { ValueNode()265 public ValueNode() {} ValueNode(int v)266 public ValueNode(int v) { 267 hasValue=true; 268 value=v; 269 } setValue(int v)270 public final void setValue(int v) { 271 assert(!hasValue); 272 hasValue=true; 273 value=v; 274 } setFinalValue(int v)275 private void setFinalValue(int v) { 276 hasValue=true; 277 value=v; 278 } 279 @Override hashCode()280 public int hashCode() /*const*/ { 281 int hash=0x111111; 282 if(hasValue) { 283 hash=hash*37+value; 284 } 285 return hash; 286 } 287 @Override equals(Object other)288 public boolean equals(Object other) { 289 if(this==other) { 290 return true; 291 } 292 if(!super.equals(other)) { 293 return false; 294 } 295 ValueNode o=(ValueNode)other; 296 return hasValue==o.hasValue && (!hasValue || value==o.value); 297 } 298 @Override add(StringTrieBuilder builder, CharSequence s, int start, int sValue)299 public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) { 300 if(start==s.length()) { 301 throw new IllegalArgumentException("Duplicate string."); 302 } 303 // Replace self with a node for the remaining string suffix and value. 304 ValueNode node=builder.createSuffixNode(s, start, sValue); 305 node.setValue(value); 306 return node; 307 } 308 @Override write(StringTrieBuilder builder)309 public void write(StringTrieBuilder builder) { 310 offset=builder.writeValueAndFinal(value, true); 311 } 312 313 protected boolean hasValue; 314 protected int value; 315 } 316 317 private static final class IntermediateValueNode extends ValueNode { IntermediateValueNode(int v, Node nextNode)318 public IntermediateValueNode(int v, Node nextNode) { 319 next=nextNode; 320 setValue(v); 321 } 322 @Override hashCode()323 public int hashCode() /*const*/ { 324 return (0x222222*37+value)*37+next.hashCode(); 325 } 326 @Override equals(Object other)327 public boolean equals(Object other) { 328 if(this==other) { 329 return true; 330 } 331 if(!super.equals(other)) { 332 return false; 333 } 334 IntermediateValueNode o=(IntermediateValueNode)other; 335 return next==o.next; 336 } 337 @Override markRightEdgesFirst(int edgeNumber)338 public int markRightEdgesFirst(int edgeNumber) { 339 if(offset==0) { 340 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber); 341 } 342 return edgeNumber; 343 } 344 @Override write(StringTrieBuilder builder)345 public void write(StringTrieBuilder builder) { 346 next.write(builder); 347 offset=builder.writeValueAndFinal(value, false); 348 } 349 350 private Node next; 351 } 352 353 private static final class LinearMatchNode extends ValueNode { LinearMatchNode(CharSequence builderStrings, int sOffset, int len, Node nextNode)354 public LinearMatchNode(CharSequence builderStrings, int sOffset, int len, Node nextNode) { 355 strings=builderStrings; 356 stringOffset=sOffset; 357 length=len; 358 next=nextNode; 359 } 360 @Override hashCode()361 public int hashCode() /*const*/ { return hash; } 362 @Override equals(Object other)363 public boolean equals(Object other) { 364 if(this==other) { 365 return true; 366 } 367 if(!super.equals(other)) { 368 return false; 369 } 370 LinearMatchNode o=(LinearMatchNode)other; 371 if(length!=o.length || next!=o.next) { 372 return false; 373 } 374 for(int i=stringOffset, j=o.stringOffset, limit=stringOffset+length; i<limit; ++i, ++j) { 375 if(strings.charAt(i)!=strings.charAt(j)) { 376 return false; 377 } 378 } 379 return true; 380 } 381 @Override add(StringTrieBuilder builder, CharSequence s, int start, int sValue)382 public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) { 383 if(start==s.length()) { 384 if(hasValue) { 385 throw new IllegalArgumentException("Duplicate string."); 386 } else { 387 setValue(sValue); 388 return this; 389 } 390 } 391 int limit=stringOffset+length; 392 for(int i=stringOffset; i<limit; ++i, ++start) { 393 if(start==s.length()) { 394 // s is a prefix with a new value. Split self into two linear-match nodes. 395 int prefixLength=i-stringOffset; 396 LinearMatchNode suffixNode=new LinearMatchNode(strings, i, length-prefixLength, next); 397 suffixNode.setValue(sValue); 398 length=prefixLength; 399 next=suffixNode; 400 return this; 401 } 402 char thisChar=strings.charAt(i); 403 char newChar=s.charAt(start); 404 if(thisChar!=newChar) { 405 // Mismatch, insert a branch node. 406 DynamicBranchNode branchNode=new DynamicBranchNode(); 407 // Reuse this node for one of the remaining substrings, if any. 408 Node result, thisSuffixNode; 409 if(i==stringOffset) { 410 // Mismatch on first character, turn this node into a suffix. 411 if(hasValue) { 412 // Move the value for prefix length "start" to the new node. 413 branchNode.setValue(value); 414 value=0; 415 hasValue=false; 416 } 417 ++stringOffset; 418 --length; 419 thisSuffixNode= length>0 ? this : next; 420 // C++: if(length==0) { delete this; } 421 result=branchNode; 422 } else if(i==limit-1) { 423 // Mismatch on last character, keep this node for the prefix. 424 --length; 425 thisSuffixNode=next; 426 next=branchNode; 427 result=this; 428 } else { 429 // Mismatch on intermediate character, keep this node for the prefix. 430 int prefixLength=i-stringOffset; 431 ++i; // Suffix start offset (after thisChar). 432 thisSuffixNode=new LinearMatchNode( 433 strings, i, length-(prefixLength+1), next); 434 length=prefixLength; 435 next=branchNode; 436 result=this; 437 } 438 ValueNode newSuffixNode=builder.createSuffixNode(s, start+1, sValue); 439 branchNode.add(thisChar, thisSuffixNode); 440 branchNode.add(newChar, newSuffixNode); 441 return result; 442 } 443 } 444 // s matches all of this node's characters. 445 next=next.add(builder, s, start, sValue); 446 return this; 447 } 448 @Override register(StringTrieBuilder builder)449 public Node register(StringTrieBuilder builder) { 450 next=next.register(builder); 451 // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. 452 int maxLinearMatchLength=builder.getMaxLinearMatchLength(); 453 while(length>maxLinearMatchLength) { 454 int nextOffset=stringOffset+length-maxLinearMatchLength; 455 length-=maxLinearMatchLength; 456 LinearMatchNode suffixNode= 457 new LinearMatchNode(strings, nextOffset, maxLinearMatchLength, next); 458 suffixNode.setHashCode(); 459 next=builder.registerNode(suffixNode); 460 } 461 Node result; 462 if(hasValue && !builder.matchNodesCanHaveValues()) { 463 int intermediateValue=value; 464 value=0; 465 hasValue=false; 466 setHashCode(); 467 result=new IntermediateValueNode(intermediateValue, builder.registerNode(this)); 468 } else { 469 setHashCode(); 470 result=this; 471 } 472 return builder.registerNode(result); 473 } 474 @Override markRightEdgesFirst(int edgeNumber)475 public int markRightEdgesFirst(int edgeNumber) { 476 if(offset==0) { 477 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber); 478 } 479 return edgeNumber; 480 } 481 @Override write(StringTrieBuilder builder)482 public void write(StringTrieBuilder builder) { 483 next.write(builder); 484 builder.write(stringOffset, length); 485 offset=builder.writeValueAndType(hasValue, value, builder.getMinLinearMatch()+length-1); 486 } 487 488 // Must be called just before registerNode(this). setHashCode()489 private void setHashCode() /*const*/ { 490 hash=(0x333333*37+length)*37+next.hashCode(); 491 if(hasValue) { 492 hash=hash*37+value; 493 } 494 for(int i=stringOffset, limit=stringOffset+length; i<limit; ++i) { 495 hash=hash*37+strings.charAt(i); 496 } 497 } 498 499 private CharSequence strings; 500 private int stringOffset; 501 private int length; 502 private Node next; 503 private int hash; 504 } 505 506 private static final class DynamicBranchNode extends ValueNode { DynamicBranchNode()507 public DynamicBranchNode() {} 508 // c must not be in chars yet. add(char c, Node node)509 public void add(char c, Node node) { 510 int i=find(c); 511 chars.insert(i, c); 512 equal.add(i, node); 513 } 514 @Override add(StringTrieBuilder builder, CharSequence s, int start, int sValue)515 public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) { 516 if(start==s.length()) { 517 if(hasValue) { 518 throw new IllegalArgumentException("Duplicate string."); 519 } else { 520 setValue(sValue); 521 return this; 522 } 523 } 524 char c=s.charAt(start++); 525 int i=find(c); 526 if(i<chars.length() && c==chars.charAt(i)) { 527 equal.set(i, equal.get(i).add(builder, s, start, sValue)); 528 } else { 529 chars.insert(i, c); 530 equal.add(i, builder.createSuffixNode(s, start, sValue)); 531 } 532 return this; 533 } 534 @Override register(StringTrieBuilder builder)535 public Node register(StringTrieBuilder builder) { 536 Node subNode=register(builder, 0, chars.length()); 537 BranchHeadNode head=new BranchHeadNode(chars.length(), subNode); 538 Node result=head; 539 if(hasValue) { 540 if(builder.matchNodesCanHaveValues()) { 541 head.setValue(value); 542 } else { 543 result=new IntermediateValueNode(value, builder.registerNode(head)); 544 } 545 } 546 return builder.registerNode(result); 547 } register(StringTrieBuilder builder, int start, int limit)548 private Node register(StringTrieBuilder builder, int start, int limit) { 549 int length=limit-start; 550 if(length>builder.getMaxBranchLinearSubNodeLength()) { 551 // Branch on the middle unit. 552 int middle=start+length/2; 553 return builder.registerNode( 554 new SplitBranchNode( 555 chars.charAt(middle), 556 register(builder, start, middle), 557 register(builder, middle, limit))); 558 } 559 ListBranchNode listNode=new ListBranchNode(length); 560 do { 561 char c=chars.charAt(start); 562 Node node=equal.get(start); 563 if(node.getClass()==ValueNode.class) { 564 // Final value. 565 listNode.add(c, ((ValueNode)node).value); 566 } else { 567 listNode.add(c, node.register(builder)); 568 } 569 } while(++start<limit); 570 return builder.registerNode(listNode); 571 } 572 find(char c)573 private int find(char c) { 574 int start=0; 575 int limit=chars.length(); 576 while(start<limit) { 577 int i=(start+limit)/2; 578 char middleChar=chars.charAt(i); 579 if(c<middleChar) { 580 limit=i; 581 } else if(c==middleChar) { 582 return i; 583 } else { 584 start=i+1; 585 } 586 } 587 return start; 588 } 589 590 private StringBuilder chars=new StringBuilder(); 591 private ArrayList<Node> equal=new ArrayList<Node>(); 592 } 593 594 private static abstract class BranchNode extends Node { BranchNode()595 public BranchNode() {} 596 @Override hashCode()597 public int hashCode() /*const*/ { return hash; } 598 599 protected int hash; 600 protected int firstEdgeNumber; 601 } 602 603 private static final class ListBranchNode extends BranchNode { ListBranchNode(int capacity)604 public ListBranchNode(int capacity) { 605 hash=0x444444*37+capacity; 606 equal=new Node[capacity]; 607 values=new int[capacity]; 608 units=new char[capacity]; 609 } 610 @Override equals(Object other)611 public boolean equals(Object other) { 612 if(this==other) { 613 return true; 614 } 615 if(!super.equals(other)) { 616 return false; 617 } 618 ListBranchNode o=(ListBranchNode)other; 619 for(int i=0; i<length; ++i) { 620 if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { 621 return false; 622 } 623 } 624 return true; 625 } 626 @Override hashCode()627 public int hashCode() { 628 return super.hashCode(); 629 } 630 @Override markRightEdgesFirst(int edgeNumber)631 public int markRightEdgesFirst(int edgeNumber) { 632 if(offset==0) { 633 firstEdgeNumber=edgeNumber; 634 int step=0; 635 int i=length; 636 do { 637 Node edge=equal[--i]; 638 if(edge!=null) { 639 edgeNumber=edge.markRightEdgesFirst(edgeNumber-step); 640 } 641 // For all but the rightmost edge, decrement the edge number. 642 step=1; 643 } while(i>0); 644 offset=edgeNumber; 645 } 646 return edgeNumber; 647 } 648 @Override write(StringTrieBuilder builder)649 public void write(StringTrieBuilder builder) { 650 // Write the sub-nodes in reverse order: The jump lengths are deltas from 651 // after their own positions, so if we wrote the minUnit sub-node first, 652 // then its jump delta would be larger. 653 // Instead we write the minUnit sub-node last, for a shorter delta. 654 int unitNumber=length-1; 655 Node rightEdge=equal[unitNumber]; 656 int rightEdgeNumber= rightEdge==null ? firstEdgeNumber : rightEdge.getOffset(); 657 do { 658 --unitNumber; 659 if(equal[unitNumber]!=null) { 660 equal[unitNumber].writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); 661 } 662 } while(unitNumber>0); 663 // The maxUnit sub-node is written as the very last one because we do 664 // not jump for it at all. 665 unitNumber=length-1; 666 if(rightEdge==null) { 667 builder.writeValueAndFinal(values[unitNumber], true); 668 } else { 669 rightEdge.write(builder); 670 } 671 offset=builder.write(units[unitNumber]); 672 // Write the rest of this node's unit-value pairs. 673 while(--unitNumber>=0) { 674 int value; 675 boolean isFinal; 676 if(equal[unitNumber]==null) { 677 // Write the final value for the one string ending with this unit. 678 value=values[unitNumber]; 679 isFinal=true; 680 } else { 681 // Write the delta to the start position of the sub-node. 682 assert(equal[unitNumber].getOffset()>0); 683 value=offset-equal[unitNumber].getOffset(); 684 isFinal=false; 685 } 686 builder.writeValueAndFinal(value, isFinal); 687 offset=builder.write(units[unitNumber]); 688 } 689 } 690 // Adds a unit with a final value. add(int c, int value)691 public void add(int c, int value) { 692 units[length]=(char)c; 693 equal[length]=null; 694 values[length]=value; 695 ++length; 696 hash=(hash*37+c)*37+value; 697 } 698 // Adds a unit which leads to another match node. add(int c, Node node)699 public void add(int c, Node node) { 700 units[length]=(char)c; 701 equal[length]=node; 702 values[length]=0; 703 ++length; 704 hash=(hash*37+c)*37+node.hashCode(); 705 } 706 707 // Note: We could try to reduce memory allocations 708 // by replacing these per-node arrays with per-builder ArrayLists and 709 // (for units) a StringBuilder (or even use its strings for the units too). 710 // It remains to be seen whether that would improve performance. 711 private Node[] equal; // null means "has final value". 712 private int length; 713 private int[] values; 714 private char[] units; 715 } 716 717 private static final class SplitBranchNode extends BranchNode { SplitBranchNode(char middleUnit, Node lessThanNode, Node greaterOrEqualNode)718 public SplitBranchNode(char middleUnit, Node lessThanNode, Node greaterOrEqualNode) { 719 hash=((0x555555*37+middleUnit)*37+ 720 lessThanNode.hashCode())*37+greaterOrEqualNode.hashCode(); 721 unit=middleUnit; 722 lessThan=lessThanNode; 723 greaterOrEqual=greaterOrEqualNode; 724 } 725 @Override equals(Object other)726 public boolean equals(Object other) { 727 if(this==other) { 728 return true; 729 } 730 if(!super.equals(other)) { 731 return false; 732 } 733 SplitBranchNode o=(SplitBranchNode)other; 734 return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; 735 } 736 @Override hashCode()737 public int hashCode() { 738 return super.hashCode(); 739 } 740 @Override markRightEdgesFirst(int edgeNumber)741 public int markRightEdgesFirst(int edgeNumber) { 742 if(offset==0) { 743 firstEdgeNumber=edgeNumber; 744 edgeNumber=greaterOrEqual.markRightEdgesFirst(edgeNumber); 745 offset=edgeNumber=lessThan.markRightEdgesFirst(edgeNumber-1); 746 } 747 return edgeNumber; 748 } 749 @Override write(StringTrieBuilder builder)750 public void write(StringTrieBuilder builder) { 751 // Encode the less-than branch first. 752 lessThan.writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual.getOffset(), builder); 753 // Encode the greater-or-equal branch last because we do not jump for it at all. 754 greaterOrEqual.write(builder); 755 // Write this node. 756 assert(lessThan.getOffset()>0); 757 builder.writeDeltaTo(lessThan.getOffset()); // less-than 758 offset=builder.write(unit); 759 } 760 761 private char unit; 762 private Node lessThan; 763 private Node greaterOrEqual; 764 } 765 766 // Branch head node, for writing the actual node lead unit. 767 private static final class BranchHeadNode extends ValueNode { BranchHeadNode(int len, Node subNode)768 public BranchHeadNode(int len, Node subNode) { 769 length=len; 770 next=subNode; 771 } 772 @Override hashCode()773 public int hashCode() /*const*/ { 774 return (0x666666*37+length)*37+next.hashCode(); 775 } 776 @Override equals(Object other)777 public boolean equals(Object other) { 778 if(this==other) { 779 return true; 780 } 781 if(!super.equals(other)) { 782 return false; 783 } 784 BranchHeadNode o=(BranchHeadNode)other; 785 return length==o.length && next==o.next; 786 } 787 @Override markRightEdgesFirst(int edgeNumber)788 public int markRightEdgesFirst(int edgeNumber) { 789 if(offset==0) { 790 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber); 791 } 792 return edgeNumber; 793 } 794 @Override write(StringTrieBuilder builder)795 public void write(StringTrieBuilder builder) { 796 next.write(builder); 797 if(length<=builder.getMinLinearMatch()) { 798 offset=builder.writeValueAndType(hasValue, value, length-1); 799 } else { 800 builder.write(length-1); 801 offset=builder.writeValueAndType(hasValue, value, 0); 802 } 803 } 804 805 private int length; 806 private Node next; // A branch sub-node. 807 } 808 createSuffixNode(CharSequence s, int start, int sValue)809 private ValueNode createSuffixNode(CharSequence s, int start, int sValue) { 810 ValueNode node=registerFinalValue(sValue); 811 if(start<s.length()) { 812 int offset=strings.length(); 813 strings.append(s, start, s.length()); 814 node=new LinearMatchNode(strings, offset, s.length()-start, node); 815 } 816 return node; 817 } 818 819 /** 820 * @internal 821 * @deprecated This API is ICU internal only. 822 */ 823 @Deprecated matchNodesCanHaveValues()824 protected abstract boolean matchNodesCanHaveValues() /*const*/; 825 826 /** 827 * @internal 828 * @deprecated This API is ICU internal only. 829 */ 830 @Deprecated getMaxBranchLinearSubNodeLength()831 protected abstract int getMaxBranchLinearSubNodeLength() /*const*/; 832 /** 833 * @internal 834 * @deprecated This API is ICU internal only. 835 */ 836 @Deprecated getMinLinearMatch()837 protected abstract int getMinLinearMatch() /*const*/; 838 /** 839 * @internal 840 * @deprecated This API is ICU internal only. 841 */ 842 @Deprecated getMaxLinearMatchLength()843 protected abstract int getMaxLinearMatchLength() /*const*/; 844 845 /** 846 * @internal 847 * @deprecated This API is ICU internal only. 848 */ 849 @Deprecated write(int unit)850 protected abstract int write(int unit); 851 /** 852 * @internal 853 * @deprecated This API is ICU internal only. 854 */ 855 @Deprecated write(int offset, int length)856 protected abstract int write(int offset, int length); 857 /** 858 * @internal 859 * @deprecated This API is ICU internal only. 860 */ 861 @Deprecated writeValueAndFinal(int i, boolean isFinal)862 protected abstract int writeValueAndFinal(int i, boolean isFinal); 863 /** 864 * @internal 865 * @deprecated This API is ICU internal only. 866 */ 867 @Deprecated writeValueAndType(boolean hasValue, int value, int node)868 protected abstract int writeValueAndType(boolean hasValue, int value, int node); 869 /** 870 * @internal 871 * @deprecated This API is ICU internal only. 872 */ 873 @Deprecated writeDeltaTo(int jumpTarget)874 protected abstract int writeDeltaTo(int jumpTarget); 875 876 private enum State { 877 ADDING, BUILDING_FAST, BUILDING_SMALL, BUILT 878 } 879 private State state=State.ADDING; 880 881 // Strings and sub-strings for linear-match nodes. 882 /** 883 * @internal 884 * @deprecated This API is ICU internal only. 885 */ 886 @Deprecated 887 protected StringBuilder strings=new StringBuilder(); 888 private Node root; 889 890 // Hash set of nodes, maps from nodes to integer 1. 891 private HashMap<Node, Node> nodes=new HashMap<Node, Node>(); 892 private ValueNode lookupFinalValueNode=new ValueNode(); 893 } 894