1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 package com.ibm.icu.text; 11 12 import java.util.ArrayList; 13 import java.util.Arrays; 14 import java.util.Collection; 15 import java.util.HashSet; 16 import java.util.List; 17 import java.util.Set; 18 import java.util.SortedSet; 19 import java.util.TreeSet; 20 21 import com.ibm.icu.impl.Assert; 22 import com.ibm.icu.impl.RBBIDataWrapper; 23 import com.ibm.icu.text.RBBIRuleBuilder.IntPair; 24 25 /** 26 * This class is part of the RBBI rule compiler. 27 * It builds the state transition table used by the RBBI runtime 28 * from the expression syntax tree generated by the rule scanner. 29 * 30 * This class is part of the RBBI implementation only. 31 * There is no user-visible public API here. 32 */ 33 class RBBITableBuilder { 34 35 // 36 // RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors, 37 // one for each state. 38 static class RBBIStateDescriptor { 39 boolean fMarked; 40 int fAccepting; 41 int fLookAhead; 42 SortedSet<Integer> fTagVals; 43 int fTagsIdx; 44 Set<RBBINode> fPositions; // Set of parse tree positions associated 45 // with this state. Unordered (it's a set). 46 // UVector contents are RBBINode * 47 48 int[] fDtran; // Transitions out of this state. 49 // indexed by input character 50 // contents is int index of dest state 51 // in RBBITableBuilder.fDStates 52 RBBIStateDescriptor(int maxInputSymbol)53 RBBIStateDescriptor(int maxInputSymbol) { 54 fTagVals = new TreeSet<>(); 55 fPositions = new HashSet<>(); 56 fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized. 57 // It is indexed by input symbols, and will 58 // hold the next state number for each 59 // symbol. 60 } 61 } 62 63 64 private RBBIRuleBuilder fRB; 65 66 /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */ 67 private int fRootIx; 68 69 /** D states (Aho's terminology). Index is state number. */ 70 private List<RBBIStateDescriptor> fDStates; 71 72 /** Synthesized safe table, a List of row arrays. */ 73 private List<short[]> fSafeTable; 74 75 private static final int MAX_STATE_FOR_8BITS_TABLE = 255; 76 77 /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */ 78 int[] fLookAheadRuleMap; 79 80 /** Counter used when assigning lookahead rule numbers. 81 * Contains the last look-ahead number already in use. 82 * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved 83 * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */ 84 int fLASlotsInUse = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL; 85 86 //----------------------------------------------------------------------------- 87 // 88 // Constructor for RBBITableBuilder. 89 // 90 // rootNode is an index into the array of root nodes that is held by 91 // the overall RBBIRuleBuilder. 92 //----------------------------------------------------------------------------- RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx)93 RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) { 94 fRootIx = rootNodeIx; 95 fRB = rb; 96 fDStates = new ArrayList<>(); 97 } 98 99 100 101 102 //----------------------------------------------------------------------------- 103 // 104 // RBBITableBuilder::buildForwardTable - This is the main function for building 105 // the DFA state transition table from the RBBI rules parse tree. 106 // 107 //----------------------------------------------------------------------------- buildForwardTable()108 void buildForwardTable() { 109 // If there were no rules, just return. This situation can easily arise 110 // for the reverse rules. 111 if (fRB.fTreeRoots[fRootIx]==null) { 112 return; 113 } 114 115 // 116 // Walk through the tree, replacing any references to $variables with a copy of the 117 // parse tree for the substitution expression. 118 // 119 fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(0); 120 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) { 121 System.out.println("Parse tree after flattening variable references."); 122 fRB.fTreeRoots[fRootIx].printTree(true); 123 } 124 125 // 126 // If the rules contained any references to {bof} 127 // add a {bof} <cat> <former root of tree> to the 128 // tree. Means that all matches must start out with the 129 // {bof} fake character. 130 // 131 if (fRB.fSetBuilder.sawBOF()) { 132 RBBINode bofTop = new RBBINode(RBBINode.opCat); 133 RBBINode bofLeaf = new RBBINode(RBBINode.leafChar); 134 bofTop.fLeftChild = bofLeaf; 135 bofTop.fRightChild = fRB.fTreeRoots[fRootIx]; 136 bofLeaf.fParent = bofTop; 137 bofLeaf.fVal = 2; // Reserved value for {bof}. 138 fRB.fTreeRoots[fRootIx] = bofTop; 139 } 140 141 // 142 // Add a unique right-end marker to the expression. 143 // Appears as a cat-node, left child being the original tree, 144 // right child being the end marker. 145 // 146 RBBINode cn = new RBBINode(RBBINode.opCat); 147 cn.fLeftChild = fRB.fTreeRoots[fRootIx]; 148 fRB.fTreeRoots[fRootIx].fParent = cn; 149 RBBINode endMarkerNode = cn.fRightChild = new RBBINode(RBBINode.endMark); 150 cn.fRightChild.fParent = cn; 151 fRB.fTreeRoots[fRootIx] = cn; 152 153 // 154 // Replace all references to UnicodeSets with the tree for the equivalent 155 // expression. 156 // 157 fRB.fTreeRoots[fRootIx].flattenSets(); 158 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) { 159 System.out.println("Parse tree after flattening Unicode Set references."); 160 fRB.fTreeRoots[fRootIx].printTree(true); 161 } 162 163 164 // 165 // calculate the functions nullable, firstpos, lastpos and followpos on 166 // nodes in the parse tree. 167 // See the algorithm description in Aho. 168 // Understanding how this works by looking at the code alone will be 169 // nearly impossible. 170 // 171 calcNullable(fRB.fTreeRoots[fRootIx]); 172 calcFirstPos(fRB.fTreeRoots[fRootIx]); 173 calcLastPos(fRB.fTreeRoots[fRootIx]); 174 calcFollowPos(fRB.fTreeRoots[fRootIx]); 175 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) { 176 System.out.print("\n"); 177 printPosSets(fRB.fTreeRoots[fRootIx]); 178 } 179 180 // 181 // For "chained" rules, modify the followPos sets 182 // 183 if (fRB.fChainRules) { 184 calcChainedFollowPos(fRB.fTreeRoots[fRootIx], endMarkerNode); 185 } 186 187 // 188 // BOF (start of input) test fixup. 189 // 190 if (fRB.fSetBuilder.sawBOF()) { 191 bofFixup(); 192 } 193 194 // 195 // Build the DFA state transition tables. 196 // 197 buildStateTable(); 198 mapLookAheadRules(); 199 flagAcceptingStates(); 200 flagLookAheadStates(); 201 flagTaggedStates(); 202 203 // 204 // Update the global table of rule status {tag} values 205 // The rule builder has a global vector of status values that are common 206 // for all tables. Merge the ones from this table into the global set. 207 // 208 mergeRuleStatusVals(); 209 } 210 211 212 213 //----------------------------------------------------------------------------- 214 // 215 // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 216 // 217 //----------------------------------------------------------------------------- calcNullable(RBBINode n)218 void calcNullable(RBBINode n) { 219 if (n == null) { 220 return; 221 } 222 if (n.fType == RBBINode.setRef || 223 n.fType == RBBINode.endMark ) { 224 // These are non-empty leaf node types. 225 n.fNullable = false; 226 return; 227 } 228 229 if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) { 230 // Lookahead marker node. It's a leaf, so no recursion on children. 231 // It's nullable because it does not match any literal text from the input stream. 232 n.fNullable = true; 233 return; 234 } 235 236 237 // The node is not a leaf. 238 // Calculate nullable on its children. 239 calcNullable(n.fLeftChild); 240 calcNullable(n.fRightChild); 241 242 // Apply functions from table 3.40 in Aho 243 if (n.fType == RBBINode.opOr) { 244 n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable; 245 } 246 else if (n.fType == RBBINode.opCat) { 247 n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable; 248 } 249 else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) { 250 n.fNullable = true; 251 } 252 else { 253 n.fNullable = false; 254 } 255 } 256 257 258 259 260 //----------------------------------------------------------------------------- 261 // 262 // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 263 // 264 //----------------------------------------------------------------------------- calcFirstPos(RBBINode n)265 void calcFirstPos(RBBINode n) { 266 if (n == null) { 267 return; 268 } 269 if (n.fType == RBBINode.leafChar || 270 n.fType == RBBINode.endMark || 271 n.fType == RBBINode.lookAhead || 272 n.fType == RBBINode.tag) { 273 // These are non-empty leaf node types. 274 n.fFirstPosSet.add(n); 275 return; 276 } 277 278 // The node is not a leaf. 279 // Calculate firstPos on its children. 280 calcFirstPos(n.fLeftChild); 281 calcFirstPos(n.fRightChild); 282 283 // Apply functions from table 3.40 in Aho 284 if (n.fType == RBBINode.opOr) { 285 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 286 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 287 } 288 else if (n.fType == RBBINode.opCat) { 289 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 290 if (n.fLeftChild.fNullable) { 291 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 292 } 293 } 294 else if (n.fType == RBBINode.opStar || 295 n.fType == RBBINode.opQuestion || 296 n.fType == RBBINode.opPlus) { 297 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 298 } 299 } 300 301 302 303 //----------------------------------------------------------------------------- 304 // 305 // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 306 // 307 //----------------------------------------------------------------------------- calcLastPos(RBBINode n)308 void calcLastPos(RBBINode n) { 309 if (n == null) { 310 return; 311 } 312 if (n.fType == RBBINode.leafChar || 313 n.fType == RBBINode.endMark || 314 n.fType == RBBINode.lookAhead || 315 n.fType == RBBINode.tag) { 316 // These are non-empty leaf node types. 317 n.fLastPosSet.add(n); 318 return; 319 } 320 321 // The node is not a leaf. 322 // Calculate lastPos on its children. 323 calcLastPos(n.fLeftChild); 324 calcLastPos(n.fRightChild); 325 326 // Apply functions from table 3.40 in Aho 327 if (n.fType == RBBINode.opOr) { 328 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 329 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 330 } 331 else if (n.fType == RBBINode.opCat) { 332 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 333 if (n.fRightChild.fNullable) { 334 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 335 } 336 } 337 else if (n.fType == RBBINode.opStar || 338 n.fType == RBBINode.opQuestion || 339 n.fType == RBBINode.opPlus) { 340 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 341 } 342 } 343 344 345 346 //----------------------------------------------------------------------------- 347 // 348 // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 349 // 350 //----------------------------------------------------------------------------- calcFollowPos(RBBINode n)351 void calcFollowPos(RBBINode n) { 352 if (n == null || 353 n.fType == RBBINode.leafChar || 354 n.fType == RBBINode.endMark) { 355 return; 356 } 357 358 calcFollowPos(n.fLeftChild); 359 calcFollowPos(n.fRightChild); 360 361 // Aho rule #1 362 if (n.fType == RBBINode.opCat) { 363 for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) { 364 i.fFollowPos.addAll(n.fRightChild.fFirstPosSet); 365 } 366 } 367 368 // Aho rule #2 369 if (n.fType == RBBINode.opStar || 370 n.fType == RBBINode.opPlus) { 371 for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) { 372 i.fFollowPos.addAll(n.fFirstPosSet); 373 } 374 } 375 } 376 377 //----------------------------------------------------------------------------- 378 // 379 // addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged 380 // as roots of a rule to a destination vector. 381 // 382 //----------------------------------------------------------------------------- addRuleRootNodes(List<RBBINode> dest, RBBINode node)383 void addRuleRootNodes(List<RBBINode> dest, RBBINode node) { 384 if (node == null) { 385 return; 386 } 387 if (node.fRuleRoot) { 388 dest.add(node); 389 // Note: rules cannot nest. If we found a rule start node, 390 // no child node can also be a start node. 391 return; 392 } 393 addRuleRootNodes(dest, node.fLeftChild); 394 addRuleRootNodes(dest, node.fRightChild); 395 } 396 397 //----------------------------------------------------------------------------- 398 // 399 // calcChainedFollowPos. Modify the previously calculated followPos sets 400 // to implement rule chaining. NOT described by Aho 401 // 402 //----------------------------------------------------------------------------- calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode)403 void calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode) { 404 405 List<RBBINode> leafNodes = new ArrayList<>(); 406 407 // get a list all leaf nodes 408 tree.findNodes(leafNodes, RBBINode.leafChar); 409 410 // Collect all leaf nodes that can start matches for rules 411 // with inbound chaining enabled, which is the union of the 412 // firstPosition sets from each of the rule root nodes. 413 414 List<RBBINode> ruleRootNodes = new ArrayList<>(); 415 addRuleRootNodes(ruleRootNodes, tree); 416 417 Set<RBBINode> matchStartNodes = new HashSet<>(); 418 for (RBBINode node: ruleRootNodes) { 419 if (node.fChainIn) { 420 matchStartNodes.addAll(node.fFirstPosSet); 421 } 422 } 423 424 // Iterate over all leaf nodes, 425 // 426 for (RBBINode endNode : leafNodes) { 427 428 // Identify leaf nodes that correspond to overall rule match positions. 429 // These include the endMarkNode in their followPos sets. 430 // 431 // Note: do not consider other end marker nodes, those that are added to 432 // look-ahead rules. These can't chain; a match immediately stops 433 // further matching. This leaves exactly one end marker node, the one 434 // at the end of the complete tree. 435 436 if (!endNode.fFollowPos.contains(endMarkNode)) { 437 continue; 438 } 439 440 // We've got a node that can end a match. 441 442 // Now iterate over the nodes that can start a match, looking for ones 443 // with the same char class as our ending node. 444 for (RBBINode startNode : matchStartNodes) { 445 if (startNode.fType != RBBINode.leafChar) { 446 continue; 447 } 448 449 if (endNode.fVal == startNode.fVal) { 450 // The end val (character class) of one possible match is the 451 // same as the start of another. 452 453 // Add all nodes from the followPos of the start node to the 454 // followPos set of the end node, which will have the effect of 455 // letting matches transition from a match state at endNode 456 // to the second char of a match starting with startNode. 457 endNode.fFollowPos.addAll(startNode.fFollowPos); 458 } 459 } 460 } 461 } 462 463 464 //----------------------------------------------------------------------------- 465 // 466 // bofFixup. Fixup for state tables that include {bof} beginning of input testing. 467 // Do an swizzle similar to chaining, modifying the followPos set of 468 // the bofNode to include the followPos nodes from other {bot} nodes 469 // scattered through the tree. 470 // 471 // This function has much in common with calcChainedFollowPos(). 472 // 473 //----------------------------------------------------------------------------- bofFixup()474 void bofFixup() { 475 // 476 // The parse tree looks like this ... 477 // fTree root --. <cat> 478 // / \ 479 // <cat> <#end node> 480 // / \ 481 // <bofNode> rest 482 // of tree 483 // 484 // We will be adding things to the followPos set of the <bofNode> 485 // 486 RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild; 487 Assert.assrt(bofNode.fType == RBBINode.leafChar); 488 Assert.assrt(bofNode.fVal == 2); 489 490 // Get all nodes that can be the start a match of the user-written rules 491 // (excluding the fake bofNode) 492 // We want the nodes that can start a match in the 493 // part labeled "rest of tree" 494 // 495 Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet; 496 for (RBBINode startNode : matchStartNodes) { 497 if (startNode.fType != RBBINode.leafChar) { 498 continue; 499 } 500 501 if (startNode.fVal == bofNode.fVal) { 502 // We found a leaf node corresponding to a {bof} that was 503 // explicitly written into a rule. 504 // Add everything from the followPos set of this node to the 505 // followPos set of the fake bofNode at the start of the tree. 506 // 507 bofNode.fFollowPos.addAll(startNode.fFollowPos); 508 } 509 } 510 } 511 512 //----------------------------------------------------------------------------- 513 // 514 // buildStateTable() Determine the set of runtime DFA states and the 515 // transition tables for these states, by the algorithm 516 // of fig. 3.44 in Aho. 517 // 518 // Most of the comments are quotes of Aho's psuedo-code. 519 // 520 //----------------------------------------------------------------------------- buildStateTable()521 void buildStateTable() { 522 // 523 // Add a dummy state 0 - the stop state. Not from Aho. 524 int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1; 525 RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol); 526 fDStates.add(failState); 527 528 // initially, the only unmarked state in Dstates is firstpos(root), 529 // where toot is the root of the syntax tree for (r)#; 530 RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol); 531 initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet); 532 fDStates.add(initialState); 533 534 // while there is an unmarked state T in Dstates do begin 535 for (;;) { 536 RBBIStateDescriptor T = null; 537 int tx; 538 for (tx=1; tx<fDStates.size(); tx++) { 539 RBBIStateDescriptor temp = fDStates.get(tx); 540 if (temp.fMarked == false) { 541 T = temp; 542 break; 543 } 544 } 545 if (T == null) { 546 break; 547 } 548 549 // mark T; 550 T.fMarked = true; 551 552 // for each input symbol a do begin 553 int a; 554 for (a = 1; a<=lastInputSymbol; a++) { 555 // let U be the set of positions that are in followpos(p) 556 // for some position p in T 557 // such that the symbol at position p is a; 558 Set<RBBINode> U = null; 559 for (RBBINode p : T.fPositions) { 560 if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) { 561 if (U == null) { 562 U = new HashSet<>(); 563 } 564 U.addAll(p.fFollowPos); 565 } 566 } 567 568 // if U is not empty and not in DStates then 569 int ux = 0; 570 boolean UinDstates = false; 571 if (U != null) { 572 Assert.assrt(U.size() > 0); 573 int ix; 574 for (ix=0; ix<fDStates.size(); ix++) { 575 RBBIStateDescriptor temp2; 576 temp2 = fDStates.get(ix); 577 if (U.equals(temp2.fPositions)) { 578 U = temp2.fPositions; 579 ux = ix; 580 UinDstates = true; 581 break; 582 } 583 } 584 585 // Add U as an unmarked state to Dstates 586 if (!UinDstates) 587 { 588 RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol); 589 newState.fPositions = U; 590 fDStates.add(newState); 591 ux = fDStates.size()-1; 592 } 593 594 // Dtran[T, a] := U; 595 T.fDtran[a] = ux; 596 } 597 } 598 } 599 } 600 601 /** 602 * mapLookAheadRules 603 * 604 */ mapLookAheadRules()605 void mapLookAheadRules() { 606 fLookAheadRuleMap = new int[fRB.fScanner.numRules() + 1]; 607 608 for (RBBIStateDescriptor sd: fDStates) { 609 int laSlotForState = 0; 610 611 // Establish the look-ahead slot for this state, if the state covers 612 // any look-ahead nodes - corresponding to the '/' in look-ahead rules. 613 614 // If any of the look-ahead nodes already have a slot assigned, use it, 615 // otherwise assign a new one. 616 617 boolean sawLookAheadNode = false; 618 for (RBBINode node: sd.fPositions) { 619 if (node.fType != RBBINode.lookAhead) { 620 continue; 621 } 622 sawLookAheadNode = true; 623 int ruleNum = node.fVal; // Set when rule was originally parsed. 624 assert(ruleNum < fLookAheadRuleMap.length); 625 assert(ruleNum > 0); 626 int laSlot = fLookAheadRuleMap[ruleNum]; 627 if (laSlot != 0) { 628 if (laSlotForState == 0) { 629 laSlotForState = laSlot; 630 } else { 631 // TODO: figure out if this can fail, change to setting an error code if so. 632 assert(laSlot == laSlotForState); 633 } 634 } 635 } 636 if (!sawLookAheadNode) { 637 continue; 638 } 639 640 if (laSlotForState == 0) { 641 laSlotForState = ++fLASlotsInUse; 642 } 643 644 // For each look ahead node covered by this state, 645 // set the mapping from the node's rule number to the look ahead slot. 646 // There can be multiple nodes/rule numbers going to the same la slot. 647 648 for (RBBINode node: sd.fPositions) { 649 if (node.fType != RBBINode.lookAhead) { 650 continue; 651 } 652 int ruleNum = node.fVal; // Set when rule was originally parsed. 653 int existingVal = fLookAheadRuleMap[ruleNum]; 654 assert(existingVal == 0 || existingVal == laSlotForState); 655 fLookAheadRuleMap[ruleNum] = laSlotForState; 656 } 657 } 658 659 } 660 661 //----------------------------------------------------------------------------- 662 // 663 // flagAcceptingStates Identify accepting states. 664 // First get a list of all of the end marker nodes. 665 // Then, for each state s, 666 // if s contains one of the end marker nodes in its list of tree positions then 667 // s is an accepting state. 668 // 669 //----------------------------------------------------------------------------- flagAcceptingStates()670 void flagAcceptingStates() { 671 List<RBBINode> endMarkerNodes = new ArrayList<>(); 672 RBBINode endMarker; 673 int i; 674 int n; 675 676 fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark); 677 678 for (i=0; i<endMarkerNodes.size(); i++) { 679 endMarker = endMarkerNodes.get(i); 680 for (n=0; n<fDStates.size(); n++) { 681 RBBIStateDescriptor sd = fDStates.get(n); 682 if (sd.fPositions.contains(endMarker)) { 683 // Any non-zero value for fAccepting means this is an accepting node. 684 // The value is what will be returned to the user as the break status. 685 // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1). 686 687 if (sd.fAccepting==0) { 688 // State hasn't been marked as accepting yet. Do it now. 689 sd.fAccepting = fLookAheadRuleMap[endMarker.fVal]; 690 if (sd.fAccepting == 0) { 691 sd.fAccepting = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL; 692 } 693 } 694 if (sd.fAccepting==RBBIDataWrapper.ACCEPTING_UNCONDITIONAL && endMarker.fVal != 0) { 695 // Both lookahead and non-lookahead accepting for this state. 696 // Favor the look-ahead, because a look-ahead match needs to 697 // immediately stop the run-time engine. First match, not longest. 698 sd.fAccepting = fLookAheadRuleMap[endMarker.fVal]; 699 } 700 // implicit else: 701 // if sd.fAccepting already had a value other than 0 or 1, leave it be. 702 } 703 } 704 } 705 } 706 707 708 //----------------------------------------------------------------------------- 709 // 710 // flagLookAheadStates Very similar to flagAcceptingStates, above. 711 // 712 //----------------------------------------------------------------------------- flagLookAheadStates()713 void flagLookAheadStates() { 714 List<RBBINode> lookAheadNodes = new ArrayList<>(); 715 RBBINode lookAheadNode; 716 int i; 717 int n; 718 719 fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead); 720 for (i=0; i<lookAheadNodes.size(); i++) { 721 lookAheadNode = lookAheadNodes.get(i); 722 for (n=0; n<fDStates.size(); n++) { 723 RBBIStateDescriptor sd = fDStates.get(n); 724 if (sd.fPositions.contains(lookAheadNode)) { 725 int lookaheadSlot = fLookAheadRuleMap[lookAheadNode.fVal]; 726 assert(sd.fLookAhead == 0 || sd.fLookAhead == lookaheadSlot); 727 sd.fLookAhead = lookaheadSlot; 728 } 729 } 730 } 731 } 732 733 734 735 736 //----------------------------------------------------------------------------- 737 // 738 // flagTaggedStates 739 // 740 //----------------------------------------------------------------------------- flagTaggedStates()741 void flagTaggedStates() { 742 List<RBBINode> tagNodes = new ArrayList<>(); 743 RBBINode tagNode; 744 int i; 745 int n; 746 747 fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag); 748 for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) 749 tagNode = tagNodes.get(i); 750 751 for (n=0; n<fDStates.size(); n++) { // For each state s (row in the state table) 752 RBBIStateDescriptor sd = fDStates.get(n); 753 if (sd.fPositions.contains(tagNode)) { // if s include the tag node t 754 sd.fTagVals.add(tagNode.fVal); 755 } 756 } 757 } 758 } 759 760 761 762 //----------------------------------------------------------------------------- 763 // 764 // mergeRuleStatusVals 765 // 766 // Allocate positions in the global array of rule status {tag} values 767 // 768 // The RBBI runtime uses an array of {sets of status values} that can 769 // be returned for boundaries. Each accepting state that has non-zero 770 // status includes an index into this array. The format of the array 771 // is 772 // Num of status values in group 1 773 // status val 774 // status val 775 // ... 776 // Num of status vals in group 2 777 // status val 778 // status val 779 // ... 780 // etc. 781 // 782 // 783 //----------------------------------------------------------------------------- 784 mergeRuleStatusVals()785 void mergeRuleStatusVals() { 786 // 787 // The basic outline of what happens here is this... 788 // 789 // for each state in this state table 790 // if the status tag list for this state is in the global statuses list 791 // record where and 792 // continue with the next state 793 // else 794 // add the tag list for this state to the global list. 795 // 796 int n; 797 798 // Pre-load a single tag of {0} into the table. 799 // We will need this as a default, for rule sets with no explicit tagging, 800 // or with explicit tagging of {0}. 801 if (fRB.fRuleStatusVals.size() == 0) { 802 fRB.fRuleStatusVals.add(1); // Num of statuses in group 803 fRB.fRuleStatusVals.add(0); // and our single status of zero 804 805 SortedSet<Integer> s0 = new TreeSet<>(); // mapping for rules with no explicit tagging 806 fRB.fStatusSets.put(s0, 0); // (key is an empty set). 807 808 SortedSet<Integer> s1 = new TreeSet<>(); // mapping for rules with explicit tagging of {0} 809 s1.add(0); 810 fRB.fStatusSets.put(s1, 0); 811 } 812 813 // For each state, check whether the state's status tag values are 814 // already entered into the status values array, and add them if not. 815 for (n=0; n<fDStates.size(); n++) { 816 RBBIStateDescriptor sd = fDStates.get(n); 817 Set<Integer> statusVals = sd.fTagVals; 818 Integer arrayIndexI = fRB.fStatusSets.get(statusVals); 819 if (arrayIndexI == null) { 820 // This is the first encounter of this set of status values. 821 // Add them to the statusSets map, This map associates 822 // the set of status values with an index in the runtime status 823 // values array. 824 arrayIndexI = fRB.fRuleStatusVals.size(); 825 fRB.fStatusSets.put(statusVals, arrayIndexI); 826 827 // Add the new set of status values to the vector of values that 828 // will eventually become the array used by the runtime engine. 829 fRB.fRuleStatusVals.add(statusVals.size()); 830 fRB.fRuleStatusVals.addAll(statusVals); 831 } 832 833 // Save the runtime array index back into the state descriptor. 834 sd.fTagsIdx = arrayIndexI.intValue(); 835 } 836 } 837 838 839 840 841 842 843 844 //----------------------------------------------------------------------------- 845 // 846 // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos 847 // for each node in the tree. 848 // 849 //----------------------------------------------------------------------------- 850 printPosSets(RBBINode n)851 void printPosSets(RBBINode n) { 852 if (n==null) { 853 return; 854 } 855 RBBINode.printNode(n); 856 System.out.print(" Nullable: " + n.fNullable); 857 858 System.out.print(" firstpos: "); 859 printSet(n.fFirstPosSet); 860 861 System.out.print(" lastpos: "); 862 printSet(n.fLastPosSet); 863 864 System.out.print(" followpos: "); 865 printSet(n.fFollowPos); 866 867 printPosSets(n.fLeftChild); 868 printPosSets(n.fRightChild); 869 } 870 871 872 873 /** 874 * Find duplicate (redundant) character classes. Begin looking with categories.first. 875 * Duplicates, if found are returned in the categories parameter. 876 * This is an iterator-like function, used to identify character classes 877 * (state table columns) that can be eliminated. 878 * @param categories in/out parameter, specifies where to start looking for duplicates, 879 * and returns the first pair of duplicates found, if any. 880 * @return true if duplicate char classes were found, false otherwise. 881 * @internal 882 */ findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories)883 boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) { 884 int numStates = fDStates.size(); 885 int numCols = fRB.fSetBuilder.getNumCharCategories(); 886 887 int table_base = 0; 888 int table_dupl = 0; 889 for (; categories.first < numCols-1; ++categories.first) { 890 // Note: dictionary & non-dictionary columns cannot be merged. 891 // The limitSecond value prevents considering mixed pairs. 892 // Dictionary categories are >= DictCategoriesStart. 893 // Non dict categories are < DictCategoriesStart. 894 int limitSecond = categories.first < fRB.fSetBuilder.getDictCategoriesStart() ? 895 fRB.fSetBuilder.getDictCategoriesStart() : numCols; 896 for (categories.second=categories.first+1; categories.second < limitSecond; ++categories.second) { 897 for (int state=0; state<numStates; state++) { 898 RBBIStateDescriptor sd = fDStates.get(state); 899 table_base = sd.fDtran[categories.first]; 900 table_dupl = sd.fDtran[categories.second]; 901 if (table_base != table_dupl) { 902 break; 903 } 904 } 905 if (table_base == table_dupl) { 906 return true; 907 } 908 } 909 } 910 return false; 911 } 912 913 /** 914 * Remove a column from the state table. Used when two character categories 915 * have been found equivalent, and merged together, to eliminate the unneeded table column. 916 */ 917 void removeColumn(int column) { 918 int numStates = fDStates.size(); 919 for (int state=0; state<numStates; state++) { 920 RBBIStateDescriptor sd = fDStates.get(state); 921 assert(column < sd.fDtran.length); 922 int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1); 923 System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column); 924 sd.fDtran = newArray; 925 } 926 } 927 928 929 /** 930 * Find duplicate (redundant) states, beginning at the specified pair, 931 * within this state table. This is an iterator-like function, used to 932 * identify states (state table rows) that can be eliminated. 933 * @param states in/out parameter, specifies where to start looking for duplicates, 934 * and returns the first pair of duplicates found, if any. 935 * @return true if duplicate states were found, false otherwise. 936 * @internal 937 */ 938 boolean findDuplicateState(RBBIRuleBuilder.IntPair states) { 939 int numStates = fDStates.size(); 940 int numCols = fRB.fSetBuilder.getNumCharCategories(); 941 942 for (; states.first<numStates-1; ++states.first) { 943 RBBIStateDescriptor firstSD = fDStates.get(states.first); 944 for (states.second=states.first+1; states.second<numStates; ++states.second) { 945 RBBIStateDescriptor duplSD = fDStates.get(states.second); 946 if (firstSD.fAccepting != duplSD.fAccepting || 947 firstSD.fLookAhead != duplSD.fLookAhead || 948 firstSD.fTagsIdx != duplSD.fTagsIdx) { 949 continue; 950 } 951 boolean rowsMatch = true; 952 for (int col=0; col < numCols; ++col) { 953 int firstVal = firstSD.fDtran[col]; 954 int duplVal = duplSD.fDtran[col]; 955 if (!((firstVal == duplVal) || 956 ((firstVal == states.first || firstVal == states.second) && 957 (duplVal == states.first || duplVal == states.second)))) { 958 rowsMatch = false; 959 break; 960 } 961 } 962 if (rowsMatch) { 963 return true; 964 } 965 } 966 } 967 return false; 968 } 969 970 /** 971 * Find the next duplicate state in the safe reverse table. An iterator function. 972 * @param states in/out parameter, specifies where to start looking for duplicates, 973 * and returns the first pair of duplicates found, if any. 974 * @return true if duplicate states were found, false otherwise. 975 * @internal 976 */ 977 boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) { 978 int numStates = fSafeTable.size(); 979 980 for (; states.first<numStates-1; ++states.first) { 981 short[] firstRow = fSafeTable.get(states.first); 982 for (states.second=states.first+1; states.second<numStates; ++states.second) { 983 short[] duplRow = fSafeTable.get(states.second); 984 boolean rowsMatch = true; 985 int numCols = firstRow.length; 986 for (int col=0; col < numCols; ++col) { 987 int firstVal = firstRow[col]; 988 int duplVal = duplRow[col]; 989 if (!((firstVal == duplVal) || 990 ((firstVal == states.first || firstVal == states.second) && 991 (duplVal == states.first || duplVal == states.second)))) { 992 rowsMatch = false; 993 break; 994 } 995 } 996 if (rowsMatch) { 997 return true; 998 } 999 } 1000 } 1001 return false; 1002 } 1003 1004 /** 1005 * Remove a duplicate state (row) from the state table. All references to the deleted (second) state 1006 * are redirected to first state. 1007 * @param duplStates The duplicate pair of states. 1008 * @internal 1009 */ 1010 void removeState(IntPair duplStates) { 1011 final int keepState = duplStates.first; 1012 final int duplState = duplStates.second; 1013 assert(keepState < duplState); 1014 assert(duplState < fDStates.size()); 1015 1016 fDStates.remove(duplState); 1017 1018 int numStates = fDStates.size(); 1019 int numCols = fRB.fSetBuilder.getNumCharCategories(); 1020 for (int state=0; state<numStates; ++state) { 1021 RBBIStateDescriptor sd = fDStates.get(state); 1022 for (int col=0; col<numCols; col++) { 1023 int existingVal = sd.fDtran[col]; 1024 int newVal = existingVal; 1025 if (existingVal == duplState) { 1026 newVal = keepState; 1027 } else if (existingVal > duplState) { 1028 newVal = existingVal - 1; 1029 } 1030 sd.fDtran[col] = newVal; 1031 } 1032 } 1033 } 1034 1035 /** 1036 * Remove a duplicate state from the safe table. 1037 * @param duplStates The duplicate pair of states. The first is kept, the second is removed. 1038 * All references to the second in the state table are retargeted 1039 * to the first. 1040 * @internal 1041 */ 1042 void removeSafeState(IntPair duplStates) { 1043 final int keepState = duplStates.first; 1044 final int duplState = duplStates.second; 1045 assert(keepState < duplState); 1046 assert(duplState < fSafeTable.size()); 1047 1048 fSafeTable.remove(duplState); 1049 int numStates = fSafeTable.size(); 1050 for (int state=0; state<numStates; ++state) { 1051 short[] row = fSafeTable.get(state); 1052 for (int col=0; col<row.length; col++) { 1053 int existingVal = row[col]; 1054 int newVal = existingVal; 1055 if (existingVal == duplState) { 1056 newVal = keepState; 1057 } else if (existingVal > duplState) { 1058 newVal = existingVal - 1; 1059 } 1060 row[col] = (short)newVal; 1061 } 1062 } 1063 } 1064 1065 1066 /** 1067 * Check for, and remove duplicate states (table rows). 1068 * @return the number of states removed. 1069 * @internal 1070 */ 1071 int removeDuplicateStates() { 1072 IntPair dupls = new IntPair(3, 0); 1073 int numStatesRemoved = 0; 1074 1075 while (findDuplicateState(dupls)) { 1076 // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); 1077 removeState(dupls); 1078 ++numStatesRemoved; 1079 } 1080 return numStatesRemoved; 1081 } 1082 1083 1084 /** 1085 * Calculate the size in bytes of the serialized form of this state transition table, 1086 * which is identical to the ICU4C runtime form. 1087 * Refer to common/rbbidata.h from ICU4C for the declarations of the structures 1088 * being matched by this calculation. 1089 */ 1090 int getTableSize() { 1091 if (fRB.fTreeRoots[fRootIx] == null) { 1092 return 0; 1093 } 1094 int size = RBBIDataWrapper.RBBIStateTable.fHeaderSize; // The header, with no rows to the table. 1095 int numRows = fDStates.size(); 1096 int numCols = fRB.fSetBuilder.getNumCharCategories(); 1097 boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE; 1098 int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols); 1099 size += numRows * rowSize; 1100 size = (size + 7) & ~7; // round up to a multiple of 8 bytes 1101 return size; 1102 } 1103 1104 1105 1106 /** 1107 * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table. 1108 * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, 1109 * in common/rbbidata.h 1110 */ 1111 RBBIDataWrapper.RBBIStateTable exportTable() { 1112 int state; 1113 int col; 1114 1115 RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); 1116 if (fRB.fTreeRoots[fRootIx] == null) { 1117 return table; 1118 } 1119 1120 Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff && 1121 fDStates.size() < 0x7fff); 1122 table.fNumStates = fDStates.size(); 1123 table.fDictCategoriesStart = fRB.fSetBuilder.getDictCategoriesStart(); 1124 table.fLookAheadResultsSize = 1125 fLASlotsInUse == RBBIDataWrapper.ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1; 1126 boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE; 1127 1128 // Size of table size in shorts. 1129 int rowLen = RBBIDataWrapper.NEXTSTATES + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts. 1130 int tableSize; 1131 if (use8Bits) { 1132 tableSize = (getTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize); // fTable length in bytes. 1133 table.fTable = new char[tableSize]; 1134 table.fRowLen = rowLen; // Row length in bytes. 1135 } else { 1136 tableSize = (getTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize) / 2; // fTable length in shorts. 1137 table.fTable = new char[tableSize]; 1138 table.fRowLen = rowLen * 2; // Row length in bytes. 1139 } 1140 1141 if (fRB.fLookAheadHardBreak) { 1142 table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; 1143 } 1144 if (fRB.fSetBuilder.sawBOF()) { 1145 table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; 1146 } 1147 if (use8Bits) { 1148 table.fFlags |= RBBIDataWrapper.RBBI_8BITS_ROWS; 1149 } 1150 1151 int numCharCategories = fRB.fSetBuilder.getNumCharCategories(); 1152 for (state=0; state<table.fNumStates; state++) { 1153 RBBIStateDescriptor sd = fDStates.get(state); 1154 int row = state*rowLen; 1155 if (use8Bits) { 1156 Assert.assrt (0 <= sd.fAccepting && sd.fAccepting <= 255); 1157 Assert.assrt (0 <= sd.fLookAhead && sd.fLookAhead <= 255); 1158 } else { 1159 Assert.assrt (0 <= sd.fAccepting && sd.fAccepting <= 0xffff); 1160 Assert.assrt (0 <= sd.fLookAhead && sd.fLookAhead <= 0xffff); 1161 } 1162 table.fTable[row + RBBIDataWrapper.ACCEPTING] = (char)sd.fAccepting; 1163 table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (char)sd.fLookAhead; 1164 table.fTable[row + RBBIDataWrapper.TAGSIDX] = (char)sd.fTagsIdx; 1165 for (col=0; col<numCharCategories; col++) { 1166 if (use8Bits) { 1167 Assert.assrt (0 <= sd.fDtran[col] && sd.fDtran[col] <= MAX_STATE_FOR_8BITS_TABLE); 1168 } 1169 table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)sd.fDtran[col]; 1170 } 1171 } 1172 return table; 1173 } 1174 1175 /** 1176 * Synthesize a safe state table from the main state table. 1177 */ 1178 void buildSafeReverseTable() { 1179 // Safe Reverse table construction is described in more detail in the corresponding 1180 // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because 1181 // it is too likely to get out of sync. 1182 1183 // Each safe pair is stored as two chars in the safePair stringBuilder. 1184 StringBuilder safePairs = new StringBuilder(); 1185 1186 int numCharClasses = fRB.fSetBuilder.getNumCharCategories(); 1187 int numStates = fDStates.size(); 1188 1189 for (int c1=0; c1<numCharClasses; ++c1) { 1190 for (int c2=0; c2 < numCharClasses; ++c2) { 1191 int wantedEndState = -1; 1192 int endState = 0; 1193 for (int startState = 1; startState < numStates; ++startState) { 1194 RBBIStateDescriptor startStateD = fDStates.get(startState); 1195 int s2 = startStateD.fDtran[c1]; 1196 RBBIStateDescriptor s2StateD = fDStates.get(s2); 1197 endState = s2StateD.fDtran[c2]; 1198 if (wantedEndState < 0) { 1199 wantedEndState = endState; 1200 } else { 1201 if (wantedEndState != endState) { 1202 break; 1203 } 1204 } 1205 } 1206 if (wantedEndState == endState) { 1207 safePairs.append((char)c1); 1208 safePairs.append((char)c2); 1209 // System.out.printf("(%d, %d) ", c1, c2); 1210 } 1211 } 1212 // System.out.printf("\n"); 1213 } 1214 1215 // Populate the initial safe table. 1216 // The table as a whole is a List<short[]> 1217 // Row 0 is the stop state. 1218 // Row 1 is the start sate. 1219 // Row 2 and beyond are other states, initially one per char class, but 1220 // after initial construction, many of the states will be combined, compacting the table.) 1221 // The String holds the nextState data only. The four leading fields of a row, fAccepting, 1222 // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building. 1223 1224 assert(fSafeTable == null); 1225 fSafeTable = new ArrayList<>(); 1226 for (int row=0; row<numCharClasses + 2; ++row) { 1227 fSafeTable.add(new short[numCharClasses]); 1228 } 1229 1230 // From the start state, each input char class transitions to the state for that input. 1231 short[] startState = fSafeTable.get(1); 1232 for (int charClass=0; charClass < numCharClasses; ++charClass) { 1233 // Note: +2 to skip the start & stop state rows. 1234 startState[charClass] = (short)(charClass+2); 1235 } 1236 1237 // Initially make every other state table row look like the start state row 1238 // (except for the stop state, which remains all 0) 1239 for (int row=2; row<numCharClasses+2; ++row) { 1240 System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length); 1241 } 1242 1243 // Run through the safe pairs, set the next state to zero when pair has been seen. 1244 // Zero being the stop state, meaning we found a safe point. 1245 for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) { 1246 int c1 = safePairs.charAt(pairIdx); 1247 int c2 = safePairs.charAt(pairIdx + 1); 1248 1249 short[] rowState = fSafeTable.get(c2 + 2); 1250 rowState[c1] = 0; 1251 } 1252 1253 // Remove duplicate or redundant rows from the table. 1254 RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0); 1255 while (findDuplicateSafeState(states)) { 1256 // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); 1257 removeSafeState(states); 1258 } 1259 } 1260 1261 1262 /** 1263 * Calculate the size of the runtime form of this safe state table. 1264 */ 1265 int getSafeTableSize() { 1266 if (fSafeTable == null) { 1267 return 0; 1268 } 1269 int size = RBBIDataWrapper.RBBIStateTable.fHeaderSize; // The header, with no rows to the table. 1270 int numRows = fSafeTable.size(); 1271 int numCols = fSafeTable.get(0).length; 1272 boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE; 1273 1274 int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols); 1275 size += numRows * rowSize; 1276 // TODO: there are redundant round-up. Figure out best place, get rid of the rest. 1277 size = (size + 7) & ~7; // round up to a multiple of 8 bytes 1278 return size; 1279 } 1280 1281 1282 /** 1283 * Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table. 1284 * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, 1285 * in common/rbbidata.h 1286 */ 1287 RBBIDataWrapper.RBBIStateTable exportSafeTable() { 1288 RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); 1289 table.fNumStates = fSafeTable.size(); 1290 boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE; 1291 int numCharCategories = fSafeTable.get(0).length; 1292 1293 // Size of table size in shorts. 1294 int rowLen = RBBIDataWrapper.NEXTSTATES + numCharCategories; 1295 // TODO: tableSize is basically numStates * numCharCategories, 1296 // except for alignment padding. Clean up here, and in main exportTable(). 1297 int tableSize = (getSafeTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize); // fTable length in bytes. 1298 if (use8Bits) { 1299 table.fFlags |= RBBIDataWrapper.RBBI_8BITS_ROWS; 1300 table.fTable = new char[tableSize]; 1301 table.fRowLen = rowLen; // Row length in bytes. 1302 } else { 1303 tableSize /= 2; // fTable length in shorts. 1304 table.fTable = new char[tableSize]; 1305 table.fRowLen = rowLen * 2; // Row length in bytes. 1306 } 1307 1308 for (int state=0; state<table.fNumStates; state++) { 1309 short[] rowArray = fSafeTable.get(state); 1310 int row = state * rowLen; 1311 1312 for (int col=0; col<numCharCategories; col++) { 1313 if (use8Bits) { 1314 Assert.assrt (rowArray[col] <= MAX_STATE_FOR_8BITS_TABLE); 1315 } 1316 table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)rowArray[col]; 1317 } 1318 } 1319 return table; 1320 } 1321 1322 1323 //----------------------------------------------------------------------------- 1324 // 1325 // printSet Debug function. Print the contents of a set of Nodes 1326 // 1327 //----------------------------------------------------------------------------- 1328 1329 void printSet(Collection<RBBINode> s) { 1330 for (RBBINode n : s) { 1331 RBBINode.printInt(n.fSerialNum, 8); 1332 } 1333 System.out.println(); 1334 } 1335 1336 1337 1338 //----------------------------------------------------------------------------- 1339 // 1340 // printStates Debug Function. Dump the fully constructed state transition table. 1341 // 1342 //----------------------------------------------------------------------------- 1343 1344 void printStates() { 1345 int c; // input "character" 1346 int n; // state number 1347 1348 System.out.print("state | i n p u t s y m b o l s \n"); 1349 System.out.print(" | Acc LA Tag"); 1350 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1351 RBBINode.printInt(c, 4); 1352 } 1353 System.out.print("\n"); 1354 System.out.print(" |---------------"); 1355 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1356 System.out.print("----"); 1357 } 1358 System.out.print("\n"); 1359 1360 for (n=0; n<fDStates.size(); n++) { 1361 RBBIStateDescriptor sd = fDStates.get(n); 1362 RBBINode.printInt(n, 5); 1363 System.out.print(" | "); 1364 1365 RBBINode.printInt(sd.fAccepting, 3); 1366 RBBINode.printInt(sd.fLookAhead, 4); 1367 RBBINode.printInt(sd.fTagsIdx, 6); 1368 System.out.print(" "); 1369 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1370 RBBINode.printInt(sd.fDtran[c], 4); 1371 } 1372 System.out.print("\n"); 1373 } 1374 System.out.print("\n\n"); 1375 } 1376 1377 1378 /** 1379 * Debug Function. Dump the fully constructed safe reverse table. 1380 */ 1381 void printReverseTable() { 1382 int c; // input "character" 1383 1384 System.out.printf(" Safe Reverse Table \n"); 1385 if (fSafeTable == null) { 1386 System.out.printf(" --- nullptr ---\n"); 1387 return; 1388 } 1389 int numCharCategories = fSafeTable.get(0).length; 1390 System.out.printf("state | i n p u t s y m b o l s \n"); 1391 System.out.printf(" | Acc LA Tag"); 1392 for (c=0; c< numCharCategories; c++) { 1393 System.out.printf(" %2d", c); 1394 } 1395 System.out.printf("\n"); 1396 System.out.printf(" |---------------"); 1397 for (c=0; c<numCharCategories; c++) { 1398 System.out.printf("---"); 1399 } 1400 System.out.printf("\n"); 1401 1402 for (int n=0; n<fSafeTable.size(); n++) { 1403 short rowArray[] = fSafeTable.get(n); 1404 System.out.printf(" %3d | " , n); 1405 System.out.printf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags 1406 for (c=0; c<numCharCategories; c++) { 1407 System.out.printf(" %2d", rowArray[c]); 1408 } 1409 System.out.printf("\n"); 1410 } 1411 System.out.printf("\n\n"); 1412 } 1413 1414 1415 1416 1417 1418 //----------------------------------------------------------------------------- 1419 // 1420 // printRuleStatusTable Debug Function. Dump the common rule status table 1421 // 1422 //----------------------------------------------------------------------------- 1423 1424 void printRuleStatusTable() { 1425 int thisRecord = 0; 1426 int nextRecord = 0; 1427 int i; 1428 List<Integer> tbl = fRB.fRuleStatusVals; 1429 1430 System.out.print("index | tags \n"); 1431 System.out.print("-------------------\n"); 1432 1433 while (nextRecord < tbl.size()) { 1434 thisRecord = nextRecord; 1435 nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1; 1436 RBBINode.printInt(thisRecord, 7); 1437 for (i=thisRecord+1; i<nextRecord; i++) { 1438 int val = tbl.get(i).intValue(); 1439 RBBINode.printInt(val, 7); 1440 } 1441 System.out.print("\n"); 1442 } 1443 System.out.print("\n\n"); 1444 } 1445 1446 1447 1448 } 1449