1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ********************************************************************** 6 * Copyright (c) 2002-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ********************************************************************** 9 */ 10 11 package ohos.global.icu.text; 12 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.HashSet; 17 import java.util.List; 18 import java.util.Set; 19 import java.util.SortedSet; 20 import java.util.TreeSet; 21 22 import ohos.global.icu.impl.Assert; 23 import ohos.global.icu.impl.RBBIDataWrapper; 24 import ohos.global.icu.lang.UCharacter; 25 import ohos.global.icu.lang.UProperty; 26 import ohos.global.icu.text.RBBIRuleBuilder.IntPair; 27 28 /** 29 * This class is part of the RBBI rule compiler. 30 * It builds the state transition table used by the RBBI runtime 31 * from the expression syntax tree generated by the rule scanner. 32 * 33 * This class is part of the RBBI implementation only. 34 * There is no user-visible public API here. 35 */ 36 class RBBITableBuilder { 37 38 // 39 // RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors, 40 // one for each state. 41 static class RBBIStateDescriptor { 42 boolean fMarked; 43 int fAccepting; 44 int fLookAhead; 45 SortedSet<Integer> fTagVals; 46 int fTagsIdx; 47 Set<RBBINode> fPositions; // Set of parse tree positions associated 48 // with this state. Unordered (it's a set). 49 // UVector contents are RBBINode * 50 51 int[] fDtran; // Transitions out of this state. 52 // indexed by input character 53 // contents is int index of dest state 54 // in RBBITableBuilder.fDStates 55 RBBIStateDescriptor(int maxInputSymbol)56 RBBIStateDescriptor(int maxInputSymbol) { 57 fTagVals = new TreeSet<>(); 58 fPositions = new HashSet<>(); 59 fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized. 60 // It is indexed by input symbols, and will 61 // hold the next state number for each 62 // symbol. 63 } 64 } 65 66 67 private RBBIRuleBuilder fRB; 68 69 /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */ 70 private int fRootIx; 71 72 /** D states (Aho's terminology). Index is state number. */ 73 private List<RBBIStateDescriptor> fDStates; 74 75 /** Synthesized safe table, a List of row arrays. */ 76 private List<short[]> fSafeTable; 77 78 /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */ 79 int[] fLookAheadRuleMap; 80 81 //----------------------------------------------------------------------------- 82 // 83 // Constructor for RBBITableBuilder. 84 // 85 // rootNode is an index into the array of root nodes that is held by 86 // the overall RBBIRuleBuilder. 87 //----------------------------------------------------------------------------- RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx)88 RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) { 89 fRootIx = rootNodeIx; 90 fRB = rb; 91 fDStates = new ArrayList<>(); 92 } 93 94 95 96 97 //----------------------------------------------------------------------------- 98 // 99 // RBBITableBuilder::buildForwardTable - This is the main function for building 100 // the DFA state transition table from the RBBI rules parse tree. 101 // 102 //----------------------------------------------------------------------------- buildForwardTable()103 void buildForwardTable() { 104 // If there were no rules, just return. This situation can easily arise 105 // for the reverse rules. 106 if (fRB.fTreeRoots[fRootIx]==null) { 107 return; 108 } 109 110 // 111 // Walk through the tree, replacing any references to $variables with a copy of the 112 // parse tree for the substition expression. 113 // 114 fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(); 115 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) { 116 System.out.println("Parse tree after flattening variable references."); 117 fRB.fTreeRoots[fRootIx].printTree(true); 118 } 119 120 // 121 // If the rules contained any references to {bof} 122 // add a {bof} <cat> <former root of tree> to the 123 // tree. Means that all matches must start out with the 124 // {bof} fake character. 125 // 126 if (fRB.fSetBuilder.sawBOF()) { 127 RBBINode bofTop = new RBBINode(RBBINode.opCat); 128 RBBINode bofLeaf = new RBBINode(RBBINode.leafChar); 129 bofTop.fLeftChild = bofLeaf; 130 bofTop.fRightChild = fRB.fTreeRoots[fRootIx]; 131 bofLeaf.fParent = bofTop; 132 bofLeaf.fVal = 2; // Reserved value for {bof}. 133 fRB.fTreeRoots[fRootIx] = bofTop; 134 } 135 136 // 137 // Add a unique right-end marker to the expression. 138 // Appears as a cat-node, left child being the original tree, 139 // right child being the end marker. 140 // 141 RBBINode cn = new RBBINode(RBBINode.opCat); 142 cn.fLeftChild = fRB.fTreeRoots[fRootIx]; 143 fRB.fTreeRoots[fRootIx].fParent = cn; 144 RBBINode endMarkerNode = cn.fRightChild = new RBBINode(RBBINode.endMark); 145 cn.fRightChild.fParent = cn; 146 fRB.fTreeRoots[fRootIx] = cn; 147 148 // 149 // Replace all references to UnicodeSets with the tree for the equivalent 150 // expression. 151 // 152 fRB.fTreeRoots[fRootIx].flattenSets(); 153 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) { 154 System.out.println("Parse tree after flattening Unicode Set references."); 155 fRB.fTreeRoots[fRootIx].printTree(true); 156 } 157 158 159 // 160 // calculate the functions nullable, firstpos, lastpos and followpos on 161 // nodes in the parse tree. 162 // See the alogrithm description in Aho. 163 // Understanding how this works by looking at the code alone will be 164 // nearly impossible. 165 // 166 calcNullable(fRB.fTreeRoots[fRootIx]); 167 calcFirstPos(fRB.fTreeRoots[fRootIx]); 168 calcLastPos(fRB.fTreeRoots[fRootIx]); 169 calcFollowPos(fRB.fTreeRoots[fRootIx]); 170 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) { 171 System.out.print("\n"); 172 printPosSets(fRB.fTreeRoots[fRootIx]); 173 } 174 175 // 176 // For "chained" rules, modify the followPos sets 177 // 178 if (fRB.fChainRules) { 179 calcChainedFollowPos(fRB.fTreeRoots[fRootIx], endMarkerNode); 180 } 181 182 // 183 // BOF (start of input) test fixup. 184 // 185 if (fRB.fSetBuilder.sawBOF()) { 186 bofFixup(); 187 } 188 189 // 190 // Build the DFA state transition tables. 191 // 192 buildStateTable(); 193 mapLookAheadRules(); 194 flagAcceptingStates(); 195 flagLookAheadStates(); 196 flagTaggedStates(); 197 198 // 199 // Update the global table of rule status {tag} values 200 // The rule builder has a global vector of status values that are common 201 // for all tables. Merge the ones from this table into the global set. 202 // 203 mergeRuleStatusVals(); 204 } 205 206 207 208 //----------------------------------------------------------------------------- 209 // 210 // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 211 // 212 //----------------------------------------------------------------------------- calcNullable(RBBINode n)213 void calcNullable(RBBINode n) { 214 if (n == null) { 215 return; 216 } 217 if (n.fType == RBBINode.setRef || 218 n.fType == RBBINode.endMark ) { 219 // These are non-empty leaf node types. 220 n.fNullable = false; 221 return; 222 } 223 224 if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) { 225 // Lookahead marker node. It's a leaf, so no recursion on children. 226 // It's nullable because it does not match any literal text from the input stream. 227 n.fNullable = true; 228 return; 229 } 230 231 232 // The node is not a leaf. 233 // Calculate nullable on its children. 234 calcNullable(n.fLeftChild); 235 calcNullable(n.fRightChild); 236 237 // Apply functions from table 3.40 in Aho 238 if (n.fType == RBBINode.opOr) { 239 n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable; 240 } 241 else if (n.fType == RBBINode.opCat) { 242 n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable; 243 } 244 else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) { 245 n.fNullable = true; 246 } 247 else { 248 n.fNullable = false; 249 } 250 } 251 252 253 254 255 //----------------------------------------------------------------------------- 256 // 257 // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 258 // 259 //----------------------------------------------------------------------------- calcFirstPos(RBBINode n)260 void calcFirstPos(RBBINode n) { 261 if (n == null) { 262 return; 263 } 264 if (n.fType == RBBINode.leafChar || 265 n.fType == RBBINode.endMark || 266 n.fType == RBBINode.lookAhead || 267 n.fType == RBBINode.tag) { 268 // These are non-empty leaf node types. 269 n.fFirstPosSet.add(n); 270 return; 271 } 272 273 // The node is not a leaf. 274 // Calculate firstPos on its children. 275 calcFirstPos(n.fLeftChild); 276 calcFirstPos(n.fRightChild); 277 278 // Apply functions from table 3.40 in Aho 279 if (n.fType == RBBINode.opOr) { 280 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 281 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 282 } 283 else if (n.fType == RBBINode.opCat) { 284 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 285 if (n.fLeftChild.fNullable) { 286 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 287 } 288 } 289 else if (n.fType == RBBINode.opStar || 290 n.fType == RBBINode.opQuestion || 291 n.fType == RBBINode.opPlus) { 292 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 293 } 294 } 295 296 297 298 //----------------------------------------------------------------------------- 299 // 300 // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 301 // 302 //----------------------------------------------------------------------------- calcLastPos(RBBINode n)303 void calcLastPos(RBBINode n) { 304 if (n == null) { 305 return; 306 } 307 if (n.fType == RBBINode.leafChar || 308 n.fType == RBBINode.endMark || 309 n.fType == RBBINode.lookAhead || 310 n.fType == RBBINode.tag) { 311 // These are non-empty leaf node types. 312 n.fLastPosSet.add(n); 313 return; 314 } 315 316 // The node is not a leaf. 317 // Calculate lastPos on its children. 318 calcLastPos(n.fLeftChild); 319 calcLastPos(n.fRightChild); 320 321 // Apply functions from table 3.40 in Aho 322 if (n.fType == RBBINode.opOr) { 323 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 324 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 325 } 326 else if (n.fType == RBBINode.opCat) { 327 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 328 if (n.fRightChild.fNullable) { 329 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 330 } 331 } 332 else if (n.fType == RBBINode.opStar || 333 n.fType == RBBINode.opQuestion || 334 n.fType == RBBINode.opPlus) { 335 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 336 } 337 } 338 339 340 341 //----------------------------------------------------------------------------- 342 // 343 // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 344 // 345 //----------------------------------------------------------------------------- calcFollowPos(RBBINode n)346 void calcFollowPos(RBBINode n) { 347 if (n == null || 348 n.fType == RBBINode.leafChar || 349 n.fType == RBBINode.endMark) { 350 return; 351 } 352 353 calcFollowPos(n.fLeftChild); 354 calcFollowPos(n.fRightChild); 355 356 // Aho rule #1 357 if (n.fType == RBBINode.opCat) { 358 for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) { 359 i.fFollowPos.addAll(n.fRightChild.fFirstPosSet); 360 } 361 } 362 363 // Aho rule #2 364 if (n.fType == RBBINode.opStar || 365 n.fType == RBBINode.opPlus) { 366 for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) { 367 i.fFollowPos.addAll(n.fFirstPosSet); 368 } 369 } 370 } 371 372 //----------------------------------------------------------------------------- 373 // 374 // addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged 375 // as roots of a rule to a destination vector. 376 // 377 //----------------------------------------------------------------------------- addRuleRootNodes(List<RBBINode> dest, RBBINode node)378 void addRuleRootNodes(List<RBBINode> dest, RBBINode node) { 379 if (node == null) { 380 return; 381 } 382 if (node.fRuleRoot) { 383 dest.add(node); 384 // Note: rules cannot nest. If we found a rule start node, 385 // no child node can also be a start node. 386 return; 387 } 388 addRuleRootNodes(dest, node.fLeftChild); 389 addRuleRootNodes(dest, node.fRightChild); 390 } 391 392 //----------------------------------------------------------------------------- 393 // 394 // calcChainedFollowPos. Modify the previously calculated followPos sets 395 // to implement rule chaining. NOT described by Aho 396 // 397 //----------------------------------------------------------------------------- calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode)398 void calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode) { 399 400 List<RBBINode> leafNodes = new ArrayList<>(); 401 402 // get a list all leaf nodes 403 tree.findNodes(leafNodes, RBBINode.leafChar); 404 405 // Collect all leaf nodes that can start matches for rules 406 // with inbound chaining enabled, which is the union of the 407 // firstPosition sets from each of the rule root nodes. 408 409 List<RBBINode> ruleRootNodes = new ArrayList<>(); 410 addRuleRootNodes(ruleRootNodes, tree); 411 412 Set<RBBINode> matchStartNodes = new HashSet<>(); 413 for (RBBINode node: ruleRootNodes) { 414 if (node.fChainIn) { 415 matchStartNodes.addAll(node.fFirstPosSet); 416 } 417 } 418 419 // Iterate over all leaf nodes, 420 // 421 for (RBBINode endNode : leafNodes) { 422 423 // Identify leaf nodes that correspond to overall rule match positions. 424 // These include the endMarkNode in their followPos sets. 425 // 426 // Note: do not consider other end marker nodes, those that are added to 427 // look-ahead rules. These can't chain; a match immediately stops 428 // further matching. This leaves exactly one end marker node, the one 429 // at the end of the complete tree. 430 431 if (!endNode.fFollowPos.contains(endMarkNode)) { 432 continue; 433 } 434 435 // We've got a node that can end a match. 436 437 // !!LBCMNoChain implementation: If this node's val correspond to 438 // the Line Break $CM char class, don't chain from it. 439 // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used 440 // by any of the standard ICU rules. 441 if (fRB.fLBCMNoChain) { 442 int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal); 443 if (c != -1) { 444 // c == -1 occurs with sets containing only the {eof} marker string. 445 int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK); 446 if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) { 447 continue; 448 } 449 } 450 } 451 452 453 // Now iterate over the nodes that can start a match, looking for ones 454 // with the same char class as our ending node. 455 for (RBBINode startNode : matchStartNodes) { 456 if (startNode.fType != RBBINode.leafChar) { 457 continue; 458 } 459 460 if (endNode.fVal == startNode.fVal) { 461 // The end val (character class) of one possible match is the 462 // same as the start of another. 463 464 // Add all nodes from the followPos of the start node to the 465 // followPos set of the end node, which will have the effect of 466 // letting matches transition from a match state at endNode 467 // to the second char of a match starting with startNode. 468 endNode.fFollowPos.addAll(startNode.fFollowPos); 469 } 470 } 471 } 472 } 473 474 475 //----------------------------------------------------------------------------- 476 // 477 // bofFixup. Fixup for state tables that include {bof} beginning of input testing. 478 // Do an swizzle similar to chaining, modifying the followPos set of 479 // the bofNode to include the followPos nodes from other {bot} nodes 480 // scattered through the tree. 481 // 482 // This function has much in common with calcChainedFollowPos(). 483 // 484 //----------------------------------------------------------------------------- bofFixup()485 void bofFixup() { 486 // 487 // The parse tree looks like this ... 488 // fTree root --. <cat> 489 // / \ 490 // <cat> <#end node> 491 // / \ 492 // <bofNode> rest 493 // of tree 494 // 495 // We will be adding things to the followPos set of the <bofNode> 496 // 497 RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild; 498 Assert.assrt(bofNode.fType == RBBINode.leafChar); 499 Assert.assrt(bofNode.fVal == 2); 500 501 // Get all nodes that can be the start a match of the user-written rules 502 // (excluding the fake bofNode) 503 // We want the nodes that can start a match in the 504 // part labeled "rest of tree" 505 // 506 Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet; 507 for (RBBINode startNode : matchStartNodes) { 508 if (startNode.fType != RBBINode.leafChar) { 509 continue; 510 } 511 512 if (startNode.fVal == bofNode.fVal) { 513 // We found a leaf node corresponding to a {bof} that was 514 // explicitly written into a rule. 515 // Add everything from the followPos set of this node to the 516 // followPos set of the fake bofNode at the start of the tree. 517 // 518 bofNode.fFollowPos.addAll(startNode.fFollowPos); 519 } 520 } 521 } 522 523 //----------------------------------------------------------------------------- 524 // 525 // buildStateTable() Determine the set of runtime DFA states and the 526 // transition tables for these states, by the algorithm 527 // of fig. 3.44 in Aho. 528 // 529 // Most of the comments are quotes of Aho's psuedo-code. 530 // 531 //----------------------------------------------------------------------------- buildStateTable()532 void buildStateTable() { 533 // 534 // Add a dummy state 0 - the stop state. Not from Aho. 535 int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1; 536 RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol); 537 fDStates.add(failState); 538 539 // initially, the only unmarked state in Dstates is firstpos(root), 540 // where toot is the root of the syntax tree for (r)#; 541 RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol); 542 initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet); 543 fDStates.add(initialState); 544 545 // while there is an unmarked state T in Dstates do begin 546 for (;;) { 547 RBBIStateDescriptor T = null; 548 int tx; 549 for (tx=1; tx<fDStates.size(); tx++) { 550 RBBIStateDescriptor temp = fDStates.get(tx); 551 if (temp.fMarked == false) { 552 T = temp; 553 break; 554 } 555 } 556 if (T == null) { 557 break; 558 } 559 560 // mark T; 561 T.fMarked = true; 562 563 // for each input symbol a do begin 564 int a; 565 for (a = 1; a<=lastInputSymbol; a++) { 566 // let U be the set of positions that are in followpos(p) 567 // for some position p in T 568 // such that the symbol at position p is a; 569 Set<RBBINode> U = null; 570 for (RBBINode p : T.fPositions) { 571 if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) { 572 if (U == null) { 573 U = new HashSet<>(); 574 } 575 U.addAll(p.fFollowPos); 576 } 577 } 578 579 // if U is not empty and not in DStates then 580 int ux = 0; 581 boolean UinDstates = false; 582 if (U != null) { 583 Assert.assrt(U.size() > 0); 584 int ix; 585 for (ix=0; ix<fDStates.size(); ix++) { 586 RBBIStateDescriptor temp2; 587 temp2 = fDStates.get(ix); 588 if (U.equals(temp2.fPositions)) { 589 U = temp2.fPositions; 590 ux = ix; 591 UinDstates = true; 592 break; 593 } 594 } 595 596 // Add U as an unmarked state to Dstates 597 if (!UinDstates) 598 { 599 RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol); 600 newState.fPositions = U; 601 fDStates.add(newState); 602 ux = fDStates.size()-1; 603 } 604 605 // Dtran[T, a] := U; 606 T.fDtran[a] = ux; 607 } 608 } 609 } 610 } 611 612 /** 613 * mapLookAheadRules 614 * 615 */ mapLookAheadRules()616 void mapLookAheadRules() { 617 fLookAheadRuleMap = new int[fRB.fScanner.numRules() + 1]; 618 int laSlotsInUse = 0; 619 620 for (RBBIStateDescriptor sd: fDStates) { 621 int laSlotForState = 0; 622 623 // Establish the look-ahead slot for this state, if the state covers 624 // any look-ahead nodes - corresponding to the '/' in look-ahead rules. 625 626 // If any of the look-ahead nodes already have a slot assigned, use it, 627 // otherwise assign a new one. 628 629 boolean sawLookAheadNode = false; 630 for (RBBINode node: sd.fPositions) { 631 if (node.fType != RBBINode.lookAhead) { 632 continue; 633 } 634 sawLookAheadNode = true; 635 int ruleNum = node.fVal; // Set when rule was originally parsed. 636 assert(ruleNum < fLookAheadRuleMap.length); 637 assert(ruleNum > 0); 638 int laSlot = fLookAheadRuleMap[ruleNum]; 639 if (laSlot != 0) { 640 if (laSlotForState == 0) { 641 laSlotForState = laSlot; 642 } else { 643 // TODO: figure out if this can fail, change to setting an error code if so. 644 assert(laSlot == laSlotForState); 645 } 646 } 647 } 648 if (!sawLookAheadNode) { 649 continue; 650 } 651 652 if (laSlotForState == 0) { 653 laSlotForState = ++laSlotsInUse; 654 } 655 656 // For each look ahead node covered by this state, 657 // set the mapping from the node's rule number to the look ahead slot. 658 // There can be multiple nodes/rule numbers going to the same la slot. 659 660 for (RBBINode node: sd.fPositions) { 661 if (node.fType != RBBINode.lookAhead) { 662 continue; 663 } 664 int ruleNum = node.fVal; // Set when rule was originally parsed. 665 int existingVal = fLookAheadRuleMap[ruleNum]; 666 assert(existingVal == 0 || existingVal == laSlotForState); 667 fLookAheadRuleMap[ruleNum] = laSlotForState; 668 } 669 } 670 671 } 672 673 //----------------------------------------------------------------------------- 674 // 675 // flagAcceptingStates Identify accepting states. 676 // First get a list of all of the end marker nodes. 677 // Then, for each state s, 678 // if s contains one of the end marker nodes in its list of tree positions then 679 // s is an accepting state. 680 // 681 //----------------------------------------------------------------------------- flagAcceptingStates()682 void flagAcceptingStates() { 683 List<RBBINode> endMarkerNodes = new ArrayList<>(); 684 RBBINode endMarker; 685 int i; 686 int n; 687 688 fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark); 689 690 for (i=0; i<endMarkerNodes.size(); i++) { 691 endMarker = endMarkerNodes.get(i); 692 for (n=0; n<fDStates.size(); n++) { 693 RBBIStateDescriptor sd = fDStates.get(n); 694 //if (sd.fPositions.indexOf(endMarker) >= 0) { 695 if (sd.fPositions.contains(endMarker)) { 696 // Any non-zero value for fAccepting means this is an accepting node. 697 // The value is what will be returned to the user as the break status. 698 // If no other value was specified, force it to -1. 699 700 if (sd.fAccepting==0) { 701 // State hasn't been marked as accepting yet. Do it now. 702 sd.fAccepting = fLookAheadRuleMap[endMarker.fVal]; 703 if (sd.fAccepting == 0) { 704 sd.fAccepting = -1; 705 } 706 } 707 if (sd.fAccepting==-1 && endMarker.fVal != 0) { 708 // Both lookahead and non-lookahead accepting for this state. 709 // Favor the look-ahead, because a look-ahead match needs to 710 // immediately stop the run-time engine. First match, not longest. 711 sd.fAccepting = fLookAheadRuleMap[endMarker.fVal]; 712 } 713 // implicit else: 714 // if sd.fAccepting already had a value other than 0 or -1, leave it be. 715 } 716 } 717 } 718 } 719 720 721 //----------------------------------------------------------------------------- 722 // 723 // flagLookAheadStates Very similar to flagAcceptingStates, above. 724 // 725 //----------------------------------------------------------------------------- flagLookAheadStates()726 void flagLookAheadStates() { 727 List<RBBINode> lookAheadNodes = new ArrayList<>(); 728 RBBINode lookAheadNode; 729 int i; 730 int n; 731 732 fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead); 733 for (i=0; i<lookAheadNodes.size(); i++) { 734 lookAheadNode = lookAheadNodes.get(i); 735 for (n=0; n<fDStates.size(); n++) { 736 RBBIStateDescriptor sd = fDStates.get(n); 737 if (sd.fPositions.contains(lookAheadNode)) { 738 int lookaheadSlot = fLookAheadRuleMap[lookAheadNode.fVal]; 739 assert(sd.fLookAhead == 0 || sd.fLookAhead == lookaheadSlot); 740 sd.fLookAhead = lookaheadSlot; 741 } 742 } 743 } 744 } 745 746 747 748 749 //----------------------------------------------------------------------------- 750 // 751 // flagTaggedStates 752 // 753 //----------------------------------------------------------------------------- flagTaggedStates()754 void flagTaggedStates() { 755 List<RBBINode> tagNodes = new ArrayList<>(); 756 RBBINode tagNode; 757 int i; 758 int n; 759 760 fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag); 761 for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) 762 tagNode = tagNodes.get(i); 763 764 for (n=0; n<fDStates.size(); n++) { // For each state s (row in the state table) 765 RBBIStateDescriptor sd = fDStates.get(n); 766 if (sd.fPositions.contains(tagNode)) { // if s include the tag node t 767 sd.fTagVals.add(Integer.valueOf(tagNode.fVal)); 768 } 769 } 770 } 771 } 772 773 774 775 //----------------------------------------------------------------------------- 776 // 777 // mergeRuleStatusVals 778 // 779 // Allocate positions in the global array of rule status {tag} values 780 // 781 // The RBBI runtime uses an array of {sets of status values} that can 782 // be returned for boundaries. Each accepting state that has non-zero 783 // status includes an index into this array. The format of the array 784 // is 785 // Num of status values in group 1 786 // status val 787 // status val 788 // ... 789 // Num of status vals in group 2 790 // status val 791 // status val 792 // ... 793 // etc. 794 // 795 // 796 //----------------------------------------------------------------------------- 797 mergeRuleStatusVals()798 void mergeRuleStatusVals() { 799 // 800 // The basic outline of what happens here is this... 801 // 802 // for each state in this state table 803 // if the status tag list for this state is in the global statuses list 804 // record where and 805 // continue with the next state 806 // else 807 // add the tag list for this state to the global list. 808 // 809 int n; 810 811 // Pre-load a single tag of {0} into the table. 812 // We will need this as a default, for rule sets with no explicit tagging, 813 // or with explicit tagging of {0}. 814 if (fRB.fRuleStatusVals.size() == 0) { 815 fRB.fRuleStatusVals.add(Integer.valueOf(1)); // Num of statuses in group 816 fRB.fRuleStatusVals.add(Integer.valueOf(0)); // and our single status of zero 817 818 SortedSet<Integer> s0 = new TreeSet<>(); // mapping for rules with no explicit tagging 819 fRB.fStatusSets.put(s0, Integer.valueOf(0)); // (key is an empty set). 820 821 SortedSet<Integer> s1 = new TreeSet<>(); // mapping for rules with explicit tagging of {0} 822 s1.add(Integer.valueOf(0)); 823 fRB.fStatusSets.put(s1, Integer.valueOf(0)); 824 } 825 826 // For each state, check whether the state's status tag values are 827 // already entered into the status values array, and add them if not. 828 for (n=0; n<fDStates.size(); n++) { 829 RBBIStateDescriptor sd = fDStates.get(n); 830 Set<Integer> statusVals = sd.fTagVals; 831 Integer arrayIndexI = fRB.fStatusSets.get(statusVals); 832 if (arrayIndexI == null) { 833 // This is the first encounter of this set of status values. 834 // Add them to the statusSets map, This map associates 835 // the set of status values with an index in the runtime status 836 // values array. 837 arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size()); 838 fRB.fStatusSets.put(statusVals, arrayIndexI); 839 840 // Add the new set of status values to the vector of values that 841 // will eventually become the array used by the runtime engine. 842 fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size())); 843 fRB.fRuleStatusVals.addAll(statusVals); 844 } 845 846 // Save the runtime array index back into the state descriptor. 847 sd.fTagsIdx = arrayIndexI.intValue(); 848 } 849 } 850 851 852 853 854 855 856 857 //----------------------------------------------------------------------------- 858 // 859 // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos 860 // for each node in the tree. 861 // 862 //----------------------------------------------------------------------------- 863 printPosSets(RBBINode n)864 void printPosSets(RBBINode n) { 865 if (n==null) { 866 return; 867 } 868 RBBINode.printNode(n); 869 System.out.print(" Nullable: " + n.fNullable); 870 871 System.out.print(" firstpos: "); 872 printSet(n.fFirstPosSet); 873 874 System.out.print(" lastpos: "); 875 printSet(n.fLastPosSet); 876 877 System.out.print(" followpos: "); 878 printSet(n.fFollowPos); 879 880 printPosSets(n.fLeftChild); 881 printPosSets(n.fRightChild); 882 } 883 884 885 886 /** 887 * Find duplicate (redundant) character classes. Begin looking with categories.first. 888 * Duplicates, if found are returned in the categories parameter. 889 * This is an iterator-like function, used to identify character classes 890 * (state table columns) that can be eliminated. 891 * @param categories in/out parameter, specifies where to start looking for duplicates, 892 * and returns the first pair of duplicates found, if any. 893 * @return true if duplicate char classes were found, false otherwise. 894 * @hide draft / provisional / internal are hidden on OHOS 895 */ findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories)896 boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) { 897 int numStates = fDStates.size(); 898 int numCols = fRB.fSetBuilder.getNumCharCategories(); 899 900 int table_base = 0; 901 int table_dupl = 0; 902 for (; categories.first < numCols-1; ++categories.first) { 903 for (categories.second=categories.first+1; categories.second < numCols; ++categories.second) { 904 for (int state=0; state<numStates; state++) { 905 RBBIStateDescriptor sd = fDStates.get(state); 906 table_base = sd.fDtran[categories.first]; 907 table_dupl = sd.fDtran[categories.second]; 908 if (table_base != table_dupl) { 909 break; 910 } 911 } 912 if (table_base == table_dupl) { 913 return true; 914 } 915 } 916 } 917 return false; 918 } 919 920 /** 921 * Remove a column from the state table. Used when two character categories 922 * have been found equivalent, and merged together, to eliminate the unneeded table column. 923 */ removeColumn(int column)924 void removeColumn(int column) { 925 int numStates = fDStates.size(); 926 for (int state=0; state<numStates; state++) { 927 RBBIStateDescriptor sd = fDStates.get(state); 928 assert(column < sd.fDtran.length); 929 int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1); 930 System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column); 931 sd.fDtran = newArray; 932 } 933 } 934 935 936 /** 937 * Find duplicate (redundant) states, beginning at the specified pair, 938 * within this state table. This is an iterator-like function, used to 939 * identify states (state table rows) that can be eliminated. 940 * @param states in/out parameter, specifies where to start looking for duplicates, 941 * and returns the first pair of duplicates found, if any. 942 * @return true if duplicate states were found, false otherwise. 943 * @hide draft / provisional / internal are hidden on OHOS 944 */ 945 boolean findDuplicateState(RBBIRuleBuilder.IntPair states) { 946 int numStates = fDStates.size(); 947 int numCols = fRB.fSetBuilder.getNumCharCategories(); 948 949 for (; states.first<numStates-1; ++states.first) { 950 RBBIStateDescriptor firstSD = fDStates.get(states.first); 951 for (states.second=states.first+1; states.second<numStates; ++states.second) { 952 RBBIStateDescriptor duplSD = fDStates.get(states.second); 953 if (firstSD.fAccepting != duplSD.fAccepting || 954 firstSD.fLookAhead != duplSD.fLookAhead || 955 firstSD.fTagsIdx != duplSD.fTagsIdx) { 956 continue; 957 } 958 boolean rowsMatch = true; 959 for (int col=0; col < numCols; ++col) { 960 int firstVal = firstSD.fDtran[col]; 961 int duplVal = duplSD.fDtran[col]; 962 if (!((firstVal == duplVal) || 963 ((firstVal == states.first || firstVal == states.second) && 964 (duplVal == states.first || duplVal == states.second)))) { 965 rowsMatch = false; 966 break; 967 } 968 } 969 if (rowsMatch) { 970 return true; 971 } 972 } 973 } 974 return false; 975 } 976 977 /** 978 * Find the next duplicate state in the safe reverse table. An iterator function. 979 * @param states in/out parameter, specifies where to start looking for duplicates, 980 * and returns the first pair of duplicates found, if any. 981 * @return true if duplicate states were found, false otherwise. 982 * @hide draft / provisional / internal are hidden on OHOS 983 */ 984 boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) { 985 int numStates = fSafeTable.size(); 986 987 for (; states.first<numStates-1; ++states.first) { 988 short[] firstRow = fSafeTable.get(states.first); 989 for (states.second=states.first+1; states.second<numStates; ++states.second) { 990 short[] duplRow = fSafeTable.get(states.second); 991 boolean rowsMatch = true; 992 int numCols = firstRow.length; 993 for (int col=0; col < numCols; ++col) { 994 int firstVal = firstRow[col]; 995 int duplVal = duplRow[col]; 996 if (!((firstVal == duplVal) || 997 ((firstVal == states.first || firstVal == states.second) && 998 (duplVal == states.first || duplVal == states.second)))) { 999 rowsMatch = false; 1000 break; 1001 } 1002 } 1003 if (rowsMatch) { 1004 return true; 1005 } 1006 } 1007 } 1008 return false; 1009 } 1010 1011 /** 1012 * Remove a duplicate state (row) from the state table. All references to the deleted (second) state 1013 * are redirected to first state. 1014 * @param duplStates The duplicate pair of states. 1015 * @hide draft / provisional / internal are hidden on OHOS 1016 */ 1017 void removeState(IntPair duplStates) { 1018 final int keepState = duplStates.first; 1019 final int duplState = duplStates.second; 1020 assert(keepState < duplState); 1021 assert(duplState < fDStates.size()); 1022 1023 fDStates.remove(duplState); 1024 1025 int numStates = fDStates.size(); 1026 int numCols = fRB.fSetBuilder.getNumCharCategories(); 1027 for (int state=0; state<numStates; ++state) { 1028 RBBIStateDescriptor sd = fDStates.get(state); 1029 for (int col=0; col<numCols; col++) { 1030 int existingVal = sd.fDtran[col]; 1031 int newVal = existingVal; 1032 if (existingVal == duplState) { 1033 newVal = keepState; 1034 } else if (existingVal > duplState) { 1035 newVal = existingVal - 1; 1036 } 1037 sd.fDtran[col] = newVal; 1038 } 1039 } 1040 } 1041 1042 /** 1043 * Remove a duplicate state from the safe table. 1044 * @param duplStates The duplicate pair of states. The first is kept, the second is removed. 1045 * All references to the second in the state table are retargeted 1046 * to the first. 1047 * @hide draft / provisional / internal are hidden on OHOS 1048 */ 1049 void removeSafeState(IntPair duplStates) { 1050 final int keepState = duplStates.first; 1051 final int duplState = duplStates.second; 1052 assert(keepState < duplState); 1053 assert(duplState < fSafeTable.size()); 1054 1055 fSafeTable.remove(duplState); 1056 int numStates = fSafeTable.size(); 1057 for (int state=0; state<numStates; ++state) { 1058 short[] row = fSafeTable.get(state); 1059 for (int col=0; col<row.length; col++) { 1060 int existingVal = row[col]; 1061 int newVal = existingVal; 1062 if (existingVal == duplState) { 1063 newVal = keepState; 1064 } else if (existingVal > duplState) { 1065 newVal = existingVal - 1; 1066 } 1067 row[col] = (short)newVal; 1068 } 1069 } 1070 } 1071 1072 1073 /** 1074 * Check for, and remove duplicate states (table rows). 1075 * @return the number of states removed. 1076 * @hide draft / provisional / internal are hidden on OHOS 1077 */ 1078 int removeDuplicateStates() { 1079 IntPair dupls = new IntPair(3, 0); 1080 int numStatesRemoved = 0; 1081 1082 while (findDuplicateState(dupls)) { 1083 // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); 1084 removeState(dupls); 1085 ++numStatesRemoved; 1086 } 1087 return numStatesRemoved; 1088 } 1089 1090 1091 /** 1092 * Calculate the size in bytes of the serialized form of this state transition table, 1093 * which is identical to the ICU4C runtime form. 1094 * Refer to common/rbbidata.h from ICU4C for the declarations of the structures 1095 * being matched by this calculation. 1096 */ 1097 int getTableSize() { 1098 if (fRB.fTreeRoots[fRootIx] == null) { 1099 return 0; 1100 } 1101 int size = 16; // The header of 4 ints, with no rows to the table. 1102 int numRows = fDStates.size(); 1103 int numCols = fRB.fSetBuilder.getNumCharCategories(); 1104 int rowSize = 8 + 2*numCols; 1105 size += numRows * rowSize; 1106 size = (size + 7) & ~7; // round up to a multiple of 8 bytes 1107 return size; 1108 } 1109 1110 1111 1112 /** 1113 * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table. 1114 * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, 1115 * in common/rbbidata.h 1116 */ 1117 RBBIDataWrapper.RBBIStateTable exportTable() { 1118 int state; 1119 int col; 1120 1121 RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); 1122 if (fRB.fTreeRoots[fRootIx] == null) { 1123 return table; 1124 } 1125 1126 Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff && 1127 fDStates.size() < 0x7fff); 1128 table.fNumStates = fDStates.size(); 1129 1130 // Size of table size in shorts. 1131 // the "4" is the size of struct RBBIStateTableRow, the row header part only. 1132 int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts. 1133 int tableSize = (getTableSize() - 16) / 2; // fTable length in shorts. 1134 table.fTable = new short[tableSize]; 1135 table.fRowLen = rowLen * 2; // Row length in bytes. 1136 1137 if (fRB.fLookAheadHardBreak) { 1138 table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; 1139 } 1140 if (fRB.fSetBuilder.sawBOF()) { 1141 table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; 1142 } 1143 1144 int numCharCategories = fRB.fSetBuilder.getNumCharCategories(); 1145 for (state=0; state<table.fNumStates; state++) { 1146 RBBIStateDescriptor sd = fDStates.get(state); 1147 int row = state*rowLen; 1148 Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767); 1149 Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767); 1150 table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting; 1151 table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead; 1152 table.fTable[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx; 1153 for (col=0; col<numCharCategories; col++) { 1154 table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col]; 1155 } 1156 } 1157 return table; 1158 } 1159 1160 /** 1161 * Synthesize a safe state table from the main state table. 1162 */ 1163 void buildSafeReverseTable() { 1164 // Safe Reverse table construction is described in more detail in the corresponding 1165 // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because 1166 // it is too likely to get out of sync. 1167 1168 // Each safe pair is stored as two chars in the safePair stringBuilder. 1169 StringBuilder safePairs = new StringBuilder(); 1170 1171 int numCharClasses = fRB.fSetBuilder.getNumCharCategories(); 1172 int numStates = fDStates.size(); 1173 1174 for (int c1=0; c1<numCharClasses; ++c1) { 1175 for (int c2=0; c2 < numCharClasses; ++c2) { 1176 int wantedEndState = -1; 1177 int endState = 0; 1178 for (int startState = 1; startState < numStates; ++startState) { 1179 RBBIStateDescriptor startStateD = fDStates.get(startState); 1180 int s2 = startStateD.fDtran[c1]; 1181 RBBIStateDescriptor s2StateD = fDStates.get(s2); 1182 endState = s2StateD.fDtran[c2]; 1183 if (wantedEndState < 0) { 1184 wantedEndState = endState; 1185 } else { 1186 if (wantedEndState != endState) { 1187 break; 1188 } 1189 } 1190 } 1191 if (wantedEndState == endState) { 1192 safePairs.append((char)c1); 1193 safePairs.append((char)c2); 1194 // System.out.printf("(%d, %d) ", c1, c2); 1195 } 1196 } 1197 // System.out.printf("\n"); 1198 } 1199 1200 // Populate the initial safe table. 1201 // The table as a whole is a List<short[]> 1202 // Row 0 is the stop state. 1203 // Row 1 is the start sate. 1204 // Row 2 and beyond are other states, initially one per char class, but 1205 // after initial construction, many of the states will be combined, compacting the table.) 1206 // The String holds the nextState data only. The four leading fields of a row, fAccepting, 1207 // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building. 1208 1209 assert(fSafeTable == null); 1210 fSafeTable = new ArrayList<>(); 1211 for (int row=0; row<numCharClasses + 2; ++row) { 1212 fSafeTable.add(new short[numCharClasses]); 1213 } 1214 1215 // From the start state, each input char class transitions to the state for that input. 1216 short[] startState = fSafeTable.get(1); 1217 for (int charClass=0; charClass < numCharClasses; ++charClass) { 1218 // Note: +2 to skip the start & stop state rows. 1219 startState[charClass] = (short)(charClass+2); 1220 } 1221 1222 // Initially make every other state table row look like the start state row 1223 // (except for the stop state, which remains all 0) 1224 for (int row=2; row<numCharClasses+2; ++row) { 1225 System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length); 1226 } 1227 1228 // Run through the safe pairs, set the next state to zero when pair has been seen. 1229 // Zero being the stop state, meaning we found a safe point. 1230 for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) { 1231 int c1 = safePairs.charAt(pairIdx); 1232 int c2 = safePairs.charAt(pairIdx + 1); 1233 1234 short[] rowState = fSafeTable.get(c2 + 2); 1235 rowState[c1] = 0; 1236 } 1237 1238 // Remove duplicate or redundant rows from the table. 1239 RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0); 1240 while (findDuplicateSafeState(states)) { 1241 // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); 1242 removeSafeState(states); 1243 } 1244 } 1245 1246 1247 /** 1248 * Calculate the size of the runtime form of this safe state table. 1249 */ 1250 int getSafeTableSize() { 1251 if (fSafeTable == null) { 1252 return 0; 1253 } 1254 int size = 16; // The header of 4 ints, with no rows to the table. 1255 int numRows = fSafeTable.size(); 1256 int numCols = fSafeTable.get(0).length; 1257 int rowSize = 8 + 2*numCols; 1258 size += numRows * rowSize; 1259 // TODO: there are redundant round-up. Figure out best place, get rid of the rest. 1260 size = (size + 7) & ~7; // round up to a multiple of 8 bytes 1261 return size; 1262 } 1263 1264 1265 /** 1266 * Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table. 1267 * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, 1268 * in common/rbbidata.h 1269 */ 1270 RBBIDataWrapper.RBBIStateTable exportSafeTable() { 1271 RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); 1272 table.fNumStates = fSafeTable.size(); 1273 int numCharCategories = fSafeTable.get(0).length; 1274 1275 // Size of table size in shorts. 1276 // the "4" is the size of struct RBBIStateTableRow, the row header part only. 1277 int rowLen = 4 + numCharCategories; 1278 // TODO: tableSize is basically numStates * numCharCategories, 1279 // except for alignment padding. Clean up here, and in main exportTable(). 1280 int tableSize = (getSafeTableSize() - 16) / 2; // fTable length in shorts. 1281 table.fTable = new short[tableSize]; 1282 table.fRowLen = rowLen * 2; // Row length in bytes. 1283 1284 for (int state=0; state<table.fNumStates; state++) { 1285 short[] rowArray = fSafeTable.get(state); 1286 int row = state * rowLen; 1287 1288 for (int col=0; col<numCharCategories; col++) { 1289 table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col]; 1290 } 1291 } 1292 return table; 1293 } 1294 1295 1296 //----------------------------------------------------------------------------- 1297 // 1298 // printSet Debug function. Print the contents of a set of Nodes 1299 // 1300 //----------------------------------------------------------------------------- 1301 1302 void printSet(Collection<RBBINode> s) { 1303 for (RBBINode n : s) { 1304 RBBINode.printInt(n.fSerialNum, 8); 1305 } 1306 System.out.println(); 1307 } 1308 1309 1310 1311 //----------------------------------------------------------------------------- 1312 // 1313 // printStates Debug Function. Dump the fully constructed state transition table. 1314 // 1315 //----------------------------------------------------------------------------- 1316 1317 void printStates() { 1318 int c; // input "character" 1319 int n; // state number 1320 1321 System.out.print("state | i n p u t s y m b o l s \n"); 1322 System.out.print(" | Acc LA Tag"); 1323 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1324 RBBINode.printInt(c, 3); 1325 } 1326 System.out.print("\n"); 1327 System.out.print(" |---------------"); 1328 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1329 System.out.print("---"); 1330 } 1331 System.out.print("\n"); 1332 1333 for (n=0; n<fDStates.size(); n++) { 1334 RBBIStateDescriptor sd = fDStates.get(n); 1335 RBBINode.printInt(n, 5); 1336 System.out.print(" | "); 1337 1338 RBBINode.printInt(sd.fAccepting, 3); 1339 RBBINode.printInt(sd.fLookAhead, 4); 1340 RBBINode.printInt(sd.fTagsIdx, 6); 1341 System.out.print(" "); 1342 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1343 RBBINode.printInt(sd.fDtran[c], 3); 1344 } 1345 System.out.print("\n"); 1346 } 1347 System.out.print("\n\n"); 1348 } 1349 1350 1351 /** 1352 * Debug Function. Dump the fully constructed safe reverse table. 1353 */ 1354 void printReverseTable() { 1355 int c; // input "character" 1356 1357 System.out.printf(" Safe Reverse Table \n"); 1358 if (fSafeTable == null) { 1359 System.out.printf(" --- nullptr ---\n"); 1360 return; 1361 } 1362 int numCharCategories = fSafeTable.get(0).length; 1363 System.out.printf("state | i n p u t s y m b o l s \n"); 1364 System.out.printf(" | Acc LA Tag"); 1365 for (c=0; c< numCharCategories; c++) { 1366 System.out.printf(" %2d", c); 1367 } 1368 System.out.printf("\n"); 1369 System.out.printf(" |---------------"); 1370 for (c=0; c<numCharCategories; c++) { 1371 System.out.printf("---"); 1372 } 1373 System.out.printf("\n"); 1374 1375 for (int n=0; n<fSafeTable.size(); n++) { 1376 short rowArray[] = fSafeTable.get(n); 1377 System.out.printf(" %3d | " , n); 1378 System.out.printf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags 1379 for (c=0; c<numCharCategories; c++) { 1380 System.out.printf(" %2d", rowArray[c]); 1381 } 1382 System.out.printf("\n"); 1383 } 1384 System.out.printf("\n\n"); 1385 } 1386 1387 1388 1389 1390 1391 //----------------------------------------------------------------------------- 1392 // 1393 // printRuleStatusTable Debug Function. Dump the common rule status table 1394 // 1395 //----------------------------------------------------------------------------- 1396 1397 void printRuleStatusTable() { 1398 int thisRecord = 0; 1399 int nextRecord = 0; 1400 int i; 1401 List<Integer> tbl = fRB.fRuleStatusVals; 1402 1403 System.out.print("index | tags \n"); 1404 System.out.print("-------------------\n"); 1405 1406 while (nextRecord < tbl.size()) { 1407 thisRecord = nextRecord; 1408 nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1; 1409 RBBINode.printInt(thisRecord, 7); 1410 for (i=thisRecord+1; i<nextRecord; i++) { 1411 int val = tbl.get(i).intValue(); 1412 RBBINode.printInt(val, 7); 1413 } 1414 System.out.print("\n"); 1415 } 1416 System.out.print("\n\n"); 1417 } 1418 1419 1420 1421 } 1422