• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 **********************************************************************
6 *   Copyright (c) 2002-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 **********************************************************************
9 */
10 
11 package ohos.global.icu.text;
12 
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.HashSet;
17 import java.util.List;
18 import java.util.Set;
19 import java.util.SortedSet;
20 import java.util.TreeSet;
21 
22 import ohos.global.icu.impl.Assert;
23 import ohos.global.icu.impl.RBBIDataWrapper;
24 import ohos.global.icu.lang.UCharacter;
25 import ohos.global.icu.lang.UProperty;
26 import ohos.global.icu.text.RBBIRuleBuilder.IntPair;
27 
28 /**
29  *  This class is part of the RBBI rule compiler.
30  *  It builds the state transition table used by the RBBI runtime
31  *  from the expression syntax tree generated by the rule scanner.
32  *
33  *  This class is part of the RBBI implementation only.
34  *  There is no user-visible public API here.
35  */
36 class RBBITableBuilder {
37 
38     //
39     //  RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
40     //                        one for each state.
41     static class RBBIStateDescriptor {
42         boolean      fMarked;
43         int          fAccepting;
44         int          fLookAhead;
45         SortedSet<Integer> fTagVals;
46         int          fTagsIdx;
47         Set<RBBINode> fPositions;                 // Set of parse tree positions associated
48                                                   //   with this state.  Unordered (it's a set).
49                                                   //   UVector contents are RBBINode *
50 
51         int[]        fDtran;                      // Transitions out of this state.
52                                                    //   indexed by input character
53                                                    //   contents is int index of dest state
54                                                    //   in RBBITableBuilder.fDStates
55 
RBBIStateDescriptor(int maxInputSymbol)56         RBBIStateDescriptor(int maxInputSymbol) {
57             fTagVals = new TreeSet<>();
58             fPositions = new HashSet<>();
59             fDtran = new int[maxInputSymbol+1];    // fDtran needs to be pre-sized.
60                                                     //   It is indexed by input symbols, and will
61                                                     //   hold  the next state number for each
62                                                     //   symbol.
63         }
64     }
65 
66 
67     private  RBBIRuleBuilder  fRB;
68 
69     /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */
70     private  int  fRootIx;
71 
72     /** D states (Aho's terminology). Index is state number. */
73     private  List<RBBIStateDescriptor> fDStates;
74 
75     /** Synthesized safe table, a List of row arrays.  */
76     private List<short[]>    fSafeTable;
77 
78     /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
79     int[] fLookAheadRuleMap;
80 
81     //-----------------------------------------------------------------------------
82     //
83     //  Constructor    for RBBITableBuilder.
84     //
85     //                 rootNode is an index into the array of root nodes that is held by
86     //                          the overall RBBIRuleBuilder.
87     //-----------------------------------------------------------------------------
RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx)88     RBBITableBuilder(RBBIRuleBuilder rb,  int rootNodeIx)  {
89            fRootIx     = rootNodeIx;
90            fRB         = rb;
91            fDStates    = new ArrayList<>();
92         }
93 
94 
95 
96 
97        //-----------------------------------------------------------------------------
98        //
99        //   RBBITableBuilder::buildForwardTable  -  This is the main function for building
100        //                          the DFA state transition table from the RBBI rules parse tree.
101        //
102        //-----------------------------------------------------------------------------
buildForwardTable()103        void  buildForwardTable() {
104            // If there were no rules, just return.  This situation can easily arise
105            //   for the reverse rules.
106            if (fRB.fTreeRoots[fRootIx]==null) {
107                return;
108            }
109 
110            //
111            // Walk through the tree, replacing any references to $variables with a copy of the
112            //   parse tree for the substition expression.
113            //
114            fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
115            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
116                System.out.println("Parse tree after flattening variable references.");
117                fRB.fTreeRoots[fRootIx].printTree(true);
118            }
119 
120            //
121            // If the rules contained any references to {bof}
122            //   add a {bof} <cat> <former root of tree> to the
123            //   tree.  Means that all matches must start out with the
124            //   {bof} fake character.
125            //
126            if (fRB.fSetBuilder.sawBOF()) {
127                RBBINode bofTop    = new RBBINode(RBBINode.opCat);
128                RBBINode bofLeaf   = new RBBINode(RBBINode.leafChar);
129                bofTop.fLeftChild  = bofLeaf;
130                bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
131                bofLeaf.fParent    = bofTop;
132                bofLeaf.fVal       = 2;      // Reserved value for {bof}.
133                fRB.fTreeRoots[fRootIx] = bofTop;
134            }
135 
136            //
137            // Add a unique right-end marker to the expression.
138            //   Appears as a cat-node, left child being the original tree,
139            //   right child being the end marker.
140            //
141            RBBINode cn = new RBBINode(RBBINode.opCat);
142            cn.fLeftChild = fRB.fTreeRoots[fRootIx];
143            fRB.fTreeRoots[fRootIx].fParent = cn;
144            RBBINode endMarkerNode = cn.fRightChild = new RBBINode(RBBINode.endMark);
145            cn.fRightChild.fParent = cn;
146            fRB.fTreeRoots[fRootIx] = cn;
147 
148            //
149            //  Replace all references to UnicodeSets with the tree for the equivalent
150            //      expression.
151            //
152            fRB.fTreeRoots[fRootIx].flattenSets();
153            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
154                System.out.println("Parse tree after flattening Unicode Set references.");
155                fRB.fTreeRoots[fRootIx].printTree(true);
156            }
157 
158 
159            //
160            // calculate the functions nullable, firstpos, lastpos and followpos on
161            // nodes in the parse tree.
162            //    See the alogrithm description in Aho.
163            //    Understanding how this works by looking at the code alone will be
164            //       nearly impossible.
165            //
166            calcNullable(fRB.fTreeRoots[fRootIx]);
167            calcFirstPos(fRB.fTreeRoots[fRootIx]);
168            calcLastPos(fRB.fTreeRoots[fRootIx]);
169            calcFollowPos(fRB.fTreeRoots[fRootIx]);
170            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
171                System.out.print("\n");
172                printPosSets(fRB.fTreeRoots[fRootIx]);
173            }
174 
175            //
176            //  For "chained" rules, modify the followPos sets
177            //
178            if (fRB.fChainRules) {
179                calcChainedFollowPos(fRB.fTreeRoots[fRootIx], endMarkerNode);
180            }
181 
182            //
183            //  BOF (start of input) test fixup.
184            //
185            if (fRB.fSetBuilder.sawBOF()) {
186                bofFixup();
187            }
188 
189            //
190            // Build the DFA state transition tables.
191            //
192            buildStateTable();
193            mapLookAheadRules();
194            flagAcceptingStates();
195            flagLookAheadStates();
196            flagTaggedStates();
197 
198            //
199            // Update the global table of rule status {tag} values
200            // The rule builder has a global vector of status values that are common
201            //    for all tables.  Merge the ones from this table into the global set.
202            //
203            mergeRuleStatusVals();
204        }
205 
206 
207 
208        //-----------------------------------------------------------------------------
209        //
210        //   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9
211        //
212        //-----------------------------------------------------------------------------
calcNullable(RBBINode n)213        void calcNullable(RBBINode n) {
214            if (n == null) {
215                return;
216            }
217            if (n.fType == RBBINode.setRef ||
218                n.fType == RBBINode.endMark ) {
219                // These are non-empty leaf node types.
220                n.fNullable = false;
221                return;
222            }
223 
224            if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
225                // Lookahead marker node.  It's a leaf, so no recursion on children.
226                // It's nullable because it does not match any literal text from the input stream.
227                n.fNullable = true;
228                return;
229            }
230 
231 
232            // The node is not a leaf.
233            //  Calculate nullable on its children.
234            calcNullable(n.fLeftChild);
235            calcNullable(n.fRightChild);
236 
237            // Apply functions from table 3.40 in Aho
238            if (n.fType == RBBINode.opOr) {
239                n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
240            }
241            else if (n.fType == RBBINode.opCat) {
242                n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
243            }
244            else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
245                n.fNullable = true;
246            }
247            else {
248                n.fNullable = false;
249            }
250        }
251 
252 
253 
254 
255        //-----------------------------------------------------------------------------
256        //
257        //   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9
258        //
259        //-----------------------------------------------------------------------------
calcFirstPos(RBBINode n)260        void calcFirstPos(RBBINode n) {
261            if (n == null) {
262                return;
263            }
264            if (n.fType == RBBINode.leafChar  ||
265                n.fType == RBBINode.endMark   ||
266                n.fType == RBBINode.lookAhead ||
267                n.fType == RBBINode.tag) {
268                // These are non-empty leaf node types.
269                n.fFirstPosSet.add(n);
270                return;
271            }
272 
273            // The node is not a leaf.
274            //  Calculate firstPos on its children.
275            calcFirstPos(n.fLeftChild);
276            calcFirstPos(n.fRightChild);
277 
278            // Apply functions from table 3.40 in Aho
279            if (n.fType == RBBINode.opOr) {
280                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
281                n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
282            }
283            else if (n.fType == RBBINode.opCat) {
284                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
285                if (n.fLeftChild.fNullable) {
286                    n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
287                }
288            }
289            else if (n.fType == RBBINode.opStar ||
290                     n.fType == RBBINode.opQuestion ||
291                     n.fType == RBBINode.opPlus) {
292                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
293            }
294        }
295 
296 
297 
298        //-----------------------------------------------------------------------------
299        //
300        //   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9
301        //
302        //-----------------------------------------------------------------------------
calcLastPos(RBBINode n)303        void calcLastPos(RBBINode n) {
304            if (n == null) {
305                return;
306            }
307            if (n.fType == RBBINode.leafChar  ||
308                n.fType == RBBINode.endMark   ||
309                n.fType == RBBINode.lookAhead ||
310                n.fType == RBBINode.tag) {
311                // These are non-empty leaf node types.
312                n.fLastPosSet.add(n);
313                return;
314            }
315 
316            // The node is not a leaf.
317            //  Calculate lastPos on its children.
318            calcLastPos(n.fLeftChild);
319            calcLastPos(n.fRightChild);
320 
321            // Apply functions from table 3.40 in Aho
322            if (n.fType == RBBINode.opOr) {
323                n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
324                n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
325            }
326            else if (n.fType == RBBINode.opCat) {
327                n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
328                if (n.fRightChild.fNullable) {
329                    n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
330                }
331            }
332            else if (n.fType == RBBINode.opStar     ||
333                     n.fType == RBBINode.opQuestion ||
334                     n.fType == RBBINode.opPlus) {
335                n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
336            }
337        }
338 
339 
340 
341        //-----------------------------------------------------------------------------
342        //
343        //   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9
344        //
345        //-----------------------------------------------------------------------------
calcFollowPos(RBBINode n)346        void calcFollowPos(RBBINode n) {
347            if (n == null ||
348                n.fType == RBBINode.leafChar ||
349                n.fType == RBBINode.endMark) {
350                return;
351            }
352 
353            calcFollowPos(n.fLeftChild);
354            calcFollowPos(n.fRightChild);
355 
356            // Aho rule #1
357            if (n.fType == RBBINode.opCat) {
358                for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) {
359                    i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
360                }
361            }
362 
363            // Aho rule #2
364            if (n.fType == RBBINode.opStar ||
365                n.fType == RBBINode.opPlus) {
366                for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) {
367                    i.fFollowPos.addAll(n.fFirstPosSet);
368                }
369            }
370        }
371 
372        //-----------------------------------------------------------------------------
373        //
374        //           addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
375        //                               as roots of a rule to a destination vector.
376        //
377        //-----------------------------------------------------------------------------
addRuleRootNodes(List<RBBINode> dest, RBBINode node)378        void addRuleRootNodes(List<RBBINode> dest, RBBINode node) {
379            if (node == null) {
380                return;
381            }
382            if (node.fRuleRoot) {
383                dest.add(node);
384                // Note: rules cannot nest. If we found a rule start node,
385                //       no child node can also be a start node.
386                return;
387            }
388            addRuleRootNodes(dest, node.fLeftChild);
389            addRuleRootNodes(dest, node.fRightChild);
390        }
391 
392        //-----------------------------------------------------------------------------
393        //
394        //   calcChainedFollowPos.    Modify the previously calculated followPos sets
395        //                            to implement rule chaining.  NOT described by Aho
396        //
397        //-----------------------------------------------------------------------------
calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode)398        void calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode) {
399 
400            List<RBBINode> leafNodes      = new ArrayList<>();
401 
402            // get a list all leaf nodes
403            tree.findNodes(leafNodes, RBBINode.leafChar);
404 
405            // Collect all leaf nodes that can start matches for rules
406            // with inbound chaining enabled, which is the union of the
407            // firstPosition sets from each of the rule root nodes.
408 
409            List<RBBINode> ruleRootNodes = new ArrayList<>();
410            addRuleRootNodes(ruleRootNodes, tree);
411 
412            Set<RBBINode> matchStartNodes = new HashSet<>();
413            for (RBBINode node: ruleRootNodes) {
414                if (node.fChainIn) {
415                    matchStartNodes.addAll(node.fFirstPosSet);
416                }
417            }
418 
419            // Iterate over all leaf nodes,
420            //
421            for (RBBINode endNode : leafNodes) {
422 
423                // Identify leaf nodes that correspond to overall rule match positions.
424                //   These include the endMarkNode in their followPos sets.
425                //
426                // Note: do not consider other end marker nodes, those that are added to
427                //       look-ahead rules. These can't chain; a match immediately stops
428                //       further matching. This leaves exactly one end marker node, the one
429                //       at the end of the complete tree.
430 
431                if (!endNode.fFollowPos.contains(endMarkNode)) {
432                    continue;
433                }
434 
435                // We've got a node that can end a match.
436 
437                // !!LBCMNoChain implementation:  If this node's val correspond to
438                // the Line Break $CM char class, don't chain from it.
439                // TODO:  Remove this. !!LBCMNoChain is deprecated, and is not used
440                //             by any of the standard ICU rules.
441                if (fRB.fLBCMNoChain) {
442                    int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
443                    if (c != -1) {
444                        // c == -1 occurs with sets containing only the {eof} marker string.
445                        int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
446                        if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
447                            continue;
448                        }
449                    }
450                }
451 
452 
453                // Now iterate over the nodes that can start a match, looking for ones
454                //   with the same char class as our ending node.
455                for (RBBINode startNode : matchStartNodes) {
456                    if (startNode.fType != RBBINode.leafChar) {
457                        continue;
458                    }
459 
460                    if (endNode.fVal == startNode.fVal) {
461                        // The end val (character class) of one possible match is the
462                        //   same as the start of another.
463 
464                        // Add all nodes from the followPos of the start node to the
465                        //  followPos set of the end node, which will have the effect of
466                        //  letting matches transition from a match state at endNode
467                        //  to the second char of a match starting with startNode.
468                        endNode.fFollowPos.addAll(startNode.fFollowPos);
469                    }
470                }
471            }
472        }
473 
474 
475        //-----------------------------------------------------------------------------
476        //
477        //   bofFixup.    Fixup for state tables that include {bof} beginning of input testing.
478        //                Do an swizzle similar to chaining, modifying the followPos set of
479        //                the bofNode to include the followPos nodes from other {bot} nodes
480        //                scattered through the tree.
481        //
482        //                This function has much in common with calcChainedFollowPos().
483        //
484        //-----------------------------------------------------------------------------
bofFixup()485        void bofFixup() {
486            //
487            //   The parse tree looks like this ...
488            //         fTree root  --.       <cat>
489            //                               /     \
490            //                            <cat>   <#end node>
491            //                           /     \
492            //                     <bofNode>   rest
493            //                               of tree
494            //
495            //    We will be adding things to the followPos set of the <bofNode>
496            //
497            RBBINode  bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
498            Assert.assrt(bofNode.fType == RBBINode.leafChar);
499            Assert.assrt(bofNode.fVal == 2);
500 
501            // Get all nodes that can be the start a match of the user-written rules
502            //  (excluding the fake bofNode)
503            //  We want the nodes that can start a match in the
504            //     part labeled "rest of tree"
505            //
506            Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
507            for (RBBINode startNode : matchStartNodes) {
508                if (startNode.fType != RBBINode.leafChar) {
509                    continue;
510                }
511 
512                if (startNode.fVal == bofNode.fVal) {
513                    //  We found a leaf node corresponding to a {bof} that was
514                    //    explicitly written into a rule.
515                    //  Add everything from the followPos set of this node to the
516                    //    followPos set of the fake bofNode at the start of the tree.
517                    //
518                    bofNode.fFollowPos.addAll(startNode.fFollowPos);
519                }
520            }
521        }
522 
523        //-----------------------------------------------------------------------------
524        //
525        //   buildStateTable()    Determine the set of runtime DFA states and the
526        //                        transition tables for these states, by the algorithm
527        //                        of fig. 3.44 in Aho.
528        //
529        //                        Most of the comments are quotes of Aho's psuedo-code.
530        //
531        //-----------------------------------------------------------------------------
buildStateTable()532        void buildStateTable() {
533            //
534            // Add a dummy state 0 - the stop state.  Not from Aho.
535            int      lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
536            RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
537            fDStates.add(failState);
538 
539            // initially, the only unmarked state in Dstates is firstpos(root),
540            //       where toot is the root of the syntax tree for (r)#;
541            RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
542            initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
543            fDStates.add(initialState);
544 
545            // while there is an unmarked state T in Dstates do begin
546            for (;;) {
547                RBBIStateDescriptor T = null;
548                int              tx;
549                for (tx=1; tx<fDStates.size(); tx++) {
550                    RBBIStateDescriptor temp  = fDStates.get(tx);
551                    if (temp.fMarked == false) {
552                        T = temp;
553                        break;
554                    }
555                }
556                if (T == null) {
557                    break;
558                }
559 
560                // mark T;
561                T.fMarked = true;
562 
563                // for each input symbol a do begin
564                int  a;
565                for (a = 1; a<=lastInputSymbol; a++) {
566                    // let U be the set of positions that are in followpos(p)
567                    //    for some position p in T
568                    //    such that the symbol at position p is a;
569                    Set<RBBINode> U = null;
570                    for (RBBINode p : T.fPositions) {
571                        if ((p.fType == RBBINode.leafChar) &&  (p.fVal == a)) {
572                            if (U == null) {
573                                U = new HashSet<>();
574                            }
575                            U.addAll(p.fFollowPos);
576                        }
577                    }
578 
579                    // if U is not empty and not in DStates then
580                    int  ux = 0;
581                    boolean    UinDstates = false;
582                    if (U != null) {
583                        Assert.assrt(U.size() > 0);
584                        int  ix;
585                        for (ix=0; ix<fDStates.size(); ix++) {
586                            RBBIStateDescriptor temp2;
587                            temp2 = fDStates.get(ix);
588                            if (U.equals(temp2.fPositions)) {
589                                U  = temp2.fPositions;
590                                ux = ix;
591                                UinDstates = true;
592                                break;
593                            }
594                        }
595 
596                        // Add U as an unmarked state to Dstates
597                        if (!UinDstates)
598                        {
599                            RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol);
600                            newState.fPositions = U;
601                            fDStates.add(newState);
602                            ux = fDStates.size()-1;
603                        }
604 
605                        // Dtran[T, a] := U;
606                        T.fDtran[a] = ux;
607                    }
608                }
609            }
610        }
611 
612       /**
613        * mapLookAheadRules
614        *
615        */
mapLookAheadRules()616       void mapLookAheadRules() {
617           fLookAheadRuleMap =  new int[fRB.fScanner.numRules() + 1];
618           int laSlotsInUse = 0;
619 
620           for (RBBIStateDescriptor sd: fDStates) {
621               int laSlotForState = 0;
622 
623               // Establish the look-ahead slot for this state, if the state covers
624               // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
625 
626               // If any of the look-ahead nodes already have a slot assigned, use it,
627               // otherwise assign a new one.
628 
629               boolean sawLookAheadNode = false;
630               for (RBBINode node: sd.fPositions) {
631                   if (node.fType != RBBINode.lookAhead) {
632                       continue;
633                   }
634                   sawLookAheadNode = true;
635                   int ruleNum = node.fVal;     // Set when rule was originally parsed.
636                   assert(ruleNum < fLookAheadRuleMap.length);
637                   assert(ruleNum > 0);
638                   int laSlot = fLookAheadRuleMap[ruleNum];
639                   if (laSlot != 0) {
640                       if (laSlotForState == 0) {
641                           laSlotForState = laSlot;
642                       } else {
643                           // TODO: figure out if this can fail, change to setting an error code if so.
644                           assert(laSlot == laSlotForState);
645                       }
646                   }
647               }
648               if (!sawLookAheadNode) {
649                   continue;
650               }
651 
652               if (laSlotForState == 0) {
653                   laSlotForState = ++laSlotsInUse;
654               }
655 
656               // For each look ahead node covered by this state,
657               // set the mapping from the node's rule number to the look ahead slot.
658               // There can be multiple nodes/rule numbers going to the same la slot.
659 
660               for (RBBINode node: sd.fPositions) {
661                   if (node.fType != RBBINode.lookAhead) {
662                       continue;
663                   }
664                   int ruleNum = node.fVal;     // Set when rule was originally parsed.
665                   int existingVal = fLookAheadRuleMap[ruleNum];
666                   assert(existingVal == 0 || existingVal == laSlotForState);
667                   fLookAheadRuleMap[ruleNum] = laSlotForState;
668               }
669           }
670 
671       }
672 
673        //-----------------------------------------------------------------------------
674        //
675        //   flagAcceptingStates    Identify accepting states.
676        //                          First get a list of all of the end marker nodes.
677        //                          Then, for each state s,
678        //                              if s contains one of the end marker nodes in its list of tree positions then
679        //                                  s is an accepting state.
680        //
681        //-----------------------------------------------------------------------------
flagAcceptingStates()682        void     flagAcceptingStates() {
683            List<RBBINode> endMarkerNodes = new ArrayList<>();
684            RBBINode    endMarker;
685            int     i;
686            int     n;
687 
688            fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);
689 
690            for (i=0; i<endMarkerNodes.size(); i++) {
691                endMarker = endMarkerNodes.get(i);
692                for (n=0; n<fDStates.size(); n++) {
693                    RBBIStateDescriptor sd = fDStates.get(n);
694                    //if (sd.fPositions.indexOf(endMarker) >= 0) {
695                    if (sd.fPositions.contains(endMarker)) {
696                        // Any non-zero value for fAccepting means this is an accepting node.
697                        // The value is what will be returned to the user as the break status.
698                        // If no other value was specified, force it to -1.
699 
700                        if (sd.fAccepting==0) {
701                            // State hasn't been marked as accepting yet.  Do it now.
702                            sd.fAccepting = fLookAheadRuleMap[endMarker.fVal];
703                            if (sd.fAccepting == 0) {
704                                sd.fAccepting = -1;
705                            }
706                        }
707                        if (sd.fAccepting==-1 && endMarker.fVal != 0) {
708                            // Both lookahead and non-lookahead accepting for this state.
709                            // Favor the look-ahead, because a look-ahead match needs to
710                            // immediately stop the run-time engine. First match, not longest.
711                            sd.fAccepting = fLookAheadRuleMap[endMarker.fVal];
712                        }
713                        // implicit else:
714                        // if sd.fAccepting already had a value other than 0 or -1, leave it be.
715                    }
716                }
717            }
718        }
719 
720 
721        //-----------------------------------------------------------------------------
722        //
723        //    flagLookAheadStates   Very similar to flagAcceptingStates, above.
724        //
725        //-----------------------------------------------------------------------------
flagLookAheadStates()726        void     flagLookAheadStates() {
727            List<RBBINode> lookAheadNodes = new ArrayList<>();
728            RBBINode    lookAheadNode;
729            int     i;
730            int     n;
731 
732            fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
733            for (i=0; i<lookAheadNodes.size(); i++) {
734                lookAheadNode = lookAheadNodes.get(i);
735                for (n=0; n<fDStates.size(); n++) {
736                    RBBIStateDescriptor sd = fDStates.get(n);
737                    if (sd.fPositions.contains(lookAheadNode)) {
738                        int lookaheadSlot = fLookAheadRuleMap[lookAheadNode.fVal];
739                        assert(sd.fLookAhead == 0 || sd.fLookAhead == lookaheadSlot);
740                        sd.fLookAhead = lookaheadSlot;
741                    }
742                }
743            }
744        }
745 
746 
747 
748 
749        //-----------------------------------------------------------------------------
750        //
751        //    flagTaggedStates
752        //
753        //-----------------------------------------------------------------------------
flagTaggedStates()754        void     flagTaggedStates() {
755            List<RBBINode> tagNodes = new ArrayList<>();
756            RBBINode    tagNode;
757            int     i;
758            int     n;
759 
760            fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
761            for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em)
762                tagNode = tagNodes.get(i);
763 
764                for (n=0; n<fDStates.size(); n++) {              //    For each state  s (row in the state table)
765                    RBBIStateDescriptor sd = fDStates.get(n);
766                    if (sd.fPositions.contains(tagNode)) {       //       if  s include the tag node t
767                        sd.fTagVals.add(Integer.valueOf(tagNode.fVal));
768                    }
769                }
770            }
771        }
772 
773 
774 
775        //-----------------------------------------------------------------------------
776        //
777        //  mergeRuleStatusVals
778        //
779        //      Allocate positions in the  global array of rule status {tag} values
780        //
781        //      The RBBI runtime uses an array of {sets of status values} that can
782        //      be returned for boundaries.  Each accepting state that has non-zero
783        //      status includes an index into this array.  The format of the array
784        //      is
785        //           Num of status values in group 1
786        //              status val
787        //              status val
788        //              ...
789        //           Num of status vals in group 2
790        //              status val
791        //              status val
792        //              ...
793        //           etc.
794        //
795        //
796        //-----------------------------------------------------------------------------
797 
mergeRuleStatusVals()798        void  mergeRuleStatusVals() {
799            //
800            //  The basic outline of what happens here is this...
801            //
802            //    for each state in this state table
803            //       if the status tag list for this state is in the global statuses list
804            //           record where and
805            //           continue with the next state
806            //       else
807            //           add the tag list for this state to the global list.
808            //
809            int n;
810 
811            // Pre-load a single tag of {0} into the table.
812            //   We will need this as a default, for rule sets with no explicit tagging,
813            //   or with explicit tagging of {0}.
814            if (fRB.fRuleStatusVals.size() == 0) {
815                fRB.fRuleStatusVals.add(Integer.valueOf(1));    // Num of statuses in group
816                fRB.fRuleStatusVals.add(Integer.valueOf(0));    //   and our single status of zero
817 
818                SortedSet<Integer> s0 = new TreeSet<>();        // mapping for rules with no explicit tagging
819                fRB.fStatusSets.put(s0, Integer.valueOf(0));    //   (key is an empty set).
820 
821                SortedSet<Integer> s1 = new TreeSet<>();        // mapping for rules with explicit tagging of {0}
822                s1.add(Integer.valueOf(0));
823                fRB.fStatusSets.put(s1, Integer.valueOf(0));
824            }
825 
826            //    For each state, check whether the state's status tag values are
827            //       already entered into the status values array, and add them if not.
828            for (n=0; n<fDStates.size(); n++) {
829                RBBIStateDescriptor sd = fDStates.get(n);
830                Set<Integer> statusVals = sd.fTagVals;
831                Integer arrayIndexI = fRB.fStatusSets.get(statusVals);
832                if (arrayIndexI == null) {
833                    // This is the first encounter of this set of status values.
834                    //   Add them to the statusSets map, This map associates
835                    //   the set of status values with an index in the runtime status
836                    //   values array.
837                    arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size());
838                    fRB.fStatusSets.put(statusVals, arrayIndexI);
839 
840                    // Add the new set of status values to the vector of values that
841                    //   will eventually become the array used by the runtime engine.
842                    fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size()));
843                    fRB.fRuleStatusVals.addAll(statusVals);
844                }
845 
846                // Save the runtime array index back into the state descriptor.
847                sd.fTagsIdx = arrayIndexI.intValue();
848            }
849        }
850 
851 
852 
853 
854 
855 
856 
857        //-----------------------------------------------------------------------------
858        //
859        //  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos
860        //                 for each node in the tree.
861        //
862        //-----------------------------------------------------------------------------
863 
printPosSets(RBBINode n)864        void printPosSets(RBBINode n) {
865            if (n==null) {
866                return;
867            }
868            RBBINode.printNode(n);
869            System.out.print("         Nullable:  " + n.fNullable);
870 
871            System.out.print("         firstpos:  ");
872            printSet(n.fFirstPosSet);
873 
874            System.out.print("         lastpos:   ");
875            printSet(n.fLastPosSet);
876 
877            System.out.print("         followpos: ");
878            printSet(n.fFollowPos);
879 
880            printPosSets(n.fLeftChild);
881            printPosSets(n.fRightChild);
882        }
883 
884 
885 
886        /**
887         *  Find duplicate (redundant) character classes. Begin looking with categories.first.
888         *  Duplicates, if found are returned in the categories parameter.
889         *  This is an iterator-like function, used to identify character classes
890         *  (state table columns) that can be eliminated.
891         *  @param categories in/out parameter, specifies where to start looking for duplicates,
892         *                and returns the first pair of duplicates found, if any.
893         *  @return true if duplicate char classes were found, false otherwise.
894         *  @hide draft / provisional / internal are hidden on OHOS
895         */
findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories)896        boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) {
897            int numStates = fDStates.size();
898            int numCols = fRB.fSetBuilder.getNumCharCategories();
899 
900            int table_base = 0;
901            int table_dupl = 0;
902            for (; categories.first < numCols-1; ++categories.first) {
903                for (categories.second=categories.first+1; categories.second < numCols; ++categories.second) {
904                    for (int state=0; state<numStates; state++) {
905                        RBBIStateDescriptor sd = fDStates.get(state);
906                        table_base = sd.fDtran[categories.first];
907                        table_dupl = sd.fDtran[categories.second];
908                        if (table_base != table_dupl) {
909                            break;
910                        }
911                    }
912                    if (table_base == table_dupl) {
913                        return true;
914                    }
915                }
916            }
917            return false;
918        }
919 
920        /**
921         * Remove a column from the state table. Used when two character categories
922         * have been found equivalent, and merged together, to eliminate the unneeded table column.
923         */
removeColumn(int column)924        void removeColumn(int column) {
925            int numStates = fDStates.size();
926            for (int state=0; state<numStates; state++) {
927                RBBIStateDescriptor sd = fDStates.get(state);
928                assert(column < sd.fDtran.length);
929                int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1);
930                System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column);
931                sd.fDtran = newArray;
932            }
933        }
934 
935 
936        /**
937         *  Find duplicate (redundant) states, beginning at the specified pair,
938         *  within this state table. This is an iterator-like function, used to
939         *  identify states (state table rows) that can be eliminated.
940         *  @param states in/out parameter, specifies where to start looking for duplicates,
941         *                and returns the first pair of duplicates found, if any.
942         *  @return true if duplicate states were found, false otherwise.
943         *  @hide draft / provisional / internal are hidden on OHOS
944         */
945        boolean findDuplicateState(RBBIRuleBuilder.IntPair states) {
946            int numStates = fDStates.size();
947            int numCols = fRB.fSetBuilder.getNumCharCategories();
948 
949            for (; states.first<numStates-1; ++states.first) {
950                RBBIStateDescriptor firstSD = fDStates.get(states.first);
951                for (states.second=states.first+1; states.second<numStates; ++states.second) {
952                    RBBIStateDescriptor duplSD = fDStates.get(states.second);
953                    if (firstSD.fAccepting != duplSD.fAccepting ||
954                            firstSD.fLookAhead != duplSD.fLookAhead ||
955                            firstSD.fTagsIdx   != duplSD.fTagsIdx) {
956                        continue;
957                    }
958                    boolean rowsMatch = true;
959                    for (int col=0; col < numCols; ++col) {
960                        int firstVal = firstSD.fDtran[col];
961                        int duplVal = duplSD.fDtran[col];
962                        if (!((firstVal == duplVal) ||
963                                ((firstVal == states.first || firstVal == states.second) &&
964                                        (duplVal  == states.first || duplVal  == states.second)))) {
965                            rowsMatch = false;
966                            break;
967                        }
968                    }
969                    if (rowsMatch) {
970                        return true;
971                    }
972                }
973            }
974            return false;
975        }
976 
977        /**
978         *  Find the next duplicate state in the safe reverse table. An iterator function.
979         *  @param states in/out parameter, specifies where to start looking for duplicates,
980         *                and returns the first pair of duplicates found, if any.
981         *  @return true if duplicate states were found, false otherwise.
982         *  @hide draft / provisional / internal are hidden on OHOS
983         */
984        boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) {
985            int numStates = fSafeTable.size();
986 
987            for (; states.first<numStates-1; ++states.first) {
988                short[] firstRow = fSafeTable.get(states.first);
989                for (states.second=states.first+1; states.second<numStates; ++states.second) {
990                    short[] duplRow = fSafeTable.get(states.second);
991                    boolean rowsMatch = true;
992                    int numCols = firstRow.length;
993                    for (int col=0; col < numCols; ++col) {
994                        int firstVal = firstRow[col];
995                        int duplVal = duplRow[col];
996                        if (!((firstVal == duplVal) ||
997                                ((firstVal == states.first || firstVal == states.second) &&
998                                        (duplVal  == states.first || duplVal  == states.second)))) {
999                            rowsMatch = false;
1000                            break;
1001                        }
1002                    }
1003                    if (rowsMatch) {
1004                        return true;
1005                    }
1006                }
1007            }
1008            return false;
1009        }
1010 
1011        /**
1012         * Remove a duplicate state (row) from the state table. All references to the deleted (second) state
1013         * are redirected to first state.
1014         * @param duplStates The duplicate pair of states.
1015         * @hide draft / provisional / internal are hidden on OHOS
1016         */
1017        void removeState(IntPair duplStates) {
1018            final int keepState = duplStates.first;
1019            final int duplState = duplStates.second;
1020            assert(keepState < duplState);
1021            assert(duplState < fDStates.size());
1022 
1023            fDStates.remove(duplState);
1024 
1025            int numStates = fDStates.size();
1026            int numCols = fRB.fSetBuilder.getNumCharCategories();
1027            for (int state=0; state<numStates; ++state) {
1028                RBBIStateDescriptor sd = fDStates.get(state);
1029                for (int col=0; col<numCols; col++) {
1030                    int existingVal = sd.fDtran[col];
1031                    int newVal = existingVal;
1032                    if (existingVal == duplState) {
1033                        newVal = keepState;
1034                    } else if (existingVal > duplState) {
1035                        newVal = existingVal - 1;
1036                    }
1037                    sd.fDtran[col] = newVal;
1038                }
1039            }
1040        }
1041 
1042        /**
1043         * Remove a duplicate state from the safe table.
1044         * @param duplStates The duplicate pair of states.  The first is kept, the second is removed.
1045         *                   All references to the second in the state table are retargeted
1046         *                   to the first.
1047         * @hide draft / provisional / internal are hidden on OHOS
1048         */
1049        void removeSafeState(IntPair duplStates) {
1050            final int keepState = duplStates.first;
1051            final int duplState = duplStates.second;
1052            assert(keepState < duplState);
1053            assert(duplState < fSafeTable.size());
1054 
1055            fSafeTable.remove(duplState);
1056            int numStates = fSafeTable.size();
1057            for (int state=0; state<numStates; ++state) {
1058                short[] row = fSafeTable.get(state);
1059                for (int col=0; col<row.length; col++) {
1060                    int existingVal = row[col];
1061                    int newVal = existingVal;
1062                    if (existingVal == duplState) {
1063                        newVal = keepState;
1064                    } else if (existingVal > duplState) {
1065                        newVal = existingVal - 1;
1066                    }
1067                    row[col] = (short)newVal;
1068                }
1069            }
1070        }
1071 
1072 
1073        /**
1074         *  Check for, and remove duplicate states (table rows).
1075         *  @return the number of states removed.
1076         *  @hide draft / provisional / internal are hidden on OHOS
1077         */
1078        int removeDuplicateStates() {
1079            IntPair dupls = new IntPair(3, 0);
1080            int numStatesRemoved = 0;
1081 
1082            while (findDuplicateState(dupls)) {
1083                // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
1084                removeState(dupls);
1085                ++numStatesRemoved;
1086            }
1087            return numStatesRemoved;
1088        }
1089 
1090 
1091        /**
1092         *  Calculate the size in bytes of the serialized form of this state transition table,
1093         *  which is identical to the ICU4C runtime form.
1094         *  Refer to common/rbbidata.h from ICU4C for the declarations of the structures
1095         *  being matched by this calculation.
1096         */
1097        int  getTableSize()  {
1098            if (fRB.fTreeRoots[fRootIx] == null) {
1099                return 0;
1100            }
1101            int size    = 16;    // The header of 4 ints, with no rows to the table.
1102            int numRows = fDStates.size();
1103            int numCols = fRB.fSetBuilder.getNumCharCategories();
1104            int rowSize = 8 + 2*numCols;
1105            size   += numRows * rowSize;
1106            size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
1107            return size;
1108        }
1109 
1110 
1111 
1112        /**
1113         * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table.
1114         * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
1115         * in common/rbbidata.h
1116         */
1117        RBBIDataWrapper.RBBIStateTable exportTable() {
1118            int                state;
1119            int                col;
1120 
1121            RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
1122            if (fRB.fTreeRoots[fRootIx] == null) {
1123                return table;
1124            }
1125 
1126            Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
1127                fDStates.size() < 0x7fff);
1128            table.fNumStates = fDStates.size();
1129 
1130            // Size of table size in shorts.
1131            //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
1132            int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();   // Row Length in shorts.
1133            int tableSize = (getTableSize() - 16) / 2;       // fTable length in shorts.
1134            table.fTable = new short[tableSize];
1135            table.fRowLen = rowLen * 2;                      // Row length in bytes.
1136 
1137            if (fRB.fLookAheadHardBreak) {
1138                table.fFlags  |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
1139            }
1140            if (fRB.fSetBuilder.sawBOF()) {
1141                table.fFlags  |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
1142            }
1143 
1144            int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
1145            for (state=0; state<table.fNumStates; state++) {
1146                RBBIStateDescriptor sd = fDStates.get(state);
1147                int row = state*rowLen;
1148                Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
1149                Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
1150                table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
1151                table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
1152                table.fTable[row + RBBIDataWrapper.TAGIDX]    = (short)sd.fTagsIdx;
1153                for (col=0; col<numCharCategories; col++) {
1154                    table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
1155                }
1156            }
1157            return table;
1158        }
1159 
1160        /**
1161         *   Synthesize a safe state table from the main state table.
1162         */
1163        void buildSafeReverseTable() {
1164            // Safe Reverse table construction is described in more detail in the corresponding
1165            // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because
1166            // it is too likely to get out of sync.
1167 
1168            // Each safe pair is stored as two chars in the safePair stringBuilder.
1169            StringBuilder safePairs = new StringBuilder();
1170 
1171            int numCharClasses = fRB.fSetBuilder.getNumCharCategories();
1172            int numStates = fDStates.size();
1173 
1174            for (int c1=0; c1<numCharClasses; ++c1) {
1175                for (int c2=0; c2 < numCharClasses; ++c2) {
1176                    int wantedEndState = -1;
1177                    int endState = 0;
1178                    for (int startState = 1; startState < numStates; ++startState) {
1179                        RBBIStateDescriptor startStateD = fDStates.get(startState);
1180                        int s2 = startStateD.fDtran[c1];
1181                        RBBIStateDescriptor s2StateD = fDStates.get(s2);
1182                        endState = s2StateD.fDtran[c2];
1183                        if (wantedEndState < 0) {
1184                            wantedEndState = endState;
1185                        } else {
1186                            if (wantedEndState != endState) {
1187                                break;
1188                            }
1189                        }
1190                    }
1191                    if (wantedEndState == endState) {
1192                        safePairs.append((char)c1);
1193                        safePairs.append((char)c2);
1194                        // System.out.printf("(%d, %d) ", c1, c2);
1195                    }
1196                }
1197                // System.out.printf("\n");
1198            }
1199 
1200            // Populate the initial safe table.
1201            // The table as a whole is a List<short[]>
1202            // Row 0 is the stop state.
1203            // Row 1 is the start sate.
1204            // Row 2 and beyond are other states, initially one per char class, but
1205            //   after initial construction, many of the states will be combined, compacting the table.)
1206            // The String holds the nextState data only. The four leading fields of a row, fAccepting,
1207            // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
1208 
1209            assert(fSafeTable == null);
1210            fSafeTable = new ArrayList<>();
1211            for (int row=0; row<numCharClasses + 2; ++row) {
1212                fSafeTable.add(new short[numCharClasses]);
1213            }
1214 
1215            // From the start state, each input char class transitions to the state for that input.
1216            short[] startState = fSafeTable.get(1);
1217            for (int charClass=0; charClass < numCharClasses; ++charClass) {
1218                // Note: +2 to skip the start & stop state rows.
1219                startState[charClass] = (short)(charClass+2);
1220            }
1221 
1222            // Initially make every other state table row look like the start state row
1223            //    (except for the stop state, which remains all 0)
1224            for (int row=2; row<numCharClasses+2; ++row) {
1225                System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length);
1226            }
1227 
1228            // Run through the safe pairs, set the next state to zero when pair has been seen.
1229            // Zero being the stop state, meaning we found a safe point.
1230            for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
1231                int c1 = safePairs.charAt(pairIdx);
1232                int c2 = safePairs.charAt(pairIdx + 1);
1233 
1234                short[] rowState = fSafeTable.get(c2 + 2);
1235                rowState[c1] = 0;
1236            }
1237 
1238            // Remove duplicate or redundant rows from the table.
1239            RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0);
1240            while (findDuplicateSafeState(states)) {
1241                // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
1242                removeSafeState(states);
1243            }
1244        }
1245 
1246 
1247        /**
1248         *  Calculate the size of the runtime form of this safe state table.
1249         */
1250        int getSafeTableSize() {
1251            if (fSafeTable == null) {
1252                return 0;
1253            }
1254            int size    = 16;    // The header of 4 ints, with no rows to the table.
1255            int numRows = fSafeTable.size();
1256            int numCols = fSafeTable.get(0).length;
1257            int rowSize = 8 + 2*numCols;
1258            size += numRows * rowSize;
1259            // TODO: there are redundant round-up. Figure out best place, get rid of the rest.
1260            size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
1261            return size;
1262        }
1263 
1264 
1265        /**
1266         *  Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table.
1267         *  RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
1268         *  in common/rbbidata.h
1269         */
1270        RBBIDataWrapper.RBBIStateTable exportSafeTable() {
1271            RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
1272            table.fNumStates = fSafeTable.size();
1273            int numCharCategories = fSafeTable.get(0).length;
1274 
1275            // Size of table size in shorts.
1276            //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
1277            int rowLen = 4 + numCharCategories;
1278            // TODO: tableSize is basically numStates * numCharCategories,
1279            //       except for alignment padding. Clean up here, and in main exportTable().
1280            int tableSize = (getSafeTableSize() - 16) / 2;   // fTable length in shorts.
1281            table.fTable = new short[tableSize];
1282            table.fRowLen = rowLen * 2;                      // Row length in bytes.
1283 
1284            for (int state=0; state<table.fNumStates; state++) {
1285                short[] rowArray = fSafeTable.get(state);
1286                int row = state * rowLen;
1287 
1288                for (int col=0; col<numCharCategories; col++) {
1289                    table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col];
1290                }
1291            }
1292            return table;
1293        }
1294 
1295 
1296        //-----------------------------------------------------------------------------
1297        //
1298        //   printSet    Debug function.   Print the contents of a set of Nodes
1299        //
1300        //-----------------------------------------------------------------------------
1301 
1302        void printSet(Collection<RBBINode> s) {
1303            for (RBBINode n : s) {
1304                RBBINode.printInt(n.fSerialNum, 8);
1305            }
1306            System.out.println();
1307        }
1308 
1309 
1310 
1311        //-----------------------------------------------------------------------------
1312        //
1313        //   printStates    Debug Function.  Dump the fully constructed state transition table.
1314        //
1315        //-----------------------------------------------------------------------------
1316 
1317        void printStates() {
1318            int     c;    // input "character"
1319            int     n;    // state number
1320 
1321            System.out.print("state |           i n p u t     s y m b o l s \n");
1322            System.out.print("      | Acc  LA    Tag");
1323            for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1324                RBBINode.printInt(c, 3);
1325            }
1326            System.out.print("\n");
1327            System.out.print("      |---------------");
1328            for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1329                System.out.print("---");
1330            }
1331            System.out.print("\n");
1332 
1333            for (n=0; n<fDStates.size(); n++) {
1334                RBBIStateDescriptor sd = fDStates.get(n);
1335                RBBINode.printInt(n, 5);
1336                System.out.print(" | ");
1337 
1338                RBBINode.printInt(sd.fAccepting, 3);
1339                RBBINode.printInt(sd.fLookAhead, 4);
1340                RBBINode.printInt(sd.fTagsIdx, 6);
1341                System.out.print(" ");
1342                for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1343                    RBBINode.printInt(sd.fDtran[c], 3);
1344                }
1345                System.out.print("\n");
1346            }
1347            System.out.print("\n\n");
1348        }
1349 
1350 
1351        /**
1352         * Debug Function.  Dump the fully constructed safe reverse table.
1353         */
1354        void printReverseTable() {
1355            int     c;    // input "character"
1356 
1357            System.out.printf("    Safe Reverse Table \n");
1358            if (fSafeTable == null) {
1359                System.out.printf("   --- nullptr ---\n");
1360                return;
1361            }
1362            int numCharCategories = fSafeTable.get(0).length;
1363            System.out.printf("state |           i n p u t     s y m b o l s \n");
1364            System.out.printf("      | Acc  LA    Tag");
1365            for (c=0; c< numCharCategories;  c++) {
1366                System.out.printf(" %2d", c);
1367            }
1368            System.out.printf("\n");
1369            System.out.printf("      |---------------");
1370            for (c=0; c<numCharCategories; c++) {
1371                System.out.printf("---");
1372            }
1373            System.out.printf("\n");
1374 
1375            for (int n=0; n<fSafeTable.size(); n++) {
1376                short rowArray[]  = fSafeTable.get(n);
1377                System.out.printf("  %3d | " , n);
1378                System.out.printf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
1379                for (c=0; c<numCharCategories; c++) {
1380                    System.out.printf(" %2d", rowArray[c]);
1381                }
1382                System.out.printf("\n");
1383            }
1384            System.out.printf("\n\n");
1385        }
1386 
1387 
1388 
1389 
1390 
1391        //-----------------------------------------------------------------------------
1392        //
1393        //   printRuleStatusTable    Debug Function.  Dump the common rule status table
1394        //
1395        //-----------------------------------------------------------------------------
1396 
1397        void printRuleStatusTable() {
1398            int  thisRecord = 0;
1399            int  nextRecord = 0;
1400            int      i;
1401            List<Integer> tbl = fRB.fRuleStatusVals;
1402 
1403            System.out.print("index |  tags \n");
1404            System.out.print("-------------------\n");
1405 
1406            while (nextRecord < tbl.size()) {
1407                thisRecord = nextRecord;
1408                nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1;
1409                RBBINode.printInt(thisRecord, 7);
1410                for (i=thisRecord+1; i<nextRecord; i++) {
1411                    int val = tbl.get(i).intValue();
1412                    RBBINode.printInt(val, 7);
1413                }
1414                System.out.print("\n");
1415            }
1416            System.out.print("\n\n");
1417        }
1418 
1419 
1420 
1421 }
1422