1 /*
2 ***************************************************************************
3 * Copyright (C) 2002-2006 International Business Machines Corporation *
4 * and others. All rights reserved. *
5 ***************************************************************************
6 */
7
8 //
9 // File: rbbinode.cpp
10 //
11 // Implementation of class RBBINode, which represents a node in the
12 // tree generated when parsing the Rules Based Break Iterator rules.
13 //
14 // This "Class" is actually closer to a struct.
15 // Code using it is expected to directly access fields much of the time.
16 //
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_BREAK_ITERATION
21
22 #include "unicode/unistr.h"
23 #include "unicode/uniset.h"
24 #include "unicode/uchar.h"
25 #include "unicode/parsepos.h"
26 #include "uvector.h"
27
28 #include "rbbirb.h"
29 #include "rbbinode.h"
30
31 #include "uassert.h"
32
33
34 U_NAMESPACE_BEGIN
35
36 #ifdef RBBI_DEBUG
37 static int gLastSerial = 0;
38 #endif
39
40
41 //-------------------------------------------------------------------------
42 //
43 // Constructor. Just set the fields to reasonable default values.
44 //
45 //-------------------------------------------------------------------------
RBBINode(NodeType t)46 RBBINode::RBBINode(NodeType t) : UMemory() {
47 #ifdef RBBI_DEBUG
48 fSerialNum = ++gLastSerial;
49 #endif
50 fType = t;
51 fParent = NULL;
52 fLeftChild = NULL;
53 fRightChild = NULL;
54 fInputSet = NULL;
55 fFirstPos = 0;
56 fLastPos = 0;
57 fNullable = FALSE;
58 fLookAheadEnd = FALSE;
59 fVal = 0;
60 fPrecedence = precZero;
61
62 UErrorCode status = U_ZERO_ERROR;
63 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
64 fLastPosSet = new UVector(status);
65 fFollowPos = new UVector(status);
66 if (t==opCat) {fPrecedence = precOpCat;}
67 else if (t==opOr) {fPrecedence = precOpOr;}
68 else if (t==opStart) {fPrecedence = precStart;}
69 else if (t==opLParen) {fPrecedence = precLParen;}
70
71 }
72
73
RBBINode(const RBBINode & other)74 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
75 #ifdef RBBI_DEBUG
76 fSerialNum = ++gLastSerial;
77 #endif
78 fType = other.fType;
79 fParent = NULL;
80 fLeftChild = NULL;
81 fRightChild = NULL;
82 fInputSet = other.fInputSet;
83 fPrecedence = other.fPrecedence;
84 fText = other.fText;
85 fFirstPos = other.fFirstPos;
86 fLastPos = other.fLastPos;
87 fNullable = other.fNullable;
88 fVal = other.fVal;
89 UErrorCode status = U_ZERO_ERROR;
90 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
91 fLastPosSet = new UVector(status);
92 fFollowPos = new UVector(status);
93 }
94
95
96 //-------------------------------------------------------------------------
97 //
98 // Destructor. Deletes both this node AND any child nodes,
99 // except in the case of variable reference nodes. For
100 // these, the l. child points back to the definition, which
101 // is common for all references to the variable, meaning
102 // it can't be deleted here.
103 //
104 //-------------------------------------------------------------------------
~RBBINode()105 RBBINode::~RBBINode() {
106 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
107 delete fInputSet;
108 fInputSet = NULL;
109
110 switch (this->fType) {
111 case varRef:
112 case setRef:
113 // for these node types, multiple instances point to the same "children"
114 // Storage ownership of children handled elsewhere. Don't delete here.
115 break;
116
117 default:
118 delete fLeftChild;
119 fLeftChild = NULL;
120 delete fRightChild;
121 fRightChild = NULL;
122 }
123
124
125 delete fFirstPosSet;
126 delete fLastPosSet;
127 delete fFollowPos;
128
129 }
130
131
132 //-------------------------------------------------------------------------
133 //
134 // cloneTree Make a copy of the subtree rooted at this node.
135 // Discard any variable references encountered along the way,
136 // and replace with copies of the variable's definitions.
137 // Used to replicate the expression underneath variable
138 // references in preparation for generating the DFA tables.
139 //
140 //-------------------------------------------------------------------------
cloneTree()141 RBBINode *RBBINode::cloneTree() {
142 RBBINode *n;
143
144 if (fType == RBBINode::varRef) {
145 // If the current node is a variable reference, skip over it
146 // and clone the definition of the variable instead.
147 n = fLeftChild->cloneTree();
148 } else if (fType == RBBINode::uset) {
149 n = this;
150 } else {
151 n = new RBBINode(*this);
152 if (fLeftChild != NULL) {
153 n->fLeftChild = fLeftChild->cloneTree();
154 n->fLeftChild->fParent = n;
155 }
156 if (fRightChild != NULL) {
157 n->fRightChild = fRightChild->cloneTree();
158 n->fRightChild->fParent = n;
159 }
160 }
161 return n;
162 }
163
164
165
166 //-------------------------------------------------------------------------
167 //
168 // flattenVariables Walk a parse tree, replacing any variable
169 // references with a copy of the variable's definition.
170 // Aside from variables, the tree is not changed.
171 //
172 // Return the root of the tree. If the root was not a variable
173 // reference, it remains unchanged - the root we started with
174 // is the root we return. If, however, the root was a variable
175 // reference, the root of the newly cloned replacement tree will
176 // be returned, and the original tree deleted.
177 //
178 // This function works by recursively walking the tree
179 // without doing anything until a variable reference is
180 // found, then calling cloneTree() at that point. Any
181 // nested references are handled by cloneTree(), not here.
182 //
183 //-------------------------------------------------------------------------
flattenVariables()184 RBBINode *RBBINode::flattenVariables() {
185 if (fType == varRef) {
186 RBBINode *retNode = fLeftChild->cloneTree();
187 delete this;
188 return retNode;
189 }
190
191 if (fLeftChild != NULL) {
192 fLeftChild = fLeftChild->flattenVariables();
193 fLeftChild->fParent = this;
194 }
195 if (fRightChild != NULL) {
196 fRightChild = fRightChild->flattenVariables();
197 fRightChild->fParent = this;
198 }
199 return this;
200 }
201
202
203 //-------------------------------------------------------------------------
204 //
205 // flattenSets Walk the parse tree, replacing any nodes of type setRef
206 // with a copy of the expression tree for the set. A set's
207 // equivalent expression tree is precomputed and saved as
208 // the left child of the uset node.
209 //
210 //-------------------------------------------------------------------------
flattenSets()211 void RBBINode::flattenSets() {
212 U_ASSERT(fType != setRef);
213
214 if (fLeftChild != NULL) {
215 if (fLeftChild->fType==setRef) {
216 RBBINode *setRefNode = fLeftChild;
217 RBBINode *usetNode = setRefNode->fLeftChild;
218 RBBINode *replTree = usetNode->fLeftChild;
219 fLeftChild = replTree->cloneTree();
220 fLeftChild->fParent = this;
221 delete setRefNode;
222 } else {
223 fLeftChild->flattenSets();
224 }
225 }
226
227 if (fRightChild != NULL) {
228 if (fRightChild->fType==setRef) {
229 RBBINode *setRefNode = fRightChild;
230 RBBINode *usetNode = setRefNode->fLeftChild;
231 RBBINode *replTree = usetNode->fLeftChild;
232 fRightChild = replTree->cloneTree();
233 fRightChild->fParent = this;
234 delete setRefNode;
235 } else {
236 fRightChild->flattenSets();
237 }
238 }
239 }
240
241
242
243 //-------------------------------------------------------------------------
244 //
245 // findNodes() Locate all the nodes of the specified type, starting
246 // at the specified root.
247 //
248 //-------------------------------------------------------------------------
findNodes(UVector * dest,RBBINode::NodeType kind,UErrorCode & status)249 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
250 /* test for buffer overflows */
251 if (U_FAILURE(status)) {
252 return;
253 }
254 if (fType == kind) {
255 dest->addElement(this, status);
256 }
257 if (fLeftChild != NULL) {
258 fLeftChild->findNodes(dest, kind, status);
259 }
260 if (fRightChild != NULL) {
261 fRightChild->findNodes(dest, kind, status);
262 }
263 }
264
265
266 //-------------------------------------------------------------------------
267 //
268 // print. Print out a single node, for debugging.
269 //
270 //-------------------------------------------------------------------------
271 #ifdef RBBI_DEBUG
printNode()272 void RBBINode::printNode() {
273 static const char * const nodeTypeNames[] = {
274 "setRef",
275 "uset",
276 "varRef",
277 "leafChar",
278 "lookAhead",
279 "tag",
280 "endMark",
281 "opStart",
282 "opCat",
283 "opOr",
284 "opStar",
285 "opPlus",
286 "opQuestion",
287 "opBreak",
288 "opReverse",
289 "opLParen"
290 };
291
292 if (this==NULL) {
293 RBBIDebugPrintf("%10p", (void *)this);
294 } else {
295 RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ",
296 (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild,
297 fSerialNum, fFirstPos, fVal);
298 if (fType == varRef) {
299 RBBI_DEBUG_printUnicodeString(fText);
300 }
301 }
302 RBBIDebugPrintf("\n");
303 }
304 #endif
305
306
307 #ifdef RBBI_DEBUG
RBBI_DEBUG_printUnicodeString(const UnicodeString & s,int minWidth)308 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
309 {
310 int i;
311 for (i=0; i<s.length(); i++) {
312 RBBIDebugPrintf("%c", s.charAt(i));
313 // putc(s.charAt(i), stdout);
314 }
315 for (i=s.length(); i<minWidth; i++) {
316 RBBIDebugPrintf(" ");
317 }
318 }
319 #endif
320
321
322 //-------------------------------------------------------------------------
323 //
324 // print. Print out the tree of nodes rooted at "this"
325 //
326 //-------------------------------------------------------------------------
327 #ifdef RBBI_DEBUG
printTree(UBool printHeading)328 void RBBINode::printTree(UBool printHeading) {
329 if (printHeading) {
330 RBBIDebugPrintf( "-------------------------------------------------------------------\n"
331 " Address type Parent LeftChild RightChild serial position value\n"
332 );
333 }
334 this->printNode();
335 if (this != NULL) {
336 // Only dump the definition under a variable reference if asked to.
337 // Unconditinally dump children of all other node types.
338 if (fType != varRef) {
339 if (fLeftChild != NULL) {
340 fLeftChild->printTree(FALSE);
341 }
342
343 if (fRightChild != NULL) {
344 fRightChild->printTree(FALSE);
345 }
346 }
347 }
348 }
349 #endif
350
351
352
353 U_NAMESPACE_END
354
355 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
356