1 /*
2 ***************************************************************************
3 * Copyright (C) 2002-2008 International Business Machines Corporation *
4 * and others. All rights reserved. *
5 ***************************************************************************
6 */
7
8 //
9 // File: rbbinode.cpp
10 //
11 // Implementation of class RBBINode, which represents a node in the
12 // tree generated when parsing the Rules Based Break Iterator rules.
13 //
14 // This "Class" is actually closer to a struct.
15 // Code using it is expected to directly access fields much of the time.
16 //
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_BREAK_ITERATION
21
22 #include "unicode/unistr.h"
23 #include "unicode/uniset.h"
24 #include "unicode/uchar.h"
25 #include "unicode/parsepos.h"
26 #include "uvector.h"
27
28 #include "rbbirb.h"
29 #include "rbbinode.h"
30
31 #include "uassert.h"
32
33
34 U_NAMESPACE_BEGIN
35
36 #ifdef RBBI_DEBUG
37 static int gLastSerial = 0;
38 #endif
39
40
41 //-------------------------------------------------------------------------
42 //
43 // Constructor. Just set the fields to reasonable default values.
44 //
45 //-------------------------------------------------------------------------
RBBINode(NodeType t)46 RBBINode::RBBINode(NodeType t) : UMemory() {
47 #ifdef RBBI_DEBUG
48 fSerialNum = ++gLastSerial;
49 #endif
50 fType = t;
51 fParent = NULL;
52 fLeftChild = NULL;
53 fRightChild = NULL;
54 fInputSet = NULL;
55 fFirstPos = 0;
56 fLastPos = 0;
57 fNullable = FALSE;
58 fLookAheadEnd = FALSE;
59 fVal = 0;
60 fPrecedence = precZero;
61
62 UErrorCode status = U_ZERO_ERROR;
63 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
64 fLastPosSet = new UVector(status);
65 fFollowPos = new UVector(status);
66 if (t==opCat) {fPrecedence = precOpCat;}
67 else if (t==opOr) {fPrecedence = precOpOr;}
68 else if (t==opStart) {fPrecedence = precStart;}
69 else if (t==opLParen) {fPrecedence = precLParen;}
70
71 }
72
73
RBBINode(const RBBINode & other)74 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
75 #ifdef RBBI_DEBUG
76 fSerialNum = ++gLastSerial;
77 #endif
78 fType = other.fType;
79 fParent = NULL;
80 fLeftChild = NULL;
81 fRightChild = NULL;
82 fInputSet = other.fInputSet;
83 fPrecedence = other.fPrecedence;
84 fText = other.fText;
85 fFirstPos = other.fFirstPos;
86 fLastPos = other.fLastPos;
87 fNullable = other.fNullable;
88 fVal = other.fVal;
89 UErrorCode status = U_ZERO_ERROR;
90 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
91 fLastPosSet = new UVector(status);
92 fFollowPos = new UVector(status);
93 }
94
95
96 //-------------------------------------------------------------------------
97 //
98 // Destructor. Deletes both this node AND any child nodes,
99 // except in the case of variable reference nodes. For
100 // these, the l. child points back to the definition, which
101 // is common for all references to the variable, meaning
102 // it can't be deleted here.
103 //
104 //-------------------------------------------------------------------------
~RBBINode()105 RBBINode::~RBBINode() {
106 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
107 delete fInputSet;
108 fInputSet = NULL;
109
110 switch (this->fType) {
111 case varRef:
112 case setRef:
113 // for these node types, multiple instances point to the same "children"
114 // Storage ownership of children handled elsewhere. Don't delete here.
115 break;
116
117 default:
118 delete fLeftChild;
119 fLeftChild = NULL;
120 delete fRightChild;
121 fRightChild = NULL;
122 }
123
124
125 delete fFirstPosSet;
126 delete fLastPosSet;
127 delete fFollowPos;
128
129 }
130
131
132 //-------------------------------------------------------------------------
133 //
134 // cloneTree Make a copy of the subtree rooted at this node.
135 // Discard any variable references encountered along the way,
136 // and replace with copies of the variable's definitions.
137 // Used to replicate the expression underneath variable
138 // references in preparation for generating the DFA tables.
139 //
140 //-------------------------------------------------------------------------
cloneTree()141 RBBINode *RBBINode::cloneTree() {
142 RBBINode *n;
143
144 if (fType == RBBINode::varRef) {
145 // If the current node is a variable reference, skip over it
146 // and clone the definition of the variable instead.
147 n = fLeftChild->cloneTree();
148 } else if (fType == RBBINode::uset) {
149 n = this;
150 } else {
151 n = new RBBINode(*this);
152 // Check for null pointer.
153 if (n != NULL) {
154 if (fLeftChild != NULL) {
155 n->fLeftChild = fLeftChild->cloneTree();
156 n->fLeftChild->fParent = n;
157 }
158 if (fRightChild != NULL) {
159 n->fRightChild = fRightChild->cloneTree();
160 n->fRightChild->fParent = n;
161 }
162 }
163 }
164 return n;
165 }
166
167
168
169 //-------------------------------------------------------------------------
170 //
171 // flattenVariables Walk a parse tree, replacing any variable
172 // references with a copy of the variable's definition.
173 // Aside from variables, the tree is not changed.
174 //
175 // Return the root of the tree. If the root was not a variable
176 // reference, it remains unchanged - the root we started with
177 // is the root we return. If, however, the root was a variable
178 // reference, the root of the newly cloned replacement tree will
179 // be returned, and the original tree deleted.
180 //
181 // This function works by recursively walking the tree
182 // without doing anything until a variable reference is
183 // found, then calling cloneTree() at that point. Any
184 // nested references are handled by cloneTree(), not here.
185 //
186 //-------------------------------------------------------------------------
flattenVariables()187 RBBINode *RBBINode::flattenVariables() {
188 if (fType == varRef) {
189 RBBINode *retNode = fLeftChild->cloneTree();
190 delete this;
191 return retNode;
192 }
193
194 if (fLeftChild != NULL) {
195 fLeftChild = fLeftChild->flattenVariables();
196 fLeftChild->fParent = this;
197 }
198 if (fRightChild != NULL) {
199 fRightChild = fRightChild->flattenVariables();
200 fRightChild->fParent = this;
201 }
202 return this;
203 }
204
205
206 //-------------------------------------------------------------------------
207 //
208 // flattenSets Walk the parse tree, replacing any nodes of type setRef
209 // with a copy of the expression tree for the set. A set's
210 // equivalent expression tree is precomputed and saved as
211 // the left child of the uset node.
212 //
213 //-------------------------------------------------------------------------
flattenSets()214 void RBBINode::flattenSets() {
215 U_ASSERT(fType != setRef);
216
217 if (fLeftChild != NULL) {
218 if (fLeftChild->fType==setRef) {
219 RBBINode *setRefNode = fLeftChild;
220 RBBINode *usetNode = setRefNode->fLeftChild;
221 RBBINode *replTree = usetNode->fLeftChild;
222 fLeftChild = replTree->cloneTree();
223 fLeftChild->fParent = this;
224 delete setRefNode;
225 } else {
226 fLeftChild->flattenSets();
227 }
228 }
229
230 if (fRightChild != NULL) {
231 if (fRightChild->fType==setRef) {
232 RBBINode *setRefNode = fRightChild;
233 RBBINode *usetNode = setRefNode->fLeftChild;
234 RBBINode *replTree = usetNode->fLeftChild;
235 fRightChild = replTree->cloneTree();
236 fRightChild->fParent = this;
237 delete setRefNode;
238 } else {
239 fRightChild->flattenSets();
240 }
241 }
242 }
243
244
245
246 //-------------------------------------------------------------------------
247 //
248 // findNodes() Locate all the nodes of the specified type, starting
249 // at the specified root.
250 //
251 //-------------------------------------------------------------------------
findNodes(UVector * dest,RBBINode::NodeType kind,UErrorCode & status)252 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
253 /* test for buffer overflows */
254 if (U_FAILURE(status)) {
255 return;
256 }
257 if (fType == kind) {
258 dest->addElement(this, status);
259 }
260 if (fLeftChild != NULL) {
261 fLeftChild->findNodes(dest, kind, status);
262 }
263 if (fRightChild != NULL) {
264 fRightChild->findNodes(dest, kind, status);
265 }
266 }
267
268
269 //-------------------------------------------------------------------------
270 //
271 // print. Print out a single node, for debugging.
272 //
273 //-------------------------------------------------------------------------
274 #ifdef RBBI_DEBUG
printNode()275 void RBBINode::printNode() {
276 static const char * const nodeTypeNames[] = {
277 "setRef",
278 "uset",
279 "varRef",
280 "leafChar",
281 "lookAhead",
282 "tag",
283 "endMark",
284 "opStart",
285 "opCat",
286 "opOr",
287 "opStar",
288 "opPlus",
289 "opQuestion",
290 "opBreak",
291 "opReverse",
292 "opLParen"
293 };
294
295 if (this==NULL) {
296 RBBIDebugPrintf("%10p", (void *)this);
297 } else {
298 RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ",
299 (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild,
300 fSerialNum, fFirstPos, fVal);
301 if (fType == varRef) {
302 RBBI_DEBUG_printUnicodeString(fText);
303 }
304 }
305 RBBIDebugPrintf("\n");
306 }
307 #endif
308
309
310 #ifdef RBBI_DEBUG
RBBI_DEBUG_printUnicodeString(const UnicodeString & s,int minWidth)311 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
312 {
313 int i;
314 for (i=0; i<s.length(); i++) {
315 RBBIDebugPrintf("%c", s.charAt(i));
316 // putc(s.charAt(i), stdout);
317 }
318 for (i=s.length(); i<minWidth; i++) {
319 RBBIDebugPrintf(" ");
320 }
321 }
322 #endif
323
324
325 //-------------------------------------------------------------------------
326 //
327 // print. Print out the tree of nodes rooted at "this"
328 //
329 //-------------------------------------------------------------------------
330 #ifdef RBBI_DEBUG
printTree(UBool printHeading)331 void RBBINode::printTree(UBool printHeading) {
332 if (printHeading) {
333 RBBIDebugPrintf( "-------------------------------------------------------------------\n"
334 " Address type Parent LeftChild RightChild serial position value\n"
335 );
336 }
337 this->printNode();
338 if (this != NULL) {
339 // Only dump the definition under a variable reference if asked to.
340 // Unconditinally dump children of all other node types.
341 if (fType != varRef) {
342 if (fLeftChild != NULL) {
343 fLeftChild->printTree(FALSE);
344 }
345
346 if (fRightChild != NULL) {
347 fRightChild->printTree(FALSE);
348 }
349 }
350 }
351 }
352 #endif
353
354
355
356 U_NAMESPACE_END
357
358 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
359