1 //
2 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
3 //
4 /*
5 ***************************************************************************
6 * Copyright (C) 2002-2006 International Business Machines Corporation *
7 * and others. All rights reserved. *
8 ***************************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_BREAK_ITERATION
14
15 #include "unicode/unistr.h"
16 #include "unicode/uniset.h"
17 #include "unicode/uchar.h"
18 #include "unicode/parsepos.h"
19
20 #include "umutex.h"
21
22 #include "rbbirb.h"
23 #include "rbbinode.h"
24
25
26 //
27 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
28 // when the hash table is deleted.
29 //
30 U_CDECL_BEGIN
RBBISymbolTableEntry_deleter(void * p)31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
32 U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *px = (U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *)p;
33 delete px;
34 }
35 U_CDECL_END
36
37
38
39 U_NAMESPACE_BEGIN
40
RBBISymbolTable(RBBIRuleScanner * rs,const UnicodeString & rules,UErrorCode & status)41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
42 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
43 {
44 fHashTable = NULL;
45 fCachedSetLookup = NULL;
46
47 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
48 // uhash_open checks status
49 if (U_FAILURE(status)) {
50 return;
51 }
52 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
53 }
54
55
56
~RBBISymbolTable()57 RBBISymbolTable::~RBBISymbolTable()
58 {
59 uhash_close(fHashTable);
60 }
61
62
63 //
64 // RBBISymbolTable::lookup This function from the abstract symbol table inteface
65 // looks up a variable name and returns a UnicodeString
66 // containing the substitution text.
67 //
68 // The variable name does NOT include the leading $.
69 //
lookup(const UnicodeString & s) const70 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
71 {
72 RBBISymbolTableEntry *el;
73 RBBINode *varRefNode;
74 RBBINode *exprNode;
75 RBBINode *usetNode;
76 const UnicodeString *retString;
77 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
78
79 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
80 if (el == NULL) {
81 return NULL;
82 }
83
84 varRefNode = el->val;
85 exprNode = varRefNode->fLeftChild; // Root node of expression for variable
86 if (exprNode->fType == RBBINode::setRef) {
87 // The $variable refers to a single UnicodeSet
88 // return the ffffString, which will subsequently be interpreted as a
89 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
90 usetNode = exprNode->fLeftChild;
91 This->fCachedSetLookup = usetNode->fInputSet;
92 retString = &ffffString;
93 }
94 else
95 {
96 // The variable refers to something other than just a set.
97 // return the original source string for the expression
98 retString = &exprNode->fText;
99 This->fCachedSetLookup = NULL;
100 }
101 return retString;
102 }
103
104
105
106 //
107 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
108 // interface maps a single stand-in character to a
109 // pointer to a Unicode Set. The Unicode Set code uses this
110 // mechanism to get all references to the same $variable
111 // name to refer to a single common Unicode Set instance.
112 //
113 // This implementation cheats a little, and does not maintain a map of stand-in chars
114 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
115 // constructor will always call this function right after calling lookup(),
116 // and we just need to remember what set to return between these two calls.
lookupMatcher(UChar32 ch) const117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
118 {
119 UnicodeSet *retVal = NULL;
120 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
121 if (ch == 0xffff) {
122 retVal = fCachedSetLookup;
123 This->fCachedSetLookup = 0;
124 }
125 return retVal;
126 }
127
128 //
129 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
130 // looks for a $variable name in the source text.
131 // It does not look it up, only scans for it.
132 // It is used by the UnicodeSet parser.
133 //
134 // This implementation is lifted pretty much verbatim
135 // from the rules based transliterator implementation.
136 // I didn't see an obvious way of sharing it.
137 //
parseReference(const UnicodeString & text,ParsePosition & pos,int32_t limit) const138 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
139 ParsePosition& pos, int32_t limit) const
140 {
141 int32_t start = pos.getIndex();
142 int32_t i = start;
143 UnicodeString result;
144 while (i < limit) {
145 UChar c = text.charAt(i);
146 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
147 break;
148 }
149 ++i;
150 }
151 if (i == start) { // No valid name chars
152 return result; // Indicate failure with empty string
153 }
154 pos.setIndex(i);
155 text.extractBetween(start, i, result);
156 return result;
157 }
158
159
160
161 //
162 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
163 // corresponding RBBI Node. If there is no entry
164 // in the table for this name, return NULL.
165 //
lookupNode(const UnicodeString & key) const166 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
167
168 RBBINode *retNode = NULL;
169 RBBISymbolTableEntry *el;
170
171 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
172 if (el != NULL) {
173 retNode = el->val;
174 }
175 return retNode;
176 }
177
178
179 //
180 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
181 // Indicate an error if the name already exists -
182 // this will only occur in the case of duplicate
183 // variable assignments.
184 //
addEntry(const UnicodeString & key,RBBINode * val,UErrorCode & err)185 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
186 RBBISymbolTableEntry *e;
187 /* test for buffer overflows */
188 if (U_FAILURE(err)) {
189 return;
190 }
191 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
192 if (e != NULL) {
193 err = U_BRK_VARIABLE_REDFINITION;
194 return;
195 }
196
197 e = new RBBISymbolTableEntry;
198 if (e == NULL) {
199 err = U_MEMORY_ALLOCATION_ERROR;
200 return;
201 }
202 e->key = key;
203 e->val = val;
204 uhash_put( fHashTable, &e->key, e, &err);
205 }
206
207
RBBISymbolTableEntry()208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
209
~RBBISymbolTableEntry()210 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
211 // The "val" of a symbol table entry is a variable reference node.
212 // The l. child of the val is the rhs expression from the assignment.
213 // Unlike other node types, children of variable reference nodes are not
214 // automatically recursively deleted. We do it manually here.
215 delete val->fLeftChild;
216 val->fLeftChild = NULL;
217
218 delete val;
219
220 // Note: the key UnicodeString is destructed by virtue of being in the object by value.
221 }
222
223
224 //
225 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
226 //
227 #ifdef RBBI_DEBUG
rbbiSymtablePrint() const228 void RBBISymbolTable::rbbiSymtablePrint() const {
229 RBBIDebugPrintf("Variable Definitions\n"
230 "Name Node Val String Val\n"
231 "----------------------------------------------------------------------\n");
232
233 int32_t pos = -1;
234 const UHashElement *e = NULL;
235 for (;;) {
236 e = uhash_nextElement(fHashTable, &pos);
237 if (e == NULL ) {
238 break;
239 }
240 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
241
242 RBBI_DEBUG_printUnicodeString(s->key, 15);
243 RBBIDebugPrintf(" %8p ", (void *)s->val);
244 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
245 RBBIDebugPrintf("\n");
246 }
247
248 RBBIDebugPrintf("\nParsed Variable Definitions\n");
249 pos = -1;
250 for (;;) {
251 e = uhash_nextElement(fHashTable, &pos);
252 if (e == NULL ) {
253 break;
254 }
255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
256 RBBI_DEBUG_printUnicodeString(s->key);
257 s->val->fLeftChild->printTree(TRUE);
258 RBBIDebugPrintf("\n");
259 }
260 }
261 #endif
262
263
264
265
266
267 U_NAMESPACE_END
268
269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
270