1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
5 //
6 /*
7 ***************************************************************************
8 * Copyright (C) 2002-2014 International Business Machines Corporation
9 * and others. All rights reserved.
10 ***************************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_BREAK_ITERATION
16
17 #include "unicode/unistr.h"
18 #include "unicode/uniset.h"
19 #include "unicode/uchar.h"
20 #include "unicode/parsepos.h"
21
22 #include "cstr.h"
23 #include "rbbinode.h"
24 #include "rbbirb.h"
25 #include "umutex.h"
26
27
28 //
29 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
30 // when the hash table is deleted.
31 //
32 U_CDECL_BEGIN
RBBISymbolTableEntry_deleter(void * p)33 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
34 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
35 delete px;
36 }
37 U_CDECL_END
38
39
40
41 U_NAMESPACE_BEGIN
42
RBBISymbolTable(RBBIRuleScanner * rs,const UnicodeString & rules,UErrorCode & status)43 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
44 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
45 {
46 fHashTable = NULL;
47 fCachedSetLookup = NULL;
48
49 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
50 // uhash_open checks status
51 if (U_FAILURE(status)) {
52 return;
53 }
54 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
55 }
56
57
58
~RBBISymbolTable()59 RBBISymbolTable::~RBBISymbolTable()
60 {
61 uhash_close(fHashTable);
62 }
63
64
65 //
66 // RBBISymbolTable::lookup This function from the abstract symbol table interface
67 // looks up a variable name and returns a UnicodeString
68 // containing the substitution text.
69 //
70 // The variable name does NOT include the leading $.
71 //
lookup(const UnicodeString & s) const72 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
73 {
74 RBBISymbolTableEntry *el;
75 RBBINode *varRefNode;
76 RBBINode *exprNode;
77 RBBINode *usetNode;
78 const UnicodeString *retString;
79 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
80
81 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
82 if (el == NULL) {
83 return NULL;
84 }
85
86 varRefNode = el->val;
87 exprNode = varRefNode->fLeftChild; // Root node of expression for variable
88 if (exprNode->fType == RBBINode::setRef) {
89 // The $variable refers to a single UnicodeSet
90 // return the ffffString, which will subsequently be interpreted as a
91 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
92 usetNode = exprNode->fLeftChild;
93 This->fCachedSetLookup = usetNode->fInputSet;
94 retString = &ffffString;
95 }
96 else
97 {
98 // The variable refers to something other than just a set.
99 // return the original source string for the expression
100 retString = &exprNode->fText;
101 This->fCachedSetLookup = NULL;
102 }
103 return retString;
104 }
105
106
107
108 //
109 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
110 // interface maps a single stand-in character to a
111 // pointer to a Unicode Set. The Unicode Set code uses this
112 // mechanism to get all references to the same $variable
113 // name to refer to a single common Unicode Set instance.
114 //
115 // This implementation cheats a little, and does not maintain a map of stand-in chars
116 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
117 // constructor will always call this function right after calling lookup(),
118 // and we just need to remember what set to return between these two calls.
lookupMatcher(UChar32 ch) const119 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
120 {
121 UnicodeSet *retVal = NULL;
122 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
123 if (ch == 0xffff) {
124 retVal = fCachedSetLookup;
125 This->fCachedSetLookup = 0;
126 }
127 return retVal;
128 }
129
130 //
131 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
132 // looks for a $variable name in the source text.
133 // It does not look it up, only scans for it.
134 // It is used by the UnicodeSet parser.
135 //
136 // This implementation is lifted pretty much verbatim
137 // from the rules based transliterator implementation.
138 // I didn't see an obvious way of sharing it.
139 //
parseReference(const UnicodeString & text,ParsePosition & pos,int32_t limit) const140 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
141 ParsePosition& pos, int32_t limit) const
142 {
143 int32_t start = pos.getIndex();
144 int32_t i = start;
145 UnicodeString result;
146 while (i < limit) {
147 UChar c = text.charAt(i);
148 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
149 break;
150 }
151 ++i;
152 }
153 if (i == start) { // No valid name chars
154 return result; // Indicate failure with empty string
155 }
156 pos.setIndex(i);
157 text.extractBetween(start, i, result);
158 return result;
159 }
160
161
162
163 //
164 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
165 // corresponding RBBI Node. If there is no entry
166 // in the table for this name, return NULL.
167 //
lookupNode(const UnicodeString & key) const168 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
169
170 RBBINode *retNode = NULL;
171 RBBISymbolTableEntry *el;
172
173 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
174 if (el != NULL) {
175 retNode = el->val;
176 }
177 return retNode;
178 }
179
180
181 //
182 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
183 // Indicate an error if the name already exists -
184 // this will only occur in the case of duplicate
185 // variable assignments.
186 //
addEntry(const UnicodeString & key,RBBINode * val,UErrorCode & err)187 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
188 RBBISymbolTableEntry *e;
189 /* test for buffer overflows */
190 if (U_FAILURE(err)) {
191 return;
192 }
193 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
194 if (e != NULL) {
195 err = U_BRK_VARIABLE_REDFINITION;
196 return;
197 }
198
199 e = new RBBISymbolTableEntry;
200 if (e == NULL) {
201 err = U_MEMORY_ALLOCATION_ERROR;
202 return;
203 }
204 e->key = key;
205 e->val = val;
206 uhash_put( fHashTable, &e->key, e, &err);
207 }
208
209
RBBISymbolTableEntry()210 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
211
~RBBISymbolTableEntry()212 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
213 // The "val" of a symbol table entry is a variable reference node.
214 // The l. child of the val is the rhs expression from the assignment.
215 // Unlike other node types, children of variable reference nodes are not
216 // automatically recursively deleted. We do it manually here.
217 delete val->fLeftChild;
218 val->fLeftChild = NULL;
219
220 delete val;
221
222 // Note: the key UnicodeString is destructed by virtue of being in the object by value.
223 }
224
225
226 //
227 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
228 //
229 #ifdef RBBI_DEBUG
rbbiSymtablePrint() const230 void RBBISymbolTable::rbbiSymtablePrint() const {
231 RBBIDebugPrintf("Variable Definitions Symbol Table\n"
232 "Name Node serial String Val\n"
233 "-------------------------------------------------------------------\n");
234
235 int32_t pos = UHASH_FIRST;
236 const UHashElement *e = NULL;
237 for (;;) {
238 e = uhash_nextElement(fHashTable, &pos);
239 if (e == NULL ) {
240 break;
241 }
242 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
243
244 RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
245 RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
246 }
247
248 RBBIDebugPrintf("\nParsed Variable Definitions\n");
249 pos = -1;
250 for (;;) {
251 e = uhash_nextElement(fHashTable, &pos);
252 if (e == NULL ) {
253 break;
254 }
255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
256 RBBIDebugPrintf("%s\n", CStr(s->key)());
257 RBBINode::printTree(s->val, TRUE);
258 RBBINode::printTree(s->val->fLeftChild, FALSE);
259 RBBIDebugPrintf("\n");
260 }
261 }
262 #endif
263
264
265
266
267
268 U_NAMESPACE_END
269
270 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
271