• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
5 //
6 /*
7 ***************************************************************************
8 *   Copyright (C) 2002-2014 International Business Machines Corporation
9 *   and others. All rights reserved.
10 ***************************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_BREAK_ITERATION
16 
17 #include "unicode/unistr.h"
18 #include "unicode/uniset.h"
19 #include "unicode/uchar.h"
20 #include "unicode/parsepos.h"
21 
22 #include "cstr.h"
23 #include "rbbinode.h"
24 #include "rbbirb.h"
25 #include "umutex.h"
26 
27 
28 //
29 //  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
30 //                                  when the hash table is deleted.
31 //
32 U_CDECL_BEGIN
RBBISymbolTableEntry_deleter(void * p)33 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
34     icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
35     delete px;
36 }
37 U_CDECL_END
38 
39 
40 
41 U_NAMESPACE_BEGIN
42 
RBBISymbolTable(RBBIRuleScanner * rs,const UnicodeString & rules,UErrorCode & status)43 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
44     :fRules(rules), fRuleScanner(rs), ffffString(char16_t(0xffff))
45 {
46     fHashTable       = nullptr;
47     fCachedSetLookup = nullptr;
48 
49     fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, &status);
50     // uhash_open checks status
51     if (U_FAILURE(status)) {
52         return;
53     }
54     uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
55 }
56 
57 
58 
~RBBISymbolTable()59 RBBISymbolTable::~RBBISymbolTable()
60 {
61     uhash_close(fHashTable);
62 }
63 
64 
65 //
66 //  RBBISymbolTable::lookup       This function from the abstract symbol table interface
67 //                                looks up a variable name and returns a UnicodeString
68 //                                containing the substitution text.
69 //
70 //                                The variable name does NOT include the leading $.
71 //
lookup(const UnicodeString & s) const72 const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
73 {
74     RBBISymbolTableEntry  *el;
75     RBBINode              *varRefNode;
76     RBBINode              *exprNode;
77     RBBINode              *usetNode;
78     const UnicodeString   *retString;
79     RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const
80 
81     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
82     if (el == nullptr) {
83         return nullptr;
84     }
85 
86     varRefNode = el->val;
87     exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
88     if (exprNode->fType == RBBINode::setRef) {
89         // The $variable refers to a single UnicodeSet
90         //   return the ffffString, which will subsequently be interpreted as a
91         //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
92         usetNode = exprNode->fLeftChild;
93         This->fCachedSetLookup = usetNode->fInputSet;
94         retString = &ffffString;
95     }
96     else
97     {
98         // The variable refers to something other than just a set.
99         // return the original source string for the expression
100         retString = &exprNode->fText;
101         This->fCachedSetLookup = nullptr;
102     }
103     return retString;
104 }
105 
106 
107 
108 //
109 //  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
110 //                                   interface maps a single stand-in character to a
111 //                                   pointer to a Unicode Set.   The Unicode Set code uses this
112 //                                   mechanism to get all references to the same $variable
113 //                                   name to refer to a single common Unicode Set instance.
114 //
115 //    This implementation cheats a little, and does not maintain a map of stand-in chars
116 //    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
117 //    constructor will always call this function right after calling lookup(),
118 //    and we just need to remember what set to return between these two calls.
lookupMatcher(UChar32 ch) const119 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
120 {
121     UnicodeSet *retVal = nullptr;
122     RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
123     if (ch == 0xffff) {
124         retVal = fCachedSetLookup;
125         This->fCachedSetLookup = 0;
126     }
127     return retVal;
128 }
129 
130 //
131 // RBBISymbolTable::parseReference   This function from the abstract symbol table interface
132 //                                   looks for a $variable name in the source text.
133 //                                   It does not look it up, only scans for it.
134 //                                   It is used by the UnicodeSet parser.
135 //
136 //                                   This implementation is lifted pretty much verbatim
137 //                                   from the rules based transliterator implementation.
138 //                                   I didn't see an obvious way of sharing it.
139 //
parseReference(const UnicodeString & text,ParsePosition & pos,int32_t limit) const140 UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
141                                                 ParsePosition& pos, int32_t limit) const
142 {
143     int32_t start = pos.getIndex();
144     int32_t i = start;
145     UnicodeString result;
146     while (i < limit) {
147         char16_t c = text.charAt(i);
148         if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
149             break;
150         }
151         ++i;
152     }
153     if (i == start) { // No valid name chars
154         return result; // Indicate failure with empty string
155     }
156     pos.setIndex(i);
157     text.extractBetween(start, i, result);
158     return result;
159 }
160 
161 
162 
163 //
164 // RBBISymbolTable::lookupNode      Given a key (a variable name), return the
165 //                                  corresponding RBBI Node.  If there is no entry
166 //                                  in the table for this name, return nullptr.
167 //
lookupNode(const UnicodeString & key) const168 RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
169 
170     RBBINode             *retNode = nullptr;
171     RBBISymbolTableEntry *el;
172 
173     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
174     if (el != nullptr) {
175         retNode = el->val;
176     }
177     return retNode;
178 }
179 
180 
181 //
182 //    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
183 //                                  Indicate an error if the name already exists -
184 //                                    this will only occur in the case of duplicate
185 //                                    variable assignments.
186 //
addEntry(const UnicodeString & key,RBBINode * val,UErrorCode & err)187 void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
188     RBBISymbolTableEntry *e;
189     /* test for buffer overflows */
190     if (U_FAILURE(err)) {
191         return;
192     }
193     e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
194     if (e != nullptr) {
195         err = U_BRK_VARIABLE_REDFINITION;
196         return;
197     }
198 
199     e = new RBBISymbolTableEntry;
200     if (e == nullptr) {
201         err = U_MEMORY_ALLOCATION_ERROR;
202         return;
203     }
204     e->key = key;
205     e->val = val;
206     uhash_put( fHashTable, &e->key, e, &err);
207 }
208 
209 
RBBISymbolTableEntry()210 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(nullptr) {}
211 
~RBBISymbolTableEntry()212 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
213     // The "val" of a symbol table entry is a variable reference node.
214     // The l. child of the val is the rhs expression from the assignment.
215     // Unlike other node types, children of variable reference nodes are not
216     //    automatically recursively deleted.  We do it manually here.
217     delete val->fLeftChild;
218     val->fLeftChild = nullptr;
219 
220     delete  val;
221 
222     // Note: the key UnicodeString is destructed by virtue of being in the object by value.
223 }
224 
225 
226 //
227 //  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
228 //
229 #ifdef RBBI_DEBUG
rbbiSymtablePrint() const230 void RBBISymbolTable::rbbiSymtablePrint() const {
231     RBBIDebugPrintf("Variable Definitions Symbol Table\n"
232            "Name                  Node         serial  String Val\n"
233            "-------------------------------------------------------------------\n");
234 
235     int32_t pos = UHASH_FIRST;
236     const UHashElement  *e   = nullptr;
237     for (;;) {
238         e = uhash_nextElement(fHashTable,  &pos);
239         if (e == nullptr ) {
240             break;
241         }
242         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
243 
244         RBBIDebugPrintf("%-19s   %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
245         RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
246     }
247 
248     RBBIDebugPrintf("\nParsed Variable Definitions\n");
249     pos = -1;
250     for (;;) {
251         e = uhash_nextElement(fHashTable,  &pos);
252         if (e == nullptr ) {
253             break;
254         }
255         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
256         RBBIDebugPrintf("%s\n", CStr(s->key)());
257         RBBINode::printTree(s->val, true);
258         RBBINode::printTree(s->val->fLeftChild, false);
259         RBBIDebugPrintf("\n");
260     }
261 }
262 #endif
263 
264 
265 
266 
267 
268 U_NAMESPACE_END
269 
270 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
271