• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (c) 2001-2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   08/10/2001  aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_TRANSLITERATION
14 
15 #include "unicode/translit.h"
16 #include "unicode/resbund.h"
17 #include "unicode/uniset.h"
18 #include "unicode/uscript.h"
19 #include "rbt.h"
20 #include "cpdtrans.h"
21 #include "nultrans.h"
22 #include "transreg.h"
23 #include "rbt_data.h"
24 #include "rbt_pars.h"
25 #include "tridpars.h"
26 #include "charstr.h"
27 #include "uassert.h"
28 #include "locutil.h"
29 
30 // Enable the following symbol to add debugging code that tracks the
31 // allocation, deletion, and use of Entry objects.  BoundsChecker has
32 // reported dangling pointer errors with these objects, but I have
33 // been unable to confirm them.  I suspect BoundsChecker is getting
34 // confused with pointers going into and coming out of a UHashtable,
35 // despite the hinting code that is designed to help it.
36 // #define DEBUG_MEM
37 #ifdef DEBUG_MEM
38 #include <stdio.h>
39 #endif
40 
41 // UChar constants
42 static const UChar LOCALE_SEP  = 95; // '_'
43 //static const UChar ID_SEP      = 0x002D; /*-*/
44 //static const UChar VARIANT_SEP = 0x002F; // '/'
45 
46 // String constants
47 static const UChar NO_VARIANT[] = { 0 }; // empty string
48 static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
49 
50 /**
51  * Resource bundle key for the RuleBasedTransliterator rule.
52  */
53 //static const char RB_RULE[] = "Rule";
54 
55 U_NAMESPACE_BEGIN
56 
57 //------------------------------------------------------------------
58 // Alias
59 //------------------------------------------------------------------
60 
TransliteratorAlias(const UnicodeString & theAliasID,const UnicodeSet * cpdFilter)61 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
62                                          const UnicodeSet* cpdFilter) :
63     ID(),
64     aliasesOrRules(theAliasID),
65     transes(0),
66     compoundFilter(cpdFilter),
67     direction(UTRANS_FORWARD),
68     type(TransliteratorAlias::SIMPLE) {
69 }
70 
TransliteratorAlias(const UnicodeString & theID,const UnicodeString & idBlocks,UVector * adoptedTransliterators,const UnicodeSet * cpdFilter)71 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
72                                          const UnicodeString& idBlocks,
73                                          UVector* adoptedTransliterators,
74                                          const UnicodeSet* cpdFilter) :
75     ID(theID),
76     aliasesOrRules(idBlocks),
77     transes(adoptedTransliterators),
78     compoundFilter(cpdFilter),
79     direction(UTRANS_FORWARD),
80     type(TransliteratorAlias::COMPOUND) {
81 }
82 
TransliteratorAlias(const UnicodeString & theID,const UnicodeString & rules,UTransDirection dir)83 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
84                                          const UnicodeString& rules,
85                                          UTransDirection dir) :
86     ID(theID),
87     aliasesOrRules(rules),
88     transes(0),
89     compoundFilter(0),
90     direction(dir),
91     type(TransliteratorAlias::RULES) {
92 }
93 
~TransliteratorAlias()94 TransliteratorAlias::~TransliteratorAlias() {
95     delete transes;
96 }
97 
98 
create(UParseError & pe,UErrorCode & ec)99 Transliterator* TransliteratorAlias::create(UParseError& pe,
100                                             UErrorCode& ec) {
101     if (U_FAILURE(ec)) {
102         return 0;
103     }
104     Transliterator *t = NULL;
105     switch (type) {
106     case SIMPLE:
107         t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
108         if(U_FAILURE(ec)){
109             return 0;
110         }
111         if (compoundFilter != 0)
112             t->adoptFilter((UnicodeSet*)compoundFilter->clone());
113         break;
114     case COMPOUND:
115         {
116             // the total number of transliterators in the compound is the total number of anonymous transliterators
117             // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
118             // block and that each pair anonymous transliterators has an ID block between them.  Then we go back
119             // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
120             // marks the position where an anonymous transliterator goes) and adjust accordingly
121             int32_t anonymousRBTs = transes->size();
122             int32_t transCount = anonymousRBTs * 2 + 1;
123             if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
124                 --transCount;
125             if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
126                 --transCount;
127             UnicodeString noIDBlock((UChar)(0xffff));
128             noIDBlock += ((UChar)(0xffff));
129             int32_t pos = aliasesOrRules.indexOf(noIDBlock);
130             while (pos >= 0) {
131                 --transCount;
132                 pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
133             }
134 
135             UVector transliterators(ec);
136             UnicodeString idBlock;
137             int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
138             while (blockSeparatorPos >= 0) {
139                 aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
140                 aliasesOrRules.remove(0, blockSeparatorPos + 1);
141                 if (!idBlock.isEmpty())
142                     transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
143                 if (!transes->isEmpty())
144                     transliterators.addElement(transes->orphanElementAt(0), ec);
145                 blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
146             }
147             if (!aliasesOrRules.isEmpty())
148                 transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
149             while (!transes->isEmpty())
150                 transliterators.addElement(transes->orphanElementAt(0), ec);
151 
152             if (U_SUCCESS(ec)) {
153                 t = new CompoundTransliterator(ID, transliterators,
154                     (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
155                     anonymousRBTs, pe, ec);
156                 if (t == 0) {
157                     ec = U_MEMORY_ALLOCATION_ERROR;
158                     return 0;
159                 }
160             } else {
161                 for (int32_t i = 0; i < transliterators.size(); i++)
162                     delete (Transliterator*)(transliterators.elementAt(i));
163             }
164         }
165         break;
166     case RULES:
167         U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
168         break;
169     }
170     return t;
171 }
172 
isRuleBased() const173 UBool TransliteratorAlias::isRuleBased() const {
174     return type == RULES;
175 }
176 
parse(TransliteratorParser & parser,UParseError & pe,UErrorCode & ec) const177 void TransliteratorAlias::parse(TransliteratorParser& parser,
178                                 UParseError& pe, UErrorCode& ec) const {
179     U_ASSERT(type == RULES);
180     if (U_FAILURE(ec)) {
181         return;
182     }
183 
184     parser.parse(aliasesOrRules, direction, pe, ec);
185 }
186 
187 //----------------------------------------------------------------------
188 // class TransliteratorSpec
189 //----------------------------------------------------------------------
190 
191 /**
192  * A TransliteratorSpec is a string specifying either a source or a target.  In more
193  * general terms, it may also specify a variant, but we only use the
194  * Spec class for sources and targets.
195  *
196  * A Spec may be a locale or a script.  If it is a locale, it has a
197  * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
198  * ssss is the script mapping of xx_YY_ZZZ.  The Spec API methods
199  * hasFallback(), next(), and reset() iterate over this fallback
200  * sequence.
201  *
202  * The Spec class canonicalizes itself, so the locale is put into
203  * canonical form, or the script is transformed from an abbreviation
204  * to a full name.
205  */
206 class TransliteratorSpec : public UMemory {
207  public:
208     TransliteratorSpec(const UnicodeString& spec);
209     ~TransliteratorSpec();
210 
211     const UnicodeString& get() const;
212     UBool hasFallback() const;
213     const UnicodeString& next();
214     void reset();
215 
216     UBool isLocale() const;
217     ResourceBundle& getBundle() const;
218 
operator const UnicodeString&() const219     operator const UnicodeString&() const { return get(); }
getTop() const220     const UnicodeString& getTop() const { return top; }
221 
222  private:
223     void setupNext();
224 
225     UnicodeString top;
226     UnicodeString spec;
227     UnicodeString nextSpec;
228     UnicodeString scriptName;
229     UBool isSpecLocale; // TRUE if spec is a locale
230     UBool isNextLocale; // TRUE if nextSpec is a locale
231     ResourceBundle* res;
232 
233     TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
234     TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
235 };
236 
TransliteratorSpec(const UnicodeString & theSpec)237 TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
238 : top(theSpec),
239   res(0)
240 {
241     UErrorCode status = U_ZERO_ERROR;
242     Locale topLoc("");
243     LocaleUtility::initLocaleFromName(theSpec, topLoc);
244     if (!topLoc.isBogus()) {
245         res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status);
246         /* test for NULL */
247         if (res == 0) {
248             return;
249         }
250         if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
251             delete res;
252             res = 0;
253         }
254     }
255 
256     // Canonicalize script name -or- do locale->script mapping
257     status = U_ZERO_ERROR;
258     static const int32_t capacity = 10;
259     UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
260     int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(),
261                                   script, capacity, &status);
262     if (num > 0 && script[0] != USCRIPT_INVALID_CODE) {
263         scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV);
264     }
265 
266     // Canonicalize top
267     if (res != 0) {
268         // Canonicalize locale name
269         UnicodeString locStr;
270         LocaleUtility::initNameFromLocale(topLoc, locStr);
271         if (!locStr.isBogus()) {
272             top = locStr;
273         }
274     } else if (scriptName.length() != 0) {
275         // We are a script; use canonical name
276         top = scriptName;
277     }
278 
279     // assert(spec != top);
280     reset();
281 }
282 
~TransliteratorSpec()283 TransliteratorSpec::~TransliteratorSpec() {
284     delete res;
285 }
286 
hasFallback() const287 UBool TransliteratorSpec::hasFallback() const {
288     return nextSpec.length() != 0;
289 }
290 
reset()291 void TransliteratorSpec::reset() {
292     if (spec != top) {
293         spec = top;
294         isSpecLocale = (res != 0);
295         setupNext();
296     }
297 }
298 
setupNext()299 void TransliteratorSpec::setupNext() {
300     isNextLocale = FALSE;
301     if (isSpecLocale) {
302         nextSpec = spec;
303         int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
304         // If i == 0 then we have _FOO, so we fall through
305         // to the scriptName.
306         if (i > 0) {
307             nextSpec.truncate(i);
308             isNextLocale = TRUE;
309         } else {
310             nextSpec = scriptName; // scriptName may be empty
311         }
312     } else {
313         // spec is a script, so we are at the end
314         nextSpec.truncate(0);
315     }
316 }
317 
318 // Protocol:
319 // for(const UnicodeString& s(spec.get());
320 //     spec.hasFallback(); s(spec.next())) { ...
321 
next()322 const UnicodeString& TransliteratorSpec::next() {
323     spec = nextSpec;
324     isSpecLocale = isNextLocale;
325     setupNext();
326     return spec;
327 }
328 
get() const329 const UnicodeString& TransliteratorSpec::get() const {
330     return spec;
331 }
332 
isLocale() const333 UBool TransliteratorSpec::isLocale() const {
334     return isSpecLocale;
335 }
336 
getBundle() const337 ResourceBundle& TransliteratorSpec::getBundle() const {
338     return *res;
339 }
340 
341 //----------------------------------------------------------------------
342 
343 #ifdef DEBUG_MEM
344 
345 // Vector of Entry pointers currently in use
346 static UVector* DEBUG_entries = NULL;
347 
DEBUG_setup()348 static void DEBUG_setup() {
349     if (DEBUG_entries == NULL) {
350         UErrorCode ec = U_ZERO_ERROR;
351         DEBUG_entries = new UVector(ec);
352     }
353 }
354 
355 // Caller must call DEBUG_setup first.  Return index of given Entry,
356 // if it is in use (not deleted yet), or -1 if not found.
DEBUG_findEntry(TransliteratorEntry * e)357 static int DEBUG_findEntry(TransliteratorEntry* e) {
358     for (int i=0; i<DEBUG_entries->size(); ++i) {
359         if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
360             return i;
361         }
362     }
363     return -1;
364 }
365 
366 // Track object creation
DEBUG_newEntry(TransliteratorEntry * e)367 static void DEBUG_newEntry(TransliteratorEntry* e) {
368     DEBUG_setup();
369     if (DEBUG_findEntry(e) >= 0) {
370         // This should really never happen unless the heap is broken
371         printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e);
372         return;
373     }
374     UErrorCode ec = U_ZERO_ERROR;
375     DEBUG_entries->addElement(e, ec);
376 }
377 
378 // Track object deletion
DEBUG_delEntry(TransliteratorEntry * e)379 static void DEBUG_delEntry(TransliteratorEntry* e) {
380     DEBUG_setup();
381     int i = DEBUG_findEntry(e);
382     if (i < 0) {
383         printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e);
384         return;
385     }
386     DEBUG_entries->removeElementAt(i);
387 }
388 
389 // Track object usage
DEBUG_useEntry(TransliteratorEntry * e)390 static void DEBUG_useEntry(TransliteratorEntry* e) {
391     if (e == NULL) return;
392     DEBUG_setup();
393     int i = DEBUG_findEntry(e);
394     if (i < 0) {
395         printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e);
396     }
397 }
398 
399 #else
400 // If we're not debugging then make these macros into NOPs
401 #define DEBUG_newEntry(x)
402 #define DEBUG_delEntry(x)
403 #define DEBUG_useEntry(x)
404 #endif
405 
406 //----------------------------------------------------------------------
407 // class Entry
408 //----------------------------------------------------------------------
409 
410 /**
411  * The Entry object stores objects of different types and
412  * singleton objects as placeholders for rule-based transliterators to
413  * be built as needed.  Instances of this struct can be placeholders,
414  * can represent prototype transliterators to be cloned, or can
415  * represent TransliteratorData objects.  We don't support storing
416  * classes in the registry because we don't have the rtti infrastructure
417  * for it.  We could easily add this if there is a need for it in the
418  * future.
419  */
420 class TransliteratorEntry : public UMemory {
421 public:
422     enum Type {
423         RULES_FORWARD,
424         RULES_REVERSE,
425         LOCALE_RULES,
426         PROTOTYPE,
427         RBT_DATA,
428         COMPOUND_RBT,
429         ALIAS,
430         FACTORY,
431         NONE // Only used for uninitialized entries
432     } entryType;
433     // NOTE: stringArg cannot go inside the union because
434     // it has a copy constructor
435     UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT
436     int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES
437     UnicodeSet* compoundFilter; // For COMPOUND_RBT
438     union {
439         Transliterator* prototype; // For PROTOTYPE
440         TransliterationRuleData* data; // For RBT_DATA
441         UVector* dataVector;    // For COMPOUND_RBT
442         struct {
443             Transliterator::Factory function;
444             Transliterator::Token   context;
445         } factory; // For FACTORY
446     } u;
447     TransliteratorEntry();
448     ~TransliteratorEntry();
449     void adoptPrototype(Transliterator* adopted);
450     void setFactory(Transliterator::Factory factory,
451                     Transliterator::Token context);
452 
453 private:
454 
455     TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
456     TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
457 };
458 
TransliteratorEntry()459 TransliteratorEntry::TransliteratorEntry() {
460     u.prototype = 0;
461     compoundFilter = NULL;
462     entryType = NONE;
463     DEBUG_newEntry(this);
464 }
465 
~TransliteratorEntry()466 TransliteratorEntry::~TransliteratorEntry() {
467     DEBUG_delEntry(this);
468     if (entryType == PROTOTYPE) {
469         delete u.prototype;
470     } else if (entryType == RBT_DATA) {
471         // The data object is shared between instances of RBT.  The
472         // entry object owns it.  It should only be deleted when the
473         // transliterator component is being cleaned up.  Doing so
474         // invalidates any RBTs that the user has instantiated.
475         delete u.data;
476     } else if (entryType == COMPOUND_RBT) {
477         while (u.dataVector != NULL && !u.dataVector->isEmpty())
478             delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
479         delete u.dataVector;
480     }
481     delete compoundFilter;
482 }
483 
adoptPrototype(Transliterator * adopted)484 void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
485     if (entryType == PROTOTYPE) {
486         delete u.prototype;
487     }
488     entryType = PROTOTYPE;
489     u.prototype = adopted;
490 }
491 
setFactory(Transliterator::Factory factory,Transliterator::Token context)492 void TransliteratorEntry::setFactory(Transliterator::Factory factory,
493                        Transliterator::Token context) {
494     if (entryType == PROTOTYPE) {
495         delete u.prototype;
496     }
497     entryType = FACTORY;
498     u.factory.function = factory;
499     u.factory.context = context;
500 }
501 
502 // UObjectDeleter for Hashtable::setValueDeleter
503 U_CDECL_BEGIN
504 static void U_CALLCONV
deleteEntry(void * obj)505 deleteEntry(void* obj) {
506     delete (TransliteratorEntry*) obj;
507 }
508 U_CDECL_END
509 
510 //----------------------------------------------------------------------
511 // class TransliteratorRegistry: Basic public API
512 //----------------------------------------------------------------------
513 
TransliteratorRegistry(UErrorCode & status)514 TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
515     registry(TRUE, status),
516     specDAG(TRUE, status),
517     availableIDs(status)
518 {
519     registry.setValueDeleter(deleteEntry);
520     availableIDs.setDeleter(uhash_deleteUnicodeString);
521     availableIDs.setComparer(uhash_compareCaselessUnicodeString);
522     specDAG.setValueDeleter(uhash_deleteHashtable);
523 }
524 
~TransliteratorRegistry()525 TransliteratorRegistry::~TransliteratorRegistry() {
526     // Through the magic of C++, everything cleans itself up
527 }
528 
get(const UnicodeString & ID,TransliteratorAlias * & aliasReturn,UErrorCode & status)529 Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
530                                             TransliteratorAlias*& aliasReturn,
531                                             UErrorCode& status) {
532     U_ASSERT(aliasReturn == NULL);
533     TransliteratorEntry *entry = find(ID);
534     return (entry == 0) ? 0
535         : instantiateEntry(ID, entry, aliasReturn, status);
536 }
537 
reget(const UnicodeString & ID,TransliteratorParser & parser,TransliteratorAlias * & aliasReturn,UErrorCode & status)538 Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
539                                               TransliteratorParser& parser,
540                                               TransliteratorAlias*& aliasReturn,
541                                               UErrorCode& status) {
542     U_ASSERT(aliasReturn == NULL);
543     TransliteratorEntry *entry = find(ID);
544 
545     if (entry == 0) {
546         // We get to this point if there are two threads, one of which
547         // is instantiating an ID, and another of which is removing
548         // the same ID from the registry, and the timing is just right.
549         return 0;
550     }
551 
552     // The usage model for the caller is that they will first call
553     // reg->get() inside the mutex, they'll get back an alias, they call
554     // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
555     // outside the mutex, then reg->reget() inside the mutex again.  A real
556     // mess, but it gets things working for ICU 3.0. [alan].
557 
558     // Note: It's possible that in between the caller calling
559     // alias->parse() and reg->reget(), that another thread will have
560     // called reg->reget(), and the entry will already have been fixed up.
561     // We have to detect this so we don't stomp over existing entry
562     // data members and potentially leak memory (u.data and compoundFilter).
563 
564     if (entry->entryType == TransliteratorEntry::RULES_FORWARD ||
565         entry->entryType == TransliteratorEntry::RULES_REVERSE ||
566         entry->entryType == TransliteratorEntry::LOCALE_RULES) {
567 
568         if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
569             entry->u.data = 0;
570             entry->entryType = TransliteratorEntry::ALIAS;
571             entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
572         }
573         else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
574             entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
575             entry->entryType = TransliteratorEntry::RBT_DATA;
576         }
577         else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
578             entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
579             entry->compoundFilter = parser.orphanCompoundFilter();
580             entry->entryType = TransliteratorEntry::ALIAS;
581         }
582         else {
583             entry->entryType = TransliteratorEntry::COMPOUND_RBT;
584             entry->compoundFilter = parser.orphanCompoundFilter();
585             entry->u.dataVector = new UVector(status);
586             entry->stringArg.remove();
587 
588             int32_t limit = parser.idBlockVector.size();
589             if (parser.dataVector.size() > limit)
590                 limit = parser.dataVector.size();
591 
592             for (int32_t i = 0; i < limit; i++) {
593                 if (i < parser.idBlockVector.size()) {
594                     UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
595                     if (!idBlock->isEmpty())
596                         entry->stringArg += *idBlock;
597                 }
598                 if (!parser.dataVector.isEmpty()) {
599                     TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
600                     entry->u.dataVector->addElement(data, status);
601                     entry->stringArg += (UChar)0xffff;  // use U+FFFF to mark position of RBTs in ID block
602                 }
603             }
604         }
605     }
606 
607     Transliterator *t =
608         instantiateEntry(ID, entry, aliasReturn, status);
609     return t;
610 }
611 
put(Transliterator * adoptedProto,UBool visible,UErrorCode & ec)612 void TransliteratorRegistry::put(Transliterator* adoptedProto,
613                                  UBool visible,
614                                  UErrorCode& ec)
615 {
616     TransliteratorEntry *entry = new TransliteratorEntry();
617     if (entry == NULL) {
618         ec = U_MEMORY_ALLOCATION_ERROR;
619         return;
620     }
621     entry->adoptPrototype(adoptedProto);
622     registerEntry(adoptedProto->getID(), entry, visible);
623 }
624 
put(const UnicodeString & ID,Transliterator::Factory factory,Transliterator::Token context,UBool visible,UErrorCode & ec)625 void TransliteratorRegistry::put(const UnicodeString& ID,
626                                  Transliterator::Factory factory,
627                                  Transliterator::Token context,
628                                  UBool visible,
629                                  UErrorCode& ec) {
630     TransliteratorEntry *entry = new TransliteratorEntry();
631     if (entry == NULL) {
632         ec = U_MEMORY_ALLOCATION_ERROR;
633         return;
634     }
635     entry->setFactory(factory, context);
636     registerEntry(ID, entry, visible);
637 }
638 
put(const UnicodeString & ID,const UnicodeString & resourceName,UTransDirection dir,UBool readonlyResourceAlias,UBool visible,UErrorCode & ec)639 void TransliteratorRegistry::put(const UnicodeString& ID,
640                                  const UnicodeString& resourceName,
641                                  UTransDirection dir,
642                                  UBool readonlyResourceAlias,
643                                  UBool visible,
644                                  UErrorCode& ec) {
645     TransliteratorEntry *entry = new TransliteratorEntry();
646     if (entry == NULL) {
647         ec = U_MEMORY_ALLOCATION_ERROR;
648         return;
649     }
650     entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
651         : TransliteratorEntry::RULES_REVERSE;
652     if (readonlyResourceAlias) {
653         entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
654     }
655     else {
656         entry->stringArg = resourceName;
657     }
658     registerEntry(ID, entry, visible);
659 }
660 
put(const UnicodeString & ID,const UnicodeString & alias,UBool readonlyAliasAlias,UBool visible,UErrorCode &)661 void TransliteratorRegistry::put(const UnicodeString& ID,
662                                  const UnicodeString& alias,
663                                  UBool readonlyAliasAlias,
664                                  UBool visible,
665                                  UErrorCode& /*ec*/) {
666     TransliteratorEntry *entry = new TransliteratorEntry();
667     // Null pointer check
668     if (entry != NULL) {
669         entry->entryType = TransliteratorEntry::ALIAS;
670         if (readonlyAliasAlias) {
671             entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
672         }
673         else {
674             entry->stringArg = alias;
675         }
676         registerEntry(ID, entry, visible);
677     }
678 }
679 
remove(const UnicodeString & ID)680 void TransliteratorRegistry::remove(const UnicodeString& ID) {
681     UnicodeString source, target, variant;
682     UBool sawSource;
683     TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
684     // Only need to do this if ID.indexOf('-') < 0
685     UnicodeString id;
686     TransliteratorIDParser::STVtoID(source, target, variant, id);
687     registry.remove(id);
688     removeSTV(source, target, variant);
689     availableIDs.removeElement((void*) &id);
690 }
691 
692 //----------------------------------------------------------------------
693 // class TransliteratorRegistry: Public ID and spec management
694 //----------------------------------------------------------------------
695 
696 /**
697  * == OBSOLETE - remove in ICU 3.4 ==
698  * Return the number of IDs currently registered with the system.
699  * To retrieve the actual IDs, call getAvailableID(i) with
700  * i from 0 to countAvailableIDs() - 1.
701  */
countAvailableIDs(void) const702 int32_t TransliteratorRegistry::countAvailableIDs(void) const {
703     return availableIDs.size();
704 }
705 
706 /**
707  * == OBSOLETE - remove in ICU 3.4 ==
708  * Return the index-th available ID.  index must be between 0
709  * and countAvailableIDs() - 1, inclusive.  If index is out of
710  * range, the result of getAvailableID(0) is returned.
711  */
getAvailableID(int32_t index) const712 const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
713     if (index < 0 || index >= availableIDs.size()) {
714         index = 0;
715     }
716     return *(const UnicodeString*) availableIDs[index];
717 }
718 
getAvailableIDs() const719 StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
720     return new Enumeration(*this);
721 }
722 
countAvailableSources(void) const723 int32_t TransliteratorRegistry::countAvailableSources(void) const {
724     return specDAG.count();
725 }
726 
getAvailableSource(int32_t index,UnicodeString & result) const727 UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
728                                                           UnicodeString& result) const {
729     int32_t pos = -1;
730     const UHashElement *e = 0;
731     while (index-- >= 0) {
732         e = specDAG.nextElement(pos);
733         if (e == 0) {
734             break;
735         }
736     }
737     if (e == 0) {
738         result.truncate(0);
739     } else {
740         result = *(UnicodeString*) e->key.pointer;
741     }
742     return result;
743 }
744 
countAvailableTargets(const UnicodeString & source) const745 int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const {
746     Hashtable *targets = (Hashtable*) specDAG.get(source);
747     return (targets == 0) ? 0 : targets->count();
748 }
749 
getAvailableTarget(int32_t index,const UnicodeString & source,UnicodeString & result) const750 UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index,
751                                                           const UnicodeString& source,
752                                                           UnicodeString& result) const {
753     Hashtable *targets = (Hashtable*) specDAG.get(source);
754     if (targets == 0) {
755         result.truncate(0); // invalid source
756         return result;
757     }
758     int32_t pos = -1;
759     const UHashElement *e = 0;
760     while (index-- >= 0) {
761         e = targets->nextElement(pos);
762         if (e == 0) {
763             break;
764         }
765     }
766     if (e == 0) {
767         result.truncate(0); // invalid index
768     } else {
769         result = *(UnicodeString*) e->key.pointer;
770     }
771     return result;
772 }
773 
countAvailableVariants(const UnicodeString & source,const UnicodeString & target) const774 int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source,
775                                                        const UnicodeString& target) const {
776     Hashtable *targets = (Hashtable*) specDAG.get(source);
777     if (targets == 0) {
778         return 0;
779     }
780     UVector *variants = (UVector*) targets->get(target);
781     // variants may be 0 if the source/target are invalid
782     return (variants == 0) ? 0 : variants->size();
783 }
784 
getAvailableVariant(int32_t index,const UnicodeString & source,const UnicodeString & target,UnicodeString & result) const785 UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
786                                                            const UnicodeString& source,
787                                                            const UnicodeString& target,
788                                                            UnicodeString& result) const {
789     Hashtable *targets = (Hashtable*) specDAG.get(source);
790     if (targets == 0) {
791         result.truncate(0); // invalid source
792         return result;
793     }
794     UVector *variants = (UVector*) targets->get(target);
795     if (variants == 0) {
796         result.truncate(0); // invalid target
797         return result;
798     }
799     UnicodeString *v = (UnicodeString*) variants->elementAt(index);
800     if (v == 0) {
801         result.truncate(0); // invalid index
802     } else {
803         result = *v;
804     }
805     return result;
806 }
807 
808 //----------------------------------------------------------------------
809 // class TransliteratorRegistry::Enumeration
810 //----------------------------------------------------------------------
811 
Enumeration(const TransliteratorRegistry & _reg)812 TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
813     index(0), reg(_reg) {
814 }
815 
~Enumeration()816 TransliteratorRegistry::Enumeration::~Enumeration() {
817 }
818 
count(UErrorCode &) const819 int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const {
820     return reg.availableIDs.size();
821 }
822 
snext(UErrorCode & status)823 const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
824     // This is sloppy but safe -- if we get out of sync with the underlying
825     // registry, we will still return legal strings, but they might not
826     // correspond to the snapshot at construction time.  So there could be
827     // duplicate IDs or omitted IDs if insertions or deletions occur in one
828     // thread while another is iterating.  To be more rigorous, add a timestamp,
829     // which is incremented with any modification, and validate this iterator
830     // against the timestamp at construction time.  This probably isn't worth
831     // doing as long as there is some possibility of removing this code in favor
832     // of some new code based on Doug's service framework.
833     if (U_FAILURE(status)) {
834         return NULL;
835     }
836     int32_t n = reg.availableIDs.size();
837     if (index > n) {
838         status = U_ENUM_OUT_OF_SYNC_ERROR;
839     }
840     // index == n is okay -- this means we've reached the end
841     if (index < n) {
842         // Copy the string! This avoids lifetime problems.
843         unistr = *(const UnicodeString*)reg.availableIDs[index++];
844         return &unistr;
845     } else {
846         return NULL;
847     }
848 }
849 
reset(UErrorCode &)850 void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) {
851     index = 0;
852 }
853 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)854 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
855 
856 //----------------------------------------------------------------------
857 // class TransliteratorRegistry: internal
858 //----------------------------------------------------------------------
859 
860 /**
861  * Convenience method.  Calls 6-arg registerEntry().
862  */
863 void TransliteratorRegistry::registerEntry(const UnicodeString& source,
864                                            const UnicodeString& target,
865                                            const UnicodeString& variant,
866                                            TransliteratorEntry* adopted,
867                                            UBool visible) {
868     UnicodeString ID;
869     UnicodeString s(source);
870     if (s.length() == 0) {
871         s = ANY;
872     }
873     TransliteratorIDParser::STVtoID(source, target, variant, ID);
874     registerEntry(ID, s, target, variant, adopted, visible);
875 }
876 
877 /**
878  * Convenience method.  Calls 6-arg registerEntry().
879  */
registerEntry(const UnicodeString & ID,TransliteratorEntry * adopted,UBool visible)880 void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
881                                            TransliteratorEntry* adopted,
882                                            UBool visible) {
883     UnicodeString source, target, variant;
884     UBool sawSource;
885     TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
886     // Only need to do this if ID.indexOf('-') < 0
887     UnicodeString id;
888     TransliteratorIDParser::STVtoID(source, target, variant, id);
889     registerEntry(id, source, target, variant, adopted, visible);
890 }
891 
892 /**
893  * Register an entry object (adopted) with the given ID, source,
894  * target, and variant strings.
895  */
registerEntry(const UnicodeString & ID,const UnicodeString & source,const UnicodeString & target,const UnicodeString & variant,TransliteratorEntry * adopted,UBool visible)896 void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
897                                            const UnicodeString& source,
898                                            const UnicodeString& target,
899                                            const UnicodeString& variant,
900                                            TransliteratorEntry* adopted,
901                                            UBool visible) {
902     UErrorCode status = U_ZERO_ERROR;
903     registry.put(ID, adopted, status);
904     if (visible) {
905         registerSTV(source, target, variant);
906         if (!availableIDs.contains((void*) &ID)) {
907             UnicodeString *newID = (UnicodeString *)ID.clone();
908             // Check to make sure newID was created.
909             if (newID != NULL) {
910 	            // NUL-terminate the ID string
911 	            newID->getTerminatedBuffer();
912 	            availableIDs.addElement(newID, status);
913             }
914         }
915     } else {
916         removeSTV(source, target, variant);
917         availableIDs.removeElement((void*) &ID);
918     }
919 }
920 
921 /**
922  * Register a source-target/variant in the specDAG.  Variant may be
923  * empty, but source and target must not be.  If variant is empty then
924  * the special variant NO_VARIANT is stored in slot zero of the
925  * UVector of variants.
926  */
registerSTV(const UnicodeString & source,const UnicodeString & target,const UnicodeString & variant)927 void TransliteratorRegistry::registerSTV(const UnicodeString& source,
928                                          const UnicodeString& target,
929                                          const UnicodeString& variant) {
930     // assert(source.length() > 0);
931     // assert(target.length() > 0);
932     UErrorCode status = U_ZERO_ERROR;
933     Hashtable *targets = (Hashtable*) specDAG.get(source);
934     if (targets == 0) {
935         targets = new Hashtable(TRUE, status);
936         if (U_FAILURE(status) || targets == 0) {
937             return;
938         }
939         targets->setValueDeleter(uhash_deleteUObject);
940         specDAG.put(source, targets, status);
941     }
942     UVector *variants = (UVector*) targets->get(target);
943     if (variants == 0) {
944         variants = new UVector(uhash_deleteUnicodeString,
945                                uhash_compareCaselessUnicodeString, status);
946         if (variants == 0) {
947             return;
948         }
949         targets->put(target, variants, status);
950     }
951     // assert(NO_VARIANT == "");
952     // We add the variant string.  If it is the special "no variant"
953     // string, that is, the empty string, we add it at position zero.
954     if (!variants->contains((void*) &variant)) {
955     	UnicodeString *tempus; // Used for null pointer check.
956         if (variant.length() > 0) {
957         	tempus = new UnicodeString(variant);
958         	if (tempus != NULL) {
959         		variants->addElement(tempus, status);
960         	}
961         } else {
962         	tempus = new UnicodeString(NO_VARIANT) ;
963         	if (tempus != NULL) {
964         		variants->insertElementAt(tempus, 0, status);
965         	}
966         }
967     }
968 }
969 
970 /**
971  * Remove a source-target/variant from the specDAG.
972  */
removeSTV(const UnicodeString & source,const UnicodeString & target,const UnicodeString & variant)973 void TransliteratorRegistry::removeSTV(const UnicodeString& source,
974                                        const UnicodeString& target,
975                                        const UnicodeString& variant) {
976     // assert(source.length() > 0);
977     // assert(target.length() > 0);
978 //    UErrorCode status = U_ZERO_ERROR;
979     Hashtable *targets = (Hashtable*) specDAG.get(source);
980     if (targets == 0) {
981         return; // should never happen for valid s-t/v
982     }
983     UVector *variants = (UVector*) targets->get(target);
984     if (variants == 0) {
985         return; // should never happen for valid s-t/v
986     }
987     variants->removeElement((void*) &variant);
988     if (variants->size() == 0) {
989         targets->remove(target); // should delete variants
990         if (targets->count() == 0) {
991             specDAG.remove(source); // should delete targets
992         }
993     }
994 }
995 
996 /**
997  * Attempt to find a source-target/variant in the dynamic registry
998  * store.  Return 0 on failure.
999  *
1000  * Caller does NOT own returned object.
1001  */
findInDynamicStore(const TransliteratorSpec & src,const TransliteratorSpec & trg,const UnicodeString & variant) const1002 TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
1003                                                   const TransliteratorSpec& trg,
1004                                                   const UnicodeString& variant) const {
1005     UnicodeString ID;
1006     TransliteratorIDParser::STVtoID(src, trg, variant, ID);
1007     TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID);
1008     DEBUG_useEntry(e);
1009     return e;
1010 }
1011 
1012 /**
1013  * Attempt to find a source-target/variant in the static locale
1014  * resource store.  Do not perform fallback.  Return 0 on failure.
1015  *
1016  * On success, create a new entry object, register it in the dynamic
1017  * store, and return a pointer to it, but do not make it public --
1018  * just because someone requested something, we do not expand the
1019  * available ID list (or spec DAG).
1020  *
1021  * Caller does NOT own returned object.
1022  */
findInStaticStore(const TransliteratorSpec & src,const TransliteratorSpec & trg,const UnicodeString & variant)1023 TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
1024                                                  const TransliteratorSpec& trg,
1025                                                  const UnicodeString& variant) {
1026     TransliteratorEntry* entry = 0;
1027     if (src.isLocale()) {
1028         entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
1029     } else if (trg.isLocale()) {
1030         entry = findInBundle(trg, src, variant, UTRANS_REVERSE);
1031     }
1032 
1033     // If we found an entry, store it in the Hashtable for next
1034     // time.
1035     if (entry != 0) {
1036         registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE);
1037     }
1038 
1039     return entry;
1040 }
1041 
1042 // As of 2.0, resource bundle keys cannot contain '_'
1043 static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo"
1044 
1045 static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom"
1046 
1047 static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate"
1048 
1049 /**
1050  * Attempt to find an entry in a single resource bundle.  This is
1051  * a one-sided lookup.  findInStaticStore() performs up to two such
1052  * lookups, one for the source, and one for the target.
1053  *
1054  * Do not perform fallback.  Return 0 on failure.
1055  *
1056  * On success, create a new Entry object, populate it, and return it.
1057  * The caller owns the returned object.
1058  */
findInBundle(const TransliteratorSpec & specToOpen,const TransliteratorSpec & specToFind,const UnicodeString & variant,UTransDirection direction)1059 TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
1060                                             const TransliteratorSpec& specToFind,
1061                                             const UnicodeString& variant,
1062                                             UTransDirection direction)
1063 {
1064     UnicodeString utag;
1065     UnicodeString resStr;
1066     int32_t pass;
1067 
1068     for (pass=0; pass<2; ++pass) {
1069         utag.truncate(0);
1070         // First try either TransliteratorTo_xxx or
1071         // TransliterateFrom_xxx, then try the bidirectional
1072         // Transliterate_xxx.  This precedence order is arbitrary
1073         // but must be consistent and documented.
1074         if (pass == 0) {
1075             utag.append(direction == UTRANS_FORWARD ?
1076                         TRANSLITERATE_TO : TRANSLITERATE_FROM);
1077         } else {
1078             utag.append(TRANSLITERATE);
1079         }
1080         UnicodeString s(specToFind.get());
1081         utag.append(s.toUpper(""));
1082         UErrorCode status = U_ZERO_ERROR;
1083         ResourceBundle subres(specToOpen.getBundle().get(
1084             CharString().appendInvariantChars(utag, status).data(), status));
1085         if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
1086             continue;
1087         }
1088 
1089         s.truncate(0);
1090         if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
1091             continue;
1092         }
1093 
1094         if (variant.length() != 0) {
1095             status = U_ZERO_ERROR;
1096             resStr = subres.getStringEx(
1097                 CharString().appendInvariantChars(variant, status).data(), status);
1098             if (U_SUCCESS(status)) {
1099                 // Exit loop successfully
1100                 break;
1101             }
1102         } else {
1103             // Variant is empty, which means match the first variant listed.
1104             status = U_ZERO_ERROR;
1105             resStr = subres.getStringEx(1, status);
1106             if (U_SUCCESS(status)) {
1107                 // Exit loop successfully
1108                 break;
1109             }
1110         }
1111     }
1112 
1113     if (pass==2) {
1114         // Failed
1115         return NULL;
1116     }
1117 
1118     // We have succeeded in loading a string from the locale
1119     // resources.  Create a new registry entry to hold it and return it.
1120     TransliteratorEntry *entry = new TransliteratorEntry();
1121     if (entry != 0) {
1122         // The direction is always forward for the
1123         // TransliterateTo_xxx and TransliterateFrom_xxx
1124         // items; those are unidirectional forward rules.
1125         // For the bidirectional Transliterate_xxx items,
1126         // the direction is the value passed in to this
1127         // function.
1128         int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
1129         entry->entryType = TransliteratorEntry::LOCALE_RULES;
1130         entry->stringArg = resStr;
1131         entry->intArg = dir;
1132     }
1133 
1134     return entry;
1135 }
1136 
1137 /**
1138  * Convenience method.  Calls 3-arg find().
1139  */
find(const UnicodeString & ID)1140 TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
1141     UnicodeString source, target, variant;
1142     UBool sawSource;
1143     TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
1144     return find(source, target, variant);
1145 }
1146 
1147 /**
1148  * Top-level find method.  Attempt to find a source-target/variant in
1149  * either the dynamic or the static (locale resource) store.  Perform
1150  * fallback.
1151  *
1152  * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
1153  *
1154  *   ss_SS_SSS-tt_TT_TTT/v -- in hashtable
1155  *   ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
1156  *
1157  *     repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
1158  *
1159  *     ss_SS_SSS-t/ *
1160  *     ss_SS-t/ *
1161  *     ss-t/ *
1162  *     sscript-t/ *
1163  *
1164  * Here * matches the first variant listed.
1165  *
1166  * Caller does NOT own returned object.  Return 0 on failure.
1167  */
find(UnicodeString & source,UnicodeString & target,UnicodeString & variant)1168 TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
1169                                     UnicodeString& target,
1170                                     UnicodeString& variant) {
1171 
1172     TransliteratorSpec src(source);
1173     TransliteratorSpec trg(target);
1174     TransliteratorEntry* entry;
1175 
1176     // Seek exact match in hashtable.  Temporary fix for ICU 4.6.
1177     // TODO: The general logic for finding a matching transliterator needs to be reviewed.
1178     // ICU ticket #8089
1179     UnicodeString ID;
1180     TransliteratorIDParser::STVtoID(source, target, variant, ID);
1181     entry = (TransliteratorEntry*) registry.get(ID);
1182     if (entry != 0) {
1183         // std::string ss;
1184         // std::cout << ID.toUTF8String(ss) << std::endl;
1185         return entry;
1186     }
1187 
1188     if (variant.length() != 0) {
1189 
1190         // Seek exact match in hashtable
1191         entry = findInDynamicStore(src, trg, variant);
1192         if (entry != 0) {
1193             return entry;
1194         }
1195 
1196         // Seek exact match in locale resources
1197         entry = findInStaticStore(src, trg, variant);
1198         if (entry != 0) {
1199             return entry;
1200         }
1201     }
1202 
1203     for (;;) {
1204         src.reset();
1205         for (;;) {
1206             // Seek match in hashtable
1207             entry = findInDynamicStore(src, trg, NO_VARIANT);
1208             if (entry != 0) {
1209                 return entry;
1210             }
1211 
1212             // Seek match in locale resources
1213             entry = findInStaticStore(src, trg, NO_VARIANT);
1214             if (entry != 0) {
1215                 return entry;
1216             }
1217             if (!src.hasFallback()) {
1218                 break;
1219             }
1220             src.next();
1221         }
1222         if (!trg.hasFallback()) {
1223             break;
1224         }
1225         trg.next();
1226     }
1227 
1228     return 0;
1229 }
1230 
1231 /**
1232  * Given an Entry object, instantiate it.  Caller owns result.  Return
1233  * 0 on failure.
1234  *
1235  * Return a non-empty aliasReturn value if the ID points to an alias.
1236  * We cannot instantiate it ourselves because the alias may contain
1237  * filters or compounds, which we do not understand.  Caller should
1238  * make aliasReturn empty before calling.
1239  *
1240  * The entry object is assumed to reside in the dynamic store.  It may be
1241  * modified.
1242  */
instantiateEntry(const UnicodeString & ID,TransliteratorEntry * entry,TransliteratorAlias * & aliasReturn,UErrorCode & status)1243 Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
1244                                                          TransliteratorEntry *entry,
1245                                                          TransliteratorAlias* &aliasReturn,
1246                                                          UErrorCode& status) {
1247     Transliterator *t = 0;
1248     U_ASSERT(aliasReturn == 0);
1249 
1250     switch (entry->entryType) {
1251     case TransliteratorEntry::RBT_DATA:
1252         t = new RuleBasedTransliterator(ID, entry->u.data);
1253         if (t == 0) {
1254             status = U_MEMORY_ALLOCATION_ERROR;
1255         }
1256         return t;
1257     case TransliteratorEntry::PROTOTYPE:
1258         t = entry->u.prototype->clone();
1259         if (t == 0) {
1260             status = U_MEMORY_ALLOCATION_ERROR;
1261         }
1262         return t;
1263     case TransliteratorEntry::ALIAS:
1264         aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
1265         if (aliasReturn == 0) {
1266             status = U_MEMORY_ALLOCATION_ERROR;
1267         }
1268         return 0;
1269     case TransliteratorEntry::FACTORY:
1270         t = entry->u.factory.function(ID, entry->u.factory.context);
1271         if (t == 0) {
1272             status = U_MEMORY_ALLOCATION_ERROR;
1273         }
1274         return t;
1275     case TransliteratorEntry::COMPOUND_RBT:
1276         {
1277             UVector* rbts = new UVector(entry->u.dataVector->size(), status);
1278             // Check for null pointer
1279             if (rbts == NULL) {
1280             	status = U_MEMORY_ALLOCATION_ERROR;
1281             	return NULL;
1282             }
1283             int32_t passNumber = 1;
1284             for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
1285                 Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++),
1286                     (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
1287                 if (t == 0)
1288                     status = U_MEMORY_ALLOCATION_ERROR;
1289                 else
1290                     rbts->addElement(t, status);
1291             }
1292             if (U_FAILURE(status)) {
1293                 delete rbts;
1294                 return 0;
1295             }
1296             aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
1297         }
1298         if (aliasReturn == 0) {
1299             status = U_MEMORY_ALLOCATION_ERROR;
1300         }
1301         return 0;
1302     case TransliteratorEntry::LOCALE_RULES:
1303         aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
1304                                               (UTransDirection) entry->intArg);
1305         if (aliasReturn == 0) {
1306             status = U_MEMORY_ALLOCATION_ERROR;
1307         }
1308         return 0;
1309     case TransliteratorEntry::RULES_FORWARD:
1310     case TransliteratorEntry::RULES_REVERSE:
1311         // Process the rule data into a TransliteratorRuleData object,
1312         // and possibly also into an ::id header and/or footer.  Then
1313         // we modify the registry with the parsed data and retry.
1314         {
1315             TransliteratorParser parser(status);
1316 
1317             // We use the file name, taken from another resource bundle
1318             // 2-d array at static init time, as a locale language.  We're
1319             // just using the locale mechanism to map through to a file
1320             // name; this in no way represents an actual locale.
1321             //CharString ch(entry->stringArg);
1322             //UResourceBundle *bundle = ures_openDirect(0, ch, &status);
1323             UnicodeString rules = entry->stringArg;
1324             //ures_close(bundle);
1325 
1326             //if (U_FAILURE(status)) {
1327                 // We have a failure of some kind.  Remove the ID from the
1328                 // registry so we don't keep trying.  NOTE: This will throw off
1329                 // anyone who is, at the moment, trying to iterate over the
1330                 // available IDs.  That's acceptable since we should never
1331                 // really get here except under installation, configuration,
1332                 // or unrecoverable run time memory failures.
1333             //    remove(ID);
1334             //} else {
1335 
1336                 // If the status indicates a failure, then we don't have any
1337                 // rules -- there is probably an installation error.  The list
1338                 // in the root locale should correspond to all the installed
1339                 // transliterators; if it lists something that's not
1340                 // installed, we'll get an error from ResourceBundle.
1341                 aliasReturn = new TransliteratorAlias(ID, rules,
1342                     ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
1343                      UTRANS_REVERSE : UTRANS_FORWARD));
1344                 if (aliasReturn == 0) {
1345                     status = U_MEMORY_ALLOCATION_ERROR;
1346                 }
1347             //}
1348         }
1349         return 0;
1350     default:
1351         U_ASSERT(FALSE); // can't get here
1352         return 0;
1353     }
1354 }
1355 U_NAMESPACE_END
1356 
1357 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1358 
1359 //eof
1360