• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (c) 2001-2006, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   08/10/2001  aliu        Creation.
8 **********************************************************************
9 */
10 #ifndef _TRANSREG_H
11 #define _TRANSREG_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "unicode/uobject.h"
18 #include "unicode/translit.h"
19 #include "hash.h"
20 #include "uvector.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 class Entry;
25 class Spec;
26 class UnicodeString;
27 
28 //------------------------------------------------------------------
29 // TransliteratorAlias
30 //------------------------------------------------------------------
31 
32 /**
33  * A TransliteratorAlias object is returned by get() if the given ID
34  * actually translates into something else.  The caller then invokes
35  * the create() method on the alias to create the actual
36  * transliterator, and deletes the alias.
37  *
38  * Why all the shenanigans?  To prevent circular calls between
39  * the registry code and the transliterator code that deadlocks.
40  */
41 class TransliteratorAlias : public UMemory {
42  public:
43     /**
44      * Construct a simple alias (type == SIMPLE)
45      * @param aliasID the given id.
46      */
47     TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
48 
49     /**
50      * Construct a compound RBT alias (type == COMPOUND)
51      */
52     TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
53                         UVector* adoptedTransliterators,
54                         const UnicodeSet* compoundFilter);
55 
56     /**
57      * Construct a rules alias (type = RULES)
58      */
59     TransliteratorAlias(const UnicodeString& theID,
60                         const UnicodeString& rules,
61                         UTransDirection dir);
62 
63     ~TransliteratorAlias();
64 
65     /**
66      * The whole point of create() is that the caller must invoke
67      * it when the registry mutex is NOT held, to prevent deadlock.
68      * It may only be called once.
69      *
70      * Note: Only call create() if isRuleBased() returns FALSE.
71      *
72      * This method must be called *outside* of the TransliteratorRegistry
73      * mutex.
74      */
75     Transliterator* create(UParseError&, UErrorCode&);
76 
77     /**
78      * Return TRUE if this alias is rule-based.  If so, the caller
79      * must call parse() on it, then call TransliteratorRegistry::reget().
80      */
81     UBool isRuleBased() const;
82 
83     /**
84      * If isRuleBased() returns TRUE, then the caller must call this
85      * method, followed by TransliteratorRegistry::reget().  The latter
86      * method must be called inside the TransliteratorRegistry mutex.
87      *
88      * Note: Only call parse() if isRuleBased() returns TRUE.
89      *
90      * This method must be called *outside* of the TransliteratorRegistry
91      * mutex, because it can instantiate Transliterators embedded in
92      * the rules via the "&Latin-Arabic()" syntax.
93      */
94     void parse(TransliteratorParser& parser,
95                UParseError& pe, UErrorCode& ec) const;
96 
97  private:
98     // We actually come in three flavors:
99     // 1. Simple alias
100     //    Here aliasID is the alias string.  Everything else is
101     //    null, zero, empty.
102     // 2. CompoundRBT
103     //    Here ID is the ID, aliasID is the idBlock, trans is the
104     //    contained RBT, and idSplitPoint is the offet in aliasID
105     //    where the contained RBT goes.  compoundFilter is the
106     //    compound filter, and it is _not_ owned.
107     // 3. Rules
108     //    Here ID is the ID, aliasID is the rules string.
109     //    idSplitPoint is the UTransDirection.
110     UnicodeString ID;
111     UnicodeString aliasesOrRules;
112     UVector* transes; // owned
113     const UnicodeSet* compoundFilter; // alias
114     UTransDirection direction;
115     enum { SIMPLE, COMPOUND, RULES } type;
116 
117     TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
118     TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
119 };
120 
121 
122 /**
123  * A registry of system transliterators.  This is the data structure
124  * that implements the mapping between transliterator IDs and the data
125  * or function pointers used to create the corresponding
126  * transliterators.  There is one instance of the registry that is
127  * created statically.
128  *
129  * The registry consists of a dynamic component -- a hashtable -- and
130  * a static component -- locale resource bundles.  The dynamic store
131  * is semantically overlaid on the static store, so the static mapping
132  * can be dynamically overridden.
133  *
134  * This is an internal class that is only used by Transliterator.
135  * Transliterator maintains one static instance of this class and
136  * delegates all registry-related operations to it.
137  *
138  * @author Alan Liu
139  */
140 class TransliteratorRegistry : public UMemory {
141 
142  public:
143 
144     /**
145      * Contructor
146      * @param status Output param set to success/failure code.
147      */
148     TransliteratorRegistry(UErrorCode& status);
149 
150     /**
151      * Nonvirtual destructor -- this class is not subclassable.
152      */
153     ~TransliteratorRegistry();
154 
155     //------------------------------------------------------------------
156     // Basic public API
157     //------------------------------------------------------------------
158 
159     /**
160      * Given a simple ID (forward direction, no inline filter, not
161      * compound) attempt to instantiate it from the registry.  Return
162      * 0 on failure.
163      *
164      * Return a non-NULL aliasReturn value if the ID points to an alias.
165      * We cannot instantiate it ourselves because the alias may contain
166      * filters or compounds, which we do not understand.  Caller should
167      * make aliasReturn NULL before calling.
168      * @param ID          the given ID
169      * @param aliasReturn output param to receive TransliteratorAlias;
170      *                    should be NULL on entry
171      * @param parseError  Struct to recieve information on position
172      *                    of error if an error is encountered
173      * @param status      Output param set to success/failure code.
174      */
175     Transliterator* get(const UnicodeString& ID,
176                         TransliteratorAlias*& aliasReturn,
177                         UErrorCode& status);
178 
179     /**
180      * The caller must call this after calling get(), if [a] calling get()
181      * returns an alias, and [b] the alias is rule based.  In that
182      * situation the caller must call alias->parse() to do the parsing
183      * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
184      * instantiating the transliterator.
185      *
186      * Note: Another alias might be returned by this method.
187      *
188      * This method (like all public methods of this class) must be called
189      * from within the TransliteratorRegistry mutex.
190      *
191      * @param aliasReturn output param to receive TransliteratorAlias;
192      *                    should be NULL on entry
193      */
194     Transliterator* reget(const UnicodeString& ID,
195                           TransliteratorParser& parser,
196                           TransliteratorAlias*& aliasReturn,
197                           UErrorCode& status);
198 
199     /**
200      * Register a prototype (adopted).  This adds an entry to the
201      * dynamic store, or replaces an existing entry.  Any entry in the
202      * underlying static locale resource store is masked.
203      */
204     void put(Transliterator* adoptedProto,
205              UBool visible);
206 
207     /**
208      * Register an ID and a factory function pointer.  This adds an
209      * entry to the dynamic store, or replaces an existing entry.  Any
210      * entry in the underlying static locale resource store is masked.
211      */
212     void put(const UnicodeString& ID,
213              Transliterator::Factory factory,
214              Transliterator::Token context,
215              UBool visible);
216 
217     /**
218      * Register an ID and a resource name.  This adds an entry to the
219      * dynamic store, or replaces an existing entry.  Any entry in the
220      * underlying static locale resource store is masked.
221      */
222     void put(const UnicodeString& ID,
223              const UnicodeString& resourceName,
224              UTransDirection dir,
225              UBool readonlyResourceAlias,
226              UBool visible);
227 
228     /**
229      * Register an ID and an alias ID.  This adds an entry to the
230      * dynamic store, or replaces an existing entry.  Any entry in the
231      * underlying static locale resource store is masked.
232      */
233     void put(const UnicodeString& ID,
234              const UnicodeString& alias,
235              UBool readonlyAliasAlias,
236              UBool visible);
237 
238     /**
239      * Unregister an ID.  This removes an entry from the dynamic store
240      * if there is one.  The static locale resource store is
241      * unaffected.
242      * @param ID    the given ID.
243      */
244     void remove(const UnicodeString& ID);
245 
246     //------------------------------------------------------------------
247     // Public ID and spec management
248     //------------------------------------------------------------------
249 
250     /**
251      * Return a StringEnumeration over the IDs currently registered
252      * with the system.
253      * @internal
254      */
255     StringEnumeration* getAvailableIDs() const;
256 
257     /**
258      * == OBSOLETE - remove in ICU 3.4 ==
259      * Return the number of IDs currently registered with the system.
260      * To retrieve the actual IDs, call getAvailableID(i) with
261      * i from 0 to countAvailableIDs() - 1.
262      * @return the number of IDs currently registered with the system.
263      * @internal
264      */
265     int32_t countAvailableIDs(void) const;
266 
267     /**
268      * == OBSOLETE - remove in ICU 3.4 ==
269      * Return the index-th available ID.  index must be between 0
270      * and countAvailableIDs() - 1, inclusive.  If index is out of
271      * range, the result of getAvailableID(0) is returned.
272      * @param index the given index.
273      * @return the index-th available ID.  index must be between 0
274      *         and countAvailableIDs() - 1, inclusive.  If index is out of
275      *         range, the result of getAvailableID(0) is returned.
276      * @internal
277      */
278     const UnicodeString& getAvailableID(int32_t index) const;
279 
280     /**
281      * Return the number of registered source specifiers.
282      * @return the number of registered source specifiers.
283      */
284     int32_t countAvailableSources(void) const;
285 
286     /**
287      * Return a registered source specifier.
288      * @param index which specifier to return, from 0 to n-1, where
289      * n = countAvailableSources()
290      * @param result fill-in paramter to receive the source specifier.
291      * If index is out of range, result will be empty.
292      * @return reference to result
293      */
294     UnicodeString& getAvailableSource(int32_t index,
295                                       UnicodeString& result) const;
296 
297     /**
298      * Return the number of registered target specifiers for a given
299      * source specifier.
300      * @param source the given source specifier.
301      * @return the number of registered target specifiers for a given
302      *         source specifier.
303      */
304     int32_t countAvailableTargets(const UnicodeString& source) const;
305 
306     /**
307      * Return a registered target specifier for a given source.
308      * @param index which specifier to return, from 0 to n-1, where
309      * n = countAvailableTargets(source)
310      * @param source the source specifier
311      * @param result fill-in paramter to receive the target specifier.
312      * If source is invalid or if index is out of range, result will
313      * be empty.
314      * @return reference to result
315      */
316     UnicodeString& getAvailableTarget(int32_t index,
317                                       const UnicodeString& source,
318                                       UnicodeString& result) const;
319 
320     /**
321      * Return the number of registered variant specifiers for a given
322      * source-target pair.  There is always at least one variant: If
323      * just source-target is registered, then the single variant
324      * NO_VARIANT is returned.  If source-target/variant is registered
325      * then that variant is returned.
326      * @param source the source specifiers
327      * @param target the target specifiers
328      * @return the number of registered variant specifiers for a given
329      *         source-target pair.
330      */
331     int32_t countAvailableVariants(const UnicodeString& source,
332                                    const UnicodeString& target) const;
333 
334     /**
335      * Return a registered variant specifier for a given source-target
336      * pair.  If NO_VARIANT is one of the variants, then it will be
337      * at index 0.
338      * @param index which specifier to return, from 0 to n-1, where
339      * n = countAvailableVariants(source, target)
340      * @param source the source specifier
341      * @param target the target specifier
342      * @param result fill-in paramter to receive the variant
343      * specifier.  If source is invalid or if target is invalid or if
344      * index is out of range, result will be empty.
345      * @return reference to result
346      */
347     UnicodeString& getAvailableVariant(int32_t index,
348                                        const UnicodeString& source,
349                                        const UnicodeString& target,
350                                        UnicodeString& result) const;
351 
352  private:
353 
354     //----------------------------------------------------------------
355     // Private implementation
356     //----------------------------------------------------------------
357 
358     Entry* find(const UnicodeString& ID);
359 
360     Entry* find(UnicodeString& source,
361                 UnicodeString& target,
362                 UnicodeString& variant);
363 
364     Entry* findInDynamicStore(const Spec& src,
365                               const Spec& trg,
366                               const UnicodeString& variant) const;
367 
368     Entry* findInStaticStore(const Spec& src,
369                              const Spec& trg,
370                              const UnicodeString& variant);
371 
372     static Entry* findInBundle(const Spec& specToOpen,
373                                const Spec& specToFind,
374                                const UnicodeString& variant,
375                                UTransDirection direction);
376 
377     void registerEntry(const UnicodeString& source,
378                        const UnicodeString& target,
379                        const UnicodeString& variant,
380                        Entry* adopted,
381                        UBool visible);
382 
383     void registerEntry(const UnicodeString& ID,
384                        Entry* adopted,
385                        UBool visible);
386 
387     void registerEntry(const UnicodeString& ID,
388                        const UnicodeString& source,
389                        const UnicodeString& target,
390                        const UnicodeString& variant,
391                        Entry* adopted,
392                        UBool visible);
393 
394     void registerSTV(const UnicodeString& source,
395                      const UnicodeString& target,
396                      const UnicodeString& variant);
397 
398     void removeSTV(const UnicodeString& source,
399                    const UnicodeString& target,
400                    const UnicodeString& variant);
401 
402     Transliterator* instantiateEntry(const UnicodeString& ID,
403                                      Entry *entry,
404                                      TransliteratorAlias*& aliasReturn,
405                                      UErrorCode& status);
406 
407     /**
408      * A StringEnumeration over the registered IDs in this object.
409      */
410     class Enumeration : public StringEnumeration {
411     public:
412         Enumeration(const TransliteratorRegistry& reg);
413         virtual ~Enumeration();
414         virtual int32_t count(UErrorCode& status) const;
415         virtual const UnicodeString* snext(UErrorCode& status);
416         virtual void reset(UErrorCode& status);
417         static UClassID U_EXPORT2 getStaticClassID();
418         virtual UClassID getDynamicClassID() const;
419     private:
420         int32_t index;
421         const TransliteratorRegistry& reg;
422     };
423     friend class Enumeration;
424 
425  private:
426 
427     /**
428      * Dynamic registry mapping full IDs to Entry objects.  This
429      * contains both public and internal entities.  The visibility is
430      * controlled by whether an entry is listed in availableIDs and
431      * specDAG or not.
432      */
433     Hashtable registry;
434 
435     /**
436      * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
437      * target => (UVector: variant)) The UVector of variants is never
438      * empty.  For a source-target with no variant, the special
439      * variant NO_VARIANT (the empty string) is stored in slot zero of
440      * the UVector.
441      */
442     Hashtable specDAG;
443 
444     /**
445      * Vector of public full IDs.
446      */
447     UVector availableIDs;
448 
449     TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
450     TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
451 };
452 
453 U_NAMESPACE_END
454 
455 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
456 
457 #endif
458 //eof
459