1 /* 2 ********************************************************************** 3 * Copyright (c) 2001-2006, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 08/10/2001 aliu Creation. 8 ********************************************************************** 9 */ 10 #ifndef _TRANSREG_H 11 #define _TRANSREG_H 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_TRANSLITERATION 16 17 #include "unicode/uobject.h" 18 #include "unicode/translit.h" 19 #include "hash.h" 20 #include "uvector.h" 21 22 U_NAMESPACE_BEGIN 23 24 class Entry; 25 class Spec; 26 class UnicodeString; 27 28 //------------------------------------------------------------------ 29 // TransliteratorAlias 30 //------------------------------------------------------------------ 31 32 /** 33 * A TransliteratorAlias object is returned by get() if the given ID 34 * actually translates into something else. The caller then invokes 35 * the create() method on the alias to create the actual 36 * transliterator, and deletes the alias. 37 * 38 * Why all the shenanigans? To prevent circular calls between 39 * the registry code and the transliterator code that deadlocks. 40 */ 41 class TransliteratorAlias : public UMemory { 42 public: 43 /** 44 * Construct a simple alias (type == SIMPLE) 45 * @param aliasID the given id. 46 */ 47 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); 48 49 /** 50 * Construct a compound RBT alias (type == COMPOUND) 51 */ 52 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, 53 UVector* adoptedTransliterators, 54 const UnicodeSet* compoundFilter); 55 56 /** 57 * Construct a rules alias (type = RULES) 58 */ 59 TransliteratorAlias(const UnicodeString& theID, 60 const UnicodeString& rules, 61 UTransDirection dir); 62 63 ~TransliteratorAlias(); 64 65 /** 66 * The whole point of create() is that the caller must invoke 67 * it when the registry mutex is NOT held, to prevent deadlock. 68 * It may only be called once. 69 * 70 * Note: Only call create() if isRuleBased() returns FALSE. 71 * 72 * This method must be called *outside* of the TransliteratorRegistry 73 * mutex. 74 */ 75 Transliterator* create(UParseError&, UErrorCode&); 76 77 /** 78 * Return TRUE if this alias is rule-based. If so, the caller 79 * must call parse() on it, then call TransliteratorRegistry::reget(). 80 */ 81 UBool isRuleBased() const; 82 83 /** 84 * If isRuleBased() returns TRUE, then the caller must call this 85 * method, followed by TransliteratorRegistry::reget(). The latter 86 * method must be called inside the TransliteratorRegistry mutex. 87 * 88 * Note: Only call parse() if isRuleBased() returns TRUE. 89 * 90 * This method must be called *outside* of the TransliteratorRegistry 91 * mutex, because it can instantiate Transliterators embedded in 92 * the rules via the "&Latin-Arabic()" syntax. 93 */ 94 void parse(TransliteratorParser& parser, 95 UParseError& pe, UErrorCode& ec) const; 96 97 private: 98 // We actually come in three flavors: 99 // 1. Simple alias 100 // Here aliasID is the alias string. Everything else is 101 // null, zero, empty. 102 // 2. CompoundRBT 103 // Here ID is the ID, aliasID is the idBlock, trans is the 104 // contained RBT, and idSplitPoint is the offet in aliasID 105 // where the contained RBT goes. compoundFilter is the 106 // compound filter, and it is _not_ owned. 107 // 3. Rules 108 // Here ID is the ID, aliasID is the rules string. 109 // idSplitPoint is the UTransDirection. 110 UnicodeString ID; 111 UnicodeString aliasesOrRules; 112 UVector* transes; // owned 113 const UnicodeSet* compoundFilter; // alias 114 UTransDirection direction; 115 enum { SIMPLE, COMPOUND, RULES } type; 116 117 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class 118 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class 119 }; 120 121 122 /** 123 * A registry of system transliterators. This is the data structure 124 * that implements the mapping between transliterator IDs and the data 125 * or function pointers used to create the corresponding 126 * transliterators. There is one instance of the registry that is 127 * created statically. 128 * 129 * The registry consists of a dynamic component -- a hashtable -- and 130 * a static component -- locale resource bundles. The dynamic store 131 * is semantically overlaid on the static store, so the static mapping 132 * can be dynamically overridden. 133 * 134 * This is an internal class that is only used by Transliterator. 135 * Transliterator maintains one static instance of this class and 136 * delegates all registry-related operations to it. 137 * 138 * @author Alan Liu 139 */ 140 class TransliteratorRegistry : public UMemory { 141 142 public: 143 144 /** 145 * Contructor 146 * @param status Output param set to success/failure code. 147 */ 148 TransliteratorRegistry(UErrorCode& status); 149 150 /** 151 * Nonvirtual destructor -- this class is not subclassable. 152 */ 153 ~TransliteratorRegistry(); 154 155 //------------------------------------------------------------------ 156 // Basic public API 157 //------------------------------------------------------------------ 158 159 /** 160 * Given a simple ID (forward direction, no inline filter, not 161 * compound) attempt to instantiate it from the registry. Return 162 * 0 on failure. 163 * 164 * Return a non-NULL aliasReturn value if the ID points to an alias. 165 * We cannot instantiate it ourselves because the alias may contain 166 * filters or compounds, which we do not understand. Caller should 167 * make aliasReturn NULL before calling. 168 * @param ID the given ID 169 * @param aliasReturn output param to receive TransliteratorAlias; 170 * should be NULL on entry 171 * @param parseError Struct to recieve information on position 172 * of error if an error is encountered 173 * @param status Output param set to success/failure code. 174 */ 175 Transliterator* get(const UnicodeString& ID, 176 TransliteratorAlias*& aliasReturn, 177 UErrorCode& status); 178 179 /** 180 * The caller must call this after calling get(), if [a] calling get() 181 * returns an alias, and [b] the alias is rule based. In that 182 * situation the caller must call alias->parse() to do the parsing 183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry 184 * instantiating the transliterator. 185 * 186 * Note: Another alias might be returned by this method. 187 * 188 * This method (like all public methods of this class) must be called 189 * from within the TransliteratorRegistry mutex. 190 * 191 * @param aliasReturn output param to receive TransliteratorAlias; 192 * should be NULL on entry 193 */ 194 Transliterator* reget(const UnicodeString& ID, 195 TransliteratorParser& parser, 196 TransliteratorAlias*& aliasReturn, 197 UErrorCode& status); 198 199 /** 200 * Register a prototype (adopted). This adds an entry to the 201 * dynamic store, or replaces an existing entry. Any entry in the 202 * underlying static locale resource store is masked. 203 */ 204 void put(Transliterator* adoptedProto, 205 UBool visible); 206 207 /** 208 * Register an ID and a factory function pointer. This adds an 209 * entry to the dynamic store, or replaces an existing entry. Any 210 * entry in the underlying static locale resource store is masked. 211 */ 212 void put(const UnicodeString& ID, 213 Transliterator::Factory factory, 214 Transliterator::Token context, 215 UBool visible); 216 217 /** 218 * Register an ID and a resource name. This adds an entry to the 219 * dynamic store, or replaces an existing entry. Any entry in the 220 * underlying static locale resource store is masked. 221 */ 222 void put(const UnicodeString& ID, 223 const UnicodeString& resourceName, 224 UTransDirection dir, 225 UBool readonlyResourceAlias, 226 UBool visible); 227 228 /** 229 * Register an ID and an alias ID. This adds an entry to the 230 * dynamic store, or replaces an existing entry. Any entry in the 231 * underlying static locale resource store is masked. 232 */ 233 void put(const UnicodeString& ID, 234 const UnicodeString& alias, 235 UBool readonlyAliasAlias, 236 UBool visible); 237 238 /** 239 * Unregister an ID. This removes an entry from the dynamic store 240 * if there is one. The static locale resource store is 241 * unaffected. 242 * @param ID the given ID. 243 */ 244 void remove(const UnicodeString& ID); 245 246 //------------------------------------------------------------------ 247 // Public ID and spec management 248 //------------------------------------------------------------------ 249 250 /** 251 * Return a StringEnumeration over the IDs currently registered 252 * with the system. 253 * @internal 254 */ 255 StringEnumeration* getAvailableIDs() const; 256 257 /** 258 * == OBSOLETE - remove in ICU 3.4 == 259 * Return the number of IDs currently registered with the system. 260 * To retrieve the actual IDs, call getAvailableID(i) with 261 * i from 0 to countAvailableIDs() - 1. 262 * @return the number of IDs currently registered with the system. 263 * @internal 264 */ 265 int32_t countAvailableIDs(void) const; 266 267 /** 268 * == OBSOLETE - remove in ICU 3.4 == 269 * Return the index-th available ID. index must be between 0 270 * and countAvailableIDs() - 1, inclusive. If index is out of 271 * range, the result of getAvailableID(0) is returned. 272 * @param index the given index. 273 * @return the index-th available ID. index must be between 0 274 * and countAvailableIDs() - 1, inclusive. If index is out of 275 * range, the result of getAvailableID(0) is returned. 276 * @internal 277 */ 278 const UnicodeString& getAvailableID(int32_t index) const; 279 280 /** 281 * Return the number of registered source specifiers. 282 * @return the number of registered source specifiers. 283 */ 284 int32_t countAvailableSources(void) const; 285 286 /** 287 * Return a registered source specifier. 288 * @param index which specifier to return, from 0 to n-1, where 289 * n = countAvailableSources() 290 * @param result fill-in paramter to receive the source specifier. 291 * If index is out of range, result will be empty. 292 * @return reference to result 293 */ 294 UnicodeString& getAvailableSource(int32_t index, 295 UnicodeString& result) const; 296 297 /** 298 * Return the number of registered target specifiers for a given 299 * source specifier. 300 * @param source the given source specifier. 301 * @return the number of registered target specifiers for a given 302 * source specifier. 303 */ 304 int32_t countAvailableTargets(const UnicodeString& source) const; 305 306 /** 307 * Return a registered target specifier for a given source. 308 * @param index which specifier to return, from 0 to n-1, where 309 * n = countAvailableTargets(source) 310 * @param source the source specifier 311 * @param result fill-in paramter to receive the target specifier. 312 * If source is invalid or if index is out of range, result will 313 * be empty. 314 * @return reference to result 315 */ 316 UnicodeString& getAvailableTarget(int32_t index, 317 const UnicodeString& source, 318 UnicodeString& result) const; 319 320 /** 321 * Return the number of registered variant specifiers for a given 322 * source-target pair. There is always at least one variant: If 323 * just source-target is registered, then the single variant 324 * NO_VARIANT is returned. If source-target/variant is registered 325 * then that variant is returned. 326 * @param source the source specifiers 327 * @param target the target specifiers 328 * @return the number of registered variant specifiers for a given 329 * source-target pair. 330 */ 331 int32_t countAvailableVariants(const UnicodeString& source, 332 const UnicodeString& target) const; 333 334 /** 335 * Return a registered variant specifier for a given source-target 336 * pair. If NO_VARIANT is one of the variants, then it will be 337 * at index 0. 338 * @param index which specifier to return, from 0 to n-1, where 339 * n = countAvailableVariants(source, target) 340 * @param source the source specifier 341 * @param target the target specifier 342 * @param result fill-in paramter to receive the variant 343 * specifier. If source is invalid or if target is invalid or if 344 * index is out of range, result will be empty. 345 * @return reference to result 346 */ 347 UnicodeString& getAvailableVariant(int32_t index, 348 const UnicodeString& source, 349 const UnicodeString& target, 350 UnicodeString& result) const; 351 352 private: 353 354 //---------------------------------------------------------------- 355 // Private implementation 356 //---------------------------------------------------------------- 357 358 Entry* find(const UnicodeString& ID); 359 360 Entry* find(UnicodeString& source, 361 UnicodeString& target, 362 UnicodeString& variant); 363 364 Entry* findInDynamicStore(const Spec& src, 365 const Spec& trg, 366 const UnicodeString& variant) const; 367 368 Entry* findInStaticStore(const Spec& src, 369 const Spec& trg, 370 const UnicodeString& variant); 371 372 static Entry* findInBundle(const Spec& specToOpen, 373 const Spec& specToFind, 374 const UnicodeString& variant, 375 UTransDirection direction); 376 377 void registerEntry(const UnicodeString& source, 378 const UnicodeString& target, 379 const UnicodeString& variant, 380 Entry* adopted, 381 UBool visible); 382 383 void registerEntry(const UnicodeString& ID, 384 Entry* adopted, 385 UBool visible); 386 387 void registerEntry(const UnicodeString& ID, 388 const UnicodeString& source, 389 const UnicodeString& target, 390 const UnicodeString& variant, 391 Entry* adopted, 392 UBool visible); 393 394 void registerSTV(const UnicodeString& source, 395 const UnicodeString& target, 396 const UnicodeString& variant); 397 398 void removeSTV(const UnicodeString& source, 399 const UnicodeString& target, 400 const UnicodeString& variant); 401 402 Transliterator* instantiateEntry(const UnicodeString& ID, 403 Entry *entry, 404 TransliteratorAlias*& aliasReturn, 405 UErrorCode& status); 406 407 /** 408 * A StringEnumeration over the registered IDs in this object. 409 */ 410 class Enumeration : public StringEnumeration { 411 public: 412 Enumeration(const TransliteratorRegistry& reg); 413 virtual ~Enumeration(); 414 virtual int32_t count(UErrorCode& status) const; 415 virtual const UnicodeString* snext(UErrorCode& status); 416 virtual void reset(UErrorCode& status); 417 static UClassID U_EXPORT2 getStaticClassID(); 418 virtual UClassID getDynamicClassID() const; 419 private: 420 int32_t index; 421 const TransliteratorRegistry& reg; 422 }; 423 friend class Enumeration; 424 425 private: 426 427 /** 428 * Dynamic registry mapping full IDs to Entry objects. This 429 * contains both public and internal entities. The visibility is 430 * controlled by whether an entry is listed in availableIDs and 431 * specDAG or not. 432 */ 433 Hashtable registry; 434 435 /** 436 * DAG of visible IDs by spec. Hashtable: source => (Hashtable: 437 * target => (UVector: variant)) The UVector of variants is never 438 * empty. For a source-target with no variant, the special 439 * variant NO_VARIANT (the empty string) is stored in slot zero of 440 * the UVector. 441 */ 442 Hashtable specDAG; 443 444 /** 445 * Vector of public full IDs. 446 */ 447 UVector availableIDs; 448 449 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class 450 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class 451 }; 452 453 U_NAMESPACE_END 454 455 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 456 457 #endif 458 //eof 459