1 /* 2 ********************************************************************** 3 * Copyright (c) 2001-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 08/10/2001 aliu Creation. 8 ********************************************************************** 9 */ 10 #ifndef _TRANSREG_H 11 #define _TRANSREG_H 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_TRANSLITERATION 16 17 #include "unicode/uobject.h" 18 #include "unicode/translit.h" 19 #include "hash.h" 20 #include "uvector.h" 21 22 U_NAMESPACE_BEGIN 23 24 class TransliteratorEntry; 25 class TransliteratorSpec; 26 class UnicodeString; 27 28 //------------------------------------------------------------------ 29 // TransliteratorAlias 30 //------------------------------------------------------------------ 31 32 /** 33 * A TransliteratorAlias object is returned by get() if the given ID 34 * actually translates into something else. The caller then invokes 35 * the create() method on the alias to create the actual 36 * transliterator, and deletes the alias. 37 * 38 * Why all the shenanigans? To prevent circular calls between 39 * the registry code and the transliterator code that deadlocks. 40 */ 41 class TransliteratorAlias : public UMemory { 42 public: 43 /** 44 * Construct a simple alias (type == SIMPLE) 45 * @param aliasID the given id. 46 */ 47 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); 48 49 /** 50 * Construct a compound RBT alias (type == COMPOUND) 51 */ 52 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, 53 UVector* adoptedTransliterators, 54 const UnicodeSet* compoundFilter); 55 56 /** 57 * Construct a rules alias (type = RULES) 58 */ 59 TransliteratorAlias(const UnicodeString& theID, 60 const UnicodeString& rules, 61 UTransDirection dir); 62 63 ~TransliteratorAlias(); 64 65 /** 66 * The whole point of create() is that the caller must invoke 67 * it when the registry mutex is NOT held, to prevent deadlock. 68 * It may only be called once. 69 * 70 * Note: Only call create() if isRuleBased() returns FALSE. 71 * 72 * This method must be called *outside* of the TransliteratorRegistry 73 * mutex. 74 */ 75 Transliterator* create(UParseError&, UErrorCode&); 76 77 /** 78 * Return TRUE if this alias is rule-based. If so, the caller 79 * must call parse() on it, then call TransliteratorRegistry::reget(). 80 */ 81 UBool isRuleBased() const; 82 83 /** 84 * If isRuleBased() returns TRUE, then the caller must call this 85 * method, followed by TransliteratorRegistry::reget(). The latter 86 * method must be called inside the TransliteratorRegistry mutex. 87 * 88 * Note: Only call parse() if isRuleBased() returns TRUE. 89 * 90 * This method must be called *outside* of the TransliteratorRegistry 91 * mutex, because it can instantiate Transliterators embedded in 92 * the rules via the "&Latin-Arabic()" syntax. 93 */ 94 void parse(TransliteratorParser& parser, 95 UParseError& pe, UErrorCode& ec) const; 96 97 private: 98 // We actually come in three flavors: 99 // 1. Simple alias 100 // Here aliasID is the alias string. Everything else is 101 // null, zero, empty. 102 // 2. CompoundRBT 103 // Here ID is the ID, aliasID is the idBlock, trans is the 104 // contained RBT, and idSplitPoint is the offet in aliasID 105 // where the contained RBT goes. compoundFilter is the 106 // compound filter, and it is _not_ owned. 107 // 3. Rules 108 // Here ID is the ID, aliasID is the rules string. 109 // idSplitPoint is the UTransDirection. 110 UnicodeString ID; 111 UnicodeString aliasesOrRules; 112 UVector* transes; // owned 113 const UnicodeSet* compoundFilter; // alias 114 UTransDirection direction; 115 enum { SIMPLE, COMPOUND, RULES } type; 116 117 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class 118 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class 119 }; 120 121 122 /** 123 * A registry of system transliterators. This is the data structure 124 * that implements the mapping between transliterator IDs and the data 125 * or function pointers used to create the corresponding 126 * transliterators. There is one instance of the registry that is 127 * created statically. 128 * 129 * The registry consists of a dynamic component -- a hashtable -- and 130 * a static component -- locale resource bundles. The dynamic store 131 * is semantically overlaid on the static store, so the static mapping 132 * can be dynamically overridden. 133 * 134 * This is an internal class that is only used by Transliterator. 135 * Transliterator maintains one static instance of this class and 136 * delegates all registry-related operations to it. 137 * 138 * @author Alan Liu 139 */ 140 class TransliteratorRegistry : public UMemory { 141 142 public: 143 144 /** 145 * Contructor 146 * @param status Output param set to success/failure code. 147 */ 148 TransliteratorRegistry(UErrorCode& status); 149 150 /** 151 * Nonvirtual destructor -- this class is not subclassable. 152 */ 153 ~TransliteratorRegistry(); 154 155 //------------------------------------------------------------------ 156 // Basic public API 157 //------------------------------------------------------------------ 158 159 /** 160 * Given a simple ID (forward direction, no inline filter, not 161 * compound) attempt to instantiate it from the registry. Return 162 * 0 on failure. 163 * 164 * Return a non-NULL aliasReturn value if the ID points to an alias. 165 * We cannot instantiate it ourselves because the alias may contain 166 * filters or compounds, which we do not understand. Caller should 167 * make aliasReturn NULL before calling. 168 * @param ID the given ID 169 * @param aliasReturn output param to receive TransliteratorAlias; 170 * should be NULL on entry 171 * @param parseError Struct to recieve information on position 172 * of error if an error is encountered 173 * @param status Output param set to success/failure code. 174 */ 175 Transliterator* get(const UnicodeString& ID, 176 TransliteratorAlias*& aliasReturn, 177 UErrorCode& status); 178 179 /** 180 * The caller must call this after calling get(), if [a] calling get() 181 * returns an alias, and [b] the alias is rule based. In that 182 * situation the caller must call alias->parse() to do the parsing 183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry 184 * instantiating the transliterator. 185 * 186 * Note: Another alias might be returned by this method. 187 * 188 * This method (like all public methods of this class) must be called 189 * from within the TransliteratorRegistry mutex. 190 * 191 * @param aliasReturn output param to receive TransliteratorAlias; 192 * should be NULL on entry 193 */ 194 Transliterator* reget(const UnicodeString& ID, 195 TransliteratorParser& parser, 196 TransliteratorAlias*& aliasReturn, 197 UErrorCode& status); 198 199 /** 200 * Register a prototype (adopted). This adds an entry to the 201 * dynamic store, or replaces an existing entry. Any entry in the 202 * underlying static locale resource store is masked. 203 */ 204 void put(Transliterator* adoptedProto, 205 UBool visible, 206 UErrorCode& ec); 207 208 /** 209 * Register an ID and a factory function pointer. This adds an 210 * entry to the dynamic store, or replaces an existing entry. Any 211 * entry in the underlying static locale resource store is masked. 212 */ 213 void put(const UnicodeString& ID, 214 Transliterator::Factory factory, 215 Transliterator::Token context, 216 UBool visible, 217 UErrorCode& ec); 218 219 /** 220 * Register an ID and a resource name. This adds an entry to the 221 * dynamic store, or replaces an existing entry. Any entry in the 222 * underlying static locale resource store is masked. 223 */ 224 void put(const UnicodeString& ID, 225 const UnicodeString& resourceName, 226 UTransDirection dir, 227 UBool readonlyResourceAlias, 228 UBool visible, 229 UErrorCode& ec); 230 231 /** 232 * Register an ID and an alias ID. This adds an entry to the 233 * dynamic store, or replaces an existing entry. Any entry in the 234 * underlying static locale resource store is masked. 235 */ 236 void put(const UnicodeString& ID, 237 const UnicodeString& alias, 238 UBool readonlyAliasAlias, 239 UBool visible, 240 UErrorCode& ec); 241 242 /** 243 * Unregister an ID. This removes an entry from the dynamic store 244 * if there is one. The static locale resource store is 245 * unaffected. 246 * @param ID the given ID. 247 */ 248 void remove(const UnicodeString& ID); 249 250 //------------------------------------------------------------------ 251 // Public ID and spec management 252 //------------------------------------------------------------------ 253 254 /** 255 * Return a StringEnumeration over the IDs currently registered 256 * with the system. 257 * @internal 258 */ 259 StringEnumeration* getAvailableIDs() const; 260 261 /** 262 * == OBSOLETE - remove in ICU 3.4 == 263 * Return the number of IDs currently registered with the system. 264 * To retrieve the actual IDs, call getAvailableID(i) with 265 * i from 0 to countAvailableIDs() - 1. 266 * @return the number of IDs currently registered with the system. 267 * @internal 268 */ 269 int32_t countAvailableIDs(void) const; 270 271 /** 272 * == OBSOLETE - remove in ICU 3.4 == 273 * Return the index-th available ID. index must be between 0 274 * and countAvailableIDs() - 1, inclusive. If index is out of 275 * range, the result of getAvailableID(0) is returned. 276 * @param index the given index. 277 * @return the index-th available ID. index must be between 0 278 * and countAvailableIDs() - 1, inclusive. If index is out of 279 * range, the result of getAvailableID(0) is returned. 280 * @internal 281 */ 282 const UnicodeString& getAvailableID(int32_t index) const; 283 284 /** 285 * Return the number of registered source specifiers. 286 * @return the number of registered source specifiers. 287 */ 288 int32_t countAvailableSources(void) const; 289 290 /** 291 * Return a registered source specifier. 292 * @param index which specifier to return, from 0 to n-1, where 293 * n = countAvailableSources() 294 * @param result fill-in paramter to receive the source specifier. 295 * If index is out of range, result will be empty. 296 * @return reference to result 297 */ 298 UnicodeString& getAvailableSource(int32_t index, 299 UnicodeString& result) const; 300 301 /** 302 * Return the number of registered target specifiers for a given 303 * source specifier. 304 * @param source the given source specifier. 305 * @return the number of registered target specifiers for a given 306 * source specifier. 307 */ 308 int32_t countAvailableTargets(const UnicodeString& source) const; 309 310 /** 311 * Return a registered target specifier for a given source. 312 * @param index which specifier to return, from 0 to n-1, where 313 * n = countAvailableTargets(source) 314 * @param source the source specifier 315 * @param result fill-in paramter to receive the target specifier. 316 * If source is invalid or if index is out of range, result will 317 * be empty. 318 * @return reference to result 319 */ 320 UnicodeString& getAvailableTarget(int32_t index, 321 const UnicodeString& source, 322 UnicodeString& result) const; 323 324 /** 325 * Return the number of registered variant specifiers for a given 326 * source-target pair. There is always at least one variant: If 327 * just source-target is registered, then the single variant 328 * NO_VARIANT is returned. If source-target/variant is registered 329 * then that variant is returned. 330 * @param source the source specifiers 331 * @param target the target specifiers 332 * @return the number of registered variant specifiers for a given 333 * source-target pair. 334 */ 335 int32_t countAvailableVariants(const UnicodeString& source, 336 const UnicodeString& target) const; 337 338 /** 339 * Return a registered variant specifier for a given source-target 340 * pair. If NO_VARIANT is one of the variants, then it will be 341 * at index 0. 342 * @param index which specifier to return, from 0 to n-1, where 343 * n = countAvailableVariants(source, target) 344 * @param source the source specifier 345 * @param target the target specifier 346 * @param result fill-in paramter to receive the variant 347 * specifier. If source is invalid or if target is invalid or if 348 * index is out of range, result will be empty. 349 * @return reference to result 350 */ 351 UnicodeString& getAvailableVariant(int32_t index, 352 const UnicodeString& source, 353 const UnicodeString& target, 354 UnicodeString& result) const; 355 356 private: 357 358 //---------------------------------------------------------------- 359 // Private implementation 360 //---------------------------------------------------------------- 361 362 TransliteratorEntry* find(const UnicodeString& ID); 363 364 TransliteratorEntry* find(UnicodeString& source, 365 UnicodeString& target, 366 UnicodeString& variant); 367 368 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, 369 const TransliteratorSpec& trg, 370 const UnicodeString& variant) const; 371 372 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, 373 const TransliteratorSpec& trg, 374 const UnicodeString& variant); 375 376 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, 377 const TransliteratorSpec& specToFind, 378 const UnicodeString& variant, 379 UTransDirection direction); 380 381 void registerEntry(const UnicodeString& source, 382 const UnicodeString& target, 383 const UnicodeString& variant, 384 TransliteratorEntry* adopted, 385 UBool visible); 386 387 void registerEntry(const UnicodeString& ID, 388 TransliteratorEntry* adopted, 389 UBool visible); 390 391 void registerEntry(const UnicodeString& ID, 392 const UnicodeString& source, 393 const UnicodeString& target, 394 const UnicodeString& variant, 395 TransliteratorEntry* adopted, 396 UBool visible); 397 398 void registerSTV(const UnicodeString& source, 399 const UnicodeString& target, 400 const UnicodeString& variant); 401 402 void removeSTV(const UnicodeString& source, 403 const UnicodeString& target, 404 const UnicodeString& variant); 405 406 Transliterator* instantiateEntry(const UnicodeString& ID, 407 TransliteratorEntry *entry, 408 TransliteratorAlias*& aliasReturn, 409 UErrorCode& status); 410 411 /** 412 * A StringEnumeration over the registered IDs in this object. 413 */ 414 class Enumeration : public StringEnumeration { 415 public: 416 Enumeration(const TransliteratorRegistry& reg); 417 virtual ~Enumeration(); 418 virtual int32_t count(UErrorCode& status) const; 419 virtual const UnicodeString* snext(UErrorCode& status); 420 virtual void reset(UErrorCode& status); 421 static UClassID U_EXPORT2 getStaticClassID(); 422 virtual UClassID getDynamicClassID() const; 423 private: 424 int32_t index; 425 const TransliteratorRegistry& reg; 426 }; 427 friend class Enumeration; 428 429 private: 430 431 /** 432 * Dynamic registry mapping full IDs to Entry objects. This 433 * contains both public and internal entities. The visibility is 434 * controlled by whether an entry is listed in availableIDs and 435 * specDAG or not. 436 */ 437 Hashtable registry; 438 439 /** 440 * DAG of visible IDs by spec. Hashtable: source => (Hashtable: 441 * target => (UVector: variant)) The UVector of variants is never 442 * empty. For a source-target with no variant, the special 443 * variant NO_VARIANT (the empty string) is stored in slot zero of 444 * the UVector. 445 */ 446 Hashtable specDAG; 447 448 /** 449 * Vector of public full IDs. 450 */ 451 UVector availableIDs; 452 453 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class 454 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class 455 }; 456 457 U_NAMESPACE_END 458 459 U_CFUNC UBool utrans_transliterator_cleanup(void); 460 461 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 462 463 #endif 464 //eof 465