1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2001-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 08/10/2001 aliu Creation. 10 ********************************************************************** 11 */ 12 #ifndef _TRANSREG_H 13 #define _TRANSREG_H 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_TRANSLITERATION 18 19 #include "unicode/uobject.h" 20 #include "unicode/translit.h" 21 #include "hash.h" 22 #include "uvector.h" 23 24 U_NAMESPACE_BEGIN 25 26 class TransliteratorEntry; 27 class TransliteratorSpec; 28 class UnicodeString; 29 30 //------------------------------------------------------------------ 31 // TransliteratorAlias 32 //------------------------------------------------------------------ 33 34 /** 35 * A TransliteratorAlias object is returned by get() if the given ID 36 * actually translates into something else. The caller then invokes 37 * the create() method on the alias to create the actual 38 * transliterator, and deletes the alias. 39 * 40 * Why all the shenanigans? To prevent circular calls between 41 * the registry code and the transliterator code that deadlocks. 42 */ 43 class TransliteratorAlias : public UMemory { 44 public: 45 /** 46 * Construct a simple alias (type == SIMPLE) 47 * @param aliasID the given id. 48 */ 49 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); 50 51 /** 52 * Construct a compound RBT alias (type == COMPOUND) 53 */ 54 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, 55 UVector* adoptedTransliterators, 56 const UnicodeSet* compoundFilter); 57 58 /** 59 * Construct a rules alias (type = RULES) 60 */ 61 TransliteratorAlias(const UnicodeString& theID, 62 const UnicodeString& rules, 63 UTransDirection dir); 64 65 ~TransliteratorAlias(); 66 67 /** 68 * The whole point of create() is that the caller must invoke 69 * it when the registry mutex is NOT held, to prevent deadlock. 70 * It may only be called once. 71 * 72 * Note: Only call create() if isRuleBased() returns FALSE. 73 * 74 * This method must be called *outside* of the TransliteratorRegistry 75 * mutex. 76 */ 77 Transliterator* create(UParseError&, UErrorCode&); 78 79 /** 80 * Return TRUE if this alias is rule-based. If so, the caller 81 * must call parse() on it, then call TransliteratorRegistry::reget(). 82 */ 83 UBool isRuleBased() const; 84 85 /** 86 * If isRuleBased() returns TRUE, then the caller must call this 87 * method, followed by TransliteratorRegistry::reget(). The latter 88 * method must be called inside the TransliteratorRegistry mutex. 89 * 90 * Note: Only call parse() if isRuleBased() returns TRUE. 91 * 92 * This method must be called *outside* of the TransliteratorRegistry 93 * mutex, because it can instantiate Transliterators embedded in 94 * the rules via the "&Latin-Arabic()" syntax. 95 */ 96 void parse(TransliteratorParser& parser, 97 UParseError& pe, UErrorCode& ec) const; 98 99 private: 100 // We actually come in three flavors: 101 // 1. Simple alias 102 // Here aliasID is the alias string. Everything else is 103 // null, zero, empty. 104 // 2. CompoundRBT 105 // Here ID is the ID, aliasID is the idBlock, trans is the 106 // contained RBT, and idSplitPoint is the offet in aliasID 107 // where the contained RBT goes. compoundFilter is the 108 // compound filter, and it is _not_ owned. 109 // 3. Rules 110 // Here ID is the ID, aliasID is the rules string. 111 // idSplitPoint is the UTransDirection. 112 UnicodeString ID; 113 UnicodeString aliasesOrRules; 114 UVector* transes; // owned 115 const UnicodeSet* compoundFilter; // alias 116 UTransDirection direction; 117 enum { SIMPLE, COMPOUND, RULES } type; 118 119 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class 120 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class 121 }; 122 123 124 /** 125 * A registry of system transliterators. This is the data structure 126 * that implements the mapping between transliterator IDs and the data 127 * or function pointers used to create the corresponding 128 * transliterators. There is one instance of the registry that is 129 * created statically. 130 * 131 * The registry consists of a dynamic component -- a hashtable -- and 132 * a static component -- locale resource bundles. The dynamic store 133 * is semantically overlaid on the static store, so the static mapping 134 * can be dynamically overridden. 135 * 136 * This is an internal class that is only used by Transliterator. 137 * Transliterator maintains one static instance of this class and 138 * delegates all registry-related operations to it. 139 * 140 * @author Alan Liu 141 */ 142 class TransliteratorRegistry : public UMemory { 143 144 public: 145 146 /** 147 * Contructor 148 * @param status Output param set to success/failure code. 149 */ 150 TransliteratorRegistry(UErrorCode& status); 151 152 /** 153 * Nonvirtual destructor -- this class is not subclassable. 154 */ 155 ~TransliteratorRegistry(); 156 157 //------------------------------------------------------------------ 158 // Basic public API 159 //------------------------------------------------------------------ 160 161 /** 162 * Given a simple ID (forward direction, no inline filter, not 163 * compound) attempt to instantiate it from the registry. Return 164 * 0 on failure. 165 * 166 * Return a non-NULL aliasReturn value if the ID points to an alias. 167 * We cannot instantiate it ourselves because the alias may contain 168 * filters or compounds, which we do not understand. Caller should 169 * make aliasReturn NULL before calling. 170 * @param ID the given ID 171 * @param aliasReturn output param to receive TransliteratorAlias; 172 * should be NULL on entry 173 * @param parseError Struct to recieve information on position 174 * of error if an error is encountered 175 * @param status Output param set to success/failure code. 176 */ 177 Transliterator* get(const UnicodeString& ID, 178 TransliteratorAlias*& aliasReturn, 179 UErrorCode& status); 180 181 /** 182 * The caller must call this after calling get(), if [a] calling get() 183 * returns an alias, and [b] the alias is rule based. In that 184 * situation the caller must call alias->parse() to do the parsing 185 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry 186 * instantiating the transliterator. 187 * 188 * Note: Another alias might be returned by this method. 189 * 190 * This method (like all public methods of this class) must be called 191 * from within the TransliteratorRegistry mutex. 192 * 193 * @param aliasReturn output param to receive TransliteratorAlias; 194 * should be NULL on entry 195 */ 196 Transliterator* reget(const UnicodeString& ID, 197 TransliteratorParser& parser, 198 TransliteratorAlias*& aliasReturn, 199 UErrorCode& status); 200 201 /** 202 * Register a prototype (adopted). This adds an entry to the 203 * dynamic store, or replaces an existing entry. Any entry in the 204 * underlying static locale resource store is masked. 205 */ 206 void put(Transliterator* adoptedProto, 207 UBool visible, 208 UErrorCode& ec); 209 210 /** 211 * Register an ID and a factory function pointer. This adds an 212 * entry to the dynamic store, or replaces an existing entry. Any 213 * entry in the underlying static locale resource store is masked. 214 */ 215 void put(const UnicodeString& ID, 216 Transliterator::Factory factory, 217 Transliterator::Token context, 218 UBool visible, 219 UErrorCode& ec); 220 221 /** 222 * Register an ID and a resource name. This adds an entry to the 223 * dynamic store, or replaces an existing entry. Any entry in the 224 * underlying static locale resource store is masked. 225 */ 226 void put(const UnicodeString& ID, 227 const UnicodeString& resourceName, 228 UTransDirection dir, 229 UBool readonlyResourceAlias, 230 UBool visible, 231 UErrorCode& ec); 232 233 /** 234 * Register an ID and an alias ID. This adds an entry to the 235 * dynamic store, or replaces an existing entry. Any entry in the 236 * underlying static locale resource store is masked. 237 */ 238 void put(const UnicodeString& ID, 239 const UnicodeString& alias, 240 UBool readonlyAliasAlias, 241 UBool visible, 242 UErrorCode& ec); 243 244 /** 245 * Unregister an ID. This removes an entry from the dynamic store 246 * if there is one. The static locale resource store is 247 * unaffected. 248 * @param ID the given ID. 249 */ 250 void remove(const UnicodeString& ID); 251 252 //------------------------------------------------------------------ 253 // Public ID and spec management 254 //------------------------------------------------------------------ 255 256 /** 257 * Return a StringEnumeration over the IDs currently registered 258 * with the system. 259 * @internal 260 */ 261 StringEnumeration* getAvailableIDs() const; 262 263 /** 264 * == OBSOLETE - remove in ICU 3.4 == 265 * Return the number of IDs currently registered with the system. 266 * To retrieve the actual IDs, call getAvailableID(i) with 267 * i from 0 to countAvailableIDs() - 1. 268 * @return the number of IDs currently registered with the system. 269 * @internal 270 */ 271 int32_t countAvailableIDs(void) const; 272 273 /** 274 * == OBSOLETE - remove in ICU 3.4 == 275 * Return the index-th available ID. index must be between 0 276 * and countAvailableIDs() - 1, inclusive. If index is out of 277 * range, the result of getAvailableID(0) is returned. 278 * @param index the given index. 279 * @return the index-th available ID. index must be between 0 280 * and countAvailableIDs() - 1, inclusive. If index is out of 281 * range, the result of getAvailableID(0) is returned. 282 * @internal 283 */ 284 const UnicodeString& getAvailableID(int32_t index) const; 285 286 /** 287 * Return the number of registered source specifiers. 288 * @return the number of registered source specifiers. 289 */ 290 int32_t countAvailableSources(void) const; 291 292 /** 293 * Return a registered source specifier. 294 * @param index which specifier to return, from 0 to n-1, where 295 * n = countAvailableSources() 296 * @param result fill-in paramter to receive the source specifier. 297 * If index is out of range, result will be empty. 298 * @return reference to result 299 */ 300 UnicodeString& getAvailableSource(int32_t index, 301 UnicodeString& result) const; 302 303 /** 304 * Return the number of registered target specifiers for a given 305 * source specifier. 306 * @param source the given source specifier. 307 * @return the number of registered target specifiers for a given 308 * source specifier. 309 */ 310 int32_t countAvailableTargets(const UnicodeString& source) const; 311 312 /** 313 * Return a registered target specifier for a given source. 314 * @param index which specifier to return, from 0 to n-1, where 315 * n = countAvailableTargets(source) 316 * @param source the source specifier 317 * @param result fill-in paramter to receive the target specifier. 318 * If source is invalid or if index is out of range, result will 319 * be empty. 320 * @return reference to result 321 */ 322 UnicodeString& getAvailableTarget(int32_t index, 323 const UnicodeString& source, 324 UnicodeString& result) const; 325 326 /** 327 * Return the number of registered variant specifiers for a given 328 * source-target pair. There is always at least one variant: If 329 * just source-target is registered, then the single variant 330 * NO_VARIANT is returned. If source-target/variant is registered 331 * then that variant is returned. 332 * @param source the source specifiers 333 * @param target the target specifiers 334 * @return the number of registered variant specifiers for a given 335 * source-target pair. 336 */ 337 int32_t countAvailableVariants(const UnicodeString& source, 338 const UnicodeString& target) const; 339 340 /** 341 * Return a registered variant specifier for a given source-target 342 * pair. If NO_VARIANT is one of the variants, then it will be 343 * at index 0. 344 * @param index which specifier to return, from 0 to n-1, where 345 * n = countAvailableVariants(source, target) 346 * @param source the source specifier 347 * @param target the target specifier 348 * @param result fill-in paramter to receive the variant 349 * specifier. If source is invalid or if target is invalid or if 350 * index is out of range, result will be empty. 351 * @return reference to result 352 */ 353 UnicodeString& getAvailableVariant(int32_t index, 354 const UnicodeString& source, 355 const UnicodeString& target, 356 UnicodeString& result) const; 357 358 private: 359 360 //---------------------------------------------------------------- 361 // Private implementation 362 //---------------------------------------------------------------- 363 364 TransliteratorEntry* find(const UnicodeString& ID); 365 366 TransliteratorEntry* find(UnicodeString& source, 367 UnicodeString& target, 368 UnicodeString& variant); 369 370 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, 371 const TransliteratorSpec& trg, 372 const UnicodeString& variant) const; 373 374 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, 375 const TransliteratorSpec& trg, 376 const UnicodeString& variant); 377 378 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, 379 const TransliteratorSpec& specToFind, 380 const UnicodeString& variant, 381 UTransDirection direction); 382 383 void registerEntry(const UnicodeString& source, 384 const UnicodeString& target, 385 const UnicodeString& variant, 386 TransliteratorEntry* adopted, 387 UBool visible); 388 389 void registerEntry(const UnicodeString& ID, 390 TransliteratorEntry* adopted, 391 UBool visible); 392 393 void registerEntry(const UnicodeString& ID, 394 const UnicodeString& source, 395 const UnicodeString& target, 396 const UnicodeString& variant, 397 TransliteratorEntry* adopted, 398 UBool visible); 399 400 void registerSTV(const UnicodeString& source, 401 const UnicodeString& target, 402 const UnicodeString& variant); 403 404 void removeSTV(const UnicodeString& source, 405 const UnicodeString& target, 406 const UnicodeString& variant); 407 408 Transliterator* instantiateEntry(const UnicodeString& ID, 409 TransliteratorEntry *entry, 410 TransliteratorAlias*& aliasReturn, 411 UErrorCode& status); 412 413 /** 414 * A StringEnumeration over the registered IDs in this object. 415 */ 416 class Enumeration : public StringEnumeration { 417 public: 418 Enumeration(const TransliteratorRegistry& reg); 419 virtual ~Enumeration(); 420 virtual int32_t count(UErrorCode& status) const; 421 virtual const UnicodeString* snext(UErrorCode& status); 422 virtual void reset(UErrorCode& status); 423 static UClassID U_EXPORT2 getStaticClassID(); 424 virtual UClassID getDynamicClassID() const; 425 private: 426 int32_t index; 427 const TransliteratorRegistry& reg; 428 }; 429 friend class Enumeration; 430 431 private: 432 433 /** 434 * Dynamic registry mapping full IDs to Entry objects. This 435 * contains both public and internal entities. The visibility is 436 * controlled by whether an entry is listed in availableIDs and 437 * specDAG or not. 438 */ 439 Hashtable registry; 440 441 /** 442 * DAG of visible IDs by spec. Hashtable: source => (Hashtable: 443 * target => (UVector: variant)) The UVector of variants is never 444 * empty. For a source-target with no variant, the special 445 * variant NO_VARIANT (the empty string) is stored in slot zero of 446 * the UVector. 447 */ 448 Hashtable specDAG; 449 450 /** 451 * Vector of public full IDs. 452 */ 453 UVector availableIDs; 454 455 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class 456 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class 457 }; 458 459 U_NAMESPACE_END 460 461 U_CFUNC UBool utrans_transliterator_cleanup(void); 462 463 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 464 465 #endif 466 //eof 467