1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2005, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * Date Name Description 7 * 06/21/00 aliu Creation. 8 ******************************************************************************* 9 */ 10 11 #ifndef UTRANS_H 12 #define UTRANS_H 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_TRANSLITERATION 17 18 #include "unicode/urep.h" 19 #include "unicode/parseerr.h" 20 #include "unicode/uenum.h" 21 22 /******************************************************************** 23 * General Notes 24 ******************************************************************** 25 */ 26 /** 27 * \file 28 * \brief C API: Transliterator 29 * 30 * <h2> Transliteration </h2> 31 * The data structures and functions described in this header provide 32 * transliteration services. Transliteration services are implemented 33 * as C++ classes. The comments and documentation in this header 34 * assume the reader is familiar with the C++ headers translit.h and 35 * associated documentation. 36 * 37 * A significant but incomplete subset of the C++ transliteration 38 * services are available to C code through this header. In order to 39 * access more complex transliteration services, refer to the C++ 40 * headers and documentation. 41 * 42 * There are two sets of functions for working with transliterator IDs: 43 * 44 * An old, deprecated set uses char * IDs, which works for true and pure 45 * identifiers that these APIs were designed for, 46 * for example "Cyrillic-Latin". 47 * It does not work when the ID contains filters ("[:Script=Cyrl:]") 48 * or even a complete set of rules because then the ID string contains more 49 * than just "invariant" characters (see utypes.h). 50 * 51 * A new set of functions replaces the old ones and uses UChar * IDs, 52 * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.) 53 */ 54 55 /******************************************************************** 56 * Data Structures 57 ********************************************************************/ 58 59 /** 60 * An opaque transliterator for use in C. Open with utrans_openxxx() 61 * and close with utrans_close() when done. Equivalent to the C++ class 62 * Transliterator and its subclasses. 63 * @see Transliterator 64 * @stable ICU 2.0 65 */ 66 typedef void* UTransliterator; 67 68 /** 69 * Direction constant indicating the direction in a transliterator, 70 * e.g., the forward or reverse rules of a RuleBasedTransliterator. 71 * Specified when a transliterator is opened. An "A-B" transliterator 72 * transliterates A to B when operating in the forward direction, and 73 * B to A when operating in the reverse direction. 74 * @stable ICU 2.0 75 */ 76 typedef enum UTransDirection { 77 78 /** 79 * UTRANS_FORWARD means from <source> to <target> for a 80 * transliterator with ID <source>-<target>. For a transliterator 81 * opened using a rule, it means forward direction rules, e.g., 82 * "A > B". 83 */ 84 UTRANS_FORWARD, 85 86 /** 87 * UTRANS_REVERSE means from <target> to <source> for a 88 * transliterator with ID <source>-<target>. For a transliterator 89 * opened using a rule, it means reverse direction rules, e.g., 90 * "A < B". 91 */ 92 UTRANS_REVERSE 93 94 } UTransDirection; 95 96 /** 97 * Position structure for utrans_transIncremental() incremental 98 * transliteration. This structure defines two substrings of the text 99 * being transliterated. The first region, [contextStart, 100 * contextLimit), defines what characters the transliterator will read 101 * as context. The second region, [start, limit), defines what 102 * characters will actually be transliterated. The second region 103 * should be a subset of the first. 104 * 105 * <p>After a transliteration operation, some of the indices in this 106 * structure will be modified. See the field descriptions for 107 * details. 108 * 109 * <p>contextStart <= start <= limit <= contextLimit 110 * 111 * <p>Note: All index values in this structure must be at code point 112 * boundaries. That is, none of them may occur between two code units 113 * of a surrogate pair. If any index does split a surrogate pair, 114 * results are unspecified. 115 * 116 * @stable ICU 2.0 117 */ 118 typedef struct UTransPosition { 119 120 /** 121 * Beginning index, inclusive, of the context to be considered for 122 * a transliteration operation. The transliterator will ignore 123 * anything before this index. INPUT/OUTPUT parameter: This parameter 124 * is updated by a transliteration operation to reflect the maximum 125 * amount of antecontext needed by a transliterator. 126 * @stable ICU 2.4 127 */ 128 int32_t contextStart; 129 130 /** 131 * Ending index, exclusive, of the context to be considered for a 132 * transliteration operation. The transliterator will ignore 133 * anything at or after this index. INPUT/OUTPUT parameter: This 134 * parameter is updated to reflect changes in the length of the 135 * text, but points to the same logical position in the text. 136 * @stable ICU 2.4 137 */ 138 int32_t contextLimit; 139 140 /** 141 * Beginning index, inclusive, of the text to be transliteratd. 142 * INPUT/OUTPUT parameter: This parameter is advanced past 143 * characters that have already been transliterated by a 144 * transliteration operation. 145 * @stable ICU 2.4 146 */ 147 int32_t start; 148 149 /** 150 * Ending index, exclusive, of the text to be transliteratd. 151 * INPUT/OUTPUT parameter: This parameter is updated to reflect 152 * changes in the length of the text, but points to the same 153 * logical position in the text. 154 * @stable ICU 2.4 155 */ 156 int32_t limit; 157 158 } UTransPosition; 159 160 /******************************************************************** 161 * General API 162 ********************************************************************/ 163 164 /** 165 * Open a custom transliterator, given a custom rules string 166 * OR 167 * a system transliterator, given its ID. 168 * Any non-NULL result from this function should later be closed with 169 * utrans_close(). 170 * 171 * @param id a valid transliterator ID 172 * @param idLength the length of the ID string, or -1 if NUL-terminated 173 * @param dir the desired direction 174 * @param rules the transliterator rules. See the C++ header rbt.h for 175 * rules syntax. If NULL then a system transliterator matching 176 * the ID is returned. 177 * @param rulesLength the length of the rules, or -1 if the rules 178 * are NUL-terminated. 179 * @param parseError a pointer to a UParseError struct to receive the details 180 * of any parsing errors. This parameter may be NULL if no 181 * parsing error details are desired. 182 * @param pErrorCode a pointer to the UErrorCode 183 * @return a transliterator pointer that may be passed to other 184 * utrans_xxx() functions, or NULL if the open call fails. 185 * @stable ICU 2.8 186 */ 187 U_STABLE UTransliterator* U_EXPORT2 188 utrans_openU(const UChar *id, 189 int32_t idLength, 190 UTransDirection dir, 191 const UChar *rules, 192 int32_t rulesLength, 193 UParseError *parseError, 194 UErrorCode *pErrorCode); 195 196 /** 197 * Open an inverse of an existing transliterator. For this to work, 198 * the inverse must be registered with the system. For example, if 199 * the Transliterator "A-B" is opened, and then its inverse is opened, 200 * the result is the Transliterator "B-A", if such a transliterator is 201 * registered with the system. Otherwise the result is NULL and a 202 * failing UErrorCode is set. Any non-NULL result from this function 203 * should later be closed with utrans_close(). 204 * 205 * @param trans the transliterator to open the inverse of. 206 * @param status a pointer to the UErrorCode 207 * @return a pointer to a newly-opened transliterator that is the 208 * inverse of trans, or NULL if the open call fails. 209 * @stable ICU 2.0 210 */ 211 U_STABLE UTransliterator* U_EXPORT2 212 utrans_openInverse(const UTransliterator* trans, 213 UErrorCode* status); 214 215 /** 216 * Create a copy of a transliterator. Any non-NULL result from this 217 * function should later be closed with utrans_close(). 218 * 219 * @param trans the transliterator to be copied. 220 * @param status a pointer to the UErrorCode 221 * @return a transliterator pointer that may be passed to other 222 * utrans_xxx() functions, or NULL if the clone call fails. 223 * @stable ICU 2.0 224 */ 225 U_STABLE UTransliterator* U_EXPORT2 226 utrans_clone(const UTransliterator* trans, 227 UErrorCode* status); 228 229 /** 230 * Close a transliterator. Any non-NULL pointer returned by 231 * utrans_openXxx() or utrans_clone() should eventually be closed. 232 * @param trans the transliterator to be closed. 233 * @stable ICU 2.0 234 */ 235 U_STABLE void U_EXPORT2 236 utrans_close(UTransliterator* trans); 237 238 /** 239 * Return the programmatic identifier for this transliterator. 240 * If this identifier is passed to utrans_openU(), it will open 241 * a transliterator equivalent to this one, if the ID has been 242 * registered. 243 * 244 * @param trans the transliterator to return the ID of. 245 * @param resultLength pointer to an output variable receiving the length 246 * of the ID string; can be NULL 247 * @return the NUL-terminated ID string. This pointer remains 248 * valid until utrans_close() is called on this transliterator. 249 * 250 * @stable ICU 2.8 251 */ 252 U_STABLE const UChar * U_EXPORT2 253 utrans_getUnicodeID(const UTransliterator *trans, 254 int32_t *resultLength); 255 256 /** 257 * Register an open transliterator with the system. When 258 * utrans_open() is called with an ID string that is equal to that 259 * returned by utrans_getID(adoptedTrans,...), then 260 * utrans_clone(adoptedTrans,...) is returned. 261 * 262 * <p>NOTE: After this call the system owns the adoptedTrans and will 263 * close it. The user must not call utrans_close() on adoptedTrans. 264 * 265 * @param adoptedTrans a transliterator, typically the result of 266 * utrans_openRules(), to be registered with the system. 267 * @param status a pointer to the UErrorCode 268 * @stable ICU 2.0 269 */ 270 U_STABLE void U_EXPORT2 271 utrans_register(UTransliterator* adoptedTrans, 272 UErrorCode* status); 273 274 /** 275 * Unregister a transliterator from the system. After this call the 276 * system will no longer recognize the given ID when passed to 277 * utrans_open(). If the ID is invalid then nothing is done. 278 * 279 * @param id an ID to unregister 280 * @param idLength the length of id, or -1 if id is zero-terminated 281 * @stable ICU 2.8 282 */ 283 U_STABLE void U_EXPORT2 284 utrans_unregisterID(const UChar* id, int32_t idLength); 285 286 /** 287 * Set the filter used by a transliterator. A filter can be used to 288 * make the transliterator pass certain characters through untouched. 289 * The filter is expressed using a UnicodeSet pattern. If the 290 * filterPattern is NULL or the empty string, then the transliterator 291 * will be reset to use no filter. 292 * 293 * @param trans the transliterator 294 * @param filterPattern a pattern string, in the form accepted by 295 * UnicodeSet, specifying which characters to apply the 296 * transliteration to. May be NULL or the empty string to indicate no 297 * filter. 298 * @param filterPatternLen the length of filterPattern, or -1 if 299 * filterPattern is zero-terminated 300 * @param status a pointer to the UErrorCode 301 * @see UnicodeSet 302 * @stable ICU 2.0 303 */ 304 U_STABLE void U_EXPORT2 305 utrans_setFilter(UTransliterator* trans, 306 const UChar* filterPattern, 307 int32_t filterPatternLen, 308 UErrorCode* status); 309 310 /** 311 * Return the number of system transliterators. 312 * It is recommended to use utrans_openIDs() instead. 313 * 314 * @return the number of system transliterators. 315 * @stable ICU 2.0 316 */ 317 U_STABLE int32_t U_EXPORT2 318 utrans_countAvailableIDs(void); 319 320 /** 321 * Return a UEnumeration for the available transliterators. 322 * 323 * @param pErrorCode Pointer to the UErrorCode in/out parameter. 324 * @return UEnumeration for the available transliterators. 325 * Close with uenum_close(). 326 * 327 * @stable ICU 2.8 328 */ 329 U_STABLE UEnumeration * U_EXPORT2 330 utrans_openIDs(UErrorCode *pErrorCode); 331 332 /******************************************************************** 333 * Transliteration API 334 ********************************************************************/ 335 336 /** 337 * Transliterate a segment of a UReplaceable string. The string is 338 * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks 339 * function pointer struct repFunc. Functions in the repFunc struct 340 * will be called in order to modify the rep string. 341 * 342 * @param trans the transliterator 343 * @param rep a pointer to the string. This will be passed to the 344 * repFunc functions. 345 * @param repFunc a set of function pointers that will be used to 346 * modify the string pointed to by rep. 347 * @param start the beginning index, inclusive; <code>0 <= start <= 348 * limit</code>. 349 * @param limit pointer to the ending index, exclusive; <code>start <= 350 * limit <= repFunc->length(rep)</code>. Upon return, *limit will 351 * contain the new limit index. The text previously occupying 352 * <code>[start, limit)</code> has been transliterated, possibly to a 353 * string of a different length, at <code>[start, 354 * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em> 355 * is the return value. 356 * @param status a pointer to the UErrorCode 357 * @stable ICU 2.0 358 */ 359 U_STABLE void U_EXPORT2 360 utrans_trans(const UTransliterator* trans, 361 UReplaceable* rep, 362 UReplaceableCallbacks* repFunc, 363 int32_t start, 364 int32_t* limit, 365 UErrorCode* status); 366 367 /** 368 * Transliterate the portion of the UReplaceable text buffer that can 369 * be transliterated unambiguosly. This method is typically called 370 * after new text has been inserted, e.g. as a result of a keyboard 371 * event. The transliterator will try to transliterate characters of 372 * <code>rep</code> between <code>index.cursor</code> and 373 * <code>index.limit</code>. Characters before 374 * <code>index.cursor</code> will not be changed. 375 * 376 * <p>Upon return, values in <code>index</code> will be updated. 377 * <code>index.start</code> will be advanced to the first 378 * character that future calls to this method will read. 379 * <code>index.cursor</code> and <code>index.limit</code> will 380 * be adjusted to delimit the range of text that future calls to 381 * this method may change. 382 * 383 * <p>Typical usage of this method begins with an initial call 384 * with <code>index.start</code> and <code>index.limit</code> 385 * set to indicate the portion of <code>text</code> to be 386 * transliterated, and <code>index.cursor == index.start</code>. 387 * Thereafter, <code>index</code> can be used without 388 * modification in future calls, provided that all changes to 389 * <code>text</code> are made via this method. 390 * 391 * <p>This method assumes that future calls may be made that will 392 * insert new text into the buffer. As a result, it only performs 393 * unambiguous transliterations. After the last call to this method, 394 * there may be untransliterated text that is waiting for more input 395 * to resolve an ambiguity. In order to perform these pending 396 * transliterations, clients should call utrans_trans() with a start 397 * of index.start and a limit of index.end after the last call to this 398 * method has been made. 399 * 400 * @param trans the transliterator 401 * @param rep a pointer to the string. This will be passed to the 402 * repFunc functions. 403 * @param repFunc a set of function pointers that will be used to 404 * modify the string pointed to by rep. 405 * @param pos a struct containing the start and limit indices of the 406 * text to be read and the text to be transliterated 407 * @param status a pointer to the UErrorCode 408 * @stable ICU 2.0 409 */ 410 U_STABLE void U_EXPORT2 411 utrans_transIncremental(const UTransliterator* trans, 412 UReplaceable* rep, 413 UReplaceableCallbacks* repFunc, 414 UTransPosition* pos, 415 UErrorCode* status); 416 417 /** 418 * Transliterate a segment of a UChar* string. The string is passed 419 * in in a UChar* buffer. The string is modified in place. If the 420 * result is longer than textCapacity, it is truncated. The actual 421 * length of the result is returned in *textLength, if textLength is 422 * non-NULL. *textLength may be greater than textCapacity, but only 423 * textCapacity UChars will be written to *text, including the zero 424 * terminator. 425 * 426 * @param trans the transliterator 427 * @param text a pointer to a buffer containing the text to be 428 * transliterated on input and the result text on output. 429 * @param textLength a pointer to the length of the string in text. 430 * If the length is -1 then the string is assumed to be 431 * zero-terminated. Upon return, the new length is stored in 432 * *textLength. If textLength is NULL then the string is assumed to 433 * be zero-terminated. 434 * @param textCapacity a pointer to the length of the text buffer. 435 * Upon return, 436 * @param start the beginning index, inclusive; <code>0 <= start <= 437 * limit</code>. 438 * @param limit pointer to the ending index, exclusive; <code>start <= 439 * limit <= repFunc->length(rep)</code>. Upon return, *limit will 440 * contain the new limit index. The text previously occupying 441 * <code>[start, limit)</code> has been transliterated, possibly to a 442 * string of a different length, at <code>[start, 443 * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em> 444 * is the return value. 445 * @param status a pointer to the UErrorCode 446 * @stable ICU 2.0 447 */ 448 U_STABLE void U_EXPORT2 449 utrans_transUChars(const UTransliterator* trans, 450 UChar* text, 451 int32_t* textLength, 452 int32_t textCapacity, 453 int32_t start, 454 int32_t* limit, 455 UErrorCode* status); 456 457 /** 458 * Transliterate the portion of the UChar* text buffer that can be 459 * transliterated unambiguosly. See utrans_transIncremental(). The 460 * string is passed in in a UChar* buffer. The string is modified in 461 * place. If the result is longer than textCapacity, it is truncated. 462 * The actual length of the result is returned in *textLength, if 463 * textLength is non-NULL. *textLength may be greater than 464 * textCapacity, but only textCapacity UChars will be written to 465 * *text, including the zero terminator. See utrans_transIncremental() 466 * for usage details. 467 * 468 * @param trans the transliterator 469 * @param text a pointer to a buffer containing the text to be 470 * transliterated on input and the result text on output. 471 * @param textLength a pointer to the length of the string in text. 472 * If the length is -1 then the string is assumed to be 473 * zero-terminated. Upon return, the new length is stored in 474 * *textLength. If textLength is NULL then the string is assumed to 475 * be zero-terminated. 476 * @param textCapacity the length of the text buffer 477 * @param pos a struct containing the start and limit indices of the 478 * text to be read and the text to be transliterated 479 * @param status a pointer to the UErrorCode 480 * @see utrans_transIncremental 481 * @stable ICU 2.0 482 */ 483 U_STABLE void U_EXPORT2 484 utrans_transIncrementalUChars(const UTransliterator* trans, 485 UChar* text, 486 int32_t* textLength, 487 int32_t textCapacity, 488 UTransPosition* pos, 489 UErrorCode* status); 490 491 /* deprecated API ----------------------------------------------------------- */ 492 493 /* see utrans.h documentation for why these functions are deprecated */ 494 495 /** 496 * Deprecated, use utrans_openU() instead. 497 * Open a custom transliterator, given a custom rules string 498 * OR 499 * a system transliterator, given its ID. 500 * Any non-NULL result from this function should later be closed with 501 * utrans_close(). 502 * 503 * @param id a valid ID, as returned by utrans_getAvailableID() 504 * @param dir the desired direction 505 * @param rules the transliterator rules. See the C++ header rbt.h 506 * for rules syntax. If NULL then a system transliterator matching 507 * the ID is returned. 508 * @param rulesLength the length of the rules, or -1 if the rules 509 * are zero-terminated. 510 * @param parseError a pointer to a UParseError struct to receive the 511 * details of any parsing errors. This parameter may be NULL if no 512 * parsing error details are desired. 513 * @param status a pointer to the UErrorCode 514 * @return a transliterator pointer that may be passed to other 515 * utrans_xxx() functions, or NULL if the open call fails. 516 * @deprecated ICU 2.8 Use utrans_openU() instead, see utrans.h 517 */ 518 U_DEPRECATED UTransliterator* U_EXPORT2 519 utrans_open(const char* id, 520 UTransDirection dir, 521 const UChar* rules, /* may be Null */ 522 int32_t rulesLength, /* -1 if null-terminated */ 523 UParseError* parseError, /* may be Null */ 524 UErrorCode* status); 525 526 /** 527 * Deprecated, use utrans_getUnicodeID() instead. 528 * Return the programmatic identifier for this transliterator. 529 * If this identifier is passed to utrans_open(), it will open 530 * a transliterator equivalent to this one, if the ID has been 531 * registered. 532 * @param trans the transliterator to return the ID of. 533 * @param buf the buffer in which to receive the ID. This may be 534 * NULL, in which case no characters are copied. 535 * @param bufCapacity the capacity of the buffer. Ignored if buf is 536 * NULL. 537 * @return the actual length of the ID, not including 538 * zero-termination. This may be greater than bufCapacity. 539 * @deprecated ICU 2.8 Use utrans_getUnicodeID() instead, see utrans.h 540 */ 541 U_DEPRECATED int32_t U_EXPORT2 542 utrans_getID(const UTransliterator* trans, 543 char* buf, 544 int32_t bufCapacity); 545 546 /** 547 * Deprecated, use utrans_unregisterID() instead. 548 * Unregister a transliterator from the system. After this call the 549 * system will no longer recognize the given ID when passed to 550 * utrans_open(). If the id is invalid then nothing is done. 551 * 552 * @param id a zero-terminated ID 553 * @deprecated ICU 2.8 Use utrans_unregisterID() instead, see utrans.h 554 */ 555 U_DEPRECATED void U_EXPORT2 556 utrans_unregister(const char* id); 557 558 /** 559 * Deprecated, use utrans_openIDs() instead. 560 * Return the ID of the index-th system transliterator. The result 561 * is placed in the given buffer. If the given buffer is too small, 562 * the initial substring is copied to buf. The result in buf is 563 * always zero-terminated. 564 * 565 * @param index the number of the transliterator to return. Must 566 * satisfy 0 <= index < utrans_countAvailableIDs(). If index is out 567 * of range then it is treated as if it were 0. 568 * @param buf the buffer in which to receive the ID. This may be 569 * NULL, in which case no characters are copied. 570 * @param bufCapacity the capacity of the buffer. Ignored if buf is 571 * NULL. 572 * @return the actual length of the index-th ID, not including 573 * zero-termination. This may be greater than bufCapacity. 574 * @deprecated ICU 2.8 Use utrans_openIDs() instead, see utrans.h 575 */ 576 U_DEPRECATED int32_t U_EXPORT2 577 utrans_getAvailableID(int32_t index, 578 char* buf, 579 int32_t bufCapacity); 580 581 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 582 583 #endif 584