1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2004-2012, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: utext.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2004oct06 16 * created by: Markus W. Scherer 17 */ 18 19 #ifndef __UTEXT_H__ 20 #define __UTEXT_H__ 21 22 /** 23 * \file 24 * \brief C API: Abstract Unicode Text API 25 * 26 * The Text Access API provides a means to allow text that is stored in alternative 27 * formats to work with ICU services. ICU normally operates on text that is 28 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type 29 * UnicodeString for C++ APIs. 30 * 31 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous 32 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. 33 * 34 * There are three general classes of usage for UText: 35 * 36 * Application Level Use. This is the simplest usage - applications would 37 * use one of the utext_open() functions on their input text, and pass 38 * the resulting UText to the desired ICU service. 39 * 40 * Second is usage in ICU Services, such as break iteration, that will need to 41 * operate on input presented to them as a UText. These implementations 42 * will need to use the iteration and related UText functions to gain 43 * access to the actual text. 44 * 45 * The third class of UText users are "text providers." These are the 46 * UText implementations for the various text storage formats. An application 47 * or system with a unique text storage format can implement a set of 48 * UText provider functions for that format, which will then allow 49 * ICU services to operate on that format. 50 * 51 * 52 * <em>Iterating over text</em> 53 * 54 * Here is sample code for a forward iteration over the contents of a UText 55 * 56 * \code 57 * UChar32 c; 58 * UText *ut = whatever(); 59 * 60 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { 61 * // do whatever with the codepoint c here. 62 * } 63 * \endcode 64 * 65 * And here is similar code to iterate in the reverse direction, from the end 66 * of the text towards the beginning. 67 * 68 * \code 69 * UChar32 c; 70 * UText *ut = whatever(); 71 * int textLength = utext_nativeLength(ut); 72 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { 73 * // do whatever with the codepoint c here. 74 * } 75 * \endcode 76 * 77 * <em>Characters and Indexing</em> 78 * 79 * Indexing into text by UText functions is nearly always in terms of the native 80 * indexing of the underlying text storage. The storage format could be UTF-8 81 * or UTF-32, for example. When coding to the UText access API, no assumptions 82 * can be made regarding the size of characters, or how far an index 83 * may move when iterating between characters. 84 * 85 * All indices supplied to UText functions are pinned to the length of the 86 * text. An out-of-bounds index is not considered to be an error, but is 87 * adjusted to be in the range 0 <= index <= length of input text. 88 * 89 * 90 * When an index position is returned from a UText function, it will be 91 * a native index to the underlying text. In the case of multi-unit characters, 92 * it will always refer to the first position of the character, 93 * never to the interior. This is essentially the same thing as saying that 94 * a returned index will always point to a boundary between characters. 95 * 96 * When a native index is supplied to a UText function, all indices that 97 * refer to any part of a multi-unit character representation are considered 98 * to be equivalent. In the case of multi-unit characters, an incoming index 99 * will be logically normalized to refer to the start of the character. 100 * 101 * It is possible to test whether a native index is on a code point boundary 102 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). 103 * If the index is returned unchanged, it was on a code point boundary. If 104 * an adjusted index is returned, the original index referred to the 105 * interior of a character. 106 * 107 * <em>Conventions for calling UText functions</em> 108 * 109 * Most UText access functions have as their first parameter a (UText *) pointer, 110 * which specifies the UText to be used. Unless otherwise noted, the 111 * pointer must refer to a valid, open UText. Attempting to 112 * use a closed UText or passing a NULL pointer is a programming error and 113 * will produce undefined results or NULL pointer exceptions. 114 * 115 * The UText_Open family of functions can either open an existing (closed) 116 * UText, or heap allocate a new UText. Here is sample code for creating 117 * a stack-allocated UText. 118 * 119 * \code 120 * char *s = whatever(); // A utf-8 string 121 * U_ErrorCode status = U_ZERO_ERROR; 122 * UText ut = UTEXT_INITIALIZER; 123 * utext_openUTF8(ut, s, -1, &status); 124 * if (U_FAILURE(status)) { 125 * // error handling 126 * } else { 127 * // work with the UText 128 * } 129 * \endcode 130 * 131 * Any existing UText passed to an open function _must_ have been initialized, 132 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated 133 * by an open function. Passing NULL will cause the open function to 134 * heap-allocate and fully initialize a new UText. 135 * 136 */ 137 138 139 140 #include "unicode/utypes.h" 141 #include "unicode/uchar.h" 142 #if U_SHOW_CPLUSPLUS_API 143 #include "unicode/localpointer.h" 144 #include "unicode/rep.h" 145 #include "unicode/unistr.h" 146 #include "unicode/chariter.h" 147 #endif 148 149 150 U_CDECL_BEGIN 151 152 struct UText; 153 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ 154 155 156 /*************************************************************************************** 157 * 158 * C Functions for creating UText wrappers around various kinds of text strings. 159 * 160 ****************************************************************************************/ 161 162 163 /** 164 * Close function for UText instances. 165 * Cleans up, releases any resources being held by an open UText. 166 * <p> 167 * If the UText was originally allocated by one of the utext_open functions, 168 * the storage associated with the utext will also be freed. 169 * If the UText storage originated with the application, as it would with 170 * a local or static instance, the storage will not be deleted. 171 * 172 * An open UText can be reset to refer to new string by using one of the utext_open() 173 * functions without first closing the UText. 174 * 175 * @param ut The UText to be closed. 176 * @return NULL if the UText struct was deleted by the close. If the UText struct 177 * was originally provided by the caller to the open function, it is 178 * returned by this function, and may be safely used again in 179 * a subsequent utext_open. 180 * 181 * @stable ICU 3.4 182 */ 183 U_CAPI UText * U_EXPORT2 184 utext_close(UText *ut); 185 186 /** 187 * Open a read-only UText implementation for UTF-8 strings. 188 * 189 * \htmlonly 190 * Any invalid UTF-8 in the input will be handled in this way: 191 * a sequence of bytes that has the form of a truncated, but otherwise valid, 192 * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 193 * Any other illegal bytes will each be replaced by a \uFFFD. 194 * \endhtmlonly 195 * 196 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 197 * If non-NULL, must refer to an initialized UText struct, which will then 198 * be reset to reference the specified UTF-8 string. 199 * @param s A UTF-8 string. Must not be NULL. 200 * @param length The length of the UTF-8 string in bytes, or -1 if the string is 201 * zero terminated. 202 * @param status Errors are returned here. 203 * @return A pointer to the UText. If a pre-allocated UText was provided, it 204 * will always be used and returned. 205 * @stable ICU 3.4 206 */ 207 U_CAPI UText * U_EXPORT2 208 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); 209 210 211 /** 212 * Open a read-only UText for UChar * string. 213 * 214 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 215 * If non-NULL, must refer to an initialized UText struct, which will then 216 * be reset to reference the specified UChar string. 217 * @param s A UChar (UTF-16) string 218 * @param length The number of UChars in the input string, or -1 if the string is 219 * zero terminated. 220 * @param status Errors are returned here. 221 * @return A pointer to the UText. If a pre-allocated UText was provided, it 222 * will always be used and returned. 223 * @stable ICU 3.4 224 */ 225 U_CAPI UText * U_EXPORT2 226 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); 227 228 229 #if U_SHOW_CPLUSPLUS_API 230 /** 231 * Open a writable UText for a non-const UnicodeString. 232 * 233 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 234 * If non-NULL, must refer to an initialized UText struct, which will then 235 * be reset to reference the specified input string. 236 * @param s A UnicodeString. 237 * @param status Errors are returned here. 238 * @return Pointer to the UText. If a UText was supplied as input, this 239 * will always be used and returned. 240 * @stable ICU 3.4 241 */ 242 U_CAPI UText * U_EXPORT2 243 utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); 244 245 246 /** 247 * Open a UText for a const UnicodeString. The resulting UText will not be writable. 248 * 249 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 250 * If non-NULL, must refer to an initialized UText struct, which will then 251 * be reset to reference the specified input string. 252 * @param s A const UnicodeString to be wrapped. 253 * @param status Errors are returned here. 254 * @return Pointer to the UText. If a UText was supplied as input, this 255 * will always be used and returned. 256 * @stable ICU 3.4 257 */ 258 U_CAPI UText * U_EXPORT2 259 utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); 260 261 262 /** 263 * Open a writable UText implementation for an ICU Replaceable object. 264 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 265 * If non-NULL, must refer to an already existing UText, which will then 266 * be reset to reference the specified replaceable text. 267 * @param rep A Replaceable text object. 268 * @param status Errors are returned here. 269 * @return Pointer to the UText. If a UText was supplied as input, this 270 * will always be used and returned. 271 * @see Replaceable 272 * @stable ICU 3.4 273 */ 274 U_CAPI UText * U_EXPORT2 275 utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); 276 277 /** 278 * Open a UText implementation over an ICU CharacterIterator. 279 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 280 * If non-NULL, must refer to an already existing UText, which will then 281 * be reset to reference the specified replaceable text. 282 * @param ci A Character Iterator. 283 * @param status Errors are returned here. 284 * @return Pointer to the UText. If a UText was supplied as input, this 285 * will always be used and returned. 286 * @see Replaceable 287 * @stable ICU 3.4 288 */ 289 U_CAPI UText * U_EXPORT2 290 utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); 291 292 #endif 293 294 295 /** 296 * Clone a UText. This is much like opening a UText where the source text is itself 297 * another UText. 298 * 299 * A deep clone will copy both the UText data structures and the underlying text. 300 * The original and cloned UText will operate completely independently; modifications 301 * made to the text in one will not affect the other. Text providers are not 302 * required to support deep clones. The user of clone() must check the status return 303 * and be prepared to handle failures. 304 * 305 * The standard UText implementations for UTF8, UChar *, UnicodeString and 306 * Replaceable all support deep cloning. 307 * 308 * The UText returned from a deep clone will be writable, assuming that the text 309 * provider is able to support writing, even if the source UText had been made 310 * non-writable by means of UText_freeze(). 311 * 312 * A shallow clone replicates only the UText data structures; it does not make 313 * a copy of the underlying text. Shallow clones can be used as an efficient way to 314 * have multiple iterators active in a single text string that is not being 315 * modified. 316 * 317 * A shallow clone operation will not fail, barring truly exceptional conditions such 318 * as memory allocation failures. 319 * 320 * Shallow UText clones should be avoided if the UText functions that modify the 321 * text are expected to be used, either on the original or the cloned UText. 322 * Any such modifications can cause unpredictable behavior. Read Only 323 * shallow clones provide some protection against errors of this type by 324 * disabling text modification via the cloned UText. 325 * 326 * A shallow clone made with the readOnly parameter == false will preserve the 327 * utext_isWritable() state of the source object. Note, however, that 328 * write operations must be avoided while more than one UText exists that refer 329 * to the same underlying text. 330 * 331 * A UText and its clone may be safely concurrently accessed by separate threads. 332 * This is true for read access only with shallow clones, and for both read and 333 * write access with deep clones. 334 * It is the responsibility of the Text Provider to ensure that this thread safety 335 * constraint is met. 336 * 337 * @param dest A UText struct to be filled in with the result of the clone operation, 338 * or NULL if the clone function should heap-allocate a new UText struct. 339 * If non-NULL, must refer to an already existing UText, which will then 340 * be reset to become the clone. 341 * @param src The UText to be cloned. 342 * @param deep true to request a deep clone, false for a shallow clone. 343 * @param readOnly true to request that the cloned UText have read only access to the 344 * underlying text. 345 346 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 347 * will be returned if the text provider is unable to clone the 348 * original text. 349 * @return The newly created clone, or NULL if the clone operation failed. 350 * @stable ICU 3.4 351 */ 352 U_CAPI UText * U_EXPORT2 353 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); 354 355 356 /** 357 * Compare two UText objects for equality. 358 * UTexts are equal if they are iterating over the same text, and 359 * have the same iteration position within the text. 360 * If either or both of the parameters are NULL, the comparison is false. 361 * 362 * @param a The first of the two UTexts to compare. 363 * @param b The other UText to be compared. 364 * @return true if the two UTexts are equal. 365 * @stable ICU 3.6 366 */ 367 U_CAPI UBool U_EXPORT2 368 utext_equals(const UText *a, const UText *b); 369 370 371 /***************************************************************************** 372 * 373 * Functions to work with the text represented by a UText wrapper 374 * 375 *****************************************************************************/ 376 377 /** 378 * Get the length of the text. Depending on the characteristics 379 * of the underlying text representation, this may be expensive. 380 * @see utext_isLengthExpensive() 381 * 382 * 383 * @param ut the text to be accessed. 384 * @return the length of the text, expressed in native units. 385 * 386 * @stable ICU 3.4 387 */ 388 U_CAPI int64_t U_EXPORT2 389 utext_nativeLength(UText *ut); 390 391 /** 392 * Return true if calculating the length of the text could be expensive. 393 * Finding the length of NUL terminated strings is considered to be expensive. 394 * 395 * Note that the value of this function may change 396 * as the result of other operations on a UText. 397 * Once the length of a string has been discovered, it will no longer 398 * be expensive to report it. 399 * 400 * @param ut the text to be accessed. 401 * @return true if determining the length of the text could be time consuming. 402 * @stable ICU 3.4 403 */ 404 U_CAPI UBool U_EXPORT2 405 utext_isLengthExpensive(const UText *ut); 406 407 /** 408 * Returns the code point at the requested index, 409 * or U_SENTINEL (-1) if it is out of bounds. 410 * 411 * If the specified index points to the interior of a multi-unit 412 * character - one of the trail bytes of a UTF-8 sequence, for example - 413 * the complete code point will be returned. 414 * 415 * The iteration position will be set to the start of the returned code point. 416 * 417 * This function is roughly equivalent to the sequence 418 * utext_setNativeIndex(index); 419 * utext_current32(); 420 * (There is a subtle difference if the index is out of bounds by being less than zero - 421 * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() 422 * will return the char at zero. utext_char32At(negative index), on the other hand, will 423 * return the U_SENTINEL value of -1.) 424 * 425 * @param ut the text to be accessed 426 * @param nativeIndex the native index of the character to be accessed. If the index points 427 * to other than the first unit of a multi-unit character, it will be adjusted 428 * to the start of the character. 429 * @return the code point at the specified index. 430 * @stable ICU 3.4 431 */ 432 U_CAPI UChar32 U_EXPORT2 433 utext_char32At(UText *ut, int64_t nativeIndex); 434 435 436 /** 437 * 438 * Get the code point at the current iteration position, 439 * or U_SENTINEL (-1) if the iteration has reached the end of 440 * the input text. 441 * 442 * @param ut the text to be accessed. 443 * @return the Unicode code point at the current iterator position. 444 * @stable ICU 3.4 445 */ 446 U_CAPI UChar32 U_EXPORT2 447 utext_current32(UText *ut); 448 449 450 /** 451 * Get the code point at the current iteration position of the UText, and 452 * advance the position to the first index following the character. 453 * 454 * If the position is at the end of the text (the index following 455 * the last character, which is also the length of the text), 456 * return U_SENTINEL (-1) and do not advance the index. 457 * 458 * This is a post-increment operation. 459 * 460 * An inline macro version of this function, UTEXT_NEXT32(), 461 * is available for performance critical use. 462 * 463 * @param ut the text to be accessed. 464 * @return the Unicode code point at the iteration position. 465 * @see UTEXT_NEXT32 466 * @stable ICU 3.4 467 */ 468 U_CAPI UChar32 U_EXPORT2 469 utext_next32(UText *ut); 470 471 472 /** 473 * Move the iterator position to the character (code point) whose 474 * index precedes the current position, and return that character. 475 * This is a pre-decrement operation. 476 * 477 * If the initial position is at the start of the text (index of 0) 478 * return U_SENTINEL (-1), and leave the position unchanged. 479 * 480 * An inline macro version of this function, UTEXT_PREVIOUS32(), 481 * is available for performance critical use. 482 * 483 * @param ut the text to be accessed. 484 * @return the previous UChar32 code point, or U_SENTINEL (-1) 485 * if the iteration has reached the start of the text. 486 * @see UTEXT_PREVIOUS32 487 * @stable ICU 3.4 488 */ 489 U_CAPI UChar32 U_EXPORT2 490 utext_previous32(UText *ut); 491 492 493 /** 494 * Set the iteration index and return the code point at that index. 495 * Leave the iteration index at the start of the following code point. 496 * 497 * This function is the most efficient and convenient way to 498 * begin a forward iteration. The results are identical to the those 499 * from the sequence 500 * \code 501 * utext_setIndex(); 502 * utext_next32(); 503 * \endcode 504 * 505 * @param ut the text to be accessed. 506 * @param nativeIndex Iteration index, in the native units of the text provider. 507 * @return Code point which starts at or before index, 508 * or U_SENTINEL (-1) if it is out of bounds. 509 * @stable ICU 3.4 510 */ 511 U_CAPI UChar32 U_EXPORT2 512 utext_next32From(UText *ut, int64_t nativeIndex); 513 514 515 516 /** 517 * Set the iteration index, and return the code point preceding the 518 * one specified by the initial index. Leave the iteration position 519 * at the start of the returned code point. 520 * 521 * This function is the most efficient and convenient way to 522 * begin a backwards iteration. 523 * 524 * @param ut the text to be accessed. 525 * @param nativeIndex Iteration index in the native units of the text provider. 526 * @return Code point preceding the one at the initial index, 527 * or U_SENTINEL (-1) if it is out of bounds. 528 * 529 * @stable ICU 3.4 530 */ 531 U_CAPI UChar32 U_EXPORT2 532 utext_previous32From(UText *ut, int64_t nativeIndex); 533 534 /** 535 * Get the current iterator position, which can range from 0 to 536 * the length of the text. 537 * The position is a native index into the input text, in whatever format it 538 * may have (possibly UTF-8 for example), and may not always be the same as 539 * the corresponding UChar (UTF-16) index. 540 * The returned position will always be aligned to a code point boundary. 541 * 542 * @param ut the text to be accessed. 543 * @return the current index position, in the native units of the text provider. 544 * @stable ICU 3.4 545 */ 546 U_CAPI int64_t U_EXPORT2 547 utext_getNativeIndex(const UText *ut); 548 549 /** 550 * Set the current iteration position to the nearest code point 551 * boundary at or preceding the specified index. 552 * The index is in the native units of the original input text. 553 * If the index is out of range, it will be pinned to be within 554 * the range of the input text. 555 * <p> 556 * It will usually be more efficient to begin an iteration 557 * using the functions utext_next32From() or utext_previous32From() 558 * rather than setIndex(). 559 * <p> 560 * Moving the index position to an adjacent character is best done 561 * with utext_next32(), utext_previous32() or utext_moveIndex32(). 562 * Attempting to do direct arithmetic on the index position is 563 * complicated by the fact that the size (in native units) of a 564 * character depends on the underlying representation of the character 565 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not 566 * easily knowable. 567 * 568 * @param ut the text to be accessed. 569 * @param nativeIndex the native unit index of the new iteration position. 570 * @stable ICU 3.4 571 */ 572 U_CAPI void U_EXPORT2 573 utext_setNativeIndex(UText *ut, int64_t nativeIndex); 574 575 /** 576 * Move the iterator position by delta code points. The number of code points 577 * is a signed number; a negative delta will move the iterator backwards, 578 * towards the start of the text. 579 * <p> 580 * The index is moved by <code>delta</code> code points 581 * forward or backward, but no further backward than to 0 and 582 * no further forward than to utext_nativeLength(). 583 * The resulting index value will be in between 0 and length, inclusive. 584 * 585 * @param ut the text to be accessed. 586 * @param delta the signed number of code points to move the iteration position. 587 * @return true if the position could be moved the requested number of positions while 588 * staying within the range [0 - text length]. 589 * @stable ICU 3.4 590 */ 591 U_CAPI UBool U_EXPORT2 592 utext_moveIndex32(UText *ut, int32_t delta); 593 594 /** 595 * Get the native index of the character preceding the current position. 596 * If the iteration position is already at the start of the text, zero 597 * is returned. 598 * The value returned is the same as that obtained from the following sequence, 599 * but without the side effect of changing the iteration position. 600 * 601 * \code 602 * UText *ut = whatever; 603 * ... 604 * utext_previous(ut) 605 * utext_getNativeIndex(ut); 606 * \endcode 607 * 608 * This function is most useful during forwards iteration, where it will get the 609 * native index of the character most recently returned from utext_next(). 610 * 611 * @param ut the text to be accessed 612 * @return the native index of the character preceding the current index position, 613 * or zero if the current position is at the start of the text. 614 * @stable ICU 3.6 615 */ 616 U_CAPI int64_t U_EXPORT2 617 utext_getPreviousNativeIndex(UText *ut); 618 619 620 /** 621 * 622 * Extract text from a UText into a UChar buffer. The range of text to be extracted 623 * is specified in the native indices of the UText provider. These may not necessarily 624 * be UTF-16 indices. 625 * <p> 626 * The size (number of 16 bit UChars) of the data to be extracted is returned. The 627 * full number of UChars is returned, even when the extracted text is truncated 628 * because the specified buffer size is too small. 629 * <p> 630 * The extracted string will (if you are a user) / must (if you are a text provider) 631 * be NUL-terminated if there is sufficient space in the destination buffer. This 632 * terminating NUL is not included in the returned length. 633 * <p> 634 * The iteration index is left at the position following the last extracted character. 635 * 636 * @param ut the UText from which to extract data. 637 * @param nativeStart the native index of the first character to extract.\ 638 * If the specified index is out of range, 639 * it will be pinned to be within 0 <= index <= textLength 640 * @param nativeLimit the native string index of the position following the last 641 * character to extract. If the specified index is out of range, 642 * it will be pinned to be within 0 <= index <= textLength. 643 * nativeLimit must be >= nativeStart. 644 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 645 * @param destCapacity The size, in UChars, of the destination buffer. May be zero 646 * for precomputing the required size. 647 * @param status receives any error status. 648 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 649 * buffer was too small. Returns number of UChars for preflighting. 650 * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. 651 * 652 * @stable ICU 3.4 653 */ 654 U_CAPI int32_t U_EXPORT2 655 utext_extract(UText *ut, 656 int64_t nativeStart, int64_t nativeLimit, 657 UChar *dest, int32_t destCapacity, 658 UErrorCode *status); 659 660 661 662 /************************************************************************************ 663 * 664 * #define inline versions of selected performance-critical text access functions 665 * Caution: do not use auto increment++ or decrement-- expressions 666 * as parameters to these macros. 667 * 668 * For most use, where there is no extreme performance constraint, the 669 * normal, non-inline functions are a better choice. The resulting code 670 * will be smaller, and, if the need ever arises, easier to debug. 671 * 672 * These are implemented as #defines rather than real functions 673 * because there is no fully portable way to do inline functions in plain C. 674 * 675 ************************************************************************************/ 676 677 #ifndef U_HIDE_INTERNAL_API 678 /** 679 * inline version of utext_current32(), for performance-critical situations. 680 * 681 * Get the code point at the current iteration position of the UText. 682 * Returns U_SENTINEL (-1) if the position is at the end of the 683 * text. 684 * 685 * @internal ICU 4.4 technology preview 686 */ 687 #define UTEXT_CURRENT32(ut) \ 688 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 689 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) 690 #endif /* U_HIDE_INTERNAL_API */ 691 692 /** 693 * inline version of utext_next32(), for performance-critical situations. 694 * 695 * Get the code point at the current iteration position of the UText, and 696 * advance the position to the first index following the character. 697 * This is a post-increment operation. 698 * Returns U_SENTINEL (-1) if the position is at the end of the 699 * text. 700 * 701 * @stable ICU 3.4 702 */ 703 #define UTEXT_NEXT32(ut) \ 704 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 705 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) 706 707 /** 708 * inline version of utext_previous32(), for performance-critical situations. 709 * 710 * Move the iterator position to the character (code point) whose 711 * index precedes the current position, and return that character. 712 * This is a pre-decrement operation. 713 * Returns U_SENTINEL (-1) if the position is at the start of the text. 714 * 715 * @stable ICU 3.4 716 */ 717 #define UTEXT_PREVIOUS32(ut) \ 718 ((ut)->chunkOffset > 0 && \ 719 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ 720 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) 721 722 /** 723 * inline version of utext_getNativeIndex(), for performance-critical situations. 724 * 725 * Get the current iterator position, which can range from 0 to 726 * the length of the text. 727 * The position is a native index into the input text, in whatever format it 728 * may have (possibly UTF-8 for example), and may not always be the same as 729 * the corresponding UChar (UTF-16) index. 730 * The returned position will always be aligned to a code point boundary. 731 * 732 * @stable ICU 3.6 733 */ 734 #define UTEXT_GETNATIVEINDEX(ut) \ 735 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ 736 (ut)->chunkNativeStart+(ut)->chunkOffset : \ 737 (ut)->pFuncs->mapOffsetToNative(ut)) 738 739 /** 740 * inline version of utext_setNativeIndex(), for performance-critical situations. 741 * 742 * Set the current iteration position to the nearest code point 743 * boundary at or preceding the specified index. 744 * The index is in the native units of the original input text. 745 * If the index is out of range, it will be pinned to be within 746 * the range of the input text. 747 * 748 * @stable ICU 3.8 749 */ 750 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ 751 int64_t __offset = (ix) - (ut)->chunkNativeStart; \ 752 if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ 753 (ut)->chunkOffset=(int32_t)__offset; \ 754 } else { \ 755 utext_setNativeIndex((ut), (ix)); \ 756 } \ 757 } UPRV_BLOCK_MACRO_END 758 759 760 761 /************************************************************************************ 762 * 763 * Functions related to writing or modifying the text. 764 * These will work only with modifiable UTexts. Attempting to 765 * modify a read-only UText will return an error status. 766 * 767 ************************************************************************************/ 768 769 770 /** 771 * Return true if the text can be written (modified) with utext_replace() or 772 * utext_copy(). For the text to be writable, the text provider must 773 * be of a type that supports writing and the UText must not be frozen. 774 * 775 * Attempting to modify text when utext_isWriteable() is false will fail - 776 * the text will not be modified, and an error will be returned from the function 777 * that attempted the modification. 778 * 779 * @param ut the UText to be tested. 780 * @return true if the text is modifiable. 781 * 782 * @see utext_freeze() 783 * @see utext_replace() 784 * @see utext_copy() 785 * @stable ICU 3.4 786 * 787 */ 788 U_CAPI UBool U_EXPORT2 789 utext_isWritable(const UText *ut); 790 791 792 /** 793 * Test whether there is meta data associated with the text. 794 * @see Replaceable::hasMetaData() 795 * 796 * @param ut The UText to be tested 797 * @return true if the underlying text includes meta data. 798 * @stable ICU 3.4 799 */ 800 U_CAPI UBool U_EXPORT2 801 utext_hasMetaData(const UText *ut); 802 803 804 /** 805 * Replace a range of the original text with a replacement text. 806 * 807 * Leaves the current iteration position at the position following the 808 * newly inserted replacement text. 809 * 810 * This function is only available on UText types that support writing, 811 * that is, ones where utext_isWritable() returns true. 812 * 813 * When using this function, there should be only a single UText opened onto the 814 * underlying native text string. Behavior after a replace operation 815 * on a UText is undefined for any other additional UTexts that refer to the 816 * modified string. 817 * 818 * @param ut the UText representing the text to be operated on. 819 * @param nativeStart the native index of the start of the region to be replaced 820 * @param nativeLimit the native index of the character following the region to be replaced. 821 * @param replacementText pointer to the replacement text 822 * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. 823 * @param status receives any error status. Possible errors include 824 * U_NO_WRITE_PERMISSION 825 * 826 * @return The signed number of (native) storage units by which 827 * the length of the text expanded or contracted. 828 * 829 * @stable ICU 3.4 830 */ 831 U_CAPI int32_t U_EXPORT2 832 utext_replace(UText *ut, 833 int64_t nativeStart, int64_t nativeLimit, 834 const UChar *replacementText, int32_t replacementLength, 835 UErrorCode *status); 836 837 838 839 /** 840 * 841 * Copy or move a substring from one position to another within the text, 842 * while retaining any metadata associated with the text. 843 * This function is used to duplicate or reorder substrings. 844 * The destination index must not overlap the source range. 845 * 846 * The text to be copied or moved is inserted at destIndex; 847 * it does not replace or overwrite any existing text. 848 * 849 * The iteration position is left following the newly inserted text 850 * at the destination position. 851 * 852 * This function is only available on UText types that support writing, 853 * that is, ones where utext_isWritable() returns true. 854 * 855 * When using this function, there should be only a single UText opened onto the 856 * underlying native text string. Behavior after a copy operation 857 * on a UText is undefined in any other additional UTexts that refer to the 858 * modified string. 859 * 860 * @param ut The UText representing the text to be operated on. 861 * @param nativeStart The native index of the start of the region to be copied or moved 862 * @param nativeLimit The native index of the character position following the region 863 * to be copied. 864 * @param destIndex The native destination index to which the source substring is 865 * copied or moved. 866 * @param move If true, then the substring is moved, not copied/duplicated. 867 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 868 * 869 * @stable ICU 3.4 870 */ 871 U_CAPI void U_EXPORT2 872 utext_copy(UText *ut, 873 int64_t nativeStart, int64_t nativeLimit, 874 int64_t destIndex, 875 UBool move, 876 UErrorCode *status); 877 878 879 /** 880 * <p> 881 * Freeze a UText. This prevents any modification to the underlying text itself 882 * by means of functions operating on this UText. 883 * </p> 884 * <p> 885 * Once frozen, a UText can not be unfrozen. The intent is to ensure 886 * that a the text underlying a frozen UText wrapper cannot be modified via that UText. 887 * </p> 888 * <p> 889 * Caution: freezing a UText will disable changes made via the specific 890 * frozen UText wrapper only; it will not have any effect on the ability to 891 * directly modify the text by bypassing the UText. Any such backdoor modifications 892 * are always an error while UText access is occurring because the underlying 893 * text can get out of sync with UText's buffering. 894 * </p> 895 * 896 * @param ut The UText to be frozen. 897 * @see utext_isWritable() 898 * @stable ICU 3.6 899 */ 900 U_CAPI void U_EXPORT2 901 utext_freeze(UText *ut); 902 903 904 /** 905 * UText provider properties (bit field indexes). 906 * 907 * @see UText 908 * @stable ICU 3.4 909 */ 910 enum { 911 /** 912 * It is potentially time consuming for the provider to determine the length of the text. 913 * @stable ICU 3.4 914 */ 915 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, 916 /** 917 * Text chunks remain valid and usable until the text object is modified or 918 * deleted, not just until the next time the access() function is called 919 * (which is the default). 920 * @stable ICU 3.4 921 */ 922 UTEXT_PROVIDER_STABLE_CHUNKS = 2, 923 /** 924 * The provider supports modifying the text via the replace() and copy() 925 * functions. 926 * @see Replaceable 927 * @stable ICU 3.4 928 */ 929 UTEXT_PROVIDER_WRITABLE = 3, 930 /** 931 * There is meta data associated with the text. 932 * @see Replaceable::hasMetaData() 933 * @stable ICU 3.4 934 */ 935 UTEXT_PROVIDER_HAS_META_DATA = 4, 936 /** 937 * Text provider owns the text storage. 938 * Generally occurs as the result of a deep clone of the UText. 939 * When closing the UText, the associated text must 940 * also be closed/deleted/freed/ whatever is appropriate. 941 * @stable ICU 3.6 942 */ 943 UTEXT_PROVIDER_OWNS_TEXT = 5 944 }; 945 946 /** 947 * Function type declaration for UText.clone(). 948 * 949 * clone a UText. Much like opening a UText where the source text is itself 950 * another UText. 951 * 952 * A deep clone will copy both the UText data structures and the underlying text. 953 * The original and cloned UText will operate completely independently; modifications 954 * made to the text in one will not effect the other. Text providers are not 955 * required to support deep clones. The user of clone() must check the status return 956 * and be prepared to handle failures. 957 * 958 * A shallow clone replicates only the UText data structures; it does not make 959 * a copy of the underlying text. Shallow clones can be used as an efficient way to 960 * have multiple iterators active in a single text string that is not being 961 * modified. 962 * 963 * A shallow clone operation must not fail except for truly exceptional conditions such 964 * as memory allocation failures. 965 * 966 * A UText and its clone may be safely concurrently accessed by separate threads. 967 * This is true for both shallow and deep clones. 968 * It is the responsibility of the Text Provider to ensure that this thread safety 969 * constraint is met. 970 971 * 972 * @param dest A UText struct to be filled in with the result of the clone operation, 973 * or NULL if the clone function should heap-allocate a new UText struct. 974 * @param src The UText to be cloned. 975 * @param deep true to request a deep clone, false for a shallow clone. 976 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 977 * should be returned if the text provider is unable to clone the 978 * original text. 979 * @return The newly created clone, or NULL if the clone operation failed. 980 * 981 * @stable ICU 3.4 982 */ 983 typedef UText * U_CALLCONV 984 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); 985 986 987 /** 988 * Function type declaration for UText.nativeLength(). 989 * 990 * @param ut the UText to get the length of. 991 * @return the length, in the native units of the original text string. 992 * @see UText 993 * @stable ICU 3.4 994 */ 995 typedef int64_t U_CALLCONV 996 UTextNativeLength(UText *ut); 997 998 /** 999 * Function type declaration for UText.access(). Get the description of the text chunk 1000 * containing the text at a requested native index. The UText's iteration 1001 * position will be left at the requested index. If the index is out 1002 * of bounds, the iteration position will be left at the start or end 1003 * of the string, as appropriate. 1004 * 1005 * Chunks must begin and end on code point boundaries. A single code point 1006 * comprised of multiple storage units must never span a chunk boundary. 1007 * 1008 * 1009 * @param ut the UText being accessed. 1010 * @param nativeIndex Requested index of the text to be accessed. 1011 * @param forward If true, then the returned chunk must contain text 1012 * starting from the index, so that start<=index<limit. 1013 * If false, then the returned chunk must contain text 1014 * before the index, so that start<index<=limit. 1015 * @return True if the requested index could be accessed. The chunk 1016 * will contain the requested text. 1017 * False value if a chunk cannot be accessed 1018 * (the requested index is out of bounds). 1019 * 1020 * @see UText 1021 * @stable ICU 3.4 1022 */ 1023 typedef UBool U_CALLCONV 1024 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); 1025 1026 /** 1027 * Function type declaration for UText.extract(). 1028 * 1029 * Extract text from a UText into a UChar buffer. The range of text to be extracted 1030 * is specified in the native indices of the UText provider. These may not necessarily 1031 * be UTF-16 indices. 1032 * <p> 1033 * The size (number of 16 bit UChars) in the data to be extracted is returned. The 1034 * full amount is returned, even when the specified buffer size is smaller. 1035 * <p> 1036 * The extracted string will (if you are a user) / must (if you are a text provider) 1037 * be NUL-terminated if there is sufficient space in the destination buffer. 1038 * 1039 * @param ut the UText from which to extract data. 1040 * @param nativeStart the native index of the first character to extract. 1041 * @param nativeLimit the native string index of the position following the last 1042 * character to extract. 1043 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 1044 * @param destCapacity The size, in UChars, of the destination buffer. May be zero 1045 * for precomputing the required size. 1046 * @param status receives any error status. 1047 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for 1048 * preflighting. 1049 * @return Number of UChars in the data. Does not include a trailing NUL. 1050 * 1051 * @stable ICU 3.4 1052 */ 1053 typedef int32_t U_CALLCONV 1054 UTextExtract(UText *ut, 1055 int64_t nativeStart, int64_t nativeLimit, 1056 UChar *dest, int32_t destCapacity, 1057 UErrorCode *status); 1058 1059 /** 1060 * Function type declaration for UText.replace(). 1061 * 1062 * Replace a range of the original text with a replacement text. 1063 * 1064 * Leaves the current iteration position at the position following the 1065 * newly inserted replacement text. 1066 * 1067 * This function need only be implemented on UText types that support writing. 1068 * 1069 * When using this function, there should be only a single UText opened onto the 1070 * underlying native text string. The function is responsible for updating the 1071 * text chunk within the UText to reflect the updated iteration position, 1072 * taking into account any changes to the underlying string's structure caused 1073 * by the replace operation. 1074 * 1075 * @param ut the UText representing the text to be operated on. 1076 * @param nativeStart the index of the start of the region to be replaced 1077 * @param nativeLimit the index of the character following the region to be replaced. 1078 * @param replacementText pointer to the replacement text 1079 * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. 1080 * @param status receives any error status. Possible errors include 1081 * U_NO_WRITE_PERMISSION 1082 * 1083 * @return The signed number of (native) storage units by which 1084 * the length of the text expanded or contracted. 1085 * 1086 * @stable ICU 3.4 1087 */ 1088 typedef int32_t U_CALLCONV 1089 UTextReplace(UText *ut, 1090 int64_t nativeStart, int64_t nativeLimit, 1091 const UChar *replacementText, int32_t replacmentLength, 1092 UErrorCode *status); 1093 1094 /** 1095 * Function type declaration for UText.copy(). 1096 * 1097 * Copy or move a substring from one position to another within the text, 1098 * while retaining any metadata associated with the text. 1099 * This function is used to duplicate or reorder substrings. 1100 * The destination index must not overlap the source range. 1101 * 1102 * The text to be copied or moved is inserted at destIndex; 1103 * it does not replace or overwrite any existing text. 1104 * 1105 * This function need only be implemented for UText types that support writing. 1106 * 1107 * When using this function, there should be only a single UText opened onto the 1108 * underlying native text string. The function is responsible for updating the 1109 * text chunk within the UText to reflect the updated iteration position, 1110 * taking into account any changes to the underlying string's structure caused 1111 * by the replace operation. 1112 * 1113 * @param ut The UText representing the text to be operated on. 1114 * @param nativeStart The index of the start of the region to be copied or moved 1115 * @param nativeLimit The index of the character following the region to be replaced. 1116 * @param nativeDest The destination index to which the source substring is copied or moved. 1117 * @param move If true, then the substring is moved, not copied/duplicated. 1118 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 1119 * 1120 * @stable ICU 3.4 1121 */ 1122 typedef void U_CALLCONV 1123 UTextCopy(UText *ut, 1124 int64_t nativeStart, int64_t nativeLimit, 1125 int64_t nativeDest, 1126 UBool move, 1127 UErrorCode *status); 1128 1129 /** 1130 * Function type declaration for UText.mapOffsetToNative(). 1131 * Map from the current UChar offset within the current text chunk to 1132 * the corresponding native index in the original source text. 1133 * 1134 * This is required only for text providers that do not use native UTF-16 indexes. 1135 * 1136 * @param ut the UText. 1137 * @return Absolute (native) index corresponding to chunkOffset in the current chunk. 1138 * The returned native index should always be to a code point boundary. 1139 * 1140 * @stable ICU 3.4 1141 */ 1142 typedef int64_t U_CALLCONV 1143 UTextMapOffsetToNative(const UText *ut); 1144 1145 /** 1146 * Function type declaration for UText.mapIndexToUTF16(). 1147 * Map from a native index to a UChar offset within a text chunk. 1148 * Behavior is undefined if the native index does not fall within the 1149 * current chunk. 1150 * 1151 * This function is required only for text providers that do not use native UTF-16 indexes. 1152 * 1153 * @param ut The UText containing the text chunk. 1154 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. 1155 * @return Chunk-relative UTF-16 offset corresponding to the specified native 1156 * index. 1157 * 1158 * @stable ICU 3.4 1159 */ 1160 typedef int32_t U_CALLCONV 1161 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); 1162 1163 1164 /** 1165 * Function type declaration for UText.utextClose(). 1166 * 1167 * A Text Provider close function is only required for provider types that make 1168 * allocations in their open function (or other functions) that must be 1169 * cleaned when the UText is closed. 1170 * 1171 * The allocation of the UText struct itself and any "extra" storage 1172 * associated with the UText is handled by the common UText implementation 1173 * and does not require provider specific cleanup in a close function. 1174 * 1175 * Most UText provider implementations do not need to implement this function. 1176 * 1177 * @param ut A UText object to be closed. 1178 * 1179 * @stable ICU 3.4 1180 */ 1181 typedef void U_CALLCONV 1182 UTextClose(UText *ut); 1183 1184 1185 /** 1186 * (public) Function dispatch table for UText. 1187 * Conceptually very much like a C++ Virtual Function Table. 1188 * This struct defines the organization of the table. 1189 * Each text provider implementation must provide an 1190 * actual table that is initialized with the appropriate functions 1191 * for the type of text being handled. 1192 * @stable ICU 3.6 1193 */ 1194 struct UTextFuncs { 1195 /** 1196 * (public) Function table size, sizeof(UTextFuncs) 1197 * Intended for use should the table grow to accommodate added 1198 * functions in the future, to allow tests for older format 1199 * function tables that do not contain the extensions. 1200 * 1201 * Fields are placed for optimal alignment on 1202 * 32/64/128-bit-pointer machines, by normally grouping together 1203 * 4 32-bit fields, 1204 * 4 pointers, 1205 * 2 64-bit fields 1206 * in sequence. 1207 * @stable ICU 3.6 1208 */ 1209 int32_t tableSize; 1210 1211 /** 1212 * (private) Alignment padding. 1213 * Do not use, reserved for use by the UText framework only. 1214 * @internal 1215 */ 1216 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; 1217 1218 1219 /** 1220 * (public) Function pointer for UTextClone 1221 * 1222 * @see UTextClone 1223 * @stable ICU 3.6 1224 */ 1225 UTextClone *clone; 1226 1227 /** 1228 * (public) function pointer for UTextLength 1229 * May be expensive to compute! 1230 * 1231 * @see UTextLength 1232 * @stable ICU 3.6 1233 */ 1234 UTextNativeLength *nativeLength; 1235 1236 /** 1237 * (public) Function pointer for UTextAccess. 1238 * 1239 * @see UTextAccess 1240 * @stable ICU 3.6 1241 */ 1242 UTextAccess *access; 1243 1244 /** 1245 * (public) Function pointer for UTextExtract. 1246 * 1247 * @see UTextExtract 1248 * @stable ICU 3.6 1249 */ 1250 UTextExtract *extract; 1251 1252 /** 1253 * (public) Function pointer for UTextReplace. 1254 * 1255 * @see UTextReplace 1256 * @stable ICU 3.6 1257 */ 1258 UTextReplace *replace; 1259 1260 /** 1261 * (public) Function pointer for UTextCopy. 1262 * 1263 * @see UTextCopy 1264 * @stable ICU 3.6 1265 */ 1266 UTextCopy *copy; 1267 1268 /** 1269 * (public) Function pointer for UTextMapOffsetToNative. 1270 * 1271 * @see UTextMapOffsetToNative 1272 * @stable ICU 3.6 1273 */ 1274 UTextMapOffsetToNative *mapOffsetToNative; 1275 1276 /** 1277 * (public) Function pointer for UTextMapNativeIndexToUTF16. 1278 * 1279 * @see UTextMapNativeIndexToUTF16 1280 * @stable ICU 3.6 1281 */ 1282 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; 1283 1284 /** 1285 * (public) Function pointer for UTextClose. 1286 * 1287 * @see UTextClose 1288 * @stable ICU 3.6 1289 */ 1290 UTextClose *close; 1291 1292 /** 1293 * (private) Spare function pointer 1294 * @internal 1295 */ 1296 UTextClose *spare1; 1297 1298 /** 1299 * (private) Spare function pointer 1300 * @internal 1301 */ 1302 UTextClose *spare2; 1303 1304 /** 1305 * (private) Spare function pointer 1306 * @internal 1307 */ 1308 UTextClose *spare3; 1309 1310 }; 1311 /** 1312 * Function dispatch table for UText 1313 * @see UTextFuncs 1314 */ 1315 typedef struct UTextFuncs UTextFuncs; 1316 1317 /** 1318 * UText struct. Provides the interface between the generic UText access code 1319 * and the UText provider code that works on specific kinds of 1320 * text (UTF-8, noncontiguous UTF-16, whatever.) 1321 * 1322 * Applications that are using predefined types of text providers 1323 * to pass text data to ICU services will have no need to view the 1324 * internals of the UText structs that they open. 1325 * 1326 * @stable ICU 3.6 1327 */ 1328 struct UText { 1329 /** 1330 * (private) Magic. Used to help detect when UText functions are handed 1331 * invalid or uninitialized UText structs. 1332 * utext_openXYZ() functions take an initialized, 1333 * but not necessarily open, UText struct as an 1334 * optional fill-in parameter. This magic field 1335 * is used to check for that initialization. 1336 * Text provider close functions must NOT clear 1337 * the magic field because that would prevent 1338 * reuse of the UText struct. 1339 * @internal 1340 */ 1341 uint32_t magic; 1342 1343 1344 /** 1345 * (private) Flags for managing the allocation and freeing of 1346 * memory associated with this UText. 1347 * @internal 1348 */ 1349 int32_t flags; 1350 1351 1352 /** 1353 * Text provider properties. This set of flags is maintained by the 1354 * text provider implementation. 1355 * @stable ICU 3.4 1356 */ 1357 int32_t providerProperties; 1358 1359 /** 1360 * (public) sizeOfStruct=sizeof(UText) 1361 * Allows possible backward compatible extension. 1362 * 1363 * @stable ICU 3.4 1364 */ 1365 int32_t sizeOfStruct; 1366 1367 /* ------ 16 byte alignment boundary ----------- */ 1368 1369 1370 /** 1371 * (protected) Native index of the first character position following 1372 * the current chunk. 1373 * @stable ICU 3.6 1374 */ 1375 int64_t chunkNativeLimit; 1376 1377 /** 1378 * (protected) Size in bytes of the extra space (pExtra). 1379 * @stable ICU 3.4 1380 */ 1381 int32_t extraSize; 1382 1383 /** 1384 * (protected) The highest chunk offset where native indexing and 1385 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value 1386 * will be equal to chunkLength. 1387 * 1388 * @stable ICU 3.6 1389 */ 1390 int32_t nativeIndexingLimit; 1391 1392 /* ---- 16 byte alignment boundary------ */ 1393 1394 /** 1395 * (protected) Native index of the first character in the text chunk. 1396 * @stable ICU 3.6 1397 */ 1398 int64_t chunkNativeStart; 1399 1400 /** 1401 * (protected) Current iteration position within the text chunk (UTF-16 buffer). 1402 * This is the index to the character that will be returned by utext_next32(). 1403 * @stable ICU 3.6 1404 */ 1405 int32_t chunkOffset; 1406 1407 /** 1408 * (protected) Length the text chunk (UTF-16 buffer), in UChars. 1409 * @stable ICU 3.6 1410 */ 1411 int32_t chunkLength; 1412 1413 /* ---- 16 byte alignment boundary-- */ 1414 1415 1416 /** 1417 * (protected) pointer to a chunk of text in UTF-16 format. 1418 * May refer either to original storage of the source of the text, or 1419 * if conversion was required, to a buffer owned by the UText. 1420 * @stable ICU 3.6 1421 */ 1422 const UChar *chunkContents; 1423 1424 /** 1425 * (public) Pointer to Dispatch table for accessing functions for this UText. 1426 * @stable ICU 3.6 1427 */ 1428 const UTextFuncs *pFuncs; 1429 1430 /** 1431 * (protected) Pointer to additional space requested by the 1432 * text provider during the utext_open operation. 1433 * @stable ICU 3.4 1434 */ 1435 void *pExtra; 1436 1437 /** 1438 * (protected) Pointer to string or text-containing object or similar. 1439 * This is the source of the text that this UText is wrapping, in a format 1440 * that is known to the text provider functions. 1441 * @stable ICU 3.4 1442 */ 1443 const void *context; 1444 1445 /* --- 16 byte alignment boundary--- */ 1446 1447 /** 1448 * (protected) Pointer fields available for use by the text provider. 1449 * Not used by UText common code. 1450 * @stable ICU 3.6 1451 */ 1452 const void *p; 1453 /** 1454 * (protected) Pointer fields available for use by the text provider. 1455 * Not used by UText common code. 1456 * @stable ICU 3.6 1457 */ 1458 const void *q; 1459 /** 1460 * (protected) Pointer fields available for use by the text provider. 1461 * Not used by UText common code. 1462 * @stable ICU 3.6 1463 */ 1464 const void *r; 1465 1466 /** 1467 * Private field reserved for future use by the UText framework 1468 * itself. This is not to be touched by the text providers. 1469 * @internal ICU 3.4 1470 */ 1471 void *privP; 1472 1473 1474 /* --- 16 byte alignment boundary--- */ 1475 1476 1477 /** 1478 * (protected) Integer field reserved for use by the text provider. 1479 * Not used by the UText framework, or by the client (user) of the UText. 1480 * @stable ICU 3.4 1481 */ 1482 int64_t a; 1483 1484 /** 1485 * (protected) Integer field reserved for use by the text provider. 1486 * Not used by the UText framework, or by the client (user) of the UText. 1487 * @stable ICU 3.4 1488 */ 1489 int32_t b; 1490 1491 /** 1492 * (protected) Integer field reserved for use by the text provider. 1493 * Not used by the UText framework, or by the client (user) of the UText. 1494 * @stable ICU 3.4 1495 */ 1496 int32_t c; 1497 1498 /* ---- 16 byte alignment boundary---- */ 1499 1500 1501 /** 1502 * Private field reserved for future use by the UText framework 1503 * itself. This is not to be touched by the text providers. 1504 * @internal ICU 3.4 1505 */ 1506 int64_t privA; 1507 /** 1508 * Private field reserved for future use by the UText framework 1509 * itself. This is not to be touched by the text providers. 1510 * @internal ICU 3.4 1511 */ 1512 int32_t privB; 1513 /** 1514 * Private field reserved for future use by the UText framework 1515 * itself. This is not to be touched by the text providers. 1516 * @internal ICU 3.4 1517 */ 1518 int32_t privC; 1519 }; 1520 1521 1522 /** 1523 * Common function for use by Text Provider implementations to allocate and/or initialize 1524 * a new UText struct. To be called in the implementation of utext_open() functions. 1525 * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. 1526 * If the supplied UText is already open, the provider's close function will be called 1527 * so that the struct can be reused by the open that is in progress. 1528 * 1529 * @param ut pointer to a UText struct to be re-used, or null if a new UText 1530 * should be allocated. 1531 * @param extraSpace The amount of additional space to be allocated as part 1532 * of this UText, for use by types of providers that require 1533 * additional storage. 1534 * @param status Errors are returned here. 1535 * @return pointer to the UText, allocated if necessary, with extra space set up if requested. 1536 * @stable ICU 3.4 1537 */ 1538 U_CAPI UText * U_EXPORT2 1539 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); 1540 1541 // do not use #ifndef U_HIDE_INTERNAL_API around the following! 1542 /** 1543 * @internal 1544 * Value used to help identify correctly initialized UText structs. 1545 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. 1546 */ 1547 enum { 1548 UTEXT_MAGIC = 0x345ad82c 1549 }; 1550 1551 /** 1552 * initializer to be used with local (stack) instances of a UText 1553 * struct. UText structs must be initialized before passing 1554 * them to one of the utext_open functions. 1555 * 1556 * @stable ICU 3.6 1557 */ 1558 #define UTEXT_INITIALIZER { \ 1559 UTEXT_MAGIC, /* magic */ \ 1560 0, /* flags */ \ 1561 0, /* providerProps */ \ 1562 sizeof(UText), /* sizeOfStruct */ \ 1563 0, /* chunkNativeLimit */ \ 1564 0, /* extraSize */ \ 1565 0, /* nativeIndexingLimit */ \ 1566 0, /* chunkNativeStart */ \ 1567 0, /* chunkOffset */ \ 1568 0, /* chunkLength */ \ 1569 NULL, /* chunkContents */ \ 1570 NULL, /* pFuncs */ \ 1571 NULL, /* pExtra */ \ 1572 NULL, /* context */ \ 1573 NULL, NULL, NULL, /* p, q, r */ \ 1574 NULL, /* privP */ \ 1575 0, 0, 0, /* a, b, c */ \ 1576 0, 0, 0 /* privA,B,C, */ \ 1577 } 1578 1579 1580 U_CDECL_END 1581 1582 1583 #if U_SHOW_CPLUSPLUS_API 1584 1585 U_NAMESPACE_BEGIN 1586 1587 /** 1588 * \class LocalUTextPointer 1589 * "Smart pointer" class, closes a UText via utext_close(). 1590 * For most methods see the LocalPointerBase base class. 1591 * 1592 * @see LocalPointerBase 1593 * @see LocalPointer 1594 * @stable ICU 4.4 1595 */ 1596 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); 1597 1598 U_NAMESPACE_END 1599 1600 #endif 1601 1602 1603 #endif 1604