1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2007, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidi.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999jul27 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #ifndef UBIDI_H 18 #define UBIDI_H 19 20 #include "unicode/utypes.h" 21 #include "unicode/uchar.h" 22 23 /** 24 *\file 25 * \brief C API: Bidi algorithm 26 * 27 * <h2>Bidi algorithm for ICU</h2> 28 * 29 * This is an implementation of the Unicode Bidirectional algorithm. 30 * The algorithm is defined in the 31 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 32 * version 13, also described in The Unicode Standard, Version 4.0 .<p> 33 * 34 * Note: Libraries that perform a bidirectional algorithm and 35 * reorder strings accordingly are sometimes called "Storage Layout Engines". 36 * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such 37 * "Storage Layout Engines". 38 * 39 * <h3>General remarks about the API:</h3> 40 * 41 * In functions with an error code parameter, 42 * the <code>pErrorCode</code> pointer must be valid 43 * and the value that it points to must not indicate a failure before 44 * the function call. Otherwise, the function returns immediately. 45 * After the function call, the value indicates success or failure.<p> 46 * 47 * The "limit" of a sequence of characters is the position just after their 48 * last character, i.e., one more than that position.<p> 49 * 50 * Some of the API functions provide access to "runs". 51 * Such a "run" is defined as a sequence of characters 52 * that are at the same embedding level 53 * after performing the Bidi algorithm.<p> 54 * 55 * @author Markus W. Scherer 56 * @version 1.0 57 * 58 * 59 * <h4> Sample code for the ICU Bidi API </h4> 60 * 61 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 62 * 63 * This is (hypothetical) sample code that illustrates 64 * how the ICU Bidi API could be used to render a paragraph of text. 65 * Rendering code depends highly on the graphics system, 66 * therefore this sample code must make a lot of assumptions, 67 * which may or may not match any existing graphics system's properties. 68 * 69 * <p>The basic assumptions are:</p> 70 * <ul> 71 * <li>Rendering is done from left to right on a horizontal line.</li> 72 * <li>A run of single-style, unidirectional text can be rendered at once.</li> 73 * <li>Such a run of text is passed to the graphics system with 74 * characters (code units) in logical order.</li> 75 * <li>The line-breaking algorithm is very complicated 76 * and Locale-dependent - 77 * and therefore its implementation omitted from this sample code.</li> 78 * </ul> 79 * 80 * <pre> 81 * \code 82 *#include "unicode/ubidi.h" 83 * 84 *typedef enum { 85 * styleNormal=0, styleSelected=1, 86 * styleBold=2, styleItalics=4, 87 * styleSuper=8, styleSub=16 88 *} Style; 89 * 90 *typedef struct { int32_t limit; Style style; } StyleRun; 91 * 92 *int getTextWidth(const UChar *text, int32_t start, int32_t limit, 93 * const StyleRun *styleRuns, int styleRunCount); 94 * 95 * // set *pLimit and *pStyleRunLimit for a line 96 * // from text[start] and from styleRuns[styleRunStart] 97 * // using ubidi_getLogicalRun(para, ...) 98 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, 99 * UBiDi *para, 100 * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, 101 * int *pLineWidth); 102 * 103 * // render runs on a line sequentially, always from left to right 104 * 105 * // prepare rendering a new line 106 * void startLine(UBiDiDirection textDirection, int lineWidth); 107 * 108 * // render a run of text and advance to the right by the run width 109 * // the text[start..limit-1] is always in logical order 110 * void renderRun(const UChar *text, int32_t start, int32_t limit, 111 * UBiDiDirection textDirection, Style style); 112 * 113 * // We could compute a cross-product 114 * // from the style runs with the directional runs 115 * // and then reorder it. 116 * // Instead, here we iterate over each run type 117 * // and render the intersections - 118 * // with shortcuts in simple (and common) cases. 119 * // renderParagraph() is the main function. 120 * 121 * // render a directional run with 122 * // (possibly) multiple style runs intersecting with it 123 * void renderDirectionalRun(const UChar *text, 124 * int32_t start, int32_t limit, 125 * UBiDiDirection direction, 126 * const StyleRun *styleRuns, int styleRunCount) { 127 * int i; 128 * 129 * // iterate over style runs 130 * if(direction==UBIDI_LTR) { 131 * int styleLimit; 132 * 133 * for(i=0; i<styleRunCount; ++i) { 134 * styleLimit=styleRun[i].limit; 135 * if(start<styleLimit) { 136 * if(styleLimit>limit) { styleLimit=limit; } 137 * renderRun(text, start, styleLimit, 138 * direction, styleRun[i].style); 139 * if(styleLimit==limit) { break; } 140 * start=styleLimit; 141 * } 142 * } 143 * } else { 144 * int styleStart; 145 * 146 * for(i=styleRunCount-1; i>=0; --i) { 147 * if(i>0) { 148 * styleStart=styleRun[i-1].limit; 149 * } else { 150 * styleStart=0; 151 * } 152 * if(limit>=styleStart) { 153 * if(styleStart<start) { styleStart=start; } 154 * renderRun(text, styleStart, limit, 155 * direction, styleRun[i].style); 156 * if(styleStart==start) { break; } 157 * limit=styleStart; 158 * } 159 * } 160 * } 161 * } 162 * 163 * // the line object represents text[start..limit-1] 164 * void renderLine(UBiDi *line, const UChar *text, 165 * int32_t start, int32_t limit, 166 * const StyleRun *styleRuns, int styleRunCount) { 167 * UBiDiDirection direction=ubidi_getDirection(line); 168 * if(direction!=UBIDI_MIXED) { 169 * // unidirectional 170 * if(styleRunCount<=1) { 171 * renderRun(text, start, limit, direction, styleRuns[0].style); 172 * } else { 173 * renderDirectionalRun(text, start, limit, 174 * direction, styleRuns, styleRunCount); 175 * } 176 * } else { 177 * // mixed-directional 178 * int32_t count, i, length; 179 * UBiDiLevel level; 180 * 181 * count=ubidi_countRuns(para, pErrorCode); 182 * if(U_SUCCESS(*pErrorCode)) { 183 * if(styleRunCount<=1) { 184 * Style style=styleRuns[0].style; 185 * 186 * // iterate over directional runs 187 * for(i=0; i<count; ++i) { 188 * direction=ubidi_getVisualRun(para, i, &start, &length); 189 * renderRun(text, start, start+length, direction, style); 190 * } 191 * } else { 192 * int32_t j; 193 * 194 * // iterate over both directional and style runs 195 * for(i=0; i<count; ++i) { 196 * direction=ubidi_getVisualRun(line, i, &start, &length); 197 * renderDirectionalRun(text, start, start+length, 198 * direction, styleRuns, styleRunCount); 199 * } 200 * } 201 * } 202 * } 203 * } 204 * 205 *void renderParagraph(const UChar *text, int32_t length, 206 * UBiDiDirection textDirection, 207 * const StyleRun *styleRuns, int styleRunCount, 208 * int lineWidth, 209 * UErrorCode *pErrorCode) { 210 * UBiDi *para; 211 * 212 * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { 213 * return; 214 * } 215 * 216 * para=ubidi_openSized(length, 0, pErrorCode); 217 * if(para==NULL) { return; } 218 * 219 * ubidi_setPara(para, text, length, 220 * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, 221 * NULL, pErrorCode); 222 * if(U_SUCCESS(*pErrorCode)) { 223 * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); 224 * StyleRun styleRun={ length, styleNormal }; 225 * int width; 226 * 227 * if(styleRuns==NULL || styleRunCount<=0) { 228 * styleRunCount=1; 229 * styleRuns=&styleRun; 230 * } 231 * 232 * // assume styleRuns[styleRunCount-1].limit>=length 233 * 234 * width=getTextWidth(text, 0, length, styleRuns, styleRunCount); 235 * if(width<=lineWidth) { 236 * // everything fits onto one line 237 * 238 * // prepare rendering a new line from either left or right 239 * startLine(paraLevel, width); 240 * 241 * renderLine(para, text, 0, length, 242 * styleRuns, styleRunCount); 243 * } else { 244 * UBiDi *line; 245 * 246 * // we need to render several lines 247 * line=ubidi_openSized(length, 0, pErrorCode); 248 * if(line!=NULL) { 249 * int32_t start=0, limit; 250 * int styleRunStart=0, styleRunLimit; 251 * 252 * for(;;) { 253 * limit=length; 254 * styleRunLimit=styleRunCount; 255 * getLineBreak(text, start, &limit, para, 256 * styleRuns, styleRunStart, &styleRunLimit, 257 * &width); 258 * ubidi_setLine(para, start, limit, line, pErrorCode); 259 * if(U_SUCCESS(*pErrorCode)) { 260 * // prepare rendering a new line 261 * // from either left or right 262 * startLine(paraLevel, width); 263 * 264 * renderLine(line, text, start, limit, 265 * styleRuns+styleRunStart, 266 * styleRunLimit-styleRunStart); 267 * } 268 * if(limit==length) { break; } 269 * start=limit; 270 * styleRunStart=styleRunLimit-1; 271 * if(start>=styleRuns[styleRunStart].limit) { 272 * ++styleRunStart; 273 * } 274 * } 275 * 276 * ubidi_close(line); 277 * } 278 * } 279 * } 280 * 281 * ubidi_close(para); 282 *} 283 *\endcode 284 * </pre> 285 */ 286 287 /*DOCXX_TAG*/ 288 /*@{*/ 289 290 /** 291 * UBiDiLevel is the type of the level values in this 292 * Bidi implementation. 293 * It holds an embedding level and indicates the visual direction 294 * by its bit 0 (even/odd value).<p> 295 * 296 * It can also hold non-level values for the 297 * <code>paraLevel</code> and <code>embeddingLevels</code> 298 * arguments of <code>ubidi_setPara()</code>; there: 299 * <ul> 300 * <li>bit 7 of an <code>embeddingLevels[]</code> 301 * value indicates whether the using application is 302 * specifying the level of a character to <i>override</i> whatever the 303 * Bidi implementation would resolve it to.</li> 304 * <li><code>paraLevel</code> can be set to the 305 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code> 306 * and <code>UBIDI_DEFAULT_RTL</code>.</li> 307 * </ul> 308 * 309 * @see ubidi_setPara 310 * 311 * <p>The related constants are not real, valid level values. 312 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify 313 * a default for the paragraph level for 314 * when the <code>ubidi_setPara()</code> function 315 * shall determine it but there is no 316 * strongly typed character in the input.<p> 317 * 318 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even 319 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd, 320 * just like with normal LTR and RTL level values - 321 * these special values are designed that way. Also, the implementation 322 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 323 * 324 * @see UBIDI_DEFAULT_LTR 325 * @see UBIDI_DEFAULT_RTL 326 * @see UBIDI_LEVEL_OVERRIDE 327 * @see UBIDI_MAX_EXPLICIT_LEVEL 328 * @stable ICU 2.0 329 */ 330 typedef uint8_t UBiDiLevel; 331 332 /** Paragraph level setting.<p> 333 * 334 * Constant indicating that the base direction depends on the first strong 335 * directional character in the text according to the Unicode Bidirectional 336 * Algorithm. If no strong directional character is present, 337 * then set the paragraph level to 0 (left-to-right).<p> 338 * 339 * If this value is used in conjunction with reordering modes 340 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or 341 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 342 * is assumed to be visual LTR, and the text after reordering is required 343 * to be the corresponding logical string with appropriate contextual 344 * direction. The direction of the result string will be RTL if either 345 * the righmost or leftmost strong character of the source text is RTL 346 * or Arabic Letter, the direction will be LTR otherwise.<p> 347 * 348 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may 349 * be added at the beginning of the result string to ensure round trip 350 * (that the result string, when reordered back to visual, will produce 351 * the original source text). 352 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT 353 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL 354 * @stable ICU 2.0 355 */ 356 #define UBIDI_DEFAULT_LTR 0xfe 357 358 /** Paragraph level setting.<p> 359 * 360 * Constant indicating that the base direction depends on the first strong 361 * directional character in the text according to the Unicode Bidirectional 362 * Algorithm. If no strong directional character is present, 363 * then set the paragraph level to 1 (right-to-left).<p> 364 * 365 * If this value is used in conjunction with reordering modes 366 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or 367 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 368 * is assumed to be visual LTR, and the text after reordering is required 369 * to be the corresponding logical string with appropriate contextual 370 * direction. The direction of the result string will be RTL if either 371 * the righmost or leftmost strong character of the source text is RTL 372 * or Arabic Letter, or if the text contains no strong character; 373 * the direction will be LTR otherwise.<p> 374 * 375 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may 376 * be added at the beginning of the result string to ensure round trip 377 * (that the result string, when reordered back to visual, will produce 378 * the original source text). 379 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT 380 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL 381 * @stable ICU 2.0 382 */ 383 #define UBIDI_DEFAULT_RTL 0xff 384 385 /** 386 * Maximum explicit embedding level. 387 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>). 388 * @stable ICU 2.0 389 */ 390 #define UBIDI_MAX_EXPLICIT_LEVEL 61 391 392 /** Bit flag for level input. 393 * Overrides directional properties. 394 * @stable ICU 2.0 395 */ 396 #define UBIDI_LEVEL_OVERRIDE 0x80 397 398 /** 399 * Special value which can be returned by the mapping functions when a logical 400 * index has no corresponding visual index or vice-versa. This may happen 401 * for the logical-to-visual mapping of a Bidi control when option 402 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen 403 * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted 404 * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 405 * @see ubidi_getVisualIndex 406 * @see ubidi_getVisualMap 407 * @see ubidi_getLogicalIndex 408 * @see ubidi_getLogicalMap 409 * @stable ICU 3.6 410 */ 411 #define UBIDI_MAP_NOWHERE (-1) 412 413 /** 414 * <code>UBiDiDirection</code> values indicate the text direction. 415 * @stable ICU 2.0 416 */ 417 enum UBiDiDirection { 418 /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */ 419 UBIDI_LTR, 420 /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */ 421 UBIDI_RTL, 422 /** Mixed-directional text. @stable ICU 2.0 */ 423 UBIDI_MIXED 424 }; 425 426 /** @stable ICU 2.0 */ 427 typedef enum UBiDiDirection UBiDiDirection; 428 429 /** 430 * Forward declaration of the <code>UBiDi</code> structure for the declaration of 431 * the API functions. Its fields are implementation-specific.<p> 432 * This structure holds information about a paragraph (or multiple paragraphs) 433 * of text with Bidi-algorithm-related details, or about one line of 434 * such a paragraph.<p> 435 * Reordering can be done on a line, or on one or more paragraphs which are 436 * then interpreted each as one single line. 437 * @stable ICU 2.0 438 */ 439 struct UBiDi; 440 441 /** @stable ICU 2.0 */ 442 typedef struct UBiDi UBiDi; 443 444 /** 445 * Allocate a <code>UBiDi</code> structure. 446 * Such an object is initially empty. It is assigned 447 * the Bidi properties of a piece of text containing one or more paragraphs 448 * by <code>ubidi_setPara()</code> 449 * or the Bidi properties of a line within a paragraph by 450 * <code>ubidi_setLine()</code>.<p> 451 * This object can be reused for as long as it is not deallocated 452 * by calling <code>ubidi_close()</code>.<p> 453 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate 454 * additional memory for internal structures as necessary. 455 * 456 * @return An empty <code>UBiDi</code> object. 457 * @stable ICU 2.0 458 */ 459 U_STABLE UBiDi * U_EXPORT2 460 ubidi_open(void); 461 462 /** 463 * Allocate a <code>UBiDi</code> structure with preallocated memory 464 * for internal structures. 465 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code> 466 * with no arguments, but it also preallocates memory for internal structures 467 * according to the sizings supplied by the caller.<p> 468 * Subsequent functions will not allocate any more memory, and are thus 469 * guaranteed not to fail because of lack of memory.<p> 470 * The preallocation can be limited to some of the internal memory 471 * by setting some values to 0 here. That means that if, e.g., 472 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 473 * be set to <code>maxLength</code> (the only failproof value) to avoid 474 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 475 * and the internal structures that are associated with it will be allocated 476 * on demand, just like with <code>ubidi_open()</code>. 477 * 478 * @param maxLength is the maximum text or line length that internal memory 479 * will be preallocated for. An attempt to associate this object with a 480 * longer text will fail, unless this value is 0, which leaves the allocation 481 * up to the implementation. 482 * 483 * @param maxRunCount is the maximum anticipated number of same-level runs 484 * that internal memory will be preallocated for. An attempt to access 485 * visual runs on an object that was not preallocated for as many runs 486 * as the text was actually resolved to will fail, 487 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 488 * The number of runs depends on the actual text and maybe anywhere between 489 * 1 and <code>maxLength</code>. It is typically small. 490 * 491 * @param pErrorCode must be a valid pointer to an error code value. 492 * 493 * @return An empty <code>UBiDi</code> object with preallocated memory. 494 * @stable ICU 2.0 495 */ 496 U_STABLE UBiDi * U_EXPORT2 497 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); 498 499 /** 500 * <code>ubidi_close()</code> must be called to free the memory 501 * associated with a UBiDi object.<p> 502 * 503 * <strong>Important: </strong> 504 * A parent <code>UBiDi</code> object must not be destroyed or reused if 505 * it still has children. 506 * If a <code>UBiDi</code> object has become the <i>child</i> 507 * of another one (its <i>parent</i>) by calling 508 * <code>ubidi_setLine()</code>, then the child object must 509 * be destroyed (closed) or reused (by calling 510 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>) 511 * before the parent object. 512 * 513 * @param pBiDi is a <code>UBiDi</code> object. 514 * 515 * @see ubidi_setPara 516 * @see ubidi_setLine 517 * @stable ICU 2.0 518 */ 519 U_STABLE void U_EXPORT2 520 ubidi_close(UBiDi *pBiDi); 521 522 /** 523 * Modify the operation of the Bidi algorithm such that it 524 * approximates an "inverse Bidi" algorithm. This function 525 * must be called before <code>ubidi_setPara()</code>. 526 * 527 * <p>The normal operation of the Bidi algorithm as described 528 * in the Unicode Technical Report is to take text stored in logical 529 * (keyboard, typing) order and to determine the reordering of it for visual 530 * rendering. 531 * Some legacy systems store text in visual order, and for operations 532 * with standard, Unicode-based algorithms, the text needs to be transformed 533 * to logical order. This is effectively the inverse algorithm of the 534 * described Bidi algorithm. Note that there is no standard algorithm for 535 * this "inverse Bidi" and that the current implementation provides only an 536 * approximation of "inverse Bidi".</p> 537 * 538 * <p>With <code>isInverse</code> set to <code>TRUE</code>, 539 * this function changes the behavior of some of the subsequent functions 540 * in a way that they can be used for the inverse Bidi algorithm. 541 * Specifically, runs of text with numeric characters will be treated in a 542 * special way and may need to be surrounded with LRM characters when they are 543 * written in reordered sequence.</p> 544 * 545 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>. 546 * Since the actual input for "inverse Bidi" is visually ordered text and 547 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually 548 * the runs of the logically ordered output.</p> 549 * 550 * <p>Calling this function with argument <code>isInverse</code> set to 551 * <code>TRUE</code> is equivalent to calling 552 * <code>ubidi_setReorderingMode</code> with argument 553 * <code>reorderingMode</code> 554 * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> 555 * Calling this function with argument <code>isInverse</code> set to 556 * <code>FALSE</code> is equivalent to calling 557 * <code>ubidi_setReorderingMode</code> with argument 558 * <code>reorderingMode</code> 559 * set to <code>#UBIDI_REORDER_DEFAULT</code>. 560 * 561 * @param pBiDi is a <code>UBiDi</code> object. 562 * 563 * @param isInverse specifies "forward" or "inverse" Bidi operation. 564 * 565 * @see ubidi_setPara 566 * @see ubidi_writeReordered 567 * @see ubidi_setReorderingMode 568 * @stable ICU 2.0 569 */ 570 U_STABLE void U_EXPORT2 571 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); 572 573 /** 574 * Is this Bidi object set to perform the inverse Bidi algorithm? 575 * <p>Note: calling this function after setting the reordering mode with 576 * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the 577 * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, 578 * <code>FALSE</code> for all other values.</p> 579 * 580 * @param pBiDi is a <code>UBiDi</code> object. 581 * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm 582 * by handling numbers as L. 583 * 584 * @see ubidi_setInverse 585 * @see ubidi_setReorderingMode 586 * @stable ICU 2.0 587 */ 588 589 U_STABLE UBool U_EXPORT2 590 ubidi_isInverse(UBiDi *pBiDi); 591 592 /** 593 * Specify whether block separators must be allocated level zero, 594 * so that successive paragraphs will progress from left to right. 595 * This function must be called before <code>ubidi_setPara()</code>. 596 * Paragraph separators (B) may appear in the text. Setting them to level zero 597 * means that all paragraph separators (including one possibly appearing 598 * in the last text position) are kept in the reordered text after the text 599 * that they follow in the source text. 600 * When this feature is not enabled, a paragraph separator at the last 601 * position of the text before reordering will go to the first position 602 * of the reordered text when the paragraph level is odd. 603 * 604 * @param pBiDi is a <code>UBiDi</code> object. 605 * 606 * @param orderParagraphsLTR specifies whether paragraph separators (B) must 607 * receive level 0, so that successive paragraphs progress from left to right. 608 * 609 * @see ubidi_setPara 610 * @stable ICU 3.4 611 */ 612 U_STABLE void U_EXPORT2 613 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR); 614 615 /** 616 * Is this Bidi object set to allocate level 0 to block separators so that 617 * successive paragraphs progress from left to right? 618 * 619 * @param pBiDi is a <code>UBiDi</code> object. 620 * @return TRUE if the Bidi object is set to allocate level 0 to block 621 * separators. 622 * 623 * @see ubidi_orderParagraphsLTR 624 * @stable ICU 3.4 625 */ 626 U_STABLE UBool U_EXPORT2 627 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); 628 629 /** 630 * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi 631 * algorithm to use. 632 * 633 * @see ubidi_setReorderingMode 634 * @stable ICU 3.6 635 */ 636 typedef enum UBiDiReorderingMode { 637 /** Regular Logical to Visual Bidi algorithm according to Unicode. 638 * This is a 0 value. 639 * @stable ICU 3.6 */ 640 UBIDI_REORDER_DEFAULT = 0, 641 /** Logical to Visual algorithm which handles numbers in a way which 642 * mimicks the behavior of Windows XP. 643 * @stable ICU 3.6 */ 644 UBIDI_REORDER_NUMBERS_SPECIAL, 645 /** Logical to Visual algorithm grouping numbers with adjacent R characters 646 * (reversible algorithm). 647 * @stable ICU 3.6 */ 648 UBIDI_REORDER_GROUP_NUMBERS_WITH_R, 649 /** Reorder runs only to transform a Logical LTR string to the Logical RTL 650 * string with the same display, or vice-versa.<br> 651 * If this mode is set together with option 652 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source 653 * text may be removed and other controls may be added to produce the 654 * minimum combination which has the required display. 655 * @stable ICU 3.6 */ 656 UBIDI_REORDER_RUNS_ONLY, 657 /** Visual to Logical algorithm which handles numbers like L 658 * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>. 659 * @see ubidi_setInverse 660 * @stable ICU 3.6 */ 661 UBIDI_REORDER_INVERSE_NUMBERS_AS_L, 662 /** Visual to Logical algorithm equivalent to the regular Logical to Visual 663 * algorithm. 664 * @stable ICU 3.6 */ 665 UBIDI_REORDER_INVERSE_LIKE_DIRECT, 666 /** Inverse Bidi (Visual to Logical) algorithm for the 667 * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 668 * @stable ICU 3.6 */ 669 UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, 670 /** Number of values for reordering mode. 671 * @stable ICU 3.6 */ 672 UBIDI_REORDER_COUNT 673 } UBiDiReorderingMode; 674 675 /** 676 * Modify the operation of the Bidi algorithm such that it implements some 677 * variant to the basic Bidi algorithm or approximates an "inverse Bidi" 678 * algorithm, depending on different values of the "reordering mode". 679 * This function must be called before <code>ubidi_setPara()</code>, and stays 680 * in effect until called again with a different argument. 681 * 682 * <p>The normal operation of the Bidi algorithm as described 683 * in the Unicode Standard Annex #9 is to take text stored in logical 684 * (keyboard, typing) order and to determine how to reorder it for visual 685 * rendering.</p> 686 * 687 * <p>With the reordering mode set to a value other than 688 * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of 689 * some of the subsequent functions in a way such that they implement an 690 * inverse Bidi algorithm or some other algorithm variants.</p> 691 * 692 * <p>Some legacy systems store text in visual order, and for operations 693 * with standard, Unicode-based algorithms, the text needs to be transformed 694 * into logical order. This is effectively the inverse algorithm of the 695 * described Bidi algorithm. Note that there is no standard algorithm for 696 * this "inverse Bidi", so a number of variants are implemented here.</p> 697 * 698 * <p>In other cases, it may be desirable to emulate some variant of the 699 * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a 700 * Logical to Logical transformation.</p> 701 * 702 * <ul> 703 * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>, 704 * the standard Bidi Logical to Visual algorithm is applied.</li> 705 * 706 * <li>When the reordering mode is set to 707 * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>, 708 * the algorithm used to perform Bidi transformations when calling 709 * <code>ubidi_setPara</code> should approximate the algorithm used in 710 * Microsoft Windows XP rather than strictly conform to the Unicode Bidi 711 * algorithm. 712 * <br> 713 * The differences between the basic algorithm and the algorithm addressed 714 * by this option are as follows: 715 * <ul> 716 * <li>Within text at an even embedding level, the sequence "123AB" 717 * (where AB represent R or AL letters) is transformed to "123BA" by the 718 * Unicode algorithm and to "BA123" by the Windows algorithm.</li> 719 * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just 720 * like regular numbers (EN).</li> 721 * </ul></li> 722 * 723 * <li>When the reordering mode is set to 724 * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>, 725 * numbers located between LTR text and RTL text are associated with the RTL 726 * text. For instance, an LTR paragraph with content "abc 123 DEF" (where 727 * upper case letters represent RTL characters) will be transformed to 728 * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed 729 * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc". 730 * This makes the algorithm reversible and makes it useful when round trip 731 * (from visual to logical and back to visual) must be achieved without 732 * adding LRM characters. However, this is a variation from the standard 733 * Unicode Bidi algorithm.<br> 734 * The source text should not contain Bidi control characters other than LRM 735 * or RLM.</li> 736 * 737 * <li>When the reordering mode is set to 738 * <code>#UBIDI_REORDER_RUNS_ONLY</code>, 739 * a "Logical to Logical" transformation must be performed: 740 * <ul> 741 * <li>If the default text level of the source text (argument <code>paraLevel</code> 742 * in <code>ubidi_setPara</code>) is even, the source text will be handled as 743 * LTR logical text and will be transformed to the RTL logical text which has 744 * the same LTR visual display.</li> 745 * <li>If the default level of the source text is odd, the source text 746 * will be handled as RTL logical text and will be transformed to the 747 * LTR logical text which has the same LTR visual display.</li> 748 * </ul> 749 * This mode may be needed when logical text which is basically Arabic or 750 * Hebrew, with possible included numbers or phrases in English, has to be 751 * displayed as if it had an even embedding level (this can happen if the 752 * displaying application treats all text as if it was basically LTR). 753 * <br> 754 * This mode may also be needed in the reverse case, when logical text which is 755 * basically English, with possible included phrases in Arabic or Hebrew, has to 756 * be displayed as if it had an odd embedding level. 757 * <br> 758 * Both cases could be handled by adding LRE or RLE at the head of the text, 759 * if the display subsystem supports these formatting controls. If it does not, 760 * the problem may be handled by transforming the source text in this mode 761 * before displaying it, so that it will be displayed properly.<br> 762 * The source text should not contain Bidi control characters other than LRM 763 * or RLM.</li> 764 * 765 * <li>When the reordering mode is set to 766 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm 767 * is applied. 768 * Runs of text with numeric characters will be treated like LTR letters and 769 * may need to be surrounded with LRM characters when they are written in 770 * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can 771 * be used with function <code>ubidi_writeReordered</code> to this end. This 772 * mode is equivalent to calling <code>ubidi_setInverse()</code> with 773 * argument <code>isInverse</code> set to <code>TRUE</code>.</li> 774 * 775 * <li>When the reordering mode is set to 776 * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual 777 * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm. 778 * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> 779 * but is closer to the regular Bidi algorithm. 780 * <br> 781 * For example, an LTR paragraph with the content "FED 123 456 CBA" (where 782 * upper case represents RTL characters) will be transformed to 783 * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC" 784 * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> 785 * When used in conjunction with option 786 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally 787 * adds Bidi marks to the output significantly more sparingly than mode 788 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option 789 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to 790 * <code>ubidi_writeReordered</code>.</li> 791 * 792 * <li>When the reordering mode is set to 793 * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual 794 * Bidi algorithm used in Windows XP is used as an approximation of an 795 * "inverse Bidi" algorithm. 796 * <br> 797 * For example, an LTR paragraph with the content "abc FED123" (where 798 * upper case represents RTL characters) will be transformed to 799 * "abc 123DEF.</li> 800 * </ul> 801 * 802 * <p>In all the reordering modes specifying an "inverse Bidi" algorithm 803 * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>), 804 * output runs should be retrieved using 805 * <code>ubidi_getVisualRun()</code>, and the output text with 806 * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in 807 * "inverse Bidi" modes the input is actually visually ordered text and 808 * reordered output returned by <code>ubidi_getVisualRun()</code> or 809 * <code>ubidi_writeReordered()</code> are actually runs or character string 810 * of logically ordered output.<br> 811 * For all the "inverse Bidi" modes, the source text should not contain 812 * Bidi control characters other than LRM or RLM.</p> 813 * 814 * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of 815 * <code>ubidi_writeReordered</code> has no useful meaning and should not be 816 * used in conjunction with any value of the reordering mode specifying 817 * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>. 818 * 819 * @param pBiDi is a <code>UBiDi</code> object. 820 * @param reorderingMode specifies the required variant of the Bidi algorithm. 821 * 822 * @see UBiDiReorderingMode 823 * @see ubidi_setInverse 824 * @see ubidi_setPara 825 * @see ubidi_writeReordered 826 * @stable ICU 3.6 827 */ 828 U_STABLE void U_EXPORT2 829 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode); 830 831 /** 832 * What is the requested reordering mode for a given Bidi object? 833 * 834 * @param pBiDi is a <code>UBiDi</code> object. 835 * @return the current reordering mode of the Bidi object 836 * @see ubidi_setReorderingMode 837 * @stable ICU 3.6 838 */ 839 U_STABLE UBiDiReorderingMode U_EXPORT2 840 ubidi_getReorderingMode(UBiDi *pBiDi); 841 842 /** 843 * <code>UBiDiReorderingOption</code> values indicate which options are 844 * specified to affect the Bidi algorithm. 845 * 846 * @see ubidi_setReorderingOptions 847 * @stable ICU 3.6 848 */ 849 typedef enum UBiDiReorderingOption { 850 /** 851 * option value for <code>ubidi_setReorderingOptions</code>: 852 * disable all the options which can be set with this function 853 * @see ubidi_setReorderingOptions 854 * @stable ICU 3.6 855 */ 856 UBIDI_OPTION_DEFAULT = 0, 857 858 /** 859 * option bit for <code>ubidi_setReorderingOptions</code>: 860 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 861 * a reordering to a Logical order 862 * 863 * <p>This option must be set or reset before calling 864 * <code>ubidi_setPara</code>.</p> 865 * 866 * <p>This option is significant only with reordering modes which generate 867 * a result with Logical order, specifically:</p> 868 * <ul> 869 * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li> 870 * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li> 871 * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li> 872 * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 873 * </ul> 874 * 875 * <p>If this option is set in conjunction with reordering mode 876 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 877 * <code>ubidi_setInverse(TRUE)</code>, it implies 878 * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> 879 * in calls to function <code>ubidi_writeReordered()</code>.</p> 880 * 881 * <p>For other reordering modes, a minimum number of LRM or RLM characters 882 * will be added to the source text after reordering it so as to ensure 883 * round trip, i.e. when applying the inverse reordering mode on the 884 * resulting logical text with removal of Bidi marks 885 * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling 886 * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> 887 * in <code>ubidi_writeReordered</code>), the result will be identical to the 888 * source text in the first transformation. 889 * 890 * <p>This option will be ignored if specified together with option 891 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option 892 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function 893 * <code>ubidi_writeReordered()</code> and it implies option 894 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function 895 * <code>ubidi_writeReordered()</code> if the reordering mode is 896 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 897 * 898 * @see ubidi_setReorderingMode 899 * @see ubidi_setReorderingOptions 900 * @stable ICU 3.6 901 */ 902 UBIDI_OPTION_INSERT_MARKS = 1, 903 904 /** 905 * option bit for <code>ubidi_setReorderingOptions</code>: 906 * remove Bidi control characters 907 * 908 * <p>This option must be set or reset before calling 909 * <code>ubidi_setPara</code>.</p> 910 * 911 * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 912 * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls 913 * to function <code>ubidi_writeReordered()</code> and it implies option 914 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p> 915 * 916 * @see ubidi_setReorderingMode 917 * @see ubidi_setReorderingOptions 918 * @stable ICU 3.6 919 */ 920 UBIDI_OPTION_REMOVE_CONTROLS = 2, 921 922 /** 923 * option bit for <code>ubidi_setReorderingOptions</code>: 924 * process the output as part of a stream to be continued 925 * 926 * <p>This option must be set or reset before calling 927 * <code>ubidi_setPara</code>.</p> 928 * 929 * <p>This option specifies that the caller is interested in processing large 930 * text object in parts. 931 * The results of the successive calls are expected to be concatenated by the 932 * caller. Only the call for the last part will have this option bit off.</p> 933 * 934 * <p>When this option bit is on, <code>ubidi_setPara()</code> may process 935 * less than the full source text in order to truncate the text at a meaningful 936 * boundary. The caller should call <code>ubidi_getProcessedLength()</code> 937 * immediately after calling <code>ubidi_setPara()</code> in order to 938 * determine how much of the source text has been processed. 939 * Source text beyond that length should be resubmitted in following calls to 940 * <code>ubidi_setPara</code>. The processed length may be less than 941 * the length of the source text if a character preceding the last character of 942 * the source text constitutes a reasonable boundary (like a block separator) 943 * for text to be continued.<br> 944 * If the last character of the source text constitutes a reasonable 945 * boundary, the whole text will be processed at once.<br> 946 * If nowhere in the source text there exists 947 * such a reasonable boundary, the processed length will be zero.<br> 948 * The caller should check for such an occurrence and do one of the following: 949 * <ul><li>submit a larger amount of text with a better chance to include 950 * a reasonable boundary.</li> 951 * <li>resubmit the same text after turning off option 952 * <code>UBIDI_OPTION_STREAMING</code>.</li></ul> 953 * In all cases, this option should be turned off before processing the last 954 * part of the text.</p> 955 * 956 * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used, 957 * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with 958 * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before 959 * calling <code>ubidi_setPara</code> so that later paragraphs may be 960 * concatenated to previous paragraphs on the right.</p> 961 * 962 * @see ubidi_setReorderingMode 963 * @see ubidi_setReorderingOptions 964 * @see ubidi_getProcessedLength 965 * @see ubidi_orderParagraphsLTR 966 * @stable ICU 3.6 967 */ 968 UBIDI_OPTION_STREAMING = 4 969 } UBiDiReorderingOption; 970 971 /** 972 * Specify which of the reordering options 973 * should be applied during Bidi transformations. 974 * 975 * @param pBiDi is a <code>UBiDi</code> object. 976 * @param reorderingOptions is a combination of zero or more of the following 977 * options: 978 * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>, 979 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>. 980 * 981 * @see ubidi_getReorderingOptions 982 * @stable ICU 3.6 983 */ 984 U_STABLE void U_EXPORT2 985 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions); 986 987 /** 988 * What are the reordering options applied to a given Bidi object? 989 * 990 * @param pBiDi is a <code>UBiDi</code> object. 991 * @return the current reordering options of the Bidi object 992 * @see ubidi_setReorderingOptions 993 * @stable ICU 3.6 994 */ 995 U_STABLE uint32_t U_EXPORT2 996 ubidi_getReorderingOptions(UBiDi *pBiDi); 997 998 /** 999 * Perform the Unicode Bidi algorithm. It is defined in the 1000 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>, 1001 * version 13, 1002 * also described in The Unicode Standard, Version 4.0 .<p> 1003 * 1004 * This function takes a piece of plain text containing one or more paragraphs, 1005 * with or without externally specified embedding levels from <i>styled</i> 1006 * text and computes the left-right-directionality of each character.<p> 1007 * 1008 * If the entire text is all of the same directionality, then 1009 * the function may not perform all the steps described by the algorithm, 1010 * i.e., some levels may not be the same as if all steps were performed. 1011 * This is not relevant for unidirectional text.<br> 1012 * For example, in pure LTR text with numbers the numbers would get 1013 * a resolved level of 2 higher than the surrounding text according to 1014 * the algorithm. This implementation may set all resolved levels to 1015 * the same value in such a case.<p> 1016 * 1017 * The text can be composed of multiple paragraphs. Occurrence of a block 1018 * separator in the text terminates a paragraph, and whatever comes next starts 1019 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 1020 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 1021 * in that case, the pair of characters is considered as terminating the 1022 * preceding paragraph, and a new paragraph will be started by a character 1023 * coming after the LF. 1024 * 1025 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code> 1026 * which will be set to contain the reordering information, 1027 * especially the resolved levels for all the characters in <code>text</code>. 1028 * 1029 * @param text is a pointer to the text that the Bidi algorithm will be performed on. 1030 * This pointer is stored in the UBiDi object and can be retrieved 1031 * with <code>ubidi_getText()</code>.<br> 1032 * <strong>Note:</strong> the text must be (at least) <code>length</code> long. 1033 * 1034 * @param length is the length of the text; if <code>length==-1</code> then 1035 * the text must be zero-terminated. 1036 * 1037 * @param paraLevel specifies the default level for the text; 1038 * it is typically 0 (LTR) or 1 (RTL). 1039 * If the function shall determine the paragraph level from the text, 1040 * then <code>paraLevel</code> can be set to 1041 * either <code>#UBIDI_DEFAULT_LTR</code> 1042 * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple 1043 * paragraphs, the paragraph level shall be determined separately for 1044 * each paragraph; if a paragraph does not include any strongly typed 1045 * character, then the desired default is used (0 for LTR or 1 for RTL). 1046 * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code> 1047 * is also valid, with odd levels indicating RTL. 1048 * 1049 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 1050 * ignoring characters like LRE and PDF in the text. 1051 * A level overrides the directional property of its corresponding 1052 * (same index) character if the level has the 1053 * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br> 1054 * Except for that bit, it must be 1055 * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>, 1056 * with one exception: a level of zero may be specified for a paragraph 1057 * separator even if <code>paraLevel>0</code> when multiple paragraphs 1058 * are submitted in the same call to <code>ubidi_setPara()</code>.<br><br> 1059 * <strong>Caution: </strong>A copy of this pointer, not of the levels, 1060 * will be stored in the <code>UBiDi</code> object; 1061 * the <code>embeddingLevels</code> array must not be 1062 * deallocated before the <code>UBiDi</code> structure is destroyed or reused, 1063 * and the <code>embeddingLevels</code> 1064 * should not be modified to avoid unexpected results on subsequent Bidi operations. 1065 * However, the <code>ubidi_setPara()</code> and 1066 * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br> 1067 * After the <code>UBiDi</code> object is reused or destroyed, the caller 1068 * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br> 1069 * <strong>Note:</strong> the <code>embeddingLevels</code> array must be 1070 * at least <code>length</code> long. 1071 * 1072 * @param pErrorCode must be a valid pointer to an error code value. 1073 * @stable ICU 2.0 1074 */ 1075 U_STABLE void U_EXPORT2 1076 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1077 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1078 UErrorCode *pErrorCode); 1079 1080 /** 1081 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to 1082 * contain the reordering information, especially the resolved levels, 1083 * for all the characters in a line of text. This line of text is 1084 * specified by referring to a <code>UBiDi</code> object representing 1085 * this information for a piece of text containing one or more paragraphs, 1086 * and by specifying a range of indexes in this text.<p> 1087 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 1088 * 1089 * This is used after calling <code>ubidi_setPara()</code> 1090 * for a piece of text, and after line-breaking on that text. 1091 * It is not necessary if each paragraph is treated as a single line.<p> 1092 * 1093 * After line-breaking, rules (L1) and (L2) for the treatment of 1094 * trailing WS and for reordering are performed on 1095 * a <code>UBiDi</code> object that represents a line.<p> 1096 * 1097 * <strong>Important: </strong><code>pLineBiDi</code> shares data with 1098 * <code>pParaBiDi</code>. 1099 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>. 1100 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line 1101 * before the object for its parent paragraph.<p> 1102 * 1103 * The text pointer that was stored in <code>pParaBiDi</code> is also copied, 1104 * and <code>start</code> is added to it so that it points to the beginning of the 1105 * line for this object. 1106 * 1107 * @param pParaBiDi is the parent paragraph object. It must have been set 1108 * by a successful call to ubidi_setPara. 1109 * 1110 * @param start is the line's first index into the text. 1111 * 1112 * @param limit is just behind the line's last index into the text 1113 * (its last index +1).<br> 1114 * It must be <code>0<=start<limit<=</code>containing paragraph limit. 1115 * If the specified line crosses a paragraph boundary, the function 1116 * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR. 1117 * 1118 * @param pLineBiDi is the object that will now represent a line of the text. 1119 * 1120 * @param pErrorCode must be a valid pointer to an error code value. 1121 * 1122 * @see ubidi_setPara 1123 * @see ubidi_getProcessedLength 1124 * @stable ICU 2.0 1125 */ 1126 U_STABLE void U_EXPORT2 1127 ubidi_setLine(const UBiDi *pParaBiDi, 1128 int32_t start, int32_t limit, 1129 UBiDi *pLineBiDi, 1130 UErrorCode *pErrorCode); 1131 1132 /** 1133 * Get the directionality of the text. 1134 * 1135 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1136 * 1137 * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code> 1138 * or <code>UBIDI_MIXED</code> 1139 * that indicates if the entire text 1140 * represented by this object is unidirectional, 1141 * and which direction, or if it is mixed-directional. 1142 * 1143 * @see UBiDiDirection 1144 * @stable ICU 2.0 1145 */ 1146 U_STABLE UBiDiDirection U_EXPORT2 1147 ubidi_getDirection(const UBiDi *pBiDi); 1148 1149 /** 1150 * Get the pointer to the text. 1151 * 1152 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1153 * 1154 * @return The pointer to the text that the UBiDi object was created for. 1155 * 1156 * @see ubidi_setPara 1157 * @see ubidi_setLine 1158 * @stable ICU 2.0 1159 */ 1160 U_STABLE const UChar * U_EXPORT2 1161 ubidi_getText(const UBiDi *pBiDi); 1162 1163 /** 1164 * Get the length of the text. 1165 * 1166 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1167 * 1168 * @return The length of the text that the UBiDi object was created for. 1169 * @stable ICU 2.0 1170 */ 1171 U_STABLE int32_t U_EXPORT2 1172 ubidi_getLength(const UBiDi *pBiDi); 1173 1174 /** 1175 * Get the paragraph level of the text. 1176 * 1177 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1178 * 1179 * @return The paragraph level. If there are multiple paragraphs, their 1180 * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or 1181 * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph 1182 * is returned. 1183 * 1184 * @see UBiDiLevel 1185 * @see ubidi_getParagraph 1186 * @see ubidi_getParagraphByIndex 1187 * @stable ICU 2.0 1188 */ 1189 U_STABLE UBiDiLevel U_EXPORT2 1190 ubidi_getParaLevel(const UBiDi *pBiDi); 1191 1192 /** 1193 * Get the number of paragraphs. 1194 * 1195 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1196 * 1197 * @return The number of paragraphs. 1198 * @stable ICU 3.4 1199 */ 1200 U_STABLE int32_t U_EXPORT2 1201 ubidi_countParagraphs(UBiDi *pBiDi); 1202 1203 /** 1204 * Get a paragraph, given a position within the text. 1205 * This function returns information about a paragraph.<br> 1206 * Note: if the paragraph index is known, it is more efficient to 1207 * retrieve the paragraph information using ubidi_getParagraphByIndex().<p> 1208 * 1209 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1210 * 1211 * @param charIndex is the index of a character within the text, in the 1212 * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>. 1213 * 1214 * @param pParaStart will receive the index of the first character of the 1215 * paragraph in the text. 1216 * This pointer can be <code>NULL</code> if this 1217 * value is not necessary. 1218 * 1219 * @param pParaLimit will receive the limit of the paragraph. 1220 * The l-value that you point to here may be the 1221 * same expression (variable) as the one for 1222 * <code>charIndex</code>. 1223 * This pointer can be <code>NULL</code> if this 1224 * value is not necessary. 1225 * 1226 * @param pParaLevel will receive the level of the paragraph. 1227 * This pointer can be <code>NULL</code> if this 1228 * value is not necessary. 1229 * 1230 * @param pErrorCode must be a valid pointer to an error code value. 1231 * 1232 * @return The index of the paragraph containing the specified position. 1233 * 1234 * @see ubidi_getProcessedLength 1235 * @stable ICU 3.4 1236 */ 1237 U_STABLE int32_t U_EXPORT2 1238 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, 1239 int32_t *pParaLimit, UBiDiLevel *pParaLevel, 1240 UErrorCode *pErrorCode); 1241 1242 /** 1243 * Get a paragraph, given the index of this paragraph. 1244 * 1245 * This function returns information about a paragraph.<p> 1246 * 1247 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1248 * 1249 * @param paraIndex is the number of the paragraph, in the 1250 * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>. 1251 * 1252 * @param pParaStart will receive the index of the first character of the 1253 * paragraph in the text. 1254 * This pointer can be <code>NULL</code> if this 1255 * value is not necessary. 1256 * 1257 * @param pParaLimit will receive the limit of the paragraph. 1258 * This pointer can be <code>NULL</code> if this 1259 * value is not necessary. 1260 * 1261 * @param pParaLevel will receive the level of the paragraph. 1262 * This pointer can be <code>NULL</code> if this 1263 * value is not necessary. 1264 * 1265 * @param pErrorCode must be a valid pointer to an error code value. 1266 * 1267 * @stable ICU 3.4 1268 */ 1269 U_STABLE void U_EXPORT2 1270 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 1271 int32_t *pParaStart, int32_t *pParaLimit, 1272 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); 1273 1274 /** 1275 * Get the level for one character. 1276 * 1277 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1278 * 1279 * @param charIndex the index of a character. It must be in the range 1280 * [0..ubidi_getProcessedLength(pBiDi)]. 1281 * 1282 * @return The level for the character at charIndex (0 if charIndex is not 1283 * in the valid range). 1284 * 1285 * @see UBiDiLevel 1286 * @see ubidi_getProcessedLength 1287 * @stable ICU 2.0 1288 */ 1289 U_STABLE UBiDiLevel U_EXPORT2 1290 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); 1291 1292 /** 1293 * Get an array of levels for each character.<p> 1294 * 1295 * Note that this function may allocate memory under some 1296 * circumstances, unlike <code>ubidi_getLevelAt()</code>. 1297 * 1298 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose 1299 * text length must be strictly positive. 1300 * 1301 * @param pErrorCode must be a valid pointer to an error code value. 1302 * 1303 * @return The levels array for the text, 1304 * or <code>NULL</code> if an error occurs. 1305 * 1306 * @see UBiDiLevel 1307 * @see ubidi_getProcessedLength 1308 * @stable ICU 2.0 1309 */ 1310 U_STABLE const UBiDiLevel * U_EXPORT2 1311 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); 1312 1313 /** 1314 * Get a logical run. 1315 * This function returns information about a run and is used 1316 * to retrieve runs in logical order.<p> 1317 * This is especially useful for line-breaking on a paragraph. 1318 * 1319 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1320 * 1321 * @param logicalPosition is a logical position within the source text. 1322 * 1323 * @param pLogicalLimit will receive the limit of the corresponding run. 1324 * The l-value that you point to here may be the 1325 * same expression (variable) as the one for 1326 * <code>logicalPosition</code>. 1327 * This pointer can be <code>NULL</code> if this 1328 * value is not necessary. 1329 * 1330 * @param pLevel will receive the level of the corresponding run. 1331 * This pointer can be <code>NULL</code> if this 1332 * value is not necessary. 1333 * 1334 * @see ubidi_getProcessedLength 1335 * @stable ICU 2.0 1336 */ 1337 U_STABLE void U_EXPORT2 1338 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, 1339 int32_t *pLogicalLimit, UBiDiLevel *pLevel); 1340 1341 /** 1342 * Get the number of runs. 1343 * This function may invoke the actual reordering on the 1344 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code> 1345 * may have resolved only the levels of the text. Therefore, 1346 * <code>ubidi_countRuns()</code> may have to allocate memory, 1347 * and may fail doing so. 1348 * 1349 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1350 * 1351 * @param pErrorCode must be a valid pointer to an error code value. 1352 * 1353 * @return The number of runs. 1354 * @stable ICU 2.0 1355 */ 1356 U_STABLE int32_t U_EXPORT2 1357 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); 1358 1359 /** 1360 * Get one run's logical start, length, and directionality, 1361 * which can be 0 for LTR or 1 for RTL. 1362 * In an RTL run, the character at the logical start is 1363 * visually on the right of the displayed run. 1364 * The length is the number of characters in the run.<p> 1365 * <code>ubidi_countRuns()</code> should be called 1366 * before the runs are retrieved. 1367 * 1368 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1369 * 1370 * @param runIndex is the number of the run in visual order, in the 1371 * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>. 1372 * 1373 * @param pLogicalStart is the first logical character index in the text. 1374 * The pointer may be <code>NULL</code> if this index is not needed. 1375 * 1376 * @param pLength is the number of characters (at least one) in the run. 1377 * The pointer may be <code>NULL</code> if this is not needed. 1378 * 1379 * @return the directionality of the run, 1380 * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>, 1381 * never <code>UBIDI_MIXED</code>. 1382 * 1383 * @see ubidi_countRuns 1384 * 1385 * Example: 1386 * <pre> 1387 * \code 1388 * int32_t i, count=ubidi_countRuns(pBiDi), 1389 * logicalStart, visualIndex=0, length; 1390 * for(i=0; i<count; ++i) { 1391 * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) { 1392 * do { // LTR 1393 * show_char(text[logicalStart++], visualIndex++); 1394 * } while(--length>0); 1395 * } else { 1396 * logicalStart+=length; // logicalLimit 1397 * do { // RTL 1398 * show_char(text[--logicalStart], visualIndex++); 1399 * } while(--length>0); 1400 * } 1401 * } 1402 *\endcode 1403 * </pre> 1404 * 1405 * Note that in right-to-left runs, code like this places 1406 * modifier letters before base characters and second surrogates 1407 * before first ones. 1408 * @stable ICU 2.0 1409 */ 1410 U_STABLE UBiDiDirection U_EXPORT2 1411 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, 1412 int32_t *pLogicalStart, int32_t *pLength); 1413 1414 /** 1415 * Get the visual position from a logical text position. 1416 * If such a mapping is used many times on the same 1417 * <code>UBiDi</code> object, then calling 1418 * <code>ubidi_getLogicalMap()</code> is more efficient.<p> 1419 * 1420 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no 1421 * visual position because the corresponding text character is a Bidi control 1422 * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. 1423 * <p> 1424 * When the visual output is altered by using options of 1425 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1426 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1427 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not 1428 * be correct. It is advised to use, when possible, reordering options 1429 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1430 * <p> 1431 * Note that in right-to-left runs, this mapping places 1432 * modifier letters before base characters and second surrogates 1433 * before first ones. 1434 * 1435 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1436 * 1437 * @param logicalIndex is the index of a character in the text. 1438 * 1439 * @param pErrorCode must be a valid pointer to an error code value. 1440 * 1441 * @return The visual position of this character. 1442 * 1443 * @see ubidi_getLogicalMap 1444 * @see ubidi_getLogicalIndex 1445 * @see ubidi_getProcessedLength 1446 * @stable ICU 2.0 1447 */ 1448 U_STABLE int32_t U_EXPORT2 1449 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); 1450 1451 /** 1452 * Get the logical text position from a visual position. 1453 * If such a mapping is used many times on the same 1454 * <code>UBiDi</code> object, then calling 1455 * <code>ubidi_getVisualMap()</code> is more efficient.<p> 1456 * 1457 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no 1458 * logical position because the corresponding text character is a Bidi mark 1459 * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 1460 * <p> 1461 * This is the inverse function to <code>ubidi_getVisualIndex()</code>. 1462 * <p> 1463 * When the visual output is altered by using options of 1464 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1465 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1466 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not 1467 * be correct. It is advised to use, when possible, reordering options 1468 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1469 * 1470 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1471 * 1472 * @param visualIndex is the visual position of a character. 1473 * 1474 * @param pErrorCode must be a valid pointer to an error code value. 1475 * 1476 * @return The index of this character in the text. 1477 * 1478 * @see ubidi_getVisualMap 1479 * @see ubidi_getVisualIndex 1480 * @see ubidi_getResultLength 1481 * @stable ICU 2.0 1482 */ 1483 U_STABLE int32_t U_EXPORT2 1484 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); 1485 1486 /** 1487 * Get a logical-to-visual index map (array) for the characters in the UBiDi 1488 * (paragraph or line) object. 1489 * <p> 1490 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the 1491 * corresponding text characters are Bidi controls removed from the visual 1492 * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. 1493 * <p> 1494 * When the visual output is altered by using options of 1495 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1496 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1497 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not 1498 * be correct. It is advised to use, when possible, reordering options 1499 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1500 * 1501 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1502 * 1503 * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code> 1504 * indexes which will reflect the reordering of the characters. 1505 * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number 1506 * of elements allocated in <code>indexMap</code> must be no less than 1507 * <code>ubidi_getResultLength()</code>. 1508 * The array does not need to be initialized.<br><br> 1509 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. 1510 * 1511 * @param pErrorCode must be a valid pointer to an error code value. 1512 * 1513 * @see ubidi_getVisualMap 1514 * @see ubidi_getVisualIndex 1515 * @see ubidi_getProcessedLength 1516 * @see ubidi_getResultLength 1517 * @stable ICU 2.0 1518 */ 1519 U_STABLE void U_EXPORT2 1520 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); 1521 1522 /** 1523 * Get a visual-to-logical index map (array) for the characters in the UBiDi 1524 * (paragraph or line) object. 1525 * <p> 1526 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the 1527 * corresponding text characters are Bidi marks inserted in the visual output 1528 * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>. 1529 * <p> 1530 * When the visual output is altered by using options of 1531 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1532 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, 1533 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 1534 * be correct. It is advised to use, when possible, reordering options 1535 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. 1536 * 1537 * @param pBiDi is the paragraph or line <code>UBiDi</code> object. 1538 * 1539 * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code> 1540 * indexes which will reflect the reordering of the characters. 1541 * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number 1542 * of elements allocated in <code>indexMap</code> must be no less than 1543 * <code>ubidi_getProcessedLength()</code>. 1544 * The array does not need to be initialized.<br><br> 1545 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. 1546 * 1547 * @param pErrorCode must be a valid pointer to an error code value. 1548 * 1549 * @see ubidi_getLogicalMap 1550 * @see ubidi_getLogicalIndex 1551 * @see ubidi_getProcessedLength 1552 * @see ubidi_getResultLength 1553 * @stable ICU 2.0 1554 */ 1555 U_STABLE void U_EXPORT2 1556 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); 1557 1558 /** 1559 * This is a convenience function that does not use a UBiDi object. 1560 * It is intended to be used for when an application has determined the levels 1561 * of objects (character sequences) and just needs to have them reordered (L2). 1562 * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a 1563 * <code>UBiDi</code> object. 1564 * 1565 * @param levels is an array with <code>length</code> levels that have been determined by 1566 * the application. 1567 * 1568 * @param length is the number of levels in the array, or, semantically, 1569 * the number of objects to be reordered. 1570 * It must be <code>length>0</code>. 1571 * 1572 * @param indexMap is a pointer to an array of <code>length</code> 1573 * indexes which will reflect the reordering of the characters. 1574 * The array does not need to be initialized.<p> 1575 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. 1576 * @stable ICU 2.0 1577 */ 1578 U_STABLE void U_EXPORT2 1579 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); 1580 1581 /** 1582 * This is a convenience function that does not use a UBiDi object. 1583 * It is intended to be used for when an application has determined the levels 1584 * of objects (character sequences) and just needs to have them reordered (L2). 1585 * This is equivalent to using <code>ubidi_getVisualMap()</code> on a 1586 * <code>UBiDi</code> object. 1587 * 1588 * @param levels is an array with <code>length</code> levels that have been determined by 1589 * the application. 1590 * 1591 * @param length is the number of levels in the array, or, semantically, 1592 * the number of objects to be reordered. 1593 * It must be <code>length>0</code>. 1594 * 1595 * @param indexMap is a pointer to an array of <code>length</code> 1596 * indexes which will reflect the reordering of the characters. 1597 * The array does not need to be initialized.<p> 1598 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. 1599 * @stable ICU 2.0 1600 */ 1601 U_STABLE void U_EXPORT2 1602 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); 1603 1604 /** 1605 * Invert an index map. 1606 * The index mapping of the first map is inverted and written to 1607 * the second one. 1608 * 1609 * @param srcMap is an array with <code>length</code> elements 1610 * which defines the original mapping from a source array containing 1611 * <code>length</code> elements to a destination array. 1612 * Some elements of the source array may have no mapping in the 1613 * destination array. In that case, their value will be 1614 * the special value <code>UBIDI_MAP_NOWHERE</code>. 1615 * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>. 1616 * Some elements may have a value >= <code>length</code>, if the 1617 * destination array has more elements than the source array. 1618 * There must be no duplicate indexes (two or more elements with the 1619 * same value except <code>UBIDI_MAP_NOWHERE</code>). 1620 * 1621 * @param destMap is an array with a number of elements equal to 1 + the highest 1622 * value in <code>srcMap</code>. 1623 * <code>destMap</code> will be filled with the inverse mapping. 1624 * If element with index i in <code>srcMap</code> has a value k different 1625 * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of 1626 * the source array maps to element k in the destination array. 1627 * The inverse map will have value i in its k-th element. 1628 * For all elements of the destination array which do not map to 1629 * an element in the source array, the corresponding element in the 1630 * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>. 1631 * 1632 * @param length is the length of each array. 1633 * @See UBIDI_MAP_NOWHERE 1634 * @stable ICU 2.0 1635 */ 1636 U_STABLE void U_EXPORT2 1637 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); 1638 1639 /** option flags for ubidi_writeReordered() */ 1640 1641 /** 1642 * option bit for ubidi_writeReordered(): 1643 * keep combining characters after their base characters in RTL runs 1644 * 1645 * @see ubidi_writeReordered 1646 * @stable ICU 2.0 1647 */ 1648 #define UBIDI_KEEP_BASE_COMBINING 1 1649 1650 /** 1651 * option bit for ubidi_writeReordered(): 1652 * replace characters with the "mirrored" property in RTL runs 1653 * by their mirror-image mappings 1654 * 1655 * @see ubidi_writeReordered 1656 * @stable ICU 2.0 1657 */ 1658 #define UBIDI_DO_MIRRORING 2 1659 1660 /** 1661 * option bit for ubidi_writeReordered(): 1662 * surround the run with LRMs if necessary; 1663 * this is part of the approximate "inverse Bidi" algorithm 1664 * 1665 * <p>This option does not imply corresponding adjustment of the index 1666 * mappings.</p> 1667 * 1668 * @see ubidi_setInverse 1669 * @see ubidi_writeReordered 1670 * @stable ICU 2.0 1671 */ 1672 #define UBIDI_INSERT_LRM_FOR_NUMERIC 4 1673 1674 /** 1675 * option bit for ubidi_writeReordered(): 1676 * remove Bidi control characters 1677 * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC) 1678 * 1679 * <p>This option does not imply corresponding adjustment of the index 1680 * mappings.</p> 1681 * 1682 * @see ubidi_writeReordered 1683 * @stable ICU 2.0 1684 */ 1685 #define UBIDI_REMOVE_BIDI_CONTROLS 8 1686 1687 /** 1688 * option bit for ubidi_writeReordered(): 1689 * write the output in reverse order 1690 * 1691 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code> 1692 * first without this option, and then calling 1693 * <code>ubidi_writeReverse()</code> without mirroring. 1694 * Doing this in the same step is faster and avoids a temporary buffer. 1695 * An example for using this option is output to a character terminal that 1696 * is designed for RTL scripts and stores text in reverse order.</p> 1697 * 1698 * @see ubidi_writeReordered 1699 * @stable ICU 2.0 1700 */ 1701 #define UBIDI_OUTPUT_REVERSE 16 1702 1703 /** 1704 * Get the length of the source text processed by the last call to 1705 * <code>ubidi_setPara()</code>. This length may be different from the length 1706 * of the source text if option <code>#UBIDI_OPTION_STREAMING</code> 1707 * has been set. 1708 * <br> 1709 * Note that whenever the length of the text affects the execution or the 1710 * result of a function, it is the processed length which must be considered, 1711 * except for <code>ubidi_setPara</code> (which receives unprocessed source 1712 * text) and <code>ubidi_getLength</code> (which returns the original length 1713 * of the source text).<br> 1714 * In particular, the processed length is the one to consider in the following 1715 * cases: 1716 * <ul> 1717 * <li>maximum value of the <code>limit</code> argument of 1718 * <code>ubidi_setLine</code></li> 1719 * <li>maximum value of the <code>charIndex</code> argument of 1720 * <code>ubidi_getParagraph</code></li> 1721 * <li>maximum value of the <code>charIndex</code> argument of 1722 * <code>ubidi_getLevelAt</code></li> 1723 * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li> 1724 * <li>maximum value of the <code>logicalStart</code> argument of 1725 * <code>ubidi_getLogicalRun</code></li> 1726 * <li>maximum value of the <code>logicalIndex</code> argument of 1727 * <code>ubidi_getVisualIndex</code></li> 1728 * <li>number of elements filled in the <code>*indexMap</code> argument of 1729 * <code>ubidi_getLogicalMap</code></li> 1730 * <li>length of text processed by <code>ubidi_writeReordered</code></li> 1731 * </ul> 1732 * 1733 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1734 * 1735 * @return The length of the part of the source text processed by 1736 * the last call to <code>ubidi_setPara</code>. 1737 * @see ubidi_setPara 1738 * @see UBIDI_OPTION_STREAMING 1739 * @stable ICU 3.6 1740 */ 1741 U_STABLE int32_t U_EXPORT2 1742 ubidi_getProcessedLength(const UBiDi *pBiDi); 1743 1744 /** 1745 * Get the length of the reordered text resulting from the last call to 1746 * <code>ubidi_setPara()</code>. This length may be different from the length 1747 * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code> 1748 * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set. 1749 * <br> 1750 * This resulting length is the one to consider in the following cases: 1751 * <ul> 1752 * <li>maximum value of the <code>visualIndex</code> argument of 1753 * <code>ubidi_getLogicalIndex</code></li> 1754 * <li>number of elements of the <code>*indexMap</code> argument of 1755 * <code>ubidi_getVisualMap</code></li> 1756 * </ul> 1757 * Note that this length stays identical to the source text length if 1758 * Bidi marks are inserted or removed using option bits of 1759 * <code>ubidi_writeReordered</code>, or if option 1760 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set. 1761 * 1762 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1763 * 1764 * @return The length of the reordered text resulting from 1765 * the last call to <code>ubidi_setPara</code>. 1766 * @see ubidi_setPara 1767 * @see UBIDI_OPTION_INSERT_MARKS 1768 * @see UBIDI_OPTION_REMOVE_CONTROLS 1769 * @stable ICU 3.6 1770 */ 1771 U_STABLE int32_t U_EXPORT2 1772 ubidi_getResultLength(const UBiDi *pBiDi); 1773 1774 U_CDECL_BEGIN 1775 /** 1776 * value returned by <code>UBiDiClassCallback</code> callbacks when 1777 * there is no need to override the standard Bidi class for a given code point. 1778 * @see UBiDiClassCallback 1779 * @stable ICU 3.6 1780 */ 1781 #define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT 1782 1783 /** 1784 * Callback type declaration for overriding default Bidi class values with 1785 * custom ones. 1786 * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code> 1787 * object by calling the <code>ubidi_setClassCallback()</code> function; 1788 * then the callback will be invoked by the UBA implementation any time the 1789 * class of a character is to be determined.</p> 1790 * 1791 * @param context is a pointer to the callback private data. 1792 * 1793 * @param c is the code point to get a Bidi class for. 1794 * 1795 * @return The directional property / Bidi class for the given code point 1796 * <code>c</code> if the default class has been overridden, or 1797 * <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value 1798 * for <code>c</code> is to be used. 1799 * @see ubidi_setClassCallback 1800 * @see ubidi_getClassCallback 1801 * @stable ICU 3.6 1802 */ 1803 typedef UCharDirection U_CALLCONV 1804 UBiDiClassCallback(const void *context, UChar32 c); 1805 1806 U_CDECL_END 1807 1808 /** 1809 * Retrieve the Bidi class for a given code point. 1810 * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a 1811 * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used; 1812 * otherwise the default class determination mechanism is invoked.</p> 1813 * 1814 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1815 * 1816 * @param c is the code point whose Bidi class must be retrieved. 1817 * 1818 * @return The Bidi class for character <code>c</code> based 1819 * on the given <code>pBiDi</code> instance. 1820 * @see UBiDiClassCallback 1821 * @stable ICU 3.6 1822 */ 1823 U_STABLE UCharDirection U_EXPORT2 1824 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c); 1825 1826 /** 1827 * Set the callback function and callback data used by the UBA 1828 * implementation for Bidi class determination. 1829 * <p>This may be useful for assigning Bidi classes to PUA characters, or 1830 * for special application needs. For instance, an application may want to 1831 * handle all spaces like L or R characters (according to the base direction) 1832 * when creating the visual ordering of logical lines which are part of a report 1833 * organized in columns: there should not be interaction between adjacent 1834 * cells.<p> 1835 * 1836 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1837 * 1838 * @param newFn is the new callback function pointer. 1839 * 1840 * @param newContext is the new callback context pointer. This can be NULL. 1841 * 1842 * @param oldFn fillin: Returns the old callback function pointer. This can be 1843 * NULL. 1844 * 1845 * @param oldContext fillin: Returns the old callback's context. This can be 1846 * NULL. 1847 * 1848 * @param pErrorCode must be a valid pointer to an error code value. 1849 * 1850 * @see ubidi_getClassCallback 1851 * @stable ICU 3.6 1852 */ 1853 U_STABLE void U_EXPORT2 1854 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 1855 const void *newContext, UBiDiClassCallback **oldFn, 1856 const void **oldContext, UErrorCode *pErrorCode); 1857 1858 /** 1859 * Get the current callback function used for Bidi class determination. 1860 * 1861 * @param pBiDi is the paragraph <code>UBiDi</code> object. 1862 * 1863 * @param fn fillin: Returns the callback function pointer. 1864 * 1865 * @param context fillin: Returns the callback's private context. 1866 * 1867 * @see ubidi_setClassCallback 1868 * @stable ICU 3.6 1869 */ 1870 U_STABLE void U_EXPORT2 1871 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context); 1872 1873 /** 1874 * Take a <code>UBiDi</code> object containing the reordering 1875 * information for a piece of text (one or more paragraphs) set by 1876 * <code>ubidi_setPara()</code> or for a line of text set by 1877 * <code>ubidi_setLine()</code> and write a reordered string to the 1878 * destination buffer. 1879 * 1880 * This function preserves the integrity of characters with multiple 1881 * code units and (optionally) modifier letters. 1882 * Characters in RTL runs can be replaced by mirror-image characters 1883 * in the destination buffer. Note that "real" mirroring has 1884 * to be done in a rendering engine by glyph selection 1885 * and that for many "mirrored" characters there are no 1886 * Unicode characters as mirror-image equivalents. 1887 * There are also options to insert or remove Bidi control 1888 * characters; see the description of the <code>destSize</code> 1889 * and <code>options</code> parameters and of the option bit flags. 1890 * 1891 * @param pBiDi A pointer to a <code>UBiDi</code> object that 1892 * is set by <code>ubidi_setPara()</code> or 1893 * <code>ubidi_setLine()</code> and contains the reordering 1894 * information for the text that it was defined for, 1895 * as well as a pointer to that text.<br><br> 1896 * The text was aliased (only the pointer was stored 1897 * without copying the contents) and must not have been modified 1898 * since the <code>ubidi_setPara()</code> call. 1899 * 1900 * @param dest A pointer to where the reordered text is to be copied. 1901 * The source text and <code>dest[destSize]</code> 1902 * must not overlap. 1903 * 1904 * @param destSize The size of the <code>dest</code> buffer, 1905 * in number of UChars. 1906 * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code> 1907 * option is set, then the destination length could be 1908 * as large as 1909 * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>. 1910 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option 1911 * is set, then the destination length may be less than 1912 * <code>ubidi_getLength(pBiDi)</code>. 1913 * If none of these options is set, then the destination length 1914 * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>. 1915 * 1916 * @param options A bit set of options for the reordering that control 1917 * how the reordered text is written. 1918 * The options include mirroring the characters on a code 1919 * point basis and inserting LRM characters, which is used 1920 * especially for transforming visually stored text 1921 * to logically stored text (although this is still an 1922 * imperfect implementation of an "inverse Bidi" algorithm 1923 * because it uses the "forward Bidi" algorithm at its core). 1924 * The available options are: 1925 * <code>#UBIDI_DO_MIRRORING</code>, 1926 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>, 1927 * <code>#UBIDI_KEEP_BASE_COMBINING</code>, 1928 * <code>#UBIDI_OUTPUT_REVERSE</code>, 1929 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> 1930 * 1931 * @param pErrorCode must be a valid pointer to an error code value. 1932 * 1933 * @return The length of the output string. 1934 * 1935 * @see ubidi_getProcessedLength 1936 * @stable ICU 2.0 1937 */ 1938 U_STABLE int32_t U_EXPORT2 1939 ubidi_writeReordered(UBiDi *pBiDi, 1940 UChar *dest, int32_t destSize, 1941 uint16_t options, 1942 UErrorCode *pErrorCode); 1943 1944 /** 1945 * Reverse a Right-To-Left run of Unicode text. 1946 * 1947 * This function preserves the integrity of characters with multiple 1948 * code units and (optionally) modifier letters. 1949 * Characters can be replaced by mirror-image characters 1950 * in the destination buffer. Note that "real" mirroring has 1951 * to be done in a rendering engine by glyph selection 1952 * and that for many "mirrored" characters there are no 1953 * Unicode characters as mirror-image equivalents. 1954 * There are also options to insert or remove Bidi control 1955 * characters. 1956 * 1957 * This function is the implementation for reversing RTL runs as part 1958 * of <code>ubidi_writeReordered()</code>. For detailed descriptions 1959 * of the parameters, see there. 1960 * Since no Bidi controls are inserted here, the output string length 1961 * will never exceed <code>srcLength</code>. 1962 * 1963 * @see ubidi_writeReordered 1964 * 1965 * @param src A pointer to the RTL run text. 1966 * 1967 * @param srcLength The length of the RTL run. 1968 * 1969 * @param dest A pointer to where the reordered text is to be copied. 1970 * <code>src[srcLength]</code> and <code>dest[destSize]</code> 1971 * must not overlap. 1972 * 1973 * @param destSize The size of the <code>dest</code> buffer, 1974 * in number of UChars. 1975 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option 1976 * is set, then the destination length may be less than 1977 * <code>srcLength</code>. 1978 * If this option is not set, then the destination length 1979 * will be exactly <code>srcLength</code>. 1980 * 1981 * @param options A bit set of options for the reordering that control 1982 * how the reordered text is written. 1983 * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>. 1984 * 1985 * @param pErrorCode must be a valid pointer to an error code value. 1986 * 1987 * @return The length of the output string. 1988 * @stable ICU 2.0 1989 */ 1990 U_STABLE int32_t U_EXPORT2 1991 ubidi_writeReverse(const UChar *src, int32_t srcLength, 1992 UChar *dest, int32_t destSize, 1993 uint16_t options, 1994 UErrorCode *pErrorCode); 1995 1996 /*#define BIDI_SAMPLE_CODE*/ 1997 /*@}*/ 1998 1999 #endif 2000