• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  ubidi.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999jul27
14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
15 */
16 
17 #ifndef UBIDI_H
18 #define UBIDI_H
19 
20 #include "unicode/utypes.h"
21 #include "unicode/uchar.h"
22 #include "unicode/localpointer.h"
23 
24 /**
25  *\file
26  * \brief C API: Bidi algorithm
27  *
28  * <h2>Bidi algorithm for ICU</h2>
29  *
30  * This is an implementation of the Unicode Bidirectional Algorithm.
31  * The algorithm is defined in the
32  * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p>
33  *
34  * Note: Libraries that perform a bidirectional algorithm and
35  * reorder strings accordingly are sometimes called "Storage Layout Engines".
36  * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
37  * "Storage Layout Engines".
38  *
39  * <h3>General remarks about the API:</h3>
40  *
41  * In functions with an error code parameter,
42  * the <code>pErrorCode</code> pointer must be valid
43  * and the value that it points to must not indicate a failure before
44  * the function call. Otherwise, the function returns immediately.
45  * After the function call, the value indicates success or failure.<p>
46  *
47  * The &quot;limit&quot; of a sequence of characters is the position just after their
48  * last character, i.e., one more than that position.<p>
49  *
50  * Some of the API functions provide access to &quot;runs&quot;.
51  * Such a &quot;run&quot; is defined as a sequence of characters
52  * that are at the same embedding level
53  * after performing the Bidi algorithm.<p>
54  *
55  * @author Markus W. Scherer
56  * @version 1.0
57  *
58  *
59  * <h4> Sample code for the ICU Bidi API </h4>
60  *
61  * <h5>Rendering a paragraph with the ICU Bidi API</h5>
62  *
63  * This is (hypothetical) sample code that illustrates
64  * how the ICU Bidi API could be used to render a paragraph of text.
65  * Rendering code depends highly on the graphics system,
66  * therefore this sample code must make a lot of assumptions,
67  * which may or may not match any existing graphics system's properties.
68  *
69  * <p>The basic assumptions are:</p>
70  * <ul>
71  * <li>Rendering is done from left to right on a horizontal line.</li>
72  * <li>A run of single-style, unidirectional text can be rendered at once.</li>
73  * <li>Such a run of text is passed to the graphics system with
74  *     characters (code units) in logical order.</li>
75  * <li>The line-breaking algorithm is very complicated
76  *     and Locale-dependent -
77  *     and therefore its implementation omitted from this sample code.</li>
78  * </ul>
79  *
80  * <pre>
81  * \code
82  *#include "unicode/ubidi.h"
83  *
84  *typedef enum {
85  *     styleNormal=0, styleSelected=1,
86  *     styleBold=2, styleItalics=4,
87  *     styleSuper=8, styleSub=16
88  *} Style;
89  *
90  *typedef struct { int32_t limit; Style style; } StyleRun;
91  *
92  *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
93  *                  const StyleRun *styleRuns, int styleRunCount);
94  *
95  * // set *pLimit and *pStyleRunLimit for a line
96  * // from text[start] and from styleRuns[styleRunStart]
97  * // using ubidi_getLogicalRun(para, ...)
98  *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
99  *                  UBiDi *para,
100  *                  const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
101  *                  int *pLineWidth);
102  *
103  * // render runs on a line sequentially, always from left to right
104  *
105  * // prepare rendering a new line
106  * void startLine(UBiDiDirection textDirection, int lineWidth);
107  *
108  * // render a run of text and advance to the right by the run width
109  * // the text[start..limit-1] is always in logical order
110  * void renderRun(const UChar *text, int32_t start, int32_t limit,
111  *               UBiDiDirection textDirection, Style style);
112  *
113  * // We could compute a cross-product
114  * // from the style runs with the directional runs
115  * // and then reorder it.
116  * // Instead, here we iterate over each run type
117  * // and render the intersections -
118  * // with shortcuts in simple (and common) cases.
119  * // renderParagraph() is the main function.
120  *
121  * // render a directional run with
122  * // (possibly) multiple style runs intersecting with it
123  * void renderDirectionalRun(const UChar *text,
124  *                           int32_t start, int32_t limit,
125  *                           UBiDiDirection direction,
126  *                           const StyleRun *styleRuns, int styleRunCount) {
127  *     int i;
128  *
129  *     // iterate over style runs
130  *     if(direction==UBIDI_LTR) {
131  *         int styleLimit;
132  *
133  *         for(i=0; i<styleRunCount; ++i) {
134  *             styleLimit=styleRun[i].limit;
135  *             if(start<styleLimit) {
136  *                 if(styleLimit>limit) { styleLimit=limit; }
137  *                 renderRun(text, start, styleLimit,
138  *                           direction, styleRun[i].style);
139  *                 if(styleLimit==limit) { break; }
140  *                 start=styleLimit;
141  *             }
142  *         }
143  *     } else {
144  *         int styleStart;
145  *
146  *         for(i=styleRunCount-1; i>=0; --i) {
147  *             if(i>0) {
148  *                 styleStart=styleRun[i-1].limit;
149  *             } else {
150  *                 styleStart=0;
151  *             }
152  *             if(limit>=styleStart) {
153  *                 if(styleStart<start) { styleStart=start; }
154  *                 renderRun(text, styleStart, limit,
155  *                           direction, styleRun[i].style);
156  *                 if(styleStart==start) { break; }
157  *                 limit=styleStart;
158  *             }
159  *         }
160  *     }
161  * }
162  *
163  * // the line object represents text[start..limit-1]
164  * void renderLine(UBiDi *line, const UChar *text,
165  *                 int32_t start, int32_t limit,
166  *                 const StyleRun *styleRuns, int styleRunCount) {
167  *     UBiDiDirection direction=ubidi_getDirection(line);
168  *     if(direction!=UBIDI_MIXED) {
169  *         // unidirectional
170  *         if(styleRunCount<=1) {
171  *             renderRun(text, start, limit, direction, styleRuns[0].style);
172  *         } else {
173  *             renderDirectionalRun(text, start, limit,
174  *                                  direction, styleRuns, styleRunCount);
175  *         }
176  *     } else {
177  *         // mixed-directional
178  *         int32_t count, i, length;
179  *         UBiDiLevel level;
180  *
181  *         count=ubidi_countRuns(para, pErrorCode);
182  *         if(U_SUCCESS(*pErrorCode)) {
183  *             if(styleRunCount<=1) {
184  *                 Style style=styleRuns[0].style;
185  *
186  *                 // iterate over directional runs
187  *                for(i=0; i<count; ++i) {
188  *                    direction=ubidi_getVisualRun(para, i, &start, &length);
189  *                     renderRun(text, start, start+length, direction, style);
190  *                }
191  *             } else {
192  *                 int32_t j;
193  *
194  *                 // iterate over both directional and style runs
195  *                 for(i=0; i<count; ++i) {
196  *                     direction=ubidi_getVisualRun(line, i, &start, &length);
197  *                     renderDirectionalRun(text, start, start+length,
198  *                                          direction, styleRuns, styleRunCount);
199  *                 }
200  *             }
201  *         }
202  *     }
203  * }
204  *
205  *void renderParagraph(const UChar *text, int32_t length,
206  *                     UBiDiDirection textDirection,
207  *                      const StyleRun *styleRuns, int styleRunCount,
208  *                      int lineWidth,
209  *                      UErrorCode *pErrorCode) {
210  *     UBiDi *para;
211  *
212  *     if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
213  *         return;
214  *     }
215  *
216  *     para=ubidi_openSized(length, 0, pErrorCode);
217  *     if(para==NULL) { return; }
218  *
219  *     ubidi_setPara(para, text, length,
220  *                   textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
221  *                   NULL, pErrorCode);
222  *     if(U_SUCCESS(*pErrorCode)) {
223  *         UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
224  *         StyleRun styleRun={ length, styleNormal };
225  *         int width;
226  *
227  *         if(styleRuns==NULL || styleRunCount<=0) {
228  *            styleRunCount=1;
229  *             styleRuns=&styleRun;
230  *         }
231  *
232  *        // assume styleRuns[styleRunCount-1].limit>=length
233  *
234  *         width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
235  *         if(width<=lineWidth) {
236  *             // everything fits onto one line
237  *
238  *            // prepare rendering a new line from either left or right
239  *             startLine(paraLevel, width);
240  *
241  *             renderLine(para, text, 0, length,
242  *                        styleRuns, styleRunCount);
243  *         } else {
244  *             UBiDi *line;
245  *
246  *             // we need to render several lines
247  *             line=ubidi_openSized(length, 0, pErrorCode);
248  *             if(line!=NULL) {
249  *                 int32_t start=0, limit;
250  *                 int styleRunStart=0, styleRunLimit;
251  *
252  *                 for(;;) {
253  *                     limit=length;
254  *                     styleRunLimit=styleRunCount;
255  *                     getLineBreak(text, start, &limit, para,
256  *                                  styleRuns, styleRunStart, &styleRunLimit,
257  *                                 &width);
258  *                     ubidi_setLine(para, start, limit, line, pErrorCode);
259  *                     if(U_SUCCESS(*pErrorCode)) {
260  *                         // prepare rendering a new line
261  *                         // from either left or right
262  *                         startLine(paraLevel, width);
263  *
264  *                         renderLine(line, text, start, limit,
265  *                                    styleRuns+styleRunStart,
266  *                                    styleRunLimit-styleRunStart);
267  *                     }
268  *                     if(limit==length) { break; }
269  *                     start=limit;
270  *                     styleRunStart=styleRunLimit-1;
271  *                     if(start>=styleRuns[styleRunStart].limit) {
272  *                         ++styleRunStart;
273  *                     }
274  *                 }
275  *
276  *                 ubidi_close(line);
277  *             }
278  *        }
279  *    }
280  *
281  *     ubidi_close(para);
282  *}
283  *\endcode
284  * </pre>
285  */
286 
287 /*DOCXX_TAG*/
288 /*@{*/
289 
290 /**
291  * UBiDiLevel is the type of the level values in this
292  * Bidi implementation.
293  * It holds an embedding level and indicates the visual direction
294  * by its bit&nbsp;0 (even/odd value).<p>
295  *
296  * It can also hold non-level values for the
297  * <code>paraLevel</code> and <code>embeddingLevels</code>
298  * arguments of <code>ubidi_setPara()</code>; there:
299  * <ul>
300  * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
301  * value indicates whether the using application is
302  * specifying the level of a character to <i>override</i> whatever the
303  * Bidi implementation would resolve it to.</li>
304  * <li><code>paraLevel</code> can be set to the
305  * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
306  * and <code>UBIDI_DEFAULT_RTL</code>.</li>
307  * </ul>
308  *
309  * @see ubidi_setPara
310  *
311  * <p>The related constants are not real, valid level values.
312  * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
313  * a default for the paragraph level for
314  * when the <code>ubidi_setPara()</code> function
315  * shall determine it but there is no
316  * strongly typed character in the input.<p>
317  *
318  * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
319  * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
320  * just like with normal LTR and RTL level values -
321  * these special values are designed that way. Also, the implementation
322  * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
323  *
324  * @see UBIDI_DEFAULT_LTR
325  * @see UBIDI_DEFAULT_RTL
326  * @see UBIDI_LEVEL_OVERRIDE
327  * @see UBIDI_MAX_EXPLICIT_LEVEL
328  * @stable ICU 2.0
329  */
330 typedef uint8_t UBiDiLevel;
331 
332 /** Paragraph level setting.<p>
333  *
334  * Constant indicating that the base direction depends on the first strong
335  * directional character in the text according to the Unicode Bidirectional
336  * Algorithm. If no strong directional character is present,
337  * then set the paragraph level to 0 (left-to-right).<p>
338  *
339  * If this value is used in conjunction with reordering modes
340  * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
341  * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
342  * is assumed to be visual LTR, and the text after reordering is required
343  * to be the corresponding logical string with appropriate contextual
344  * direction. The direction of the result string will be RTL if either
345  * the righmost or leftmost strong character of the source text is RTL
346  * or Arabic Letter, the direction will be LTR otherwise.<p>
347  *
348  * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
349  * be added at the beginning of the result string to ensure round trip
350  * (that the result string, when reordered back to visual, will produce
351  * the original source text).
352  * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
353  * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
354  * @stable ICU 2.0
355  */
356 #define UBIDI_DEFAULT_LTR 0xfe
357 
358 /** Paragraph level setting.<p>
359  *
360  * Constant indicating that the base direction depends on the first strong
361  * directional character in the text according to the Unicode Bidirectional
362  * Algorithm. If no strong directional character is present,
363  * then set the paragraph level to 1 (right-to-left).<p>
364  *
365  * If this value is used in conjunction with reordering modes
366  * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
367  * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
368  * is assumed to be visual LTR, and the text after reordering is required
369  * to be the corresponding logical string with appropriate contextual
370  * direction. The direction of the result string will be RTL if either
371  * the righmost or leftmost strong character of the source text is RTL
372  * or Arabic Letter, or if the text contains no strong character;
373  * the direction will be LTR otherwise.<p>
374  *
375  * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
376  * be added at the beginning of the result string to ensure round trip
377  * (that the result string, when reordered back to visual, will produce
378  * the original source text).
379  * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
380  * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
381  * @stable ICU 2.0
382  */
383 #define UBIDI_DEFAULT_RTL 0xff
384 
385 /**
386  * Maximum explicit embedding level.
387  * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
388  * @stable ICU 2.0
389  */
390 #define UBIDI_MAX_EXPLICIT_LEVEL 61
391 
392 /** Bit flag for level input.
393  *  Overrides directional properties.
394  * @stable ICU 2.0
395  */
396 #define UBIDI_LEVEL_OVERRIDE 0x80
397 
398 /**
399  * Special value which can be returned by the mapping functions when a logical
400  * index has no corresponding visual index or vice-versa. This may happen
401  * for the logical-to-visual mapping of a Bidi control when option
402  * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
403  * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
404  * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
405  * @see ubidi_getVisualIndex
406  * @see ubidi_getVisualMap
407  * @see ubidi_getLogicalIndex
408  * @see ubidi_getLogicalMap
409  * @stable ICU 3.6
410  */
411 #define UBIDI_MAP_NOWHERE   (-1)
412 
413 /**
414  * <code>UBiDiDirection</code> values indicate the text direction.
415  * @stable ICU 2.0
416  */
417 enum UBiDiDirection {
418   /** Left-to-right text. This is a 0 value.
419    * <ul>
420    * <li>As return value for <code>ubidi_getDirection()</code>, it means
421    *     that the source string contains no right-to-left characters, or
422    *     that the source string is empty and the paragraph level is even.
423    * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
424    *      means that the first strong character of the source string has
425    *      a left-to-right direction.
426    * </ul>
427    * @stable ICU 2.0
428    */
429   UBIDI_LTR,
430   /** Right-to-left text. This is a 1 value.
431    * <ul>
432    * <li>As return value for <code>ubidi_getDirection()</code>, it means
433    *     that the source string contains no left-to-right characters, or
434    *     that the source string is empty and the paragraph level is odd.
435    * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
436    *      means that the first strong character of the source string has
437    *      a right-to-left direction.
438    * </ul>
439    * @stable ICU 2.0
440    */
441   UBIDI_RTL,
442   /** Mixed-directional text.
443    * <p>As return value for <code>ubidi_getDirection()</code>, it means
444    *    that the source string contains both left-to-right and
445    *    right-to-left characters.
446    * @stable ICU 2.0
447    */
448   UBIDI_MIXED,
449   /** No strongly directional text.
450    * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means
451    *    that the source string is missing or empty, or contains neither left-to-right
452    *    nor right-to-left characters.
453    * @draft ICU 4.6
454    */
455   UBIDI_NEUTRAL
456 };
457 
458 /** @stable ICU 2.0 */
459 typedef enum UBiDiDirection UBiDiDirection;
460 
461 /**
462  * Forward declaration of the <code>UBiDi</code> structure for the declaration of
463  * the API functions. Its fields are implementation-specific.<p>
464  * This structure holds information about a paragraph (or multiple paragraphs)
465  * of text with Bidi-algorithm-related details, or about one line of
466  * such a paragraph.<p>
467  * Reordering can be done on a line, or on one or more paragraphs which are
468  * then interpreted each as one single line.
469  * @stable ICU 2.0
470  */
471 struct UBiDi;
472 
473 /** @stable ICU 2.0 */
474 typedef struct UBiDi UBiDi;
475 
476 /**
477  * Allocate a <code>UBiDi</code> structure.
478  * Such an object is initially empty. It is assigned
479  * the Bidi properties of a piece of text containing one or more paragraphs
480  * by <code>ubidi_setPara()</code>
481  * or the Bidi properties of a line within a paragraph by
482  * <code>ubidi_setLine()</code>.<p>
483  * This object can be reused for as long as it is not deallocated
484  * by calling <code>ubidi_close()</code>.<p>
485  * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
486  * additional memory for internal structures as necessary.
487  *
488  * @return An empty <code>UBiDi</code> object.
489  * @stable ICU 2.0
490  */
491 U_STABLE UBiDi * U_EXPORT2
492 ubidi_open(void);
493 
494 /**
495  * Allocate a <code>UBiDi</code> structure with preallocated memory
496  * for internal structures.
497  * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
498  * with no arguments, but it also preallocates memory for internal structures
499  * according to the sizings supplied by the caller.<p>
500  * Subsequent functions will not allocate any more memory, and are thus
501  * guaranteed not to fail because of lack of memory.<p>
502  * The preallocation can be limited to some of the internal memory
503  * by setting some values to 0 here. That means that if, e.g.,
504  * <code>maxRunCount</code> cannot be reasonably predetermined and should not
505  * be set to <code>maxLength</code> (the only failproof value) to avoid
506  * wasting memory, then <code>maxRunCount</code> could be set to 0 here
507  * and the internal structures that are associated with it will be allocated
508  * on demand, just like with <code>ubidi_open()</code>.
509  *
510  * @param maxLength is the maximum text or line length that internal memory
511  *        will be preallocated for. An attempt to associate this object with a
512  *        longer text will fail, unless this value is 0, which leaves the allocation
513  *        up to the implementation.
514  *
515  * @param maxRunCount is the maximum anticipated number of same-level runs
516  *        that internal memory will be preallocated for. An attempt to access
517  *        visual runs on an object that was not preallocated for as many runs
518  *        as the text was actually resolved to will fail,
519  *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
520  *        The number of runs depends on the actual text and maybe anywhere between
521  *        1 and <code>maxLength</code>. It is typically small.
522  *
523  * @param pErrorCode must be a valid pointer to an error code value.
524  *
525  * @return An empty <code>UBiDi</code> object with preallocated memory.
526  * @stable ICU 2.0
527  */
528 U_STABLE UBiDi * U_EXPORT2
529 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
530 
531 /**
532  * <code>ubidi_close()</code> must be called to free the memory
533  * associated with a UBiDi object.<p>
534  *
535  * <strong>Important: </strong>
536  * A parent <code>UBiDi</code> object must not be destroyed or reused if
537  * it still has children.
538  * If a <code>UBiDi</code> object has become the <i>child</i>
539  * of another one (its <i>parent</i>) by calling
540  * <code>ubidi_setLine()</code>, then the child object must
541  * be destroyed (closed) or reused (by calling
542  * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
543  * before the parent object.
544  *
545  * @param pBiDi is a <code>UBiDi</code> object.
546  *
547  * @see ubidi_setPara
548  * @see ubidi_setLine
549  * @stable ICU 2.0
550  */
551 U_STABLE void U_EXPORT2
552 ubidi_close(UBiDi *pBiDi);
553 
554 #if U_SHOW_CPLUSPLUS_API
555 
556 U_NAMESPACE_BEGIN
557 
558 /**
559  * \class LocalUBiDiPointer
560  * "Smart pointer" class, closes a UBiDi via ubidi_close().
561  * For most methods see the LocalPointerBase base class.
562  *
563  * @see LocalPointerBase
564  * @see LocalPointer
565  * @stable ICU 4.4
566  */
567 U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
568 
569 U_NAMESPACE_END
570 
571 #endif
572 
573 /**
574  * Modify the operation of the Bidi algorithm such that it
575  * approximates an "inverse Bidi" algorithm. This function
576  * must be called before <code>ubidi_setPara()</code>.
577  *
578  * <p>The normal operation of the Bidi algorithm as described
579  * in the Unicode Technical Report is to take text stored in logical
580  * (keyboard, typing) order and to determine the reordering of it for visual
581  * rendering.
582  * Some legacy systems store text in visual order, and for operations
583  * with standard, Unicode-based algorithms, the text needs to be transformed
584  * to logical order. This is effectively the inverse algorithm of the
585  * described Bidi algorithm. Note that there is no standard algorithm for
586  * this "inverse Bidi" and that the current implementation provides only an
587  * approximation of "inverse Bidi".</p>
588  *
589  * <p>With <code>isInverse</code> set to <code>TRUE</code>,
590  * this function changes the behavior of some of the subsequent functions
591  * in a way that they can be used for the inverse Bidi algorithm.
592  * Specifically, runs of text with numeric characters will be treated in a
593  * special way and may need to be surrounded with LRM characters when they are
594  * written in reordered sequence.</p>
595  *
596  * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
597  * Since the actual input for "inverse Bidi" is visually ordered text and
598  * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
599  * the runs of the logically ordered output.</p>
600  *
601  * <p>Calling this function with argument <code>isInverse</code> set to
602  * <code>TRUE</code> is equivalent to calling
603  * <code>ubidi_setReorderingMode</code> with argument
604  * <code>reorderingMode</code>
605  * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
606  * Calling this function with argument <code>isInverse</code> set to
607  * <code>FALSE</code> is equivalent to calling
608  * <code>ubidi_setReorderingMode</code> with argument
609  * <code>reorderingMode</code>
610  * set to <code>#UBIDI_REORDER_DEFAULT</code>.
611  *
612  * @param pBiDi is a <code>UBiDi</code> object.
613  *
614  * @param isInverse specifies "forward" or "inverse" Bidi operation.
615  *
616  * @see ubidi_setPara
617  * @see ubidi_writeReordered
618  * @see ubidi_setReorderingMode
619  * @stable ICU 2.0
620  */
621 U_STABLE void U_EXPORT2
622 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
623 
624 /**
625  * Is this Bidi object set to perform the inverse Bidi algorithm?
626  * <p>Note: calling this function after setting the reordering mode with
627  * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the
628  * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
629  * <code>FALSE</code> for all other values.</p>
630  *
631  * @param pBiDi is a <code>UBiDi</code> object.
632  * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm
633  * by handling numbers as L.
634  *
635  * @see ubidi_setInverse
636  * @see ubidi_setReorderingMode
637  * @stable ICU 2.0
638  */
639 
640 U_STABLE UBool U_EXPORT2
641 ubidi_isInverse(UBiDi *pBiDi);
642 
643 /**
644  * Specify whether block separators must be allocated level zero,
645  * so that successive paragraphs will progress from left to right.
646  * This function must be called before <code>ubidi_setPara()</code>.
647  * Paragraph separators (B) may appear in the text.  Setting them to level zero
648  * means that all paragraph separators (including one possibly appearing
649  * in the last text position) are kept in the reordered text after the text
650  * that they follow in the source text.
651  * When this feature is not enabled, a paragraph separator at the last
652  * position of the text before reordering will go to the first position
653  * of the reordered text when the paragraph level is odd.
654  *
655  * @param pBiDi is a <code>UBiDi</code> object.
656  *
657  * @param orderParagraphsLTR specifies whether paragraph separators (B) must
658  * receive level 0, so that successive paragraphs progress from left to right.
659  *
660  * @see ubidi_setPara
661  * @stable ICU 3.4
662  */
663 U_STABLE void U_EXPORT2
664 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
665 
666 /**
667  * Is this Bidi object set to allocate level 0 to block separators so that
668  * successive paragraphs progress from left to right?
669  *
670  * @param pBiDi is a <code>UBiDi</code> object.
671  * @return TRUE if the Bidi object is set to allocate level 0 to block
672  *         separators.
673  *
674  * @see ubidi_orderParagraphsLTR
675  * @stable ICU 3.4
676  */
677 U_STABLE UBool U_EXPORT2
678 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
679 
680 /**
681  * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
682  * algorithm to use.
683  *
684  * @see ubidi_setReorderingMode
685  * @stable ICU 3.6
686  */
687 typedef enum UBiDiReorderingMode {
688     /** Regular Logical to Visual Bidi algorithm according to Unicode.
689       * This is a 0 value.
690       * @stable ICU 3.6 */
691     UBIDI_REORDER_DEFAULT = 0,
692     /** Logical to Visual algorithm which handles numbers in a way which
693       * mimicks the behavior of Windows XP.
694       * @stable ICU 3.6 */
695     UBIDI_REORDER_NUMBERS_SPECIAL,
696     /** Logical to Visual algorithm grouping numbers with adjacent R characters
697       * (reversible algorithm).
698       * @stable ICU 3.6 */
699     UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
700     /** Reorder runs only to transform a Logical LTR string to the Logical RTL
701       * string with the same display, or vice-versa.<br>
702       * If this mode is set together with option
703       * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
704       * text may be removed and other controls may be added to produce the
705       * minimum combination which has the required display.
706       * @stable ICU 3.6 */
707     UBIDI_REORDER_RUNS_ONLY,
708     /** Visual to Logical algorithm which handles numbers like L
709       * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>.
710       * @see ubidi_setInverse
711       * @stable ICU 3.6 */
712     UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
713     /** Visual to Logical algorithm equivalent to the regular Logical to Visual
714       * algorithm.
715       * @stable ICU 3.6 */
716     UBIDI_REORDER_INVERSE_LIKE_DIRECT,
717     /** Inverse Bidi (Visual to Logical) algorithm for the
718       * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
719       * @stable ICU 3.6 */
720     UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
721     /** Number of values for reordering mode.
722       * @stable ICU 3.6 */
723     UBIDI_REORDER_COUNT
724 } UBiDiReorderingMode;
725 
726 /**
727  * Modify the operation of the Bidi algorithm such that it implements some
728  * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
729  * algorithm, depending on different values of the "reordering mode".
730  * This function must be called before <code>ubidi_setPara()</code>, and stays
731  * in effect until called again with a different argument.
732  *
733  * <p>The normal operation of the Bidi algorithm as described
734  * in the Unicode Standard Annex #9 is to take text stored in logical
735  * (keyboard, typing) order and to determine how to reorder it for visual
736  * rendering.</p>
737  *
738  * <p>With the reordering mode set to a value other than
739  * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
740  * some of the subsequent functions in a way such that they implement an
741  * inverse Bidi algorithm or some other algorithm variants.</p>
742  *
743  * <p>Some legacy systems store text in visual order, and for operations
744  * with standard, Unicode-based algorithms, the text needs to be transformed
745  * into logical order. This is effectively the inverse algorithm of the
746  * described Bidi algorithm. Note that there is no standard algorithm for
747  * this "inverse Bidi", so a number of variants are implemented here.</p>
748  *
749  * <p>In other cases, it may be desirable to emulate some variant of the
750  * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
751  * Logical to Logical transformation.</p>
752  *
753  * <ul>
754  * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
755  * the standard Bidi Logical to Visual algorithm is applied.</li>
756  *
757  * <li>When the reordering mode is set to
758  * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
759  * the algorithm used to perform Bidi transformations when calling
760  * <code>ubidi_setPara</code> should approximate the algorithm used in
761  * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
762  * algorithm.
763  * <br>
764  * The differences between the basic algorithm and the algorithm addressed
765  * by this option are as follows:
766  * <ul>
767  *   <li>Within text at an even embedding level, the sequence "123AB"
768  *   (where AB represent R or AL letters) is transformed to "123BA" by the
769  *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
770  *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
771  *   like regular numbers (EN).</li>
772  * </ul></li>
773  *
774  * <li>When the reordering mode is set to
775  * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
776  * numbers located between LTR text and RTL text are associated with the RTL
777  * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
778  * upper case letters represent RTL characters) will be transformed to
779  * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
780  * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
781  * This makes the algorithm reversible and makes it useful when round trip
782  * (from visual to logical and back to visual) must be achieved without
783  * adding LRM characters. However, this is a variation from the standard
784  * Unicode Bidi algorithm.<br>
785  * The source text should not contain Bidi control characters other than LRM
786  * or RLM.</li>
787  *
788  * <li>When the reordering mode is set to
789  * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
790  * a "Logical to Logical" transformation must be performed:
791  * <ul>
792  * <li>If the default text level of the source text (argument <code>paraLevel</code>
793  * in <code>ubidi_setPara</code>) is even, the source text will be handled as
794  * LTR logical text and will be transformed to the RTL logical text which has
795  * the same LTR visual display.</li>
796  * <li>If the default level of the source text is odd, the source text
797  * will be handled as RTL logical text and will be transformed to the
798  * LTR logical text which has the same LTR visual display.</li>
799  * </ul>
800  * This mode may be needed when logical text which is basically Arabic or
801  * Hebrew, with possible included numbers or phrases in English, has to be
802  * displayed as if it had an even embedding level (this can happen if the
803  * displaying application treats all text as if it was basically LTR).
804  * <br>
805  * This mode may also be needed in the reverse case, when logical text which is
806  * basically English, with possible included phrases in Arabic or Hebrew, has to
807  * be displayed as if it had an odd embedding level.
808  * <br>
809  * Both cases could be handled by adding LRE or RLE at the head of the text,
810  * if the display subsystem supports these formatting controls. If it does not,
811  * the problem may be handled by transforming the source text in this mode
812  * before displaying it, so that it will be displayed properly.<br>
813  * The source text should not contain Bidi control characters other than LRM
814  * or RLM.</li>
815  *
816  * <li>When the reordering mode is set to
817  * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
818  * is applied.
819  * Runs of text with numeric characters will be treated like LTR letters and
820  * may need to be surrounded with LRM characters when they are written in
821  * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
822  * be used with function <code>ubidi_writeReordered</code> to this end. This
823  * mode is equivalent to calling <code>ubidi_setInverse()</code> with
824  * argument <code>isInverse</code> set to <code>TRUE</code>.</li>
825  *
826  * <li>When the reordering mode is set to
827  * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
828  * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
829  * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
830  * but is closer to the regular Bidi algorithm.
831  * <br>
832  * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
833  * upper case represents RTL characters) will be transformed to
834  * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
835  * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
836  * When used in conjunction with option
837  * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
838  * adds Bidi marks to the output significantly more sparingly than mode
839  * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
840  * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
841  * <code>ubidi_writeReordered</code>.</li>
842  *
843  * <li>When the reordering mode is set to
844  * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
845  * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
846  * <br>
847  * For example, an LTR paragraph with the content "abc FED123" (where
848  * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
849  * </ul>
850  *
851  * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
852  * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
853  * output runs should be retrieved using
854  * <code>ubidi_getVisualRun()</code>, and the output text with
855  * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
856  * "inverse Bidi" modes the input is actually visually ordered text and
857  * reordered output returned by <code>ubidi_getVisualRun()</code> or
858  * <code>ubidi_writeReordered()</code> are actually runs or character string
859  * of logically ordered output.<br>
860  * For all the "inverse Bidi" modes, the source text should not contain
861  * Bidi control characters other than LRM or RLM.</p>
862  *
863  * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
864  * <code>ubidi_writeReordered</code> has no useful meaning and should not be
865  * used in conjunction with any value of the reordering mode specifying
866  * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
867  *
868  * @param pBiDi is a <code>UBiDi</code> object.
869  * @param reorderingMode specifies the required variant of the Bidi algorithm.
870  *
871  * @see UBiDiReorderingMode
872  * @see ubidi_setInverse
873  * @see ubidi_setPara
874  * @see ubidi_writeReordered
875  * @stable ICU 3.6
876  */
877 U_STABLE void U_EXPORT2
878 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
879 
880 /**
881  * What is the requested reordering mode for a given Bidi object?
882  *
883  * @param pBiDi is a <code>UBiDi</code> object.
884  * @return the current reordering mode of the Bidi object
885  * @see ubidi_setReorderingMode
886  * @stable ICU 3.6
887  */
888 U_STABLE UBiDiReorderingMode U_EXPORT2
889 ubidi_getReorderingMode(UBiDi *pBiDi);
890 
891 /**
892  * <code>UBiDiReorderingOption</code> values indicate which options are
893  * specified to affect the Bidi algorithm.
894  *
895  * @see ubidi_setReorderingOptions
896  * @stable ICU 3.6
897  */
898 typedef enum UBiDiReorderingOption {
899     /**
900      * option value for <code>ubidi_setReorderingOptions</code>:
901      * disable all the options which can be set with this function
902      * @see ubidi_setReorderingOptions
903      * @stable ICU 3.6
904      */
905     UBIDI_OPTION_DEFAULT = 0,
906 
907     /**
908      * option bit for <code>ubidi_setReorderingOptions</code>:
909      * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
910      * a reordering to a Logical order
911      *
912      * <p>This option must be set or reset before calling
913      * <code>ubidi_setPara</code>.</p>
914      *
915      * <p>This option is significant only with reordering modes which generate
916      * a result with Logical order, specifically:</p>
917      * <ul>
918      *   <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
919      *   <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
920      *   <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
921      *   <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
922      * </ul>
923      *
924      * <p>If this option is set in conjunction with reordering mode
925      * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
926      * <code>ubidi_setInverse(TRUE)</code>, it implies
927      * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
928      * in calls to function <code>ubidi_writeReordered()</code>.</p>
929      *
930      * <p>For other reordering modes, a minimum number of LRM or RLM characters
931      * will be added to the source text after reordering it so as to ensure
932      * round trip, i.e. when applying the inverse reordering mode on the
933      * resulting logical text with removal of Bidi marks
934      * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
935      * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
936      * in <code>ubidi_writeReordered</code>), the result will be identical to the
937      * source text in the first transformation.
938      *
939      * <p>This option will be ignored if specified together with option
940      * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
941      * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
942      * <code>ubidi_writeReordered()</code> and it implies option
943      * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
944      * <code>ubidi_writeReordered()</code> if the reordering mode is
945      * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
946      *
947      * @see ubidi_setReorderingMode
948      * @see ubidi_setReorderingOptions
949      * @stable ICU 3.6
950      */
951     UBIDI_OPTION_INSERT_MARKS = 1,
952 
953     /**
954      * option bit for <code>ubidi_setReorderingOptions</code>:
955      * remove Bidi control characters
956      *
957      * <p>This option must be set or reset before calling
958      * <code>ubidi_setPara</code>.</p>
959      *
960      * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
961      * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
962      * to function <code>ubidi_writeReordered()</code> and it implies option
963      * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
964      *
965      * @see ubidi_setReorderingMode
966      * @see ubidi_setReorderingOptions
967      * @stable ICU 3.6
968      */
969     UBIDI_OPTION_REMOVE_CONTROLS = 2,
970 
971     /**
972      * option bit for <code>ubidi_setReorderingOptions</code>:
973      * process the output as part of a stream to be continued
974      *
975      * <p>This option must be set or reset before calling
976      * <code>ubidi_setPara</code>.</p>
977      *
978      * <p>This option specifies that the caller is interested in processing large
979      * text object in parts.
980      * The results of the successive calls are expected to be concatenated by the
981      * caller. Only the call for the last part will have this option bit off.</p>
982      *
983      * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
984      * less than the full source text in order to truncate the text at a meaningful
985      * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
986      * immediately after calling <code>ubidi_setPara()</code> in order to
987      * determine how much of the source text has been processed.
988      * Source text beyond that length should be resubmitted in following calls to
989      * <code>ubidi_setPara</code>. The processed length may be less than
990      * the length of the source text if a character preceding the last character of
991      * the source text constitutes a reasonable boundary (like a block separator)
992      * for text to be continued.<br>
993      * If the last character of the source text constitutes a reasonable
994      * boundary, the whole text will be processed at once.<br>
995      * If nowhere in the source text there exists
996      * such a reasonable boundary, the processed length will be zero.<br>
997      * The caller should check for such an occurrence and do one of the following:
998      * <ul><li>submit a larger amount of text with a better chance to include
999      *         a reasonable boundary.</li>
1000      *     <li>resubmit the same text after turning off option
1001      *         <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
1002      * In all cases, this option should be turned off before processing the last
1003      * part of the text.</p>
1004      *
1005      * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
1006      * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
1007      * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before
1008      * calling <code>ubidi_setPara</code> so that later paragraphs may be
1009      * concatenated to previous paragraphs on the right.</p>
1010      *
1011      * @see ubidi_setReorderingMode
1012      * @see ubidi_setReorderingOptions
1013      * @see ubidi_getProcessedLength
1014      * @see ubidi_orderParagraphsLTR
1015      * @stable ICU 3.6
1016      */
1017     UBIDI_OPTION_STREAMING = 4
1018 } UBiDiReorderingOption;
1019 
1020 /**
1021  * Specify which of the reordering options
1022  * should be applied during Bidi transformations.
1023  *
1024  * @param pBiDi is a <code>UBiDi</code> object.
1025  * @param reorderingOptions is a combination of zero or more of the following
1026  * options:
1027  * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
1028  * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
1029  *
1030  * @see ubidi_getReorderingOptions
1031  * @stable ICU 3.6
1032  */
1033 U_STABLE void U_EXPORT2
1034 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
1035 
1036 /**
1037  * What are the reordering options applied to a given Bidi object?
1038  *
1039  * @param pBiDi is a <code>UBiDi</code> object.
1040  * @return the current reordering options of the Bidi object
1041  * @see ubidi_setReorderingOptions
1042  * @stable ICU 3.6
1043  */
1044 U_STABLE uint32_t U_EXPORT2
1045 ubidi_getReorderingOptions(UBiDi *pBiDi);
1046 
1047 /**
1048  * Perform the Unicode Bidi algorithm. It is defined in the
1049  * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
1050  * version 13,
1051  * also described in The Unicode Standard, Version 4.0 .<p>
1052  *
1053  * This function takes a piece of plain text containing one or more paragraphs,
1054  * with or without externally specified embedding levels from <i>styled</i>
1055  * text and computes the left-right-directionality of each character.<p>
1056  *
1057  * If the entire text is all of the same directionality, then
1058  * the function may not perform all the steps described by the algorithm,
1059  * i.e., some levels may not be the same as if all steps were performed.
1060  * This is not relevant for unidirectional text.<br>
1061  * For example, in pure LTR text with numbers the numbers would get
1062  * a resolved level of 2 higher than the surrounding text according to
1063  * the algorithm. This implementation may set all resolved levels to
1064  * the same value in such a case.<p>
1065  *
1066  * The text can be composed of multiple paragraphs. Occurrence of a block
1067  * separator in the text terminates a paragraph, and whatever comes next starts
1068  * a new paragraph. The exception to this rule is when a Carriage Return (CR)
1069  * is followed by a Line Feed (LF). Both CR and LF are block separators, but
1070  * in that case, the pair of characters is considered as terminating the
1071  * preceding paragraph, and a new paragraph will be started by a character
1072  * coming after the LF.
1073  *
1074  * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
1075  *        which will be set to contain the reordering information,
1076  *        especially the resolved levels for all the characters in <code>text</code>.
1077  *
1078  * @param text is a pointer to the text that the Bidi algorithm will be performed on.
1079  *        This pointer is stored in the UBiDi object and can be retrieved
1080  *        with <code>ubidi_getText()</code>.<br>
1081  *        <strong>Note:</strong> the text must be (at least) <code>length</code> long.
1082  *
1083  * @param length is the length of the text; if <code>length==-1</code> then
1084  *        the text must be zero-terminated.
1085  *
1086  * @param paraLevel specifies the default level for the text;
1087  *        it is typically 0 (LTR) or 1 (RTL).
1088  *        If the function shall determine the paragraph level from the text,
1089  *        then <code>paraLevel</code> can be set to
1090  *        either <code>#UBIDI_DEFAULT_LTR</code>
1091  *        or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
1092  *        paragraphs, the paragraph level shall be determined separately for
1093  *        each paragraph; if a paragraph does not include any strongly typed
1094  *        character, then the desired default is used (0 for LTR or 1 for RTL).
1095  *        Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
1096  *        is also valid, with odd levels indicating RTL.
1097  *
1098  * @param embeddingLevels (in) may be used to preset the embedding and override levels,
1099  *        ignoring characters like LRE and PDF in the text.
1100  *        A level overrides the directional property of its corresponding
1101  *        (same index) character if the level has the
1102  *        <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
1103  *        Except for that bit, it must be
1104  *        <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
1105  *        with one exception: a level of zero may be specified for a paragraph
1106  *        separator even if <code>paraLevel>0</code> when multiple paragraphs
1107  *        are submitted in the same call to <code>ubidi_setPara()</code>.<br><br>
1108  *        <strong>Caution: </strong>A copy of this pointer, not of the levels,
1109  *        will be stored in the <code>UBiDi</code> object;
1110  *        the <code>embeddingLevels</code> array must not be
1111  *        deallocated before the <code>UBiDi</code> structure is destroyed or reused,
1112  *        and the <code>embeddingLevels</code>
1113  *        should not be modified to avoid unexpected results on subsequent Bidi operations.
1114  *        However, the <code>ubidi_setPara()</code> and
1115  *        <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
1116  *        After the <code>UBiDi</code> object is reused or destroyed, the caller
1117  *        must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
1118  *        <strong>Note:</strong> the <code>embeddingLevels</code> array must be
1119  *        at least <code>length</code> long.
1120  *        This pointer can be <code>NULL</code> if this
1121  *        value is not necessary.
1122  *
1123  * @param pErrorCode must be a valid pointer to an error code value.
1124  * @stable ICU 2.0
1125  */
1126 U_STABLE void U_EXPORT2
1127 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1128               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1129               UErrorCode *pErrorCode);
1130 
1131 /**
1132  * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
1133  * contain the reordering information, especially the resolved levels,
1134  * for all the characters in a line of text. This line of text is
1135  * specified by referring to a <code>UBiDi</code> object representing
1136  * this information for a piece of text containing one or more paragraphs,
1137  * and by specifying a range of indexes in this text.<p>
1138  * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
1139  *
1140  * This is used after calling <code>ubidi_setPara()</code>
1141  * for a piece of text, and after line-breaking on that text.
1142  * It is not necessary if each paragraph is treated as a single line.<p>
1143  *
1144  * After line-breaking, rules (L1) and (L2) for the treatment of
1145  * trailing WS and for reordering are performed on
1146  * a <code>UBiDi</code> object that represents a line.<p>
1147  *
1148  * <strong>Important: </strong><code>pLineBiDi</code> shares data with
1149  * <code>pParaBiDi</code>.
1150  * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
1151  * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
1152  * before the object for its parent paragraph.<p>
1153  *
1154  * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
1155  * and <code>start</code> is added to it so that it points to the beginning of the
1156  * line for this object.
1157  *
1158  * @param pParaBiDi is the parent paragraph object. It must have been set
1159  * by a successful call to ubidi_setPara.
1160  *
1161  * @param start is the line's first index into the text.
1162  *
1163  * @param limit is just behind the line's last index into the text
1164  *        (its last index +1).<br>
1165  *        It must be <code>0<=start<limit<=</code>containing paragraph limit.
1166  *        If the specified line crosses a paragraph boundary, the function
1167  *        will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
1168  *
1169  * @param pLineBiDi is the object that will now represent a line of the text.
1170  *
1171  * @param pErrorCode must be a valid pointer to an error code value.
1172  *
1173  * @see ubidi_setPara
1174  * @see ubidi_getProcessedLength
1175  * @stable ICU 2.0
1176  */
1177 U_STABLE void U_EXPORT2
1178 ubidi_setLine(const UBiDi *pParaBiDi,
1179               int32_t start, int32_t limit,
1180               UBiDi *pLineBiDi,
1181               UErrorCode *pErrorCode);
1182 
1183 /**
1184  * Get the directionality of the text.
1185  *
1186  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1187  *
1188  * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
1189  *         or <code>UBIDI_MIXED</code>
1190  *         that indicates if the entire text
1191  *         represented by this object is unidirectional,
1192  *         and which direction, or if it is mixed-directional.
1193  * Note -  The value <code>UBIDI_NEUTRAL</code> is never returned from this method.
1194  *
1195  * @see UBiDiDirection
1196  * @stable ICU 2.0
1197  */
1198 U_STABLE UBiDiDirection U_EXPORT2
1199 ubidi_getDirection(const UBiDi *pBiDi);
1200 
1201 /**
1202  * Gets the base direction of the text provided according
1203  * to the Unicode Bidirectional Algorithm. The base direction
1204  * is derived from the first character in the string with bidirectional
1205  * character type L, R, or AL. If the first such character has type L,
1206  * <code>UBIDI_LTR</code> is returned. If the first such character has
1207  * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does
1208  * not contain any character of these types, then
1209  * <code>UBIDI_NEUTRAL</code> is returned.
1210  *
1211  * This is a lightweight function for use when only the base direction
1212  * is needed and no further bidi processing of the text is needed.
1213  *
1214  * @param text is a pointer to the text whose base
1215  *             direction is needed.
1216  * Note: the text must be (at least) @c length long.
1217  *
1218  * @param length is the length of the text;
1219  *               if <code>length==-1</code> then the text
1220  *               must be zero-terminated.
1221  *
1222  * @return  <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>,
1223  *          <code>UBIDI_NEUTRAL</code>
1224  *
1225  * @see UBiDiDirection
1226  * @draft ICU 4.6
1227  */
1228 U_DRAFT UBiDiDirection U_EXPORT2
1229 ubidi_getBaseDirection(const UChar *text,  int32_t length );
1230 
1231 /**
1232  * Get the pointer to the text.
1233  *
1234  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1235  *
1236  * @return The pointer to the text that the UBiDi object was created for.
1237  *
1238  * @see ubidi_setPara
1239  * @see ubidi_setLine
1240  * @stable ICU 2.0
1241  */
1242 U_STABLE const UChar * U_EXPORT2
1243 ubidi_getText(const UBiDi *pBiDi);
1244 
1245 /**
1246  * Get the length of the text.
1247  *
1248  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1249  *
1250  * @return The length of the text that the UBiDi object was created for.
1251  * @stable ICU 2.0
1252  */
1253 U_STABLE int32_t U_EXPORT2
1254 ubidi_getLength(const UBiDi *pBiDi);
1255 
1256 /**
1257  * Get the paragraph level of the text.
1258  *
1259  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1260  *
1261  * @return The paragraph level. If there are multiple paragraphs, their
1262  *         level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
1263  *         UBIDI_DEFAULT_RTL.  In that case, the level of the first paragraph
1264  *         is returned.
1265  *
1266  * @see UBiDiLevel
1267  * @see ubidi_getParagraph
1268  * @see ubidi_getParagraphByIndex
1269  * @stable ICU 2.0
1270  */
1271 U_STABLE UBiDiLevel U_EXPORT2
1272 ubidi_getParaLevel(const UBiDi *pBiDi);
1273 
1274 /**
1275  * Get the number of paragraphs.
1276  *
1277  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1278  *
1279  * @return The number of paragraphs.
1280  * @stable ICU 3.4
1281  */
1282 U_STABLE int32_t U_EXPORT2
1283 ubidi_countParagraphs(UBiDi *pBiDi);
1284 
1285 /**
1286  * Get a paragraph, given a position within the text.
1287  * This function returns information about a paragraph.<br>
1288  * Note: if the paragraph index is known, it is more efficient to
1289  * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
1290  *
1291  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1292  *
1293  * @param charIndex is the index of a character within the text, in the
1294  *        range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
1295  *
1296  * @param pParaStart will receive the index of the first character of the
1297  *        paragraph in the text.
1298  *        This pointer can be <code>NULL</code> if this
1299  *        value is not necessary.
1300  *
1301  * @param pParaLimit will receive the limit of the paragraph.
1302  *        The l-value that you point to here may be the
1303  *        same expression (variable) as the one for
1304  *        <code>charIndex</code>.
1305  *        This pointer can be <code>NULL</code> if this
1306  *        value is not necessary.
1307  *
1308  * @param pParaLevel will receive the level of the paragraph.
1309  *        This pointer can be <code>NULL</code> if this
1310  *        value is not necessary.
1311  *
1312  * @param pErrorCode must be a valid pointer to an error code value.
1313  *
1314  * @return The index of the paragraph containing the specified position.
1315  *
1316  * @see ubidi_getProcessedLength
1317  * @stable ICU 3.4
1318  */
1319 U_STABLE int32_t U_EXPORT2
1320 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
1321                    int32_t *pParaLimit, UBiDiLevel *pParaLevel,
1322                    UErrorCode *pErrorCode);
1323 
1324 /**
1325  * Get a paragraph, given the index of this paragraph.
1326  *
1327  * This function returns information about a paragraph.<p>
1328  *
1329  * @param pBiDi is the paragraph <code>UBiDi</code> object.
1330  *
1331  * @param paraIndex is the number of the paragraph, in the
1332  *        range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
1333  *
1334  * @param pParaStart will receive the index of the first character of the
1335  *        paragraph in the text.
1336  *        This pointer can be <code>NULL</code> if this
1337  *        value is not necessary.
1338  *
1339  * @param pParaLimit will receive the limit of the paragraph.
1340  *        This pointer can be <code>NULL</code> if this
1341  *        value is not necessary.
1342  *
1343  * @param pParaLevel will receive the level of the paragraph.
1344  *        This pointer can be <code>NULL</code> if this
1345  *        value is not necessary.
1346  *
1347  * @param pErrorCode must be a valid pointer to an error code value.
1348  *
1349  * @stable ICU 3.4
1350  */
1351 U_STABLE void U_EXPORT2
1352 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
1353                           int32_t *pParaStart, int32_t *pParaLimit,
1354                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
1355 
1356 /**
1357  * Get the level for one character.
1358  *
1359  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1360  *
1361  * @param charIndex the index of a character. It must be in the range
1362  *         [0..ubidi_getProcessedLength(pBiDi)].
1363  *
1364  * @return The level for the character at charIndex (0 if charIndex is not
1365  *         in the valid range).
1366  *
1367  * @see UBiDiLevel
1368  * @see ubidi_getProcessedLength
1369  * @stable ICU 2.0
1370  */
1371 U_STABLE UBiDiLevel U_EXPORT2
1372 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
1373 
1374 /**
1375  * Get an array of levels for each character.<p>
1376  *
1377  * Note that this function may allocate memory under some
1378  * circumstances, unlike <code>ubidi_getLevelAt()</code>.
1379  *
1380  * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
1381  *        text length must be strictly positive.
1382  *
1383  * @param pErrorCode must be a valid pointer to an error code value.
1384  *
1385  * @return The levels array for the text,
1386  *         or <code>NULL</code> if an error occurs.
1387  *
1388  * @see UBiDiLevel
1389  * @see ubidi_getProcessedLength
1390  * @stable ICU 2.0
1391  */
1392 U_STABLE const UBiDiLevel * U_EXPORT2
1393 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
1394 
1395 /**
1396  * Get a logical run.
1397  * This function returns information about a run and is used
1398  * to retrieve runs in logical order.<p>
1399  * This is especially useful for line-breaking on a paragraph.
1400  *
1401  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1402  *
1403  * @param logicalPosition is a logical position within the source text.
1404  *
1405  * @param pLogicalLimit will receive the limit of the corresponding run.
1406  *        The l-value that you point to here may be the
1407  *        same expression (variable) as the one for
1408  *        <code>logicalPosition</code>.
1409  *        This pointer can be <code>NULL</code> if this
1410  *        value is not necessary.
1411  *
1412  * @param pLevel will receive the level of the corresponding run.
1413  *        This pointer can be <code>NULL</code> if this
1414  *        value is not necessary.
1415  *
1416  * @see ubidi_getProcessedLength
1417  * @stable ICU 2.0
1418  */
1419 U_STABLE void U_EXPORT2
1420 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
1421                     int32_t *pLogicalLimit, UBiDiLevel *pLevel);
1422 
1423 /**
1424  * Get the number of runs.
1425  * This function may invoke the actual reordering on the
1426  * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
1427  * may have resolved only the levels of the text. Therefore,
1428  * <code>ubidi_countRuns()</code> may have to allocate memory,
1429  * and may fail doing so.
1430  *
1431  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1432  *
1433  * @param pErrorCode must be a valid pointer to an error code value.
1434  *
1435  * @return The number of runs.
1436  * @stable ICU 2.0
1437  */
1438 U_STABLE int32_t U_EXPORT2
1439 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
1440 
1441 /**
1442  * Get one run's logical start, length, and directionality,
1443  * which can be 0 for LTR or 1 for RTL.
1444  * In an RTL run, the character at the logical start is
1445  * visually on the right of the displayed run.
1446  * The length is the number of characters in the run.<p>
1447  * <code>ubidi_countRuns()</code> should be called
1448  * before the runs are retrieved.
1449  *
1450  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1451  *
1452  * @param runIndex is the number of the run in visual order, in the
1453  *        range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
1454  *
1455  * @param pLogicalStart is the first logical character index in the text.
1456  *        The pointer may be <code>NULL</code> if this index is not needed.
1457  *
1458  * @param pLength is the number of characters (at least one) in the run.
1459  *        The pointer may be <code>NULL</code> if this is not needed.
1460  *
1461  * @return the directionality of the run,
1462  *         <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
1463  *         never <code>UBIDI_MIXED</code>,
1464  *         never <code>UBIDI_NEUTRAL</code>.
1465  *
1466  * @see ubidi_countRuns
1467  *
1468  * Example:
1469  * <pre>
1470  * \code
1471  * int32_t i, count=ubidi_countRuns(pBiDi),
1472  *         logicalStart, visualIndex=0, length;
1473  * for(i=0; i<count; ++i) {
1474  *    if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
1475  *         do { // LTR
1476  *             show_char(text[logicalStart++], visualIndex++);
1477  *         } while(--length>0);
1478  *     } else {
1479  *         logicalStart+=length;  // logicalLimit
1480  *         do { // RTL
1481  *             show_char(text[--logicalStart], visualIndex++);
1482  *         } while(--length>0);
1483  *     }
1484  * }
1485  *\endcode
1486  * </pre>
1487  *
1488  * Note that in right-to-left runs, code like this places
1489  * second surrogates before first ones (which is generally a bad idea)
1490  * and combining characters before base characters.
1491  * <p>
1492  * Use of <code>ubidi_writeReordered()</code>, optionally with the
1493  * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
1494  * to avoid these issues.
1495  * @stable ICU 2.0
1496  */
1497 U_STABLE UBiDiDirection U_EXPORT2
1498 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
1499                    int32_t *pLogicalStart, int32_t *pLength);
1500 
1501 /**
1502  * Get the visual position from a logical text position.
1503  * If such a mapping is used many times on the same
1504  * <code>UBiDi</code> object, then calling
1505  * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
1506  *
1507  * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
1508  * visual position because the corresponding text character is a Bidi control
1509  * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
1510  * <p>
1511  * When the visual output is altered by using options of
1512  * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1513  * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1514  * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
1515  * be correct. It is advised to use, when possible, reordering options
1516  * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1517  * <p>
1518  * Note that in right-to-left runs, this mapping places
1519  * second surrogates before first ones (which is generally a bad idea)
1520  * and combining characters before base characters.
1521  * Use of <code>ubidi_writeReordered()</code>, optionally with the
1522  * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
1523  * of using the mapping, in order to avoid these issues.
1524  *
1525  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1526  *
1527  * @param logicalIndex is the index of a character in the text.
1528  *
1529  * @param pErrorCode must be a valid pointer to an error code value.
1530  *
1531  * @return The visual position of this character.
1532  *
1533  * @see ubidi_getLogicalMap
1534  * @see ubidi_getLogicalIndex
1535  * @see ubidi_getProcessedLength
1536  * @stable ICU 2.0
1537  */
1538 U_STABLE int32_t U_EXPORT2
1539 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
1540 
1541 /**
1542  * Get the logical text position from a visual position.
1543  * If such a mapping is used many times on the same
1544  * <code>UBiDi</code> object, then calling
1545  * <code>ubidi_getVisualMap()</code> is more efficient.<p>
1546  *
1547  * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
1548  * logical position because the corresponding text character is a Bidi mark
1549  * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
1550  * <p>
1551  * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
1552  * <p>
1553  * When the visual output is altered by using options of
1554  * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1555  * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1556  * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
1557  * be correct. It is advised to use, when possible, reordering options
1558  * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1559  *
1560  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1561  *
1562  * @param visualIndex is the visual position of a character.
1563  *
1564  * @param pErrorCode must be a valid pointer to an error code value.
1565  *
1566  * @return The index of this character in the text.
1567  *
1568  * @see ubidi_getVisualMap
1569  * @see ubidi_getVisualIndex
1570  * @see ubidi_getResultLength
1571  * @stable ICU 2.0
1572  */
1573 U_STABLE int32_t U_EXPORT2
1574 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
1575 
1576 /**
1577  * Get a logical-to-visual index map (array) for the characters in the UBiDi
1578  * (paragraph or line) object.
1579  * <p>
1580  * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
1581  * corresponding text characters are Bidi controls removed from the visual
1582  * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
1583  * <p>
1584  * When the visual output is altered by using options of
1585  * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1586  * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1587  * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
1588  * be correct. It is advised to use, when possible, reordering options
1589  * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1590  * <p>
1591  * Note that in right-to-left runs, this mapping places
1592  * second surrogates before first ones (which is generally a bad idea)
1593  * and combining characters before base characters.
1594  * Use of <code>ubidi_writeReordered()</code>, optionally with the
1595  * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
1596  * of using the mapping, in order to avoid these issues.
1597  *
1598  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1599  *
1600  * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
1601  *        indexes which will reflect the reordering of the characters.
1602  *        If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
1603  *        of elements allocated in <code>indexMap</code> must be no less than
1604  *        <code>ubidi_getResultLength()</code>.
1605  *        The array does not need to be initialized.<br><br>
1606  *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1607  *
1608  * @param pErrorCode must be a valid pointer to an error code value.
1609  *
1610  * @see ubidi_getVisualMap
1611  * @see ubidi_getVisualIndex
1612  * @see ubidi_getProcessedLength
1613  * @see ubidi_getResultLength
1614  * @stable ICU 2.0
1615  */
1616 U_STABLE void U_EXPORT2
1617 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1618 
1619 /**
1620  * Get a visual-to-logical index map (array) for the characters in the UBiDi
1621  * (paragraph or line) object.
1622  * <p>
1623  * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
1624  * corresponding text characters are Bidi marks inserted in the visual output
1625  * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
1626  * <p>
1627  * When the visual output is altered by using options of
1628  * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1629  * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1630  * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
1631  * be correct. It is advised to use, when possible, reordering options
1632  * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1633  *
1634  * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1635  *
1636  * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
1637  *        indexes which will reflect the reordering of the characters.
1638  *        If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
1639  *        of elements allocated in <code>indexMap</code> must be no less than
1640  *        <code>ubidi_getProcessedLength()</code>.
1641  *        The array does not need to be initialized.<br><br>
1642  *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1643  *
1644  * @param pErrorCode must be a valid pointer to an error code value.
1645  *
1646  * @see ubidi_getLogicalMap
1647  * @see ubidi_getLogicalIndex
1648  * @see ubidi_getProcessedLength
1649  * @see ubidi_getResultLength
1650  * @stable ICU 2.0
1651  */
1652 U_STABLE void U_EXPORT2
1653 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1654 
1655 /**
1656  * This is a convenience function that does not use a UBiDi object.
1657  * It is intended to be used for when an application has determined the levels
1658  * of objects (character sequences) and just needs to have them reordered (L2).
1659  * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
1660  * <code>UBiDi</code> object.
1661  *
1662  * @param levels is an array with <code>length</code> levels that have been determined by
1663  *        the application.
1664  *
1665  * @param length is the number of levels in the array, or, semantically,
1666  *        the number of objects to be reordered.
1667  *        It must be <code>length>0</code>.
1668  *
1669  * @param indexMap is a pointer to an array of <code>length</code>
1670  *        indexes which will reflect the reordering of the characters.
1671  *        The array does not need to be initialized.<p>
1672  *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1673  * @stable ICU 2.0
1674  */
1675 U_STABLE void U_EXPORT2
1676 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1677 
1678 /**
1679  * This is a convenience function that does not use a UBiDi object.
1680  * It is intended to be used for when an application has determined the levels
1681  * of objects (character sequences) and just needs to have them reordered (L2).
1682  * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
1683  * <code>UBiDi</code> object.
1684  *
1685  * @param levels is an array with <code>length</code> levels that have been determined by
1686  *        the application.
1687  *
1688  * @param length is the number of levels in the array, or, semantically,
1689  *        the number of objects to be reordered.
1690  *        It must be <code>length>0</code>.
1691  *
1692  * @param indexMap is a pointer to an array of <code>length</code>
1693  *        indexes which will reflect the reordering of the characters.
1694  *        The array does not need to be initialized.<p>
1695  *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1696  * @stable ICU 2.0
1697  */
1698 U_STABLE void U_EXPORT2
1699 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1700 
1701 /**
1702  * Invert an index map.
1703  * The index mapping of the first map is inverted and written to
1704  * the second one.
1705  *
1706  * @param srcMap is an array with <code>length</code> elements
1707  *        which defines the original mapping from a source array containing
1708  *        <code>length</code> elements to a destination array.
1709  *        Some elements of the source array may have no mapping in the
1710  *        destination array. In that case, their value will be
1711  *        the special value <code>UBIDI_MAP_NOWHERE</code>.
1712  *        All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
1713  *        Some elements may have a value >= <code>length</code>, if the
1714  *        destination array has more elements than the source array.
1715  *        There must be no duplicate indexes (two or more elements with the
1716  *        same value except <code>UBIDI_MAP_NOWHERE</code>).
1717  *
1718  * @param destMap is an array with a number of elements equal to 1 + the highest
1719  *        value in <code>srcMap</code>.
1720  *        <code>destMap</code> will be filled with the inverse mapping.
1721  *        If element with index i in <code>srcMap</code> has a value k different
1722  *        from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
1723  *        the source array maps to element k in the destination array.
1724  *        The inverse map will have value i in its k-th element.
1725  *        For all elements of the destination array which do not map to
1726  *        an element in the source array, the corresponding element in the
1727  *        inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
1728  *
1729  * @param length is the length of each array.
1730  * @see UBIDI_MAP_NOWHERE
1731  * @stable ICU 2.0
1732  */
1733 U_STABLE void U_EXPORT2
1734 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
1735 
1736 /** option flags for ubidi_writeReordered() */
1737 
1738 /**
1739  * option bit for ubidi_writeReordered():
1740  * keep combining characters after their base characters in RTL runs
1741  *
1742  * @see ubidi_writeReordered
1743  * @stable ICU 2.0
1744  */
1745 #define UBIDI_KEEP_BASE_COMBINING       1
1746 
1747 /**
1748  * option bit for ubidi_writeReordered():
1749  * replace characters with the "mirrored" property in RTL runs
1750  * by their mirror-image mappings
1751  *
1752  * @see ubidi_writeReordered
1753  * @stable ICU 2.0
1754  */
1755 #define UBIDI_DO_MIRRORING              2
1756 
1757 /**
1758  * option bit for ubidi_writeReordered():
1759  * surround the run with LRMs if necessary;
1760  * this is part of the approximate "inverse Bidi" algorithm
1761  *
1762  * <p>This option does not imply corresponding adjustment of the index
1763  * mappings.</p>
1764  *
1765  * @see ubidi_setInverse
1766  * @see ubidi_writeReordered
1767  * @stable ICU 2.0
1768  */
1769 #define UBIDI_INSERT_LRM_FOR_NUMERIC    4
1770 
1771 /**
1772  * option bit for ubidi_writeReordered():
1773  * remove Bidi control characters
1774  * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
1775  *
1776  * <p>This option does not imply corresponding adjustment of the index
1777  * mappings.</p>
1778  *
1779  * @see ubidi_writeReordered
1780  * @stable ICU 2.0
1781  */
1782 #define UBIDI_REMOVE_BIDI_CONTROLS      8
1783 
1784 /**
1785  * option bit for ubidi_writeReordered():
1786  * write the output in reverse order
1787  *
1788  * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
1789  * first without this option, and then calling
1790  * <code>ubidi_writeReverse()</code> without mirroring.
1791  * Doing this in the same step is faster and avoids a temporary buffer.
1792  * An example for using this option is output to a character terminal that
1793  * is designed for RTL scripts and stores text in reverse order.</p>
1794  *
1795  * @see ubidi_writeReordered
1796  * @stable ICU 2.0
1797  */
1798 #define UBIDI_OUTPUT_REVERSE            16
1799 
1800 /**
1801  * Get the length of the source text processed by the last call to
1802  * <code>ubidi_setPara()</code>. This length may be different from the length
1803  * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
1804  * has been set.
1805  * <br>
1806  * Note that whenever the length of the text affects the execution or the
1807  * result of a function, it is the processed length which must be considered,
1808  * except for <code>ubidi_setPara</code> (which receives unprocessed source
1809  * text) and <code>ubidi_getLength</code> (which returns the original length
1810  * of the source text).<br>
1811  * In particular, the processed length is the one to consider in the following
1812  * cases:
1813  * <ul>
1814  * <li>maximum value of the <code>limit</code> argument of
1815  * <code>ubidi_setLine</code></li>
1816  * <li>maximum value of the <code>charIndex</code> argument of
1817  * <code>ubidi_getParagraph</code></li>
1818  * <li>maximum value of the <code>charIndex</code> argument of
1819  * <code>ubidi_getLevelAt</code></li>
1820  * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
1821  * <li>maximum value of the <code>logicalStart</code> argument of
1822  * <code>ubidi_getLogicalRun</code></li>
1823  * <li>maximum value of the <code>logicalIndex</code> argument of
1824  * <code>ubidi_getVisualIndex</code></li>
1825  * <li>number of elements filled in the <code>*indexMap</code> argument of
1826  * <code>ubidi_getLogicalMap</code></li>
1827  * <li>length of text processed by <code>ubidi_writeReordered</code></li>
1828  * </ul>
1829  *
1830  * @param pBiDi is the paragraph <code>UBiDi</code> object.
1831  *
1832  * @return The length of the part of the source text processed by
1833  *         the last call to <code>ubidi_setPara</code>.
1834  * @see ubidi_setPara
1835  * @see UBIDI_OPTION_STREAMING
1836  * @stable ICU 3.6
1837  */
1838 U_STABLE int32_t U_EXPORT2
1839 ubidi_getProcessedLength(const UBiDi *pBiDi);
1840 
1841 /**
1842  * Get the length of the reordered text resulting from the last call to
1843  * <code>ubidi_setPara()</code>. This length may be different from the length
1844  * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
1845  * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
1846  * <br>
1847  * This resulting length is the one to consider in the following cases:
1848  * <ul>
1849  * <li>maximum value of the <code>visualIndex</code> argument of
1850  * <code>ubidi_getLogicalIndex</code></li>
1851  * <li>number of elements of the <code>*indexMap</code> argument of
1852  * <code>ubidi_getVisualMap</code></li>
1853  * </ul>
1854  * Note that this length stays identical to the source text length if
1855  * Bidi marks are inserted or removed using option bits of
1856  * <code>ubidi_writeReordered</code>, or if option
1857  * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
1858  *
1859  * @param pBiDi is the paragraph <code>UBiDi</code> object.
1860  *
1861  * @return The length of the reordered text resulting from
1862  *         the last call to <code>ubidi_setPara</code>.
1863  * @see ubidi_setPara
1864  * @see UBIDI_OPTION_INSERT_MARKS
1865  * @see UBIDI_OPTION_REMOVE_CONTROLS
1866  * @stable ICU 3.6
1867  */
1868 U_STABLE int32_t U_EXPORT2
1869 ubidi_getResultLength(const UBiDi *pBiDi);
1870 
1871 U_CDECL_BEGIN
1872 /**
1873  * value returned by <code>UBiDiClassCallback</code> callbacks when
1874  * there is no need to override the standard Bidi class for a given code point.
1875  * @see UBiDiClassCallback
1876  * @stable ICU 3.6
1877  */
1878 #define U_BIDI_CLASS_DEFAULT  U_CHAR_DIRECTION_COUNT
1879 
1880 /**
1881  * Callback type declaration for overriding default Bidi class values with
1882  * custom ones.
1883  * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
1884  * object by calling the <code>ubidi_setClassCallback()</code> function;
1885  * then the callback will be invoked by the UBA implementation any time the
1886  * class of a character is to be determined.</p>
1887  *
1888  * @param context is a pointer to the callback private data.
1889  *
1890  * @param c       is the code point to get a Bidi class for.
1891  *
1892  * @return The directional property / Bidi class for the given code point
1893  *         <code>c</code> if the default class has been overridden, or
1894  *         <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value
1895  *         for <code>c</code> is to be used.
1896  * @see ubidi_setClassCallback
1897  * @see ubidi_getClassCallback
1898  * @stable ICU 3.6
1899  */
1900 typedef UCharDirection U_CALLCONV
1901 UBiDiClassCallback(const void *context, UChar32 c);
1902 
1903 U_CDECL_END
1904 
1905 /**
1906  * Retrieve the Bidi class for a given code point.
1907  * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
1908  * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used;
1909  * otherwise the default class determination mechanism is invoked.</p>
1910  *
1911  * @param pBiDi is the paragraph <code>UBiDi</code> object.
1912  *
1913  * @param c     is the code point whose Bidi class must be retrieved.
1914  *
1915  * @return The Bidi class for character <code>c</code> based
1916  *         on the given <code>pBiDi</code> instance.
1917  * @see UBiDiClassCallback
1918  * @stable ICU 3.6
1919  */
1920 U_STABLE UCharDirection U_EXPORT2
1921 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
1922 
1923 /**
1924  * Set the callback function and callback data used by the UBA
1925  * implementation for Bidi class determination.
1926  * <p>This may be useful for assigning Bidi classes to PUA characters, or
1927  * for special application needs. For instance, an application may want to
1928  * handle all spaces like L or R characters (according to the base direction)
1929  * when creating the visual ordering of logical lines which are part of a report
1930  * organized in columns: there should not be interaction between adjacent
1931  * cells.<p>
1932  *
1933  * @param pBiDi is the paragraph <code>UBiDi</code> object.
1934  *
1935  * @param newFn is the new callback function pointer.
1936  *
1937  * @param newContext is the new callback context pointer. This can be NULL.
1938  *
1939  * @param oldFn fillin: Returns the old callback function pointer. This can be
1940  *                      NULL.
1941  *
1942  * @param oldContext fillin: Returns the old callback's context. This can be
1943  *                           NULL.
1944  *
1945  * @param pErrorCode must be a valid pointer to an error code value.
1946  *
1947  * @see ubidi_getClassCallback
1948  * @stable ICU 3.6
1949  */
1950 U_STABLE void U_EXPORT2
1951 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
1952                        const void *newContext, UBiDiClassCallback **oldFn,
1953                        const void **oldContext, UErrorCode *pErrorCode);
1954 
1955 /**
1956  * Get the current callback function used for Bidi class determination.
1957  *
1958  * @param pBiDi is the paragraph <code>UBiDi</code> object.
1959  *
1960  * @param fn fillin: Returns the callback function pointer.
1961  *
1962  * @param context fillin: Returns the callback's private context.
1963  *
1964  * @see ubidi_setClassCallback
1965  * @stable ICU 3.6
1966  */
1967 U_STABLE void U_EXPORT2
1968 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
1969 
1970 /**
1971  * Take a <code>UBiDi</code> object containing the reordering
1972  * information for a piece of text (one or more paragraphs) set by
1973  * <code>ubidi_setPara()</code> or for a line of text set by
1974  * <code>ubidi_setLine()</code> and write a reordered string to the
1975  * destination buffer.
1976  *
1977  * This function preserves the integrity of characters with multiple
1978  * code units and (optionally) combining characters.
1979  * Characters in RTL runs can be replaced by mirror-image characters
1980  * in the destination buffer. Note that "real" mirroring has
1981  * to be done in a rendering engine by glyph selection
1982  * and that for many "mirrored" characters there are no
1983  * Unicode characters as mirror-image equivalents.
1984  * There are also options to insert or remove Bidi control
1985  * characters; see the description of the <code>destSize</code>
1986  * and <code>options</code> parameters and of the option bit flags.
1987  *
1988  * @param pBiDi A pointer to a <code>UBiDi</code> object that
1989  *              is set by <code>ubidi_setPara()</code> or
1990  *              <code>ubidi_setLine()</code> and contains the reordering
1991  *              information for the text that it was defined for,
1992  *              as well as a pointer to that text.<br><br>
1993  *              The text was aliased (only the pointer was stored
1994  *              without copying the contents) and must not have been modified
1995  *              since the <code>ubidi_setPara()</code> call.
1996  *
1997  * @param dest A pointer to where the reordered text is to be copied.
1998  *             The source text and <code>dest[destSize]</code>
1999  *             must not overlap.
2000  *
2001  * @param destSize The size of the <code>dest</code> buffer,
2002  *                 in number of UChars.
2003  *                 If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
2004  *                 option is set, then the destination length could be
2005  *                 as large as
2006  *                 <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
2007  *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
2008  *                 is set, then the destination length may be less than
2009  *                 <code>ubidi_getLength(pBiDi)</code>.
2010  *                 If none of these options is set, then the destination length
2011  *                 will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
2012  *
2013  * @param options A bit set of options for the reordering that control
2014  *                how the reordered text is written.
2015  *                The options include mirroring the characters on a code
2016  *                point basis and inserting LRM characters, which is used
2017  *                especially for transforming visually stored text
2018  *                to logically stored text (although this is still an
2019  *                imperfect implementation of an "inverse Bidi" algorithm
2020  *                because it uses the "forward Bidi" algorithm at its core).
2021  *                The available options are:
2022  *                <code>#UBIDI_DO_MIRRORING</code>,
2023  *                <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
2024  *                <code>#UBIDI_KEEP_BASE_COMBINING</code>,
2025  *                <code>#UBIDI_OUTPUT_REVERSE</code>,
2026  *                <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
2027  *
2028  * @param pErrorCode must be a valid pointer to an error code value.
2029  *
2030  * @return The length of the output string.
2031  *
2032  * @see ubidi_getProcessedLength
2033  * @stable ICU 2.0
2034  */
2035 U_STABLE int32_t U_EXPORT2
2036 ubidi_writeReordered(UBiDi *pBiDi,
2037                      UChar *dest, int32_t destSize,
2038                      uint16_t options,
2039                      UErrorCode *pErrorCode);
2040 
2041 /**
2042  * Reverse a Right-To-Left run of Unicode text.
2043  *
2044  * This function preserves the integrity of characters with multiple
2045  * code units and (optionally) combining characters.
2046  * Characters can be replaced by mirror-image characters
2047  * in the destination buffer. Note that "real" mirroring has
2048  * to be done in a rendering engine by glyph selection
2049  * and that for many "mirrored" characters there are no
2050  * Unicode characters as mirror-image equivalents.
2051  * There are also options to insert or remove Bidi control
2052  * characters.
2053  *
2054  * This function is the implementation for reversing RTL runs as part
2055  * of <code>ubidi_writeReordered()</code>. For detailed descriptions
2056  * of the parameters, see there.
2057  * Since no Bidi controls are inserted here, the output string length
2058  * will never exceed <code>srcLength</code>.
2059  *
2060  * @see ubidi_writeReordered
2061  *
2062  * @param src A pointer to the RTL run text.
2063  *
2064  * @param srcLength The length of the RTL run.
2065  *
2066  * @param dest A pointer to where the reordered text is to be copied.
2067  *             <code>src[srcLength]</code> and <code>dest[destSize]</code>
2068  *             must not overlap.
2069  *
2070  * @param destSize The size of the <code>dest</code> buffer,
2071  *                 in number of UChars.
2072  *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
2073  *                 is set, then the destination length may be less than
2074  *                 <code>srcLength</code>.
2075  *                 If this option is not set, then the destination length
2076  *                 will be exactly <code>srcLength</code>.
2077  *
2078  * @param options A bit set of options for the reordering that control
2079  *                how the reordered text is written.
2080  *                See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
2081  *
2082  * @param pErrorCode must be a valid pointer to an error code value.
2083  *
2084  * @return The length of the output string.
2085  * @stable ICU 2.0
2086  */
2087 U_STABLE int32_t U_EXPORT2
2088 ubidi_writeReverse(const UChar *src, int32_t srcLength,
2089                    UChar *dest, int32_t destSize,
2090                    uint16_t options,
2091                    UErrorCode *pErrorCode);
2092 
2093 /*#define BIDI_SAMPLE_CODE*/
2094 /*@}*/
2095 
2096 #endif
2097