• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  ubidi.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999jul27
14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
15 */
16 
17 #include "cmemory.h"
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
22 #include "ubidi_props.h"
23 #include "ubidiimp.h"
24 #include "uassert.h"
25 
26 /*
27  * General implementation notes:
28  *
29  * Throughout the implementation, there are comments like (W2) that refer to
30  * rules of the BiDi algorithm in its version 5, in this example to the second
31  * rule of the resolution of weak types.
32  *
33  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34  * character according to UTF-16, the second UChar gets the directional property of
35  * the entire character assigned, while the first one gets a BN, a boundary
36  * neutral, type, which is ignored by most of the algorithm according to
37  * rule (X9) and the implementation suggestions of the BiDi algorithm.
38  *
39  * Later, adjustWSLevels() will set the level for each BN to that of the
40  * following character (UChar), which results in surrogate pairs getting the
41  * same level on each of their surrogates.
42  *
43  * In a UTF-8 implementation, the same thing could be done: the last byte of
44  * a multi-byte sequence would get the "real" property, while all previous
45  * bytes of that sequence would get BN.
46  *
47  * It is not possible to assign all those parts of a character the same real
48  * property because this would fail in the resolution of weak types with rules
49  * that look at immediately surrounding types.
50  *
51  * As a related topic, this implementation does not remove Boundary Neutral
52  * types from the input, but ignores them wherever this is relevant.
53  * For example, the loop for the resolution of the weak types reads
54  * types until it finds a non-BN.
55  * Also, explicit embedding codes are neither changed into BN nor removed.
56  * They are only treated the same way real BNs are.
57  * As stated before, adjustWSLevels() takes care of them at the end.
58  * For the purpose of conformance, the levels of all these codes
59  * do not matter.
60  *
61  * Note that this implementation never modifies the dirProps
62  * after the initial setup.
63  *
64  *
65  * In this implementation, the resolution of weak types (Wn),
66  * neutrals (Nn), and the assignment of the resolved level (In)
67  * are all done in one single loop, in resolveImplicitLevels().
68  * Changes of dirProp values are done on the fly, without writing
69  * them back to the dirProps array.
70  *
71  *
72  * This implementation contains code that allows to bypass steps of the
73  * algorithm that are not needed on the specific paragraph
74  * in order to speed up the most common cases considerably,
75  * like text that is entirely LTR, or RTL text without numbers.
76  *
77  * Most of this is done by setting a bit for each directional property
78  * in a flags variable and later checking for whether there are
79  * any LTR characters or any RTL characters, or both, whether
80  * there are any explicit embedding codes, etc.
81  *
82  * If the (Xn) steps are performed, then the flags are re-evaluated,
83  * because they will then not contain the embedding codes any more
84  * and will be adjusted for override codes, so that subsequently
85  * more bypassing may be possible than what the initial flags suggested.
86  *
87  * If the text is not mixed-directional, then the
88  * algorithm steps for the weak type resolution are not performed,
89  * and all levels are set to the paragraph level.
90  *
91  * If there are no explicit embedding codes, then the (Xn) steps
92  * are not performed.
93  *
94  * If embedding levels are supplied as a parameter, then all
95  * explicit embedding codes are ignored, and the (Xn) steps
96  * are not performed.
97  *
98  * White Space types could get the level of the run they belong to,
99  * and are checked with a test of (flags&MASK_EMBEDDING) to
100  * consider if the paragraph direction should be considered in
101  * the flags variable.
102  *
103  * If there are no White Space types in the paragraph, then
104  * (L1) is not necessary in adjustWSLevels().
105  */
106 
107 /* to avoid some conditional statements, use tiny constant arrays */
108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
111 
112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
115 
116 /* UBiDi object management -------------------------------------------------- */
117 
118 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)119 ubidi_open(void)
120 {
121     UErrorCode errorCode=U_ZERO_ERROR;
122     return ubidi_openSized(0, 0, &errorCode);
123 }
124 
125 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
127     UBiDi *pBiDi;
128 
129     /* check the argument values */
130     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
131         return NULL;
132     } else if(maxLength<0 || maxRunCount<0) {
133         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
134         return NULL;    /* invalid arguments */
135     }
136 
137     /* allocate memory for the object */
138     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
139     if(pBiDi==NULL) {
140         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
141         return NULL;
142     }
143 
144     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145     uprv_memset(pBiDi, 0, sizeof(UBiDi));
146 
147     /* get BiDi properties */
148     pBiDi->bdp=ubidi_getSingleton(pErrorCode);
149     if(U_FAILURE(*pErrorCode)) {
150         uprv_free(pBiDi);
151         return NULL;
152     }
153 
154     /* allocate memory for arrays as requested */
155     if(maxLength>0) {
156         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
157             !getInitialLevelsMemory(pBiDi, maxLength)
158         ) {
159             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
160         }
161     } else {
162         pBiDi->mayAllocateText=TRUE;
163     }
164 
165     if(maxRunCount>0) {
166         if(maxRunCount==1) {
167             /* use simpleRuns[] */
168             pBiDi->runsSize=sizeof(Run);
169         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
170             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
171         }
172     } else {
173         pBiDi->mayAllocateRuns=TRUE;
174     }
175 
176     if(U_SUCCESS(*pErrorCode)) {
177         return pBiDi;
178     } else {
179         ubidi_close(pBiDi);
180         return NULL;
181     }
182 }
183 
184 /*
185  * We are allowed to allocate memory if memory==NULL or
186  * mayAllocate==TRUE for each array that we need.
187  * We also try to grow and shrink memory as needed if we
188  * allocate it.
189  *
190  * Assume sizeNeeded>0.
191  * If *pMemory!=NULL, then assume *pSize>0.
192  *
193  * ### this realloc() may unnecessarily copy the old data,
194  * which we know we don't need any more;
195  * is this the best way to do this??
196  */
197 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)198 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
199     void **pMemory = (void **)bidiMem;
200     /* check for existing memory */
201     if(*pMemory==NULL) {
202         /* we need to allocate memory */
203         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
204             *pSize=sizeNeeded;
205             return TRUE;
206         } else {
207             return FALSE;
208         }
209     } else {
210         /* there is some memory, is it enough or too much? */
211         if(sizeNeeded>*pSize && !mayAllocate) {
212             /* not enough memory, and we must not allocate */
213             return FALSE;
214         } else if(sizeNeeded!=*pSize && mayAllocate) {
215             /* FOOD FOR THOUGHT: in hope to improve performance, we should
216              * try never shrinking memory, only growing it when required.
217              */
218             /* we may try to grow or shrink */
219             void *memory;
220 
221             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
222                 *pMemory=memory;
223                 *pSize=sizeNeeded;
224                 return TRUE;
225             } else {
226                 /* we failed to grow */
227                 return FALSE;
228             }
229         } else {
230             /* we have at least enough memory and must not allocate */
231             return TRUE;
232         }
233     }
234 }
235 
236 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)237 ubidi_close(UBiDi *pBiDi) {
238     if(pBiDi!=NULL) {
239         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
240         if(pBiDi->dirPropsMemory!=NULL) {
241             uprv_free(pBiDi->dirPropsMemory);
242         }
243         if(pBiDi->levelsMemory!=NULL) {
244             uprv_free(pBiDi->levelsMemory);
245         }
246         if(pBiDi->runsMemory!=NULL) {
247             uprv_free(pBiDi->runsMemory);
248         }
249         if(pBiDi->parasMemory!=NULL) {
250             uprv_free(pBiDi->parasMemory);
251         }
252         if(pBiDi->insertPoints.points!=NULL) {
253             uprv_free(pBiDi->insertPoints.points);
254         }
255 
256         uprv_free(pBiDi);
257     }
258 }
259 
260 /* set to approximate "inverse BiDi" ---------------------------------------- */
261 
262 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)263 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
264     if(pBiDi!=NULL) {
265         pBiDi->isInverse=isInverse;
266         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
267                                           : UBIDI_REORDER_DEFAULT;
268     }
269 }
270 
271 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)272 ubidi_isInverse(UBiDi *pBiDi) {
273     if(pBiDi!=NULL) {
274         return pBiDi->isInverse;
275     } else {
276         return FALSE;
277     }
278 }
279 
280 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
281  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
282  * concept of RUNS_ONLY which is a double operation.
283  * It could be advantageous to divide this into 3 concepts:
284  * a) Operation: direct / inverse / RUNS_ONLY
285  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
286  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
287  * This would allow combinations not possible today like RUNS_ONLY with
288  * NUMBERS_SPECIAL.
289  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
290  * REMOVE_CONTROLS for the inverse step.
291  * Not all combinations would be supported, and probably not all do make sense.
292  * This would need to document which ones are supported and what are the
293  * fallbacks for unsupported combinations.
294  */
295 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)296 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
297     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
298                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
299         pBiDi->reorderingMode = reorderingMode;
300         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
301     }
302 }
303 
304 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)305 ubidi_getReorderingMode(UBiDi *pBiDi) {
306     if (pBiDi!=NULL) {
307         return pBiDi->reorderingMode;
308     } else {
309         return UBIDI_REORDER_DEFAULT;
310     }
311 }
312 
313 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)314 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
315     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
316         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
317     }
318     if (pBiDi!=NULL) {
319         pBiDi->reorderingOptions=reorderingOptions;
320     }
321 }
322 
323 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)324 ubidi_getReorderingOptions(UBiDi *pBiDi) {
325     if (pBiDi!=NULL) {
326         return pBiDi->reorderingOptions;
327     } else {
328         return 0;
329     }
330 }
331 
332 /* perform (P2)..(P3) ------------------------------------------------------- */
333 
334 /*
335  * Get the directional properties for the text,
336  * calculate the flags bit-set, and
337  * determine the paragraph level if necessary.
338  */
339 static void
getDirProps(UBiDi * pBiDi)340 getDirProps(UBiDi *pBiDi) {
341     const UChar *text=pBiDi->text;
342     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
343 
344     int32_t i=0, i0, i1, length=pBiDi->originalLength;
345     Flags flags=0;      /* collect all directionalities in the text */
346     UChar32 uchar;
347     DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
348     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
349     /* for inverse BiDi, the default para level is set to RTL if there is a
350        strong R or AL character at either end of the text                           */
351     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
352             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
353              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
354     int32_t lastArabicPos=-1;
355     int32_t controlCount=0;
356     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
357                                        UBIDI_OPTION_REMOVE_CONTROLS);
358 
359     typedef enum {
360          NOT_CONTEXTUAL,                /* 0: not contextual paraLevel */
361          LOOKING_FOR_STRONG,            /* 1: looking for first strong char */
362          FOUND_STRONG_CHAR              /* 2: found first strong char       */
363     } State;
364     State state;
365     int32_t paraStart=0;                /* index of first char in paragraph */
366     DirProp paraDir;                    /* == CONTEXT_RTL within paragraphs
367                                            starting with strong R char      */
368     DirProp lastStrongDir=0;            /* for default level & inverse BiDi */
369     int32_t lastStrongLTR=0;            /* for STREAMING option             */
370 
371     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
372         pBiDi->length=0;
373         lastStrongLTR=0;
374     }
375     if(isDefaultLevel) {
376         paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
377         paraDir=paraDirDefault;
378         lastStrongDir=paraDirDefault;
379         state=LOOKING_FOR_STRONG;
380     } else {
381         state=NOT_CONTEXTUAL;
382         paraDir=0;
383     }
384     /* count paragraphs and determine the paragraph level (P2..P3) */
385     /*
386      * see comment in ubidi.h:
387      * the DEFAULT_XXX values are designed so that
388      * their bit 0 alone yields the intended default
389      */
390     for( /* i=0 above */ ; i<length; ) {
391         /* i is incremented by UTF_NEXT_CHAR */
392         i0=i;           /* index of first code unit */
393         UTF_NEXT_CHAR(text, i, length, uchar);
394         i1=i-1;         /* index of last code unit, gets the directional property */
395         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
396         dirProps[i1]=dirProp|paraDir;
397         if(i1>i0) {     /* set previous code units' properties to BN */
398             flags|=DIRPROP_FLAG(BN);
399             do {
400                 dirProps[--i1]=(DirProp)(BN|paraDir);
401             } while(i1>i0);
402         }
403         if(state==LOOKING_FOR_STRONG) {
404             if(dirProp==L) {
405                 state=FOUND_STRONG_CHAR;
406                 if(paraDir) {
407                     paraDir=0;
408                     for(i1=paraStart; i1<i; i1++) {
409                         dirProps[i1]&=~CONTEXT_RTL;
410                     }
411                 }
412                 continue;
413             }
414             if(dirProp==R || dirProp==AL) {
415                 state=FOUND_STRONG_CHAR;
416                 if(paraDir==0) {
417                     paraDir=CONTEXT_RTL;
418                     for(i1=paraStart; i1<i; i1++) {
419                         dirProps[i1]|=CONTEXT_RTL;
420                     }
421                 }
422                 continue;
423             }
424         }
425         if(dirProp==L) {
426             lastStrongDir=0;
427             lastStrongLTR=i;            /* i is index to next character */
428         }
429         else if(dirProp==R) {
430             lastStrongDir=CONTEXT_RTL;
431         }
432         else if(dirProp==AL) {
433             lastStrongDir=CONTEXT_RTL;
434             lastArabicPos=i-1;
435         }
436         else if(dirProp==B) {
437             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
438                 pBiDi->length=i;        /* i is index to next character */
439             }
440             if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
441                 for( ; paraStart<i; paraStart++) {
442                     dirProps[paraStart]|=CONTEXT_RTL;
443                 }
444             }
445             if(i<length) {              /* B not last char in text */
446                 if(!((uchar==CR) && (text[i]==LF))) {
447                     pBiDi->paraCount++;
448                 }
449                 if(isDefaultLevel) {
450                     state=LOOKING_FOR_STRONG;
451                     paraStart=i;        /* i is index to next character */
452                     paraDir=paraDirDefault;
453                     lastStrongDir=paraDirDefault;
454                 }
455             }
456         }
457         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
458             controlCount++;
459         }
460     }
461     if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
462         for(i1=paraStart; i1<length; i1++) {
463             dirProps[i1]|=CONTEXT_RTL;
464         }
465     }
466     if(isDefaultLevel) {
467         pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
468     }
469     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
470         if((lastStrongLTR>pBiDi->length) &&
471            (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
472             pBiDi->length = lastStrongLTR;
473         }
474         if(pBiDi->length<pBiDi->originalLength) {
475             pBiDi->paraCount--;
476         }
477     }
478     /* The following line does nothing new for contextual paraLevel, but is
479        needed for absolute paraLevel.                               */
480     flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
481 
482     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
483         flags|=DIRPROP_FLAG(L);
484     }
485 
486     pBiDi->controlCount = controlCount;
487     pBiDi->flags=flags;
488     pBiDi->lastArabicPos=lastArabicPos;
489 }
490 
491 /* perform (X1)..(X9) ------------------------------------------------------- */
492 
493 /* determine if the text is mixed-directional or single-directional */
494 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)495 directionFromFlags(UBiDi *pBiDi) {
496     Flags flags=pBiDi->flags;
497     /* if the text contains AN and neutrals, then some neutrals may become RTL */
498     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
499         return UBIDI_LTR;
500     } else if(!(flags&MASK_LTR)) {
501         return UBIDI_RTL;
502     } else {
503         return UBIDI_MIXED;
504     }
505 }
506 
507 /*
508  * Resolve the explicit levels as specified by explicit embedding codes.
509  * Recalculate the flags to have them reflect the real properties
510  * after taking the explicit embeddings into account.
511  *
512  * The BiDi algorithm is designed to result in the same behavior whether embedding
513  * levels are externally specified (from "styled text", supposedly the preferred
514  * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
515  * That is why (X9) instructs to remove all explicit codes (and BN).
516  * However, in a real implementation, this removal of these codes and their index
517  * positions in the plain text is undesirable since it would result in
518  * reallocated, reindexed text.
519  * Instead, this implementation leaves the codes in there and just ignores them
520  * in the subsequent processing.
521  * In order to get the same reordering behavior, positions with a BN or an
522  * explicit embedding code just get the same level assigned as the last "real"
523  * character.
524  *
525  * Some implementations, not this one, then overwrite some of these
526  * directionality properties at "real" same-level-run boundaries by
527  * L or R codes so that the resolution of weak types can be performed on the
528  * entire paragraph at once instead of having to parse it once more and
529  * perform that resolution on same-level-runs.
530  * This limits the scope of the implicit rules in effectively
531  * the same way as the run limits.
532  *
533  * Instead, this implementation does not modify these codes.
534  * On one hand, the paragraph has to be scanned for same-level-runs, but
535  * on the other hand, this saves another loop to reset these codes,
536  * or saves making and modifying a copy of dirProps[].
537  *
538  *
539  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
540  *
541  *
542  * Handling the stack of explicit levels (Xn):
543  *
544  * With the BiDi stack of explicit levels,
545  * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
546  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
547  *
548  * In order to have a correct push-pop semantics even in the case of overflows,
549  * there are two overflow counters:
550  * - countOver60 is incremented with each LRx at level 60
551  * - from level 60, one RLx increases the level to 61
552  * - countOver61 is incremented with each LRx and RLx at level 61
553  *
554  * Popping levels with PDF must work in the opposite order so that level 61
555  * is correct at the correct point. Underflows (too many PDFs) must be checked.
556  *
557  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
558  */
559 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi)560 resolveExplicitLevels(UBiDi *pBiDi) {
561     const DirProp *dirProps=pBiDi->dirProps;
562     UBiDiLevel *levels=pBiDi->levels;
563     const UChar *text=pBiDi->text;
564 
565     int32_t i=0, length=pBiDi->length;
566     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
567     DirProp dirProp;
568     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
569 
570     UBiDiDirection direction;
571     int32_t paraIndex=0;
572 
573     /* determine if the text is mixed-directional or single-directional */
574     direction=directionFromFlags(pBiDi);
575 
576     /* we may not need to resolve any explicit levels, but for multiple
577        paragraphs we want to loop on all chars to set the para boundaries */
578     if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
579         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
580     } else if((pBiDi->paraCount==1) &&
581               (!(flags&MASK_EXPLICIT) ||
582                (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
583         /* mixed, but all characters are at the same embedding level */
584         /* or we are in "inverse BiDi" */
585         /* and we don't have contextual multiple paragraphs with some B char */
586         /* set all levels to the paragraph level */
587         for(i=0; i<length; ++i) {
588             levels[i]=level;
589         }
590     } else {
591         /* continue to perform (Xn) */
592 
593         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
594         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
595         UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
596 
597         UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
598         uint32_t countOver60=0, countOver61=0;  /* count overflows of explicit levels */
599 
600         /* recalculate the flags */
601         flags=0;
602 
603         for(i=0; i<length; ++i) {
604             dirProp=NO_CONTEXT_RTL(dirProps[i]);
605             switch(dirProp) {
606             case LRE:
607             case LRO:
608                 /* (X3, X5) */
609                 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
610                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
611                     stack[stackTop]=embeddingLevel;
612                     ++stackTop;
613                     embeddingLevel=newLevel;
614                     if(dirProp==LRO) {
615                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
616                     }
617                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
618                        since this has already been done for newLevel which is
619                        the source for embeddingLevel.
620                      */
621                 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
622                     ++countOver61;
623                 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
624                     ++countOver60;
625                 }
626                 flags|=DIRPROP_FLAG(BN);
627                 break;
628             case RLE:
629             case RLO:
630                 /* (X2, X4) */
631                 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
632                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
633                     stack[stackTop]=embeddingLevel;
634                     ++stackTop;
635                     embeddingLevel=newLevel;
636                     if(dirProp==RLO) {
637                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
638                     }
639                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
640                        since this has already been done for newLevel which is
641                        the source for embeddingLevel.
642                      */
643                 } else {
644                     ++countOver61;
645                 }
646                 flags|=DIRPROP_FLAG(BN);
647                 break;
648             case PDF:
649                 /* (X7) */
650                 /* handle all the overflow cases first */
651                 if(countOver61>0) {
652                     --countOver61;
653                 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
654                     /* handle LRx overflows from level 60 */
655                     --countOver60;
656                 } else if(stackTop>0) {
657                     /* this is the pop operation; it also pops level 61 while countOver60>0 */
658                     --stackTop;
659                     embeddingLevel=stack[stackTop];
660                 /* } else { (underflow) */
661                 }
662                 flags|=DIRPROP_FLAG(BN);
663                 break;
664             case B:
665                 stackTop=0;
666                 countOver60=countOver61=0;
667                 level=GET_PARALEVEL(pBiDi, i);
668                 if((i+1)<length) {
669                     embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
670                     if(!((text[i]==CR) && (text[i+1]==LF))) {
671                         pBiDi->paras[paraIndex++]=i+1;
672                     }
673                 }
674                 flags|=DIRPROP_FLAG(B);
675                 break;
676             case BN:
677                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
678                 /* they will get their levels set correctly in adjustWSLevels() */
679                 flags|=DIRPROP_FLAG(BN);
680                 break;
681             default:
682                 /* all other types get the "real" level */
683                 if(level!=embeddingLevel) {
684                     level=embeddingLevel;
685                     if(level&UBIDI_LEVEL_OVERRIDE) {
686                         flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
687                     } else {
688                         flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
689                     }
690                 }
691                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
692                     flags|=DIRPROP_FLAG(dirProp);
693                 }
694                 break;
695             }
696 
697             /*
698              * We need to set reasonable levels even on BN codes and
699              * explicit codes because we will later look at same-level runs (X10).
700              */
701             levels[i]=level;
702         }
703         if(flags&MASK_EMBEDDING) {
704             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
705         }
706         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
707             flags|=DIRPROP_FLAG(L);
708         }
709 
710         /* subsequently, ignore the explicit codes and BN (X9) */
711 
712         /* again, determine if the text is mixed-directional or single-directional */
713         pBiDi->flags=flags;
714         direction=directionFromFlags(pBiDi);
715     }
716 
717     return direction;
718 }
719 
720 /*
721  * Use a pre-specified embedding levels array:
722  *
723  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
724  * ignore all explicit codes (X9),
725  * and check all the preset levels.
726  *
727  * Recalculate the flags to have them reflect the real properties
728  * after taking the explicit embeddings into account.
729  */
730 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)731 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
732     const DirProp *dirProps=pBiDi->dirProps;
733     DirProp dirProp;
734     UBiDiLevel *levels=pBiDi->levels;
735     const UChar *text=pBiDi->text;
736 
737     int32_t i, length=pBiDi->length;
738     Flags flags=0;  /* collect all directionalities in the text */
739     UBiDiLevel level;
740     uint32_t paraIndex=0;
741 
742     for(i=0; i<length; ++i) {
743         level=levels[i];
744         dirProp=NO_CONTEXT_RTL(dirProps[i]);
745         if(level&UBIDI_LEVEL_OVERRIDE) {
746             /* keep the override flag in levels[i] but adjust the flags */
747             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
748             flags|=DIRPROP_FLAG_O(level);
749         } else {
750             /* set the flags */
751             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
752         }
753         if((level<GET_PARALEVEL(pBiDi, i) &&
754             !((0==level)&&(dirProp==B))) ||
755            (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
756             /* level out of bounds */
757             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
758             return UBIDI_LTR;
759         }
760         if((dirProp==B) && ((i+1)<length)) {
761             if(!((text[i]==CR) && (text[i+1]==LF))) {
762                 pBiDi->paras[paraIndex++]=i+1;
763             }
764         }
765     }
766     if(flags&MASK_EMBEDDING) {
767         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
768     }
769 
770     /* determine if the text is mixed-directional or single-directional */
771     pBiDi->flags=flags;
772     return directionFromFlags(pBiDi);
773 }
774 
775 /******************************************************************
776  The Properties state machine table
777 *******************************************************************
778 
779  All table cells are 8 bits:
780       bits 0..4:  next state
781       bits 5..7:  action to perform (if > 0)
782 
783  Cells may be of format "n" where n represents the next state
784  (except for the rightmost column).
785  Cells may also be of format "s(x,y)" where x represents an action
786  to perform and y represents the next state.
787 
788 *******************************************************************
789  Definitions and type for properties state table
790 *******************************************************************
791 */
792 #define IMPTABPROPS_COLUMNS 14
793 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
794 #define GET_STATEPROPS(cell) ((cell)&0x1f)
795 #define GET_ACTIONPROPS(cell) ((cell)>>5)
796 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
797 
798 static const uint8_t groupProp[] =          /* dirProp regrouped */
799 {
800 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
801     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
802 };
803 enum { _L=0, _R=1, _EN=2, _AN=3, _ON=4, _S=5, _B=6 }; /* reduced dirProp */
804 
805 /******************************************************************
806 
807       PROPERTIES  STATE  TABLE
808 
809  In table impTabProps,
810       - the ON column regroups ON and WS
811       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
812       - the Res column is the reduced property assigned to a run
813 
814  Action 1: process current run1, init new run1
815         2: init new run2
816         3: process run1, process run2, init new run1
817         4: process run1, set run1=run2, init new run2
818 
819  Notes:
820   1) This table is used in resolveImplicitLevels().
821   2) This table triggers actions when there is a change in the Bidi
822      property of incoming characters (action 1).
823   3) Most such property sequences are processed immediately (in
824      fact, passed to processPropertySeq().
825   4) However, numbers are assembled as one sequence. This means
826      that undefined situations (like CS following digits, until
827      it is known if the next char will be a digit) are held until
828      following chars define them.
829      Example: digits followed by CS, then comes another CS or ON;
830               the digits will be processed, then the CS assigned
831               as the start of an ON sequence (action 3).
832   5) There are cases where more than one sequence must be
833      processed, for instance digits followed by CS followed by L:
834      the digits must be processed as one sequence, and the CS
835      must be processed as an ON sequence, all this before starting
836      assembling chars for the opening L sequence.
837 
838 
839 */
840 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
841 {
842 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,  Res */
843 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,  _ON },
844 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),   _L },
845 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),   _R },
846 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,   _R },
847 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),  _EN },
848 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),  _AN },
849 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),  _AN },
850 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),  _ON },
851 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),  _ON },
852 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),  _ON },
853 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),  _EN },
854 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),  _EN },
855 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),  _AN },
856 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),  _AN },
857 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),  _ON },
858 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),   _S },
859 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),   _S },
860 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),   _B }
861 };
862 
863 /*  we must undef macro s because the levels table have a different
864  *  structure (4 bits for action and 4 bits for next state.
865  */
866 #undef s
867 
868 /******************************************************************
869  The levels state machine tables
870 *******************************************************************
871 
872  All table cells are 8 bits:
873       bits 0..3:  next state
874       bits 4..7:  action to perform (if > 0)
875 
876  Cells may be of format "n" where n represents the next state
877  (except for the rightmost column).
878  Cells may also be of format "s(x,y)" where x represents an action
879  to perform and y represents the next state.
880 
881  This format limits each table to 16 states each and to 15 actions.
882 
883 *******************************************************************
884  Definitions and type for levels state tables
885 *******************************************************************
886 */
887 #define IMPTABLEVELS_COLUMNS (_B + 2)
888 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
889 #define GET_STATE(cell) ((cell)&0x0f)
890 #define GET_ACTION(cell) ((cell)>>4)
891 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
892 
893 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
894 typedef uint8_t ImpAct[];
895 
896 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
897  * instead of having a pair of ImpTab and a pair of ImpAct.
898  */
899 typedef struct ImpTabPair {
900     const void * pImpTab[2];
901     const void * pImpAct[2];
902 } ImpTabPair;
903 
904 /******************************************************************
905 
906       LEVELS  STATE  TABLES
907 
908  In all levels state tables,
909       - state 0 is the initial state
910       - the Res column is the increment to add to the text level
911         for this property sequence.
912 
913  The impAct arrays for each table of a pair map the local action
914  numbers of the table to the total list of actions. For instance,
915  action 2 in a given table corresponds to the action number which
916  appears in entry [2] of the impAct array for that table.
917  The first entry of all impAct arrays must be 0.
918 
919  Action 1: init conditional sequence
920         2: prepend conditional sequence to current sequence
921         3: set ON sequence to new level - 1
922         4: init EN/AN/ON sequence
923         5: fix EN/AN/ON sequence followed by R
924         6: set previous level sequence to level 2
925 
926  Notes:
927   1) These tables are used in processPropertySeq(). The input
928      is property sequences as determined by resolveImplicitLevels.
929   2) Most such property sequences are processed immediately
930      (levels are assigned).
931   3) However, some sequences cannot be assigned a final level till
932      one or more following sequences are received. For instance,
933      ON following an R sequence within an even-level paragraph.
934      If the following sequence is R, the ON sequence will be
935      assigned basic run level+1, and so will the R sequence.
936   4) S is generally handled like ON, since its level will be fixed
937      to paragraph level in adjustWSLevels().
938 
939 */
940 
941 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
942 /*  In this table, conditional sequences receive the higher possible level
943     until proven otherwise.
944 */
945 {
946 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
947 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
948 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
949 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
950 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
951 /* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
952 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
953 };
954 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
955 /*  In this table, conditional sequences receive the lower possible level
956     until proven otherwise.
957 */
958 {
959 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
960 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
961 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
962 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
963 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
964 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
965 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
966 };
967 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
968 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
969                                            &impTabR_DEFAULT},
970                                           {&impAct0, &impAct0}};
971 
972 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
973 /*  In this table, conditional sequences receive the higher possible level
974     until proven otherwise.
975 */
976 {
977 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
978 /* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
979 /* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
980 /* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
981 /* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
982 /* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
983   };
984 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
985                                                    &impTabR_DEFAULT},
986                                                   {&impAct0, &impAct0}};
987 
988 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
989 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
990     until proven that there is L or sor/eor on both sides. AN is handled like EN.
991 */
992 {
993 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
994 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
995 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
996 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
997 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
998 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
999 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1000 };
1001 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1002 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1003     until proven that there is L on both sides. AN is handled like EN.
1004 */
1005 {
1006 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1007 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1008 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1009 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1010 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1011 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1012 };
1013 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1014                         {&impTabL_GROUP_NUMBERS_WITH_R,
1015                          &impTabR_GROUP_NUMBERS_WITH_R},
1016                         {&impAct0, &impAct0}};
1017 
1018 
1019 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1020 /*  This table is identical to the Default LTR table except that EN and AN are
1021     handled like L.
1022 */
1023 {
1024 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1025 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1026 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1027 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1028 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1029 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1030 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1031 };
1032 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1033 /*  This table is identical to the Default RTL table except that EN and AN are
1034     handled like L.
1035 */
1036 {
1037 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1038 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1039 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1040 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1041 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1042 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1043 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1044 };
1045 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1046                         {&impTabL_INVERSE_NUMBERS_AS_L,
1047                          &impTabR_INVERSE_NUMBERS_AS_L},
1048                         {&impAct0, &impAct0}};
1049 
1050 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1051 /*  In this table, conditional sequences receive the lower possible level
1052     until proven otherwise.
1053 */
1054 {
1055 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1056 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1057 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1058 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1059 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1060 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1061 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1062 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1063 };
1064 static const ImpAct impAct1 = {0,1,11,12};
1065 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1066  */
1067 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1068                         {&impTabL_DEFAULT,
1069                          &impTabR_INVERSE_LIKE_DIRECT},
1070                         {&impAct0, &impAct1}};
1071 
1072 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1073 /*  The case handled in this table is (visually):  R EN L
1074 */
1075 {
1076 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1077 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1078 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1079 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1080 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1081 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1082 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1083 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1084 };
1085 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1086 /*  The cases handled in this table are (visually):  R EN L
1087                                                      R L AN L
1088 */
1089 {
1090 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1091 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1092 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1093 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1094 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1095 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1096 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1097 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1098 };
1099 static const ImpAct impAct2 = {0,1,7,8,9,10};
1100 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1101                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1102                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1103                         {&impAct0, &impAct2}};
1104 
1105 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1106                         {&impTabL_NUMBERS_SPECIAL,
1107                          &impTabR_INVERSE_LIKE_DIRECT},
1108                         {&impAct0, &impAct1}};
1109 
1110 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1111 /*  The case handled in this table is (visually):  R EN L
1112 */
1113 {
1114 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1115 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1116 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1117 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1118 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1119 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1120 };
1121 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1122                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1123                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1124                         {&impAct0, &impAct2}};
1125 
1126 #undef s
1127 
1128 typedef struct {
1129     const ImpTab * pImpTab;             /* level table pointer          */
1130     const ImpAct * pImpAct;             /* action map array             */
1131     int32_t startON;                    /* start of ON sequence         */
1132     int32_t startL2EN;                  /* start of level 2 sequence    */
1133     int32_t lastStrongRTL;              /* index of last found R or AL  */
1134     int32_t state;                      /* current state                */
1135     UBiDiLevel runLevel;                /* run level before implicit solving */
1136 } LevState;
1137 
1138 /*------------------------------------------------------------------------*/
1139 
1140 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1141 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1142   /* param pos:     position where to insert
1143      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1144   */
1145 {
1146 #define FIRSTALLOC  10
1147     Point point;
1148     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1149 
1150     if (pInsertPoints->capacity == 0)
1151     {
1152         pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1153         if (pInsertPoints->points == NULL)
1154         {
1155             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1156             return;
1157         }
1158         pInsertPoints->capacity=FIRSTALLOC;
1159     }
1160     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1161     {
1162         void * savePoints=pInsertPoints->points;
1163         pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1164                                            pInsertPoints->capacity*2*sizeof(Point));
1165         if (pInsertPoints->points == NULL)
1166         {
1167             pInsertPoints->points=savePoints;
1168             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1169             return;
1170         }
1171         else  pInsertPoints->capacity*=2;
1172     }
1173     point.pos=pos;
1174     point.flag=flag;
1175     pInsertPoints->points[pInsertPoints->size]=point;
1176     pInsertPoints->size++;
1177 #undef FIRSTALLOC
1178 }
1179 
1180 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1181 
1182 /*
1183  * This implementation of the (Wn) rules applies all rules in one pass.
1184  * In order to do so, it needs a look-ahead of typically 1 character
1185  * (except for W5: sequences of ET) and keeps track of changes
1186  * in a rule Wp that affect a later Wq (p<q).
1187  *
1188  * The (Nn) and (In) rules are also performed in that same single loop,
1189  * but effectively one iteration behind for white space.
1190  *
1191  * Since all implicit rules are performed in one step, it is not necessary
1192  * to actually store the intermediate directional properties in dirProps[].
1193  */
1194 
1195 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1196 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1197                    int32_t start, int32_t limit) {
1198     uint8_t cell, oldStateSeq, actionSeq;
1199     const ImpTab * pImpTab=pLevState->pImpTab;
1200     const ImpAct * pImpAct=pLevState->pImpAct;
1201     UBiDiLevel * levels=pBiDi->levels;
1202     UBiDiLevel level, addLevel;
1203     InsertPoints * pInsertPoints;
1204     int32_t start0, k;
1205 
1206     start0=start;                           /* save original start position */
1207     oldStateSeq=(uint8_t)pLevState->state;
1208     cell=(*pImpTab)[oldStateSeq][_prop];
1209     pLevState->state=GET_STATE(cell);       /* isolate the new state */
1210     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1211     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1212 
1213     if(actionSeq) {
1214         switch(actionSeq) {
1215         case 1:                         /* init ON seq */
1216             pLevState->startON=start0;
1217             break;
1218 
1219         case 2:                         /* prepend ON seq to current seq */
1220             start=pLevState->startON;
1221             break;
1222 
1223         case 3:                         /* L or S after possible relevant EN/AN */
1224             /* check if we had EN after R/AL */
1225             if (pLevState->startL2EN >= 0) {
1226                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1227             }
1228             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1229             /* check if we had any relevant EN/AN after R/AL */
1230             pInsertPoints=&(pBiDi->insertPoints);
1231             if ((pInsertPoints->capacity == 0) ||
1232                 (pInsertPoints->size <= pInsertPoints->confirmed))
1233             {
1234                 /* nothing, just clean up */
1235                 pLevState->lastStrongRTL=-1;
1236                 /* check if we have a pending conditional segment */
1237                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1238                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1239                     start=pLevState->startON;   /* reset to basic run level */
1240                 }
1241                 if (_prop == _S)                /* add LRM before S */
1242                 {
1243                     addPoint(pBiDi, start0, LRM_BEFORE);
1244                     pInsertPoints->confirmed=pInsertPoints->size;
1245                 }
1246                 break;
1247             }
1248             /* reset previous RTL cont to level for LTR text */
1249             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1250             {
1251                 /* reset odd level, leave runLevel+2 as is */
1252                 levels[k]=(levels[k] - 2) & ~1;
1253             }
1254             /* mark insert points as confirmed */
1255             pInsertPoints->confirmed=pInsertPoints->size;
1256             pLevState->lastStrongRTL=-1;
1257             if (_prop == _S)            /* add LRM before S */
1258             {
1259                 addPoint(pBiDi, start0, LRM_BEFORE);
1260                 pInsertPoints->confirmed=pInsertPoints->size;
1261             }
1262             break;
1263 
1264         case 4:                         /* R/AL after possible relevant EN/AN */
1265             /* just clean up */
1266             pInsertPoints=&(pBiDi->insertPoints);
1267             if (pInsertPoints->capacity > 0)
1268                 /* remove all non confirmed insert points */
1269                 pInsertPoints->size=pInsertPoints->confirmed;
1270             pLevState->startON=-1;
1271             pLevState->startL2EN=-1;
1272             pLevState->lastStrongRTL=limit - 1;
1273             break;
1274 
1275         case 5:                         /* EN/AN after R/AL + possible cont */
1276             /* check for real AN */
1277             if ((_prop == _AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
1278                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1279             {
1280                 /* real AN */
1281                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1282                 {
1283                     /* just note the righmost digit as a strong RTL */
1284                     pLevState->lastStrongRTL=limit - 1;
1285                     break;
1286                 }
1287                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1288                 {
1289                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1290                     pLevState->startL2EN=-2;
1291                 }
1292                 /* note AN */
1293                 addPoint(pBiDi, start0, LRM_BEFORE);
1294                 break;
1295             }
1296             /* if first EN/AN after R/AL */
1297             if (pLevState->startL2EN == -1) {
1298                 pLevState->startL2EN=start0;
1299             }
1300             break;
1301 
1302         case 6:                         /* note location of latest R/AL */
1303             pLevState->lastStrongRTL=limit - 1;
1304             pLevState->startON=-1;
1305             break;
1306 
1307         case 7:                         /* L after R+ON/EN/AN */
1308             /* include possible adjacent number on the left */
1309             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1310             if(k>=0) {
1311                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1312                 pInsertPoints=&(pBiDi->insertPoints);
1313                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1314             }
1315             pLevState->startON=start0;
1316             break;
1317 
1318         case 8:                         /* AN after L */
1319             /* AN numbers between L text on both sides may be trouble. */
1320             /* tentatively bracket with LRMs; will be confirmed if followed by L */
1321             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1322             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1323             break;
1324 
1325         case 9:                         /* R after L+ON/EN/AN */
1326             /* false alert, infirm LRMs around previous AN */
1327             pInsertPoints=&(pBiDi->insertPoints);
1328             pInsertPoints->size=pInsertPoints->confirmed;
1329             if (_prop == _S)            /* add RLM before S */
1330             {
1331                 addPoint(pBiDi, start0, RLM_BEFORE);
1332                 pInsertPoints->confirmed=pInsertPoints->size;
1333             }
1334             break;
1335 
1336         case 10:                        /* L after L+ON/AN */
1337             level=pLevState->runLevel + addLevel;
1338             for(k=pLevState->startON; k<start0; k++) {
1339                 if (levels[k]<level)
1340                     levels[k]=level;
1341             }
1342             pInsertPoints=&(pBiDi->insertPoints);
1343             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
1344             pLevState->startON=start0;
1345             break;
1346 
1347         case 11:                        /* L after L+ON+EN/AN/ON */
1348             level=pLevState->runLevel;
1349             for(k=start0-1; k>=pLevState->startON; k--) {
1350                 if(levels[k]==level+3) {
1351                     while(levels[k]==level+3) {
1352                         levels[k--]-=2;
1353                     }
1354                     while(levels[k]==level) {
1355                         k--;
1356                     }
1357                 }
1358                 if(levels[k]==level+2) {
1359                     levels[k]=level;
1360                     continue;
1361                 }
1362                 levels[k]=level+1;
1363             }
1364             break;
1365 
1366         case 12:                        /* R after L+ON+EN/AN/ON */
1367             level=pLevState->runLevel+1;
1368             for(k=start0-1; k>=pLevState->startON; k--) {
1369                 if(levels[k]>level) {
1370                     levels[k]-=2;
1371                 }
1372             }
1373             break;
1374 
1375         default:                        /* we should never get here */
1376             U_ASSERT(FALSE);
1377             break;
1378         }
1379     }
1380     if((addLevel) || (start < start0)) {
1381         level=pLevState->runLevel + addLevel;
1382         for(k=start; k<limit; k++) {
1383             levels[k]=level;
1384         }
1385     }
1386 }
1387 
1388 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)1389 resolveImplicitLevels(UBiDi *pBiDi,
1390                       int32_t start, int32_t limit,
1391                       DirProp sor, DirProp eor) {
1392     const DirProp *dirProps=pBiDi->dirProps;
1393 
1394     LevState levState;
1395     int32_t i, start1, start2;
1396     uint8_t oldStateImp, stateImp, actionImp;
1397     uint8_t gprop, resProp, cell;
1398     UBool inverseRTL;
1399     DirProp nextStrongProp=R;
1400     int32_t nextStrongPos=-1;
1401 
1402     /* check for RTL inverse BiDi mode */
1403     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1404      * loop on the text characters from end to start.
1405      * This would need a different properties state table (at least different
1406      * actions) and different levels state tables (maybe very similar to the
1407      * LTR corresponding ones.
1408      */
1409     inverseRTL=(UBool)
1410         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
1411          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
1412           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
1413     /* initialize for levels state table */
1414     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1415     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1416     levState.state=0;
1417     levState.runLevel=pBiDi->levels[start];
1418     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
1419     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
1420     processPropertySeq(pBiDi, &levState, sor, start, start);
1421     /* initialize for property state table */
1422     if(dirProps[start]==NSM) {
1423         stateImp = 1 + sor;
1424     } else {
1425         stateImp=0;
1426     }
1427     start1=start;
1428     start2=start;
1429 
1430     for(i=start; i<=limit; i++) {
1431         if(i>=limit) {
1432             gprop=eor;
1433         } else {
1434             DirProp prop, prop1;
1435             prop=NO_CONTEXT_RTL(dirProps[i]);
1436             if(inverseRTL) {
1437                 if(prop==AL) {
1438                     /* AL before EN does not make it AN */
1439                     prop=R;
1440                 } else if(prop==EN) {
1441                     if(nextStrongPos<=i) {
1442                         /* look for next strong char (L/R/AL) */
1443                         int32_t j;
1444                         nextStrongProp=R;   /* set default */
1445                         nextStrongPos=limit;
1446                         for(j=i+1; j<limit; j++) {
1447                             prop1=NO_CONTEXT_RTL(dirProps[j]);
1448                             if(prop1==L || prop1==R || prop1==AL) {
1449                                 nextStrongProp=prop1;
1450                                 nextStrongPos=j;
1451                                 break;
1452                             }
1453                         }
1454                     }
1455                     if(nextStrongProp==AL) {
1456                         prop=AN;
1457                     }
1458                 }
1459             }
1460             gprop=groupProp[prop];
1461         }
1462         oldStateImp=stateImp;
1463         cell=impTabProps[oldStateImp][gprop];
1464         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
1465         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
1466         if((i==limit) && (actionImp==0)) {
1467             /* there is an unprocessed sequence if its property == eor   */
1468             actionImp=1;                    /* process the last sequence */
1469         }
1470         if(actionImp) {
1471             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
1472             switch(actionImp) {
1473             case 1:             /* process current seq1, init new seq1 */
1474                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
1475                 start1=i;
1476                 break;
1477             case 2:             /* init new seq2 */
1478                 start2=i;
1479                 break;
1480             case 3:             /* process seq1, process seq2, init new seq1 */
1481                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1482                 processPropertySeq(pBiDi, &levState, _ON, start2, i);
1483                 start1=i;
1484                 break;
1485             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
1486                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1487                 start1=start2;
1488                 start2=i;
1489                 break;
1490             default:            /* we should never get here */
1491                 U_ASSERT(FALSE);
1492                 break;
1493             }
1494         }
1495     }
1496     /* flush possible pending sequence, e.g. ON */
1497     processPropertySeq(pBiDi, &levState, eor, limit, limit);
1498 }
1499 
1500 /* perform (L1) and (X9) ---------------------------------------------------- */
1501 
1502 /*
1503  * Reset the embedding levels for some non-graphic characters (L1).
1504  * This function also sets appropriate levels for BN, and
1505  * explicit embedding types that are supposed to have been removed
1506  * from the paragraph in (X9).
1507  */
1508 static void
adjustWSLevels(UBiDi * pBiDi)1509 adjustWSLevels(UBiDi *pBiDi) {
1510     const DirProp *dirProps=pBiDi->dirProps;
1511     UBiDiLevel *levels=pBiDi->levels;
1512     int32_t i;
1513 
1514     if(pBiDi->flags&MASK_WS) {
1515         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
1516         Flags flag;
1517 
1518         i=pBiDi->trailingWSStart;
1519         while(i>0) {
1520             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1521             while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
1522                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1523                     levels[i]=0;
1524                 } else {
1525                     levels[i]=GET_PARALEVEL(pBiDi, i);
1526                 }
1527             }
1528 
1529             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1530             /* here, i+1 is guaranteed to be <length */
1531             while(i>0) {
1532                 flag=DIRPROP_FLAG_NC(dirProps[--i]);
1533                 if(flag&MASK_BN_EXPLICIT) {
1534                     levels[i]=levels[i+1];
1535                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1536                     levels[i]=0;
1537                     break;
1538                 } else if(flag&MASK_B_S) {
1539                     levels[i]=GET_PARALEVEL(pBiDi, i);
1540                     break;
1541                 }
1542             }
1543         }
1544     }
1545 }
1546 
1547 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
1548 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
1549 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)1550 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
1551                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
1552     void *runsOnlyMemory;
1553     int32_t *visualMap;
1554     UChar *visualText;
1555     int32_t saveLength, saveTrailingWSStart;
1556     const UBiDiLevel *levels;
1557     UBiDiLevel *saveLevels;
1558     UBiDiDirection saveDirection;
1559     UBool saveMayAllocateText;
1560     Run *runs;
1561     int32_t visualLength, i, j, visualStart, logicalStart,
1562             runCount, runLength, addedRuns, insertRemove,
1563             start, limit, step, indexOddBit, logicalPos,
1564             index, index1;
1565     uint32_t saveOptions;
1566 
1567     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
1568     if(length==0) {
1569         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1570         goto cleanup3;
1571     }
1572     /* obtain memory for mapping table and visual text */
1573     runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
1574     if(runsOnlyMemory==NULL) {
1575         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1576         goto cleanup3;
1577     }
1578     visualMap=runsOnlyMemory;
1579     visualText=(UChar *)&visualMap[length];
1580     saveLevels=(UBiDiLevel *)&visualText[length];
1581     saveOptions=pBiDi->reorderingOptions;
1582     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
1583         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
1584         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
1585     }
1586     paraLevel&=1;                       /* accept only 0 or 1 */
1587     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1588     if(U_FAILURE(*pErrorCode)) {
1589         goto cleanup3;
1590     }
1591     /* we cannot access directly pBiDi->levels since it is not yet set if
1592      * direction is not MIXED
1593      */
1594     levels=ubidi_getLevels(pBiDi, pErrorCode);
1595     uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
1596     saveTrailingWSStart=pBiDi->trailingWSStart;
1597     saveLength=pBiDi->length;
1598     saveDirection=pBiDi->direction;
1599 
1600     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1601      * the visual map and the dirProps array to drive the second call
1602      * to ubidi_setPara (but must make provision for possible removal of
1603      * BiDi controls.  Alternatively, only use the dirProps array via
1604      * customized classifier callback.
1605      */
1606     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
1607                                       UBIDI_DO_MIRRORING, pErrorCode);
1608     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
1609     if(U_FAILURE(*pErrorCode)) {
1610         goto cleanup2;
1611     }
1612     pBiDi->reorderingOptions=saveOptions;
1613 
1614     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
1615     paraLevel^=1;
1616     /* Because what we did with reorderingOptions, visualText may be shorter
1617      * than the original text. But we don't want the levels memory to be
1618      * reallocated shorter than the original length, since we need to restore
1619      * the levels as after the first call to ubidi_setpara() before returning.
1620      * We will force mayAllocateText to FALSE before the second call to
1621      * ubidi_setpara(), and will restore it afterwards.
1622      */
1623     saveMayAllocateText=pBiDi->mayAllocateText;
1624     pBiDi->mayAllocateText=FALSE;
1625     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
1626     pBiDi->mayAllocateText=saveMayAllocateText;
1627     ubidi_getRuns(pBiDi, pErrorCode);
1628     if(U_FAILURE(*pErrorCode)) {
1629         goto cleanup1;
1630     }
1631     /* check if some runs must be split, count how many splits */
1632     addedRuns=0;
1633     runCount=pBiDi->runCount;
1634     runs=pBiDi->runs;
1635     visualStart=0;
1636     for(i=0; i<runCount; i++, visualStart+=runLength) {
1637         runLength=runs[i].visualLimit-visualStart;
1638         if(runLength<2) {
1639             continue;
1640         }
1641         logicalStart=GET_INDEX(runs[i].logicalStart);
1642         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
1643             index=visualMap[j];
1644             index1=visualMap[j-1];
1645             if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
1646                 addedRuns++;
1647             }
1648         }
1649     }
1650     if(addedRuns) {
1651         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
1652             if(runCount==1) {
1653                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1654                 pBiDi->runsMemory[0]=runs[0];
1655             }
1656             runs=pBiDi->runs=pBiDi->runsMemory;
1657             pBiDi->runCount+=addedRuns;
1658         } else {
1659             goto cleanup1;
1660         }
1661     }
1662     /* split runs which are not consecutive in source text */
1663     for(i=runCount-1; i>=0; i--) {
1664         runLength= i==0 ? runs[0].visualLimit :
1665                           runs[i].visualLimit-runs[i-1].visualLimit;
1666         logicalStart=runs[i].logicalStart;
1667         indexOddBit=GET_ODD_BIT(logicalStart);
1668         logicalStart=GET_INDEX(logicalStart);
1669         if(runLength<2) {
1670             if(addedRuns) {
1671                 runs[i+addedRuns]=runs[i];
1672             }
1673             logicalPos=visualMap[logicalStart];
1674             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1675                                             saveLevels[logicalPos]^indexOddBit);
1676             continue;
1677         }
1678         if(indexOddBit) {
1679             start=logicalStart;
1680             limit=logicalStart+runLength-1;
1681             step=1;
1682         } else {
1683             start=logicalStart+runLength-1;
1684             limit=logicalStart;
1685             step=-1;
1686         }
1687         for(j=start; j!=limit; j+=step) {
1688             index=visualMap[j];
1689             index1=visualMap[j+step];
1690             if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
1691                 logicalPos=BIDI_MIN(visualMap[start], index);
1692                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1693                                             saveLevels[logicalPos]^indexOddBit);
1694                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
1695                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
1696                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
1697                 runs[i+addedRuns].insertRemove=insertRemove;
1698                 runs[i].insertRemove&=~insertRemove;
1699                 start=j+step;
1700                 addedRuns--;
1701             }
1702         }
1703         if(addedRuns) {
1704             runs[i+addedRuns]=runs[i];
1705         }
1706         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
1707         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1708                                             saveLevels[logicalPos]^indexOddBit);
1709     }
1710 
1711   cleanup1:
1712     /* restore initial paraLevel */
1713     pBiDi->paraLevel^=1;
1714   cleanup2:
1715     /* restore real text */
1716     pBiDi->text=text;
1717     pBiDi->length=saveLength;
1718     pBiDi->originalLength=length;
1719     pBiDi->direction=saveDirection;
1720     /* the saved levels should never excess levelsSize, but we check anyway */
1721     if(saveLength>pBiDi->levelsSize) {
1722         saveLength=pBiDi->levelsSize;
1723     }
1724     uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
1725     pBiDi->trailingWSStart=saveTrailingWSStart;
1726     /* free memory for mapping table and visual text */
1727     uprv_free(runsOnlyMemory);
1728     if(pBiDi->runCount>1) {
1729         pBiDi->direction=UBIDI_MIXED;
1730     }
1731   cleanup3:
1732     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
1733 }
1734 
1735 /* ubidi_setPara ------------------------------------------------------------ */
1736 
1737 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)1738 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1739               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1740               UErrorCode *pErrorCode) {
1741     UBiDiDirection direction;
1742 
1743     /* check the argument values */
1744     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1745     if(pBiDi==NULL || text==NULL || length<-1 ||
1746        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
1747         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1748         return;
1749     }
1750 
1751     if(length==-1) {
1752         length=u_strlen(text);
1753     }
1754 
1755     /* special treatment for RUNS_ONLY mode */
1756     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
1757         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
1758         return;
1759     }
1760 
1761     /* initialize the UBiDi structure */
1762     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
1763     pBiDi->text=text;
1764     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
1765     pBiDi->paraLevel=paraLevel;
1766     pBiDi->direction=UBIDI_LTR;
1767     pBiDi->paraCount=1;
1768 
1769     pBiDi->dirProps=NULL;
1770     pBiDi->levels=NULL;
1771     pBiDi->runs=NULL;
1772     pBiDi->insertPoints.size=0;         /* clean up from last call */
1773     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
1774 
1775     /*
1776      * Save the original paraLevel if contextual; otherwise, set to 0.
1777      */
1778     if(IS_DEFAULT_LEVEL(paraLevel)) {
1779         pBiDi->defaultParaLevel=paraLevel;
1780     } else {
1781         pBiDi->defaultParaLevel=0;
1782     }
1783 
1784     if(length==0) {
1785         /*
1786          * For an empty paragraph, create a UBiDi object with the paraLevel and
1787          * the flags and the direction set but without allocating zero-length arrays.
1788          * There is nothing more to do.
1789          */
1790         if(IS_DEFAULT_LEVEL(paraLevel)) {
1791             pBiDi->paraLevel&=1;
1792             pBiDi->defaultParaLevel=0;
1793         }
1794         if(paraLevel&1) {
1795             pBiDi->flags=DIRPROP_FLAG(R);
1796             pBiDi->direction=UBIDI_RTL;
1797         } else {
1798             pBiDi->flags=DIRPROP_FLAG(L);
1799             pBiDi->direction=UBIDI_LTR;
1800         }
1801 
1802         pBiDi->runCount=0;
1803         pBiDi->paraCount=0;
1804         pBiDi->pParaBiDi=pBiDi;         /* mark successful setPara */
1805         return;
1806     }
1807 
1808     pBiDi->runCount=-1;
1809 
1810     /*
1811      * Get the directional properties,
1812      * the flags bit-set, and
1813      * determine the paragraph level if necessary.
1814      */
1815     if(getDirPropsMemory(pBiDi, length)) {
1816         pBiDi->dirProps=pBiDi->dirPropsMemory;
1817         getDirProps(pBiDi);
1818     } else {
1819         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1820         return;
1821     }
1822     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1823     length= pBiDi->length;
1824     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
1825     /* allocate paras memory */
1826     if(pBiDi->paraCount>1) {
1827         if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
1828             pBiDi->paras=pBiDi->parasMemory;
1829             pBiDi->paras[pBiDi->paraCount-1]=length;
1830         } else {
1831             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1832             return;
1833         }
1834     } else {
1835         /* initialize paras for single paragraph */
1836         pBiDi->paras=pBiDi->simpleParas;
1837         pBiDi->simpleParas[0]=length;
1838     }
1839 
1840     /* are explicit levels specified? */
1841     if(embeddingLevels==NULL) {
1842         /* no: determine explicit levels according to the (Xn) rules */\
1843         if(getLevelsMemory(pBiDi, length)) {
1844             pBiDi->levels=pBiDi->levelsMemory;
1845             direction=resolveExplicitLevels(pBiDi);
1846         } else {
1847             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1848             return;
1849         }
1850     } else {
1851         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1852         pBiDi->levels=embeddingLevels;
1853         direction=checkExplicitLevels(pBiDi, pErrorCode);
1854         if(U_FAILURE(*pErrorCode)) {
1855             return;
1856         }
1857     }
1858 
1859     /*
1860      * The steps after (X9) in the UBiDi algorithm are performed only if
1861      * the paragraph text has mixed directionality!
1862      */
1863     pBiDi->direction=direction;
1864     switch(direction) {
1865     case UBIDI_LTR:
1866         /* make sure paraLevel is even */
1867         pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
1868 
1869         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1870         pBiDi->trailingWSStart=0;
1871         break;
1872     case UBIDI_RTL:
1873         /* make sure paraLevel is odd */
1874         pBiDi->paraLevel|=1;
1875 
1876         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1877         pBiDi->trailingWSStart=0;
1878         break;
1879     default:
1880         /*
1881          *  Choose the right implicit state table
1882          */
1883         switch(pBiDi->reorderingMode) {
1884         case UBIDI_REORDER_DEFAULT:
1885             pBiDi->pImpTabPair=&impTab_DEFAULT;
1886             break;
1887         case UBIDI_REORDER_NUMBERS_SPECIAL:
1888             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
1889             break;
1890         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
1891             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
1892             break;
1893         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
1894             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
1895             break;
1896         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
1897             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1898                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
1899             } else {
1900                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
1901             }
1902             break;
1903         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
1904             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1905                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
1906             } else {
1907                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
1908             }
1909             break;
1910         default:
1911             /* we should never get here */
1912             U_ASSERT(FALSE);
1913             break;
1914         }
1915         /*
1916          * If there are no external levels specified and there
1917          * are no significant explicit level codes in the text,
1918          * then we can treat the entire paragraph as one run.
1919          * Otherwise, we need to perform the following rules on runs of
1920          * the text with the same embedding levels. (X10)
1921          * "Significant" explicit level codes are ones that actually
1922          * affect non-BN characters.
1923          * Examples for "insignificant" ones are empty embeddings
1924          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1925          */
1926         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
1927                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
1928             resolveImplicitLevels(pBiDi, 0, length,
1929                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
1930                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
1931         } else {
1932             /* sor, eor: start and end types of same-level-run */
1933             UBiDiLevel *levels=pBiDi->levels;
1934             int32_t start, limit=0;
1935             UBiDiLevel level, nextLevel;
1936             DirProp sor, eor;
1937 
1938             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1939             level=GET_PARALEVEL(pBiDi, 0);
1940             nextLevel=levels[0];
1941             if(level<nextLevel) {
1942                 eor=GET_LR_FROM_LEVEL(nextLevel);
1943             } else {
1944                 eor=GET_LR_FROM_LEVEL(level);
1945             }
1946 
1947             do {
1948                 /* determine start and limit of the run (end points just behind the run) */
1949 
1950                 /* the values for this run's start are the same as for the previous run's end */
1951                 start=limit;
1952                 level=nextLevel;
1953                 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
1954                     /* except if this is a new paragraph, then set sor = para level */
1955                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
1956                 } else {
1957                     sor=eor;
1958                 }
1959 
1960                 /* search for the limit of this run */
1961                 while(++limit<length && levels[limit]==level) {}
1962 
1963                 /* get the correct level of the next run */
1964                 if(limit<length) {
1965                     nextLevel=levels[limit];
1966                 } else {
1967                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
1968                 }
1969 
1970                 /* determine eor from max(level, nextLevel); sor is last run's eor */
1971                 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
1972                     eor=GET_LR_FROM_LEVEL(nextLevel);
1973                 } else {
1974                     eor=GET_LR_FROM_LEVEL(level);
1975                 }
1976 
1977                 /* if the run consists of overridden directional types, then there
1978                    are no implicit types to be resolved */
1979                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
1980                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
1981                 } else {
1982                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
1983                     do {
1984                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
1985                     } while(start<limit);
1986                 }
1987             } while(limit<length);
1988         }
1989         /* check if we got any memory shortage while adding insert points */
1990         if (U_FAILURE(pBiDi->insertPoints.errorCode))
1991         {
1992             *pErrorCode=pBiDi->insertPoints.errorCode;
1993             return;
1994         }
1995         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
1996         adjustWSLevels(pBiDi);
1997         break;
1998     }
1999     /* add RLM for inverse Bidi with contextual orientation resolving
2000      * to RTL which would not round-trip otherwise
2001      */
2002     if((pBiDi->defaultParaLevel>0) &&
2003        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2004        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2005         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2006         int32_t i, j, start, last;
2007         DirProp dirProp;
2008         for(i=0; i<pBiDi->paraCount; i++) {
2009             last=pBiDi->paras[i]-1;
2010             if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
2011                 continue;           /* LTR paragraph */
2012             }
2013             start= i==0 ? 0 : pBiDi->paras[i - 1];
2014             for(j=last; j>=start; j--) {
2015                 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
2016                 if(dirProp==L) {
2017                     if(j<last) {
2018                         while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
2019                             last--;
2020                         }
2021                     }
2022                     addPoint(pBiDi, last, RLM_BEFORE);
2023                     break;
2024                 }
2025                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2026                     break;
2027                 }
2028             }
2029         }
2030     }
2031 
2032     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2033         pBiDi->resultLength -= pBiDi->controlCount;
2034     } else {
2035         pBiDi->resultLength += pBiDi->insertPoints.size;
2036     }
2037     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2038 }
2039 
2040 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2041 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2042     if(pBiDi!=NULL) {
2043         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2044     }
2045 }
2046 
2047 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2048 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2049     if(pBiDi!=NULL) {
2050         return pBiDi->orderParagraphsLTR;
2051     } else {
2052         return FALSE;
2053     }
2054 }
2055 
2056 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2057 ubidi_getDirection(const UBiDi *pBiDi) {
2058     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2059         return pBiDi->direction;
2060     } else {
2061         return UBIDI_LTR;
2062     }
2063 }
2064 
2065 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2066 ubidi_getText(const UBiDi *pBiDi) {
2067     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2068         return pBiDi->text;
2069     } else {
2070         return NULL;
2071     }
2072 }
2073 
2074 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2075 ubidi_getLength(const UBiDi *pBiDi) {
2076     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2077         return pBiDi->originalLength;
2078     } else {
2079         return 0;
2080     }
2081 }
2082 
2083 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2084 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2085     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2086         return pBiDi->length;
2087     } else {
2088         return 0;
2089     }
2090 }
2091 
2092 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2093 ubidi_getResultLength(const UBiDi *pBiDi) {
2094     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2095         return pBiDi->resultLength;
2096     } else {
2097         return 0;
2098     }
2099 }
2100 
2101 /* paragraphs API functions ------------------------------------------------- */
2102 
2103 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2104 ubidi_getParaLevel(const UBiDi *pBiDi) {
2105     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2106         return pBiDi->paraLevel;
2107     } else {
2108         return 0;
2109     }
2110 }
2111 
2112 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2113 ubidi_countParagraphs(UBiDi *pBiDi) {
2114     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2115         return 0;
2116     } else {
2117         return pBiDi->paraCount;
2118     }
2119 }
2120 
2121 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2122 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2123                           int32_t *pParaStart, int32_t *pParaLimit,
2124                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2125     int32_t paraStart;
2126 
2127     /* check the argument values */
2128     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2129     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2130     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2131 
2132     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2133     if(paraIndex) {
2134         paraStart=pBiDi->paras[paraIndex-1];
2135     } else {
2136         paraStart=0;
2137     }
2138     if(pParaStart!=NULL) {
2139         *pParaStart=paraStart;
2140     }
2141     if(pParaLimit!=NULL) {
2142         *pParaLimit=pBiDi->paras[paraIndex];
2143     }
2144     if(pParaLevel!=NULL) {
2145         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2146     }
2147 }
2148 
2149 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2150 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2151                           int32_t *pParaStart, int32_t *pParaLimit,
2152                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2153     uint32_t paraIndex;
2154 
2155     /* check the argument values */
2156     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2157     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2158     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2159     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2160     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2161 
2162     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
2163     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2164     return paraIndex;
2165 }
2166 
2167 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2168 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2169                        const void *newContext, UBiDiClassCallback **oldFn,
2170                        const void **oldContext, UErrorCode *pErrorCode)
2171 {
2172     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2173     if(pBiDi==NULL) {
2174         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2175         return;
2176     }
2177     if( oldFn )
2178     {
2179         *oldFn = pBiDi->fnClassCallback;
2180     }
2181     if( oldContext )
2182     {
2183         *oldContext = pBiDi->coClassCallback;
2184     }
2185     pBiDi->fnClassCallback = newFn;
2186     pBiDi->coClassCallback = newContext;
2187 }
2188 
2189 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)2190 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2191 {
2192     if(pBiDi==NULL) {
2193         return;
2194     }
2195     if( fn )
2196     {
2197         *fn = pBiDi->fnClassCallback;
2198     }
2199     if( context )
2200     {
2201         *context = pBiDi->coClassCallback;
2202     }
2203 }
2204 
2205 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)2206 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2207 {
2208     UCharDirection dir;
2209 
2210     if( pBiDi->fnClassCallback == NULL ||
2211         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2212     {
2213         return ubidi_getClass(pBiDi->bdp, c);
2214     } else {
2215         return dir;
2216     }
2217 }
2218 
2219